Index: head/sys/fs/msdosfs/msdosfs_vnops.c =================================================================== --- head/sys/fs/msdosfs/msdosfs_vnops.c (revision 35822) +++ head/sys/fs/msdosfs/msdosfs_vnops.c (revision 35823) @@ -1,1987 +1,1973 @@ -/* $Id: msdosfs_vnops.c,v 1.66 1998/03/20 02:33:42 kato Exp $ */ +/* $Id: msdosfs_vnops.c,v 1.67 1998/03/26 20:52:58 phk Exp $ */ /* $NetBSD: msdosfs_vnops.c,v 1.68 1998/02/10 14:10:04 mrg Exp $ */ /*- * Copyright (C) 1994, 1995, 1997 Wolfgang Solfrank. * Copyright (C) 1994, 1995, 1997 TooLs GmbH. * All rights reserved. * Original code by Paul Popelka (paulp@uts.amdahl.com) (see below). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by TooLs GmbH. * 4. The name of TooLs GmbH may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* * Written by Paul Popelka (paulp@uts.amdahl.com) * * You can do anything you want with this software, just don't say you wrote * it, and don't remove this notice. * * This software is provided "as is". * * The author supplies this software to be publicly redistributed on the * understanding that the author is not responsible for the correct * functioning of this software in any circumstances and is not liable for * any damages caused by this software. * * October 1992 */ #include #include #include #include /* defines plimit structure in proc struct */ #include #include #include #include #include #include #include #include /* XXX */ /* defines v_rdev */ #include #include #include #include #include #include #include #include #include #include #include #include /* * Prototypes for MSDOSFS vnode operations */ static int msdosfs_create __P((struct vop_create_args *)); static int msdosfs_mknod __P((struct vop_mknod_args *)); static int msdosfs_close __P((struct vop_close_args *)); static int msdosfs_access __P((struct vop_access_args *)); static int msdosfs_getattr __P((struct vop_getattr_args *)); static int msdosfs_setattr __P((struct vop_setattr_args *)); static int msdosfs_read __P((struct vop_read_args *)); static int msdosfs_write __P((struct vop_write_args *)); static int msdosfs_fsync __P((struct vop_fsync_args *)); static int msdosfs_remove __P((struct vop_remove_args *)); static int msdosfs_link __P((struct vop_link_args *)); static int msdosfs_rename __P((struct vop_rename_args *)); static int msdosfs_mkdir __P((struct vop_mkdir_args *)); static int msdosfs_rmdir __P((struct vop_rmdir_args *)); static int msdosfs_symlink __P((struct vop_symlink_args *)); static int msdosfs_readdir __P((struct vop_readdir_args *)); static int msdosfs_abortop __P((struct vop_abortop_args *)); static int msdosfs_bmap __P((struct vop_bmap_args *)); static int msdosfs_strategy __P((struct vop_strategy_args *)); static int msdosfs_print __P((struct vop_print_args *)); static int msdosfs_pathconf __P((struct vop_pathconf_args *ap)); static int msdosfs_getpages __P((struct vop_getpages_args *)); static int msdosfs_putpages __P((struct vop_putpages_args *)); /* * Some general notes: * * In the ufs filesystem the inodes, superblocks, and indirect blocks are * read/written using the vnode for the filesystem. Blocks that represent * the contents of a file are read/written using the vnode for the file * (including directories when they are read/written as files). This * presents problems for the dos filesystem because data that should be in * an inode (if dos had them) resides in the directory itself. Since we * must update directory entries without the benefit of having the vnode * for the directory we must use the vnode for the filesystem. This means * that when a directory is actually read/written (via read, write, or * readdir, or seek) we must use the vnode for the filesystem instead of * the vnode for the directory as would happen in ufs. This is to insure we * retreive the correct block from the buffer cache since the hash value is * based upon the vnode address and the desired block number. */ /* * Create a regular file. On entry the directory to contain the file being * created is locked. We must release before we return. We must also free * the pathname buffer pointed at by cnp->cn_pnbuf, always on error, or * only if the SAVESTART bit in cn_flags is clear on success. */ static int msdosfs_create(ap) struct vop_create_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; struct vattr *a_vap; } */ *ap; { struct componentname *cnp = ap->a_cnp; struct denode ndirent; struct denode *dep; struct denode *pdep = VTODE(ap->a_dvp); struct timespec ts; int error; #ifdef MSDOSFS_DEBUG printf("msdosfs_create(cnp %p, vap %p\n", cnp, ap->a_vap); #endif /* * If this is the root directory and there is no space left we * can't do anything. This is because the root directory can not * change size. */ if (pdep->de_StartCluster == MSDOSFSROOT && pdep->de_fndoffset >= pdep->de_FileSize) { error = ENOSPC; goto bad; } /* * Create a directory entry for the file, then call createde() to * have it installed. NOTE: DOS files are always executable. We * use the absence of the owner write bit to make the file * readonly. */ #ifdef DIAGNOSTIC if ((cnp->cn_flags & HASBUF) == 0) panic("msdosfs_create: no name"); #endif bzero(&ndirent, sizeof(ndirent)); error = uniqdosname(pdep, cnp, ndirent.de_Name); if (error) goto bad; ndirent.de_Attributes = (ap->a_vap->va_mode & VWRITE) ? ATTR_ARCHIVE : ATTR_ARCHIVE | ATTR_READONLY; ndirent.de_StartCluster = 0; ndirent.de_FileSize = 0; ndirent.de_dev = pdep->de_dev; ndirent.de_devvp = pdep->de_devvp; ndirent.de_pmp = pdep->de_pmp; ndirent.de_flag = DE_ACCESS | DE_CREATE | DE_UPDATE; getnanotime(&ts); DETIMES(&ndirent, &ts, &ts, &ts); error = createde(&ndirent, pdep, &dep, cnp); if (error) goto bad; if ((cnp->cn_flags & SAVESTART) == 0) zfree(namei_zone, cnp->cn_pnbuf); - vput(ap->a_dvp); *ap->a_vpp = DETOV(dep); return (0); bad: zfree(namei_zone, cnp->cn_pnbuf); - vput(ap->a_dvp); return (error); } static int msdosfs_mknod(ap) struct vop_mknod_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; struct vattr *a_vap; } */ *ap; { switch (ap->a_vap->va_type) { case VDIR: return (msdosfs_mkdir((struct vop_mkdir_args *)ap)); break; case VREG: return (msdosfs_create((struct vop_create_args *)ap)); break; default: zfree(namei_zone, ap->a_cnp->cn_pnbuf); - vput(ap->a_dvp); return (EINVAL); } /* NOTREACHED */ } static int msdosfs_close(ap) struct vop_close_args /* { struct vnode *a_vp; int a_fflag; struct ucred *a_cred; struct proc *a_p; } */ *ap; { struct vnode *vp = ap->a_vp; struct denode *dep = VTODE(vp); struct timespec ts; simple_lock(&vp->v_interlock); if (vp->v_usecount > 1) { getnanotime(&ts); DETIMES(dep, &ts, &ts, &ts); } simple_unlock(&vp->v_interlock); return 0; } static int msdosfs_access(ap) struct vop_access_args /* { struct vnode *a_vp; int a_mode; struct ucred *a_cred; struct proc *a_p; } */ *ap; { struct vnode *vp = ap->a_vp; struct denode *dep = VTODE(ap->a_vp); struct msdosfsmount *pmp = dep->de_pmp; struct ucred *cred = ap->a_cred; mode_t mask, file_mode, mode = ap->a_mode; register gid_t *gp; int i; file_mode = (S_IXUSR|S_IXGRP|S_IXOTH) | (S_IRUSR|S_IRGRP|S_IROTH) | ((dep->de_Attributes & ATTR_READONLY) ? 0 : (S_IWUSR|S_IWGRP|S_IWOTH)); file_mode &= pmp->pm_mask; /* * Disallow write attempts on read-only file systems; * unless the file is a socket, fifo, or a block or * character device resident on the file system. */ if (mode & VWRITE) { switch (vp->v_type) { case VDIR: case VLNK: case VREG: if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); break; } } /* User id 0 always gets access. */ if (cred->cr_uid == 0) return 0; mask = 0; /* Otherwise, check the owner. */ if (cred->cr_uid == pmp->pm_uid) { if (mode & VEXEC) mask |= S_IXUSR; if (mode & VREAD) mask |= S_IRUSR; if (mode & VWRITE) mask |= S_IWUSR; return (file_mode & mask) == mask ? 0 : EACCES; } /* Otherwise, check the groups. */ for (i = 0, gp = cred->cr_groups; i < cred->cr_ngroups; i++, gp++) if (pmp->pm_gid == *gp) { if (mode & VEXEC) mask |= S_IXGRP; if (mode & VREAD) mask |= S_IRGRP; if (mode & VWRITE) mask |= S_IWGRP; return (file_mode & mask) == mask ? 0 : EACCES; } /* Otherwise, check everyone else. */ if (mode & VEXEC) mask |= S_IXOTH; if (mode & VREAD) mask |= S_IROTH; if (mode & VWRITE) mask |= S_IWOTH; return (file_mode & mask) == mask ? 0 : EACCES; } static int msdosfs_getattr(ap) struct vop_getattr_args /* { struct vnode *a_vp; struct vattr *a_vap; struct ucred *a_cred; struct proc *a_p; } */ *ap; { u_int cn; struct denode *dep = VTODE(ap->a_vp); struct msdosfsmount *pmp = dep->de_pmp; struct vattr *vap = ap->a_vap; mode_t mode; struct timespec ts; u_long dirsperblk = pmp->pm_BytesPerSec / sizeof(struct direntry); u_long fileid; getnanotime(&ts); DETIMES(dep, &ts, &ts, &ts); vap->va_fsid = dep->de_dev; /* * The following computation of the fileid must be the same as that * used in msdosfs_readdir() to compute d_fileno. If not, pwd * doesn't work. */ if (dep->de_Attributes & ATTR_DIRECTORY) { fileid = cntobn(pmp, dep->de_StartCluster) * dirsperblk; if (dep->de_StartCluster == MSDOSFSROOT) fileid = 1; } else { fileid = cntobn(pmp, dep->de_dirclust) * dirsperblk; if (dep->de_dirclust == MSDOSFSROOT) fileid = roottobn(pmp, 0) * dirsperblk; fileid += dep->de_diroffset / sizeof(struct direntry); } vap->va_fileid = fileid; if ((dep->de_Attributes & ATTR_READONLY) == 0) mode = S_IRWXU|S_IRWXG|S_IRWXO; else mode = S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH; vap->va_mode = mode & pmp->pm_mask; vap->va_uid = pmp->pm_uid; vap->va_gid = pmp->pm_gid; vap->va_nlink = 1; vap->va_rdev = 0; vap->va_size = dep->de_FileSize; dos2unixtime(dep->de_MDate, dep->de_MTime, 0, &vap->va_mtime); if (pmp->pm_flags & MSDOSFSMNT_LONGNAME) { dos2unixtime(dep->de_ADate, 0, 0, &vap->va_atime); dos2unixtime(dep->de_CDate, dep->de_CTime, dep->de_CHun, &vap->va_ctime); } else { vap->va_atime = vap->va_mtime; vap->va_ctime = vap->va_mtime; } vap->va_flags = 0; if ((dep->de_Attributes & ATTR_ARCHIVE) == 0) vap->va_flags |= SF_ARCHIVED; vap->va_gen = 0; vap->va_blocksize = pmp->pm_bpcluster; vap->va_bytes = (dep->de_FileSize + pmp->pm_crbomask) & ~pmp->pm_crbomask; vap->va_type = ap->a_vp->v_type; vap->va_filerev = dep->de_modrev; return (0); } static int msdosfs_setattr(ap) struct vop_setattr_args /* { struct vnode *a_vp; struct vattr *a_vap; struct ucred *a_cred; struct proc *a_p; } */ *ap; { struct vnode *vp = ap->a_vp; struct denode *dep = VTODE(ap->a_vp); struct msdosfsmount *pmp = dep->de_pmp; struct vattr *vap = ap->a_vap; struct ucred *cred = ap->a_cred; int error = 0; #ifdef MSDOSFS_DEBUG printf("msdosfs_setattr(): vp %p, vap %p, cred %p, p %p\n", ap->a_vp, vap, cred, ap->a_p); #endif /* * Check for unsettable attributes. */ if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) || (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) || (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) || (vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) { #ifdef MSDOSFS_DEBUG printf("msdosfs_setattr(): returning EINVAL\n"); printf(" va_type %d, va_nlink %x, va_fsid %lx, va_fileid %lx\n", vap->va_type, vap->va_nlink, vap->va_fsid, vap->va_fileid); printf(" va_blocksize %lx, va_rdev %x, va_bytes %qx, va_gen %lx\n", vap->va_blocksize, vap->va_rdev, vap->va_bytes, vap->va_gen); printf(" va_uid %x, va_gid %x\n", vap->va_uid, vap->va_gid); #endif return (EINVAL); } if (vap->va_flags != VNOVAL) { if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); if (cred->cr_uid != pmp->pm_uid && (error = suser(cred, &ap->a_p->p_acflag))) return (error); /* * We are very inconsistent about handling unsupported * attributes. We ignored the the access time and the * read and execute bits. We were strict for the other * attributes. * * Here we are strict, stricter than ufs in not allowing * users to attempt to set SF_SETTABLE bits or anyone to * set unsupported bits. However, we ignore attempts to * set ATTR_ARCHIVE for directories `cp -pr' from a more * sensible file system attempts it a lot. */ if (cred->cr_uid != 0) { if (vap->va_flags & SF_SETTABLE) return EPERM; } if (vap->va_flags & ~SF_ARCHIVED) return EINVAL; if (vap->va_flags & SF_ARCHIVED) dep->de_Attributes &= ~ATTR_ARCHIVE; else if (!(dep->de_Attributes & ATTR_DIRECTORY)) dep->de_Attributes |= ATTR_ARCHIVE; dep->de_flag |= DE_MODIFIED; } if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) { uid_t uid; gid_t gid; if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); uid = vap->va_uid; if (uid == (uid_t)VNOVAL) uid = pmp->pm_uid; gid = vap->va_gid; if (gid == (gid_t)VNOVAL) gid = pmp->pm_gid; if ((cred->cr_uid != pmp->pm_uid || uid != pmp->pm_uid || (gid != pmp->pm_gid && !groupmember(gid, cred))) && (error = suser(cred, &ap->a_p->p_acflag))) return error; if (uid != pmp->pm_uid || gid != pmp->pm_gid) return EINVAL; } if (vap->va_size != VNOVAL) { /* * Disallow write attempts on read-only file systems; * unless the file is a socket, fifo, or a block or * character device resident on the file system. */ switch (vp->v_type) { case VDIR: return (EISDIR); case VLNK: case VREG: if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); break; } error = detrunc(dep, vap->va_size, 0, cred, ap->a_p); if (error) return error; } if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) { if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); if (cred->cr_uid != pmp->pm_uid && (error = suser(cred, &ap->a_p->p_acflag)) && ((vap->va_vaflags & VA_UTIMES_NULL) == 0 || (error = VOP_ACCESS(ap->a_vp, VWRITE, cred, ap->a_p)))) return (error); if (vp->v_type != VDIR) { if ((pmp->pm_flags & MSDOSFSMNT_NOWIN95) == 0 && vap->va_atime.tv_sec != VNOVAL) unix2dostime(&vap->va_atime, &dep->de_ADate, NULL, NULL); if (vap->va_mtime.tv_sec != VNOVAL) unix2dostime(&vap->va_mtime, &dep->de_MDate, &dep->de_MTime, NULL); dep->de_Attributes |= ATTR_ARCHIVE; dep->de_flag |= DE_MODIFIED; } } /* * DOS files only have the ability to have their writability * attribute set, so we use the owner write bit to set the readonly * attribute. */ if (vap->va_mode != (mode_t)VNOVAL) { if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); if (cred->cr_uid != pmp->pm_uid && (error = suser(cred, &ap->a_p->p_acflag))) return (error); if (vp->v_type != VDIR) { /* We ignore the read and execute bits. */ if (vap->va_mode & VWRITE) dep->de_Attributes &= ~ATTR_READONLY; else dep->de_Attributes |= ATTR_READONLY; dep->de_flag |= DE_MODIFIED; } } return (deupdat(dep, 1)); } static int msdosfs_read(ap) struct vop_read_args /* { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; struct ucred *a_cred; } */ *ap; { int error = 0; int diff; int blsize; int isadir; long n; long on; daddr_t lbn; daddr_t rablock; int rasize; struct buf *bp; struct vnode *vp = ap->a_vp; struct denode *dep = VTODE(vp); struct msdosfsmount *pmp = dep->de_pmp; struct uio *uio = ap->a_uio; /* * If they didn't ask for any data, then we are done. */ if (uio->uio_resid == 0) return (0); if (uio->uio_offset < 0) return (EINVAL); isadir = dep->de_Attributes & ATTR_DIRECTORY; do { lbn = de_cluster(pmp, uio->uio_offset); on = uio->uio_offset & pmp->pm_crbomask; n = min((u_long) (pmp->pm_bpcluster - on), uio->uio_resid); diff = dep->de_FileSize - uio->uio_offset; if (diff <= 0) return (0); if (diff < n) n = diff; /* convert cluster # to block # if a directory */ if (isadir) { error = pcbmap(dep, lbn, &lbn, 0, &blsize); if (error) return (error); } /* * If we are operating on a directory file then be sure to * do i/o with the vnode for the filesystem instead of the * vnode for the directory. */ if (isadir) { error = bread(pmp->pm_devvp, lbn, blsize, NOCRED, &bp); } else { rablock = lbn + 1; if (vp->v_lastr + 1 == lbn && de_cn2off(pmp, rablock) < dep->de_FileSize) { rasize = pmp->pm_bpcluster; error = breadn(vp, lbn, pmp->pm_bpcluster, &rablock, &rasize, 1, NOCRED, &bp); } else error = bread(vp, lbn, pmp->pm_bpcluster, NOCRED, &bp); vp->v_lastr = lbn; } n = min(n, pmp->pm_bpcluster - bp->b_resid); if (error) { brelse(bp); return (error); } error = uiomove(bp->b_data + on, (int) n, uio); if (!isadir) dep->de_flag |= DE_ACCESS; brelse(bp); } while (error == 0 && uio->uio_resid > 0 && n != 0); return (error); } /* * Write data to a file or directory. */ static int msdosfs_write(ap) struct vop_write_args /* { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; struct ucred *a_cred; } */ *ap; { int n; int croffset; int resid; u_long osize; int error = 0; u_long count; daddr_t bn, lastcn; struct buf *bp; int ioflag = ap->a_ioflag; struct uio *uio = ap->a_uio; struct proc *p = uio->uio_procp; struct vnode *vp = ap->a_vp; struct vnode *thisvp; struct denode *dep = VTODE(vp); struct msdosfsmount *pmp = dep->de_pmp; struct ucred *cred = ap->a_cred; #ifdef MSDOSFS_DEBUG printf("msdosfs_write(vp %p, uio %p, ioflag %x, cred %p\n", vp, uio, ioflag, cred); printf("msdosfs_write(): diroff %lu, dirclust %lu, startcluster %lu\n", dep->de_diroffset, dep->de_dirclust, dep->de_StartCluster); #endif switch (vp->v_type) { case VREG: if (ioflag & IO_APPEND) uio->uio_offset = dep->de_FileSize; thisvp = vp; break; case VDIR: return EISDIR; default: panic("msdosfs_write(): bad file type"); } if (uio->uio_offset < 0) return (EINVAL); if (uio->uio_resid == 0) return (0); /* * If they've exceeded their filesize limit, tell them about it. */ if (p && ((uio->uio_offset + uio->uio_resid) > p->p_rlimit[RLIMIT_FSIZE].rlim_cur)) { psignal(p, SIGXFSZ); return (EFBIG); } /* * If the offset we are starting the write at is beyond the end of * the file, then they've done a seek. Unix filesystems allow * files with holes in them, DOS doesn't so we must fill the hole * with zeroed blocks. */ if (uio->uio_offset > dep->de_FileSize) { error = deextend(dep, uio->uio_offset, cred); if (error) return (error); } /* * Remember some values in case the write fails. */ resid = uio->uio_resid; osize = dep->de_FileSize; /* * If we write beyond the end of the file, extend it to its ultimate * size ahead of the time to hopefully get a contiguous area. */ if (uio->uio_offset + resid > osize) { count = de_clcount(pmp, uio->uio_offset + resid) - de_clcount(pmp, osize); error = extendfile(dep, count, NULL, NULL, 0); if (error && (error != ENOSPC || (ioflag & IO_UNIT))) goto errexit; lastcn = dep->de_fc[FC_LASTFC].fc_frcn; } else lastcn = de_clcount(pmp, osize) - 1; do { if (de_cluster(pmp, uio->uio_offset) > lastcn) { error = ENOSPC; break; } croffset = uio->uio_offset & pmp->pm_crbomask; n = min(uio->uio_resid, pmp->pm_bpcluster - croffset); if (uio->uio_offset + n > dep->de_FileSize) { dep->de_FileSize = uio->uio_offset + n; /* The object size needs to be set before buffer is allocated */ vnode_pager_setsize(vp, dep->de_FileSize); } bn = de_cluster(pmp, uio->uio_offset); if ((uio->uio_offset & pmp->pm_crbomask) == 0 && (de_cluster(pmp, uio->uio_offset + uio->uio_resid) > de_cluster(pmp, uio->uio_offset) || uio->uio_offset + uio->uio_resid >= dep->de_FileSize)) { /* * If either the whole cluster gets written, * or we write the cluster from its start beyond EOF, * then no need to read data from disk. */ bp = getblk(thisvp, bn, pmp->pm_bpcluster, 0, 0); clrbuf(bp); /* * Do the bmap now, since pcbmap needs buffers * for the fat table. (see msdosfs_strategy) */ if (bp->b_blkno == bp->b_lblkno) { error = pcbmap(dep, bp->b_lblkno, &bp->b_blkno, 0, 0); if (error) bp->b_blkno = -1; } if (bp->b_blkno == -1) { brelse(bp); if (!error) error = EIO; /* XXX */ break; } } else { /* * The block we need to write into exists, so read it in. */ error = bread(thisvp, bn, pmp->pm_bpcluster, cred, &bp); if (error) { brelse(bp); break; } } /* * Should these vnode_pager_* functions be done on dir * files? */ /* * Copy the data from user space into the buf header. */ error = uiomove(bp->b_data + croffset, n, uio); /* * If they want this synchronous then write it and wait for * it. Otherwise, if on a cluster boundary write it * asynchronously so we can move on to the next block * without delay. Otherwise do a delayed write because we * may want to write somemore into the block later. */ if (ioflag & IO_SYNC) (void) bwrite(bp); else if (n + croffset == pmp->pm_bpcluster) bawrite(bp); else bdwrite(bp); dep->de_flag |= DE_UPDATE; } while (error == 0 && uio->uio_resid > 0); /* * If the write failed and they want us to, truncate the file back * to the size it was before the write was attempted. */ errexit: if (error) { if (ioflag & IO_UNIT) { detrunc(dep, osize, ioflag & IO_SYNC, NOCRED, NULL); uio->uio_offset -= resid - uio->uio_resid; uio->uio_resid = resid; } else { detrunc(dep, dep->de_FileSize, ioflag & IO_SYNC, NOCRED, NULL); if (uio->uio_resid != resid) error = 0; } } else if (ioflag & IO_SYNC) error = deupdat(dep, 1); return (error); } /* * Flush the blocks of a file to disk. * * This function is worthless for vnodes that represent directories. Maybe we * could just do a sync if they try an fsync on a directory file. */ static int msdosfs_fsync(ap) struct vop_fsync_args /* { struct vnode *a_vp; struct ucred *a_cred; int a_waitfor; struct proc *a_p; } */ *ap; { struct vnode *vp = ap->a_vp; int s; struct buf *bp, *nbp; /* * Flush all dirty buffers associated with a vnode. */ loop: s = splbio(); for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) { nbp = bp->b_vnbufs.le_next; if ((bp->b_flags & B_BUSY)) continue; if ((bp->b_flags & B_DELWRI) == 0) panic("msdosfs_fsync: not dirty"); bremfree(bp); bp->b_flags |= B_BUSY; splx(s); (void) bwrite(bp); goto loop; } while (vp->v_numoutput) { vp->v_flag |= VBWAIT; (void) tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "msdosfsn", 0); } #ifdef DIAGNOSTIC if (vp->v_dirtyblkhd.lh_first) { vprint("msdosfs_fsync: dirty", vp); goto loop; } #endif splx(s); return (deupdat(VTODE(vp), ap->a_waitfor == MNT_WAIT)); } static int msdosfs_remove(ap) struct vop_remove_args /* { struct vnode *a_dvp; struct vnode *a_vp; struct componentname *a_cnp; } */ *ap; { - int error; struct denode *dep = VTODE(ap->a_vp); struct denode *ddep = VTODE(ap->a_dvp); + int error; if (ap->a_vp->v_type == VDIR) error = EPERM; else error = removede(ddep, dep); #ifdef MSDOSFS_DEBUG printf("msdosfs_remove(), dep %p, v_usecount %d\n", dep, ap->a_vp->v_usecount); #endif - if (ddep == dep) - vrele(ap->a_vp); - else - vput(ap->a_vp); /* causes msdosfs_inactive() to be called - * via vrele() */ - vput(ap->a_dvp); return (error); } /* * DOS filesystems don't know what links are. But since we already called * msdosfs_lookup() with create and lockparent, the parent is locked so we * have to free it before we return the error. */ static int msdosfs_link(ap) struct vop_link_args /* { struct vnode *a_tdvp; struct vnode *a_vp; struct componentname *a_cnp; } */ *ap; { VOP_ABORTOP(ap->a_tdvp, ap->a_cnp); - vput(ap->a_tdvp); - return EOPNOTSUPP; + return (EOPNOTSUPP); } /* * Renames on files require moving the denode to a new hash queue since the * denode's location is used to compute which hash queue to put the file * in. Unless it is a rename in place. For example "mv a b". * * What follows is the basic algorithm: * * if (file move) { * if (dest file exists) { * remove dest file * } * if (dest and src in same directory) { * rewrite name in existing directory slot * } else { * write new entry in dest directory * update offset and dirclust in denode * move denode to new hash chain * clear old directory entry * } * } else { * directory move * if (dest directory exists) { * if (dest is not empty) { * return ENOTEMPTY * } * remove dest directory * } * if (dest and src in same directory) { * rewrite name in existing entry * } else { * be sure dest is not a child of src directory * write entry in dest directory * update "." and ".." in moved directory * clear old directory entry for moved directory * } * } * * On entry: * source's parent directory is unlocked * source file or directory is unlocked * destination's parent directory is locked * destination file or directory is locked if it exists * * On exit: * all denodes should be released * * Notes: * I'm not sure how the memory containing the pathnames pointed at by the * componentname structures is freed, there may be some memory bleeding * for each rename done. */ static int msdosfs_rename(ap) struct vop_rename_args /* { struct vnode *a_fdvp; struct vnode *a_fvp; struct componentname *a_fcnp; struct vnode *a_tdvp; struct vnode *a_tvp; struct componentname *a_tcnp; } */ *ap; { struct vnode *tdvp = ap->a_tdvp; struct vnode *fvp = ap->a_fvp; struct vnode *fdvp = ap->a_fdvp; struct vnode *tvp = ap->a_tvp; struct componentname *tcnp = ap->a_tcnp; struct componentname *fcnp = ap->a_fcnp; struct proc *p = fcnp->cn_proc; struct denode *ip, *xp, *dp, *zp; u_char toname[11], oldname[11]; u_long from_diroffset, to_diroffset; u_char to_count; int doingdirectory = 0, newparent = 0; int error; u_long cn; daddr_t bn; struct denode *fddep; /* from file's parent directory */ struct denode *fdep; /* from file or directory */ struct denode *tddep; /* to file's parent directory */ struct denode *tdep; /* to file or directory */ struct msdosfsmount *pmp; struct direntry *dotdotp; struct buf *bp; fddep = VTODE(ap->a_fdvp); fdep = VTODE(ap->a_fvp); tddep = VTODE(ap->a_tdvp); tdep = tvp ? VTODE(tvp) : NULL; pmp = fddep->de_pmp; pmp = VFSTOMSDOSFS(fdvp->v_mount); #ifdef DIAGNOSTIC if ((tcnp->cn_flags & HASBUF) == 0 || (fcnp->cn_flags & HASBUF) == 0) panic("msdosfs_rename: no name"); #endif /* * Check for cross-device rename. */ if ((fvp->v_mount != tdvp->v_mount) || (tvp && (fvp->v_mount != tvp->v_mount))) { error = EXDEV; abortit: VOP_ABORTOP(tdvp, tcnp); if (tdvp == tvp) vrele(tdvp); else vput(tdvp); if (tvp) vput(tvp); VOP_ABORTOP(fdvp, fcnp); vrele(fdvp); vrele(fvp); return (error); } /* * If source and dest are the same, do nothing. */ if (tvp == fvp) { error = 0; goto abortit; } error = vn_lock(fvp, LK_EXCLUSIVE | LK_RETRY, p); if (error) goto abortit; dp = VTODE(fdvp); ip = VTODE(fvp); /* * Be sure we are not renaming ".", "..", or an alias of ".". This * leads to a crippled directory tree. It's pretty tough to do a * "ls" or "pwd" with the "." directory entry missing, and "cd .." * doesn't work if the ".." entry is missing. */ if (ip->de_Attributes & ATTR_DIRECTORY) { /* * Avoid ".", "..", and aliases of "." for obvious reasons. */ if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') || dp == ip || (fcnp->cn_flags & ISDOTDOT) || (tcnp->cn_flags & ISDOTDOT) || (ip->de_flag & DE_RENAME)) { VOP_UNLOCK(fvp, 0, p); error = EINVAL; goto abortit; } ip->de_flag |= DE_RENAME; doingdirectory++; } /* * When the target exists, both the directory * and target vnodes are returned locked. */ dp = VTODE(tdvp); xp = tvp ? VTODE(tvp) : NULL; /* * Remember direntry place to use for destination */ to_diroffset = dp->de_fndoffset; to_count = dp->de_fndcnt; /* * If ".." must be changed (ie the directory gets a new * parent) then the source directory must not be in the * directory heirarchy above the target, as this would * orphan everything below the source directory. Also * the user must have write permission in the source so * as to be able to change "..". We must repeat the call * to namei, as the parent directory is unlocked by the * call to doscheckpath(). */ error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, tcnp->cn_proc); VOP_UNLOCK(fvp, 0, p); if (VTODE(fdvp)->de_StartCluster != VTODE(tdvp)->de_StartCluster) newparent = 1; vrele(fdvp); if (doingdirectory && newparent) { if (error) /* write access check above */ goto bad; if (xp != NULL) vput(tvp); /* * doscheckpath() vput()'s dp, * so we have to do a relookup afterwards */ error = doscheckpath(ip, dp); if (error) goto out; if ((tcnp->cn_flags & SAVESTART) == 0) panic("msdosfs_rename: lost to startdir"); error = relookup(tdvp, &tvp, tcnp); if (error) goto out; dp = VTODE(tdvp); xp = tvp ? VTODE(tvp) : NULL; } if (xp != NULL) { /* * Target must be empty if a directory and have no links * to it. Also, ensure source and target are compatible * (both directories, or both not directories). */ if (xp->de_Attributes & ATTR_DIRECTORY) { if (!dosdirempty(xp)) { error = ENOTEMPTY; goto bad; } if (!doingdirectory) { error = ENOTDIR; goto bad; } cache_purge(tdvp); } else if (doingdirectory) { error = EISDIR; goto bad; } error = removede(dp, xp); if (error) goto bad; vput(tvp); xp = NULL; } /* * Convert the filename in tcnp into a dos filename. We copy this * into the denode and directory entry for the destination * file/directory. */ error = uniqdosname(VTODE(tdvp), tcnp, toname); if (error) goto abortit; /* * Since from wasn't locked at various places above, * have to do a relookup here. */ fcnp->cn_flags &= ~MODMASK; fcnp->cn_flags |= LOCKPARENT | LOCKLEAF; if ((fcnp->cn_flags & SAVESTART) == 0) panic("msdosfs_rename: lost from startdir"); if (!newparent) VOP_UNLOCK(tdvp, 0, p); (void) relookup(fdvp, &fvp, fcnp); if (fvp == NULL) { /* * From name has disappeared. */ if (doingdirectory) panic("rename: lost dir entry"); vrele(ap->a_fvp); if (newparent) VOP_UNLOCK(tdvp, 0, p); vrele(tdvp); return 0; } xp = VTODE(fvp); zp = VTODE(fdvp); from_diroffset = zp->de_fndoffset; /* * Ensure that the directory entry still exists and has not * changed till now. If the source is a file the entry may * have been unlinked or renamed. In either case there is * no further work to be done. If the source is a directory * then it cannot have been rmdir'ed or renamed; this is * prohibited by the DE_RENAME flag. */ if (xp != ip) { if (doingdirectory) panic("rename: lost dir entry"); vrele(ap->a_fvp); VOP_UNLOCK(fvp, 0, p); if (newparent) VOP_UNLOCK(fdvp, 0, p); xp = NULL; } else { vrele(fvp); xp = NULL; /* * First write a new entry in the destination * directory and mark the entry in the source directory * as deleted. Then move the denode to the correct hash * chain for its new location in the filesystem. And, if * we moved a directory, then update its .. entry to point * to the new parent directory. */ bcopy(ip->de_Name, oldname, 11); bcopy(toname, ip->de_Name, 11); /* update denode */ dp->de_fndoffset = to_diroffset; dp->de_fndcnt = to_count; error = createde(ip, dp, (struct denode **)0, tcnp); if (error) { bcopy(oldname, ip->de_Name, 11); if (newparent) VOP_UNLOCK(fdvp, 0, p); VOP_UNLOCK(fvp, 0, p); goto bad; } ip->de_refcnt++; zp->de_fndoffset = from_diroffset; error = removede(zp, ip); if (error) { /* XXX should really panic here, fs is corrupt */ if (newparent) VOP_UNLOCK(fdvp, 0, p); VOP_UNLOCK(fvp, 0, p); goto bad; } if (!doingdirectory) { error = pcbmap(dp, de_cluster(pmp, to_diroffset), 0, &ip->de_dirclust, 0); if (error) { /* XXX should really panic here, fs is corrupt */ if (newparent) VOP_UNLOCK(fdvp, 0, p); VOP_UNLOCK(fvp, 0, p); goto bad; } if (ip->de_dirclust != MSDOSFSROOT) ip->de_diroffset = to_diroffset & pmp->pm_crbomask; } reinsert(ip); if (newparent) VOP_UNLOCK(fdvp, 0, p); } /* * If we moved a directory to a new parent directory, then we must * fixup the ".." entry in the moved directory. */ if (doingdirectory && newparent) { cn = ip->de_StartCluster; if (cn == MSDOSFSROOT) { /* this should never happen */ panic("msdosfs_rename(): updating .. in root directory?"); } else bn = cntobn(pmp, cn); error = bread(pmp->pm_devvp, bn, pmp->pm_bpcluster, NOCRED, &bp); if (error) { /* XXX should really panic here, fs is corrupt */ brelse(bp); VOP_UNLOCK(fvp, 0, p); goto bad; } dotdotp = (struct direntry *)bp->b_data + 1; putushort(dotdotp->deStartCluster, dp->de_StartCluster); if (FAT32(pmp)) putushort(dotdotp->deHighClust, dp->de_StartCluster >> 16); error = bwrite(bp); if (error) { /* XXX should really panic here, fs is corrupt */ VOP_UNLOCK(fvp, 0, p); goto bad; } } VOP_UNLOCK(fvp, 0, p); bad: if (xp) vput(tvp); vput(tdvp); out: ip->de_flag &= ~DE_RENAME; vrele(fdvp); vrele(fvp); return (error); } static struct { struct direntry dot; struct direntry dotdot; } dosdirtemplate = { { ". ", " ", /* the . entry */ ATTR_DIRECTORY, /* file attribute */ 0, /* reserved */ 0, { 0, 0 }, { 0, 0 }, /* create time & date */ { 0, 0 }, /* access date */ { 0, 0 }, /* high bits of start cluster */ { 210, 4 }, { 210, 4 }, /* modify time & date */ { 0, 0 }, /* startcluster */ { 0, 0, 0, 0 } /* filesize */ }, { ".. ", " ", /* the .. entry */ ATTR_DIRECTORY, /* file attribute */ 0, /* reserved */ 0, { 0, 0 }, { 0, 0 }, /* create time & date */ { 0, 0 }, /* access date */ { 0, 0 }, /* high bits of start cluster */ { 210, 4 }, { 210, 4 }, /* modify time & date */ { 0, 0 }, /* startcluster */ { 0, 0, 0, 0 } /* filesize */ } }; static int msdosfs_mkdir(ap) struct vop_mkdir_args /* { struct vnode *a_dvp; struvt vnode **a_vpp; struvt componentname *a_cnp; struct vattr *a_vap; } */ *ap; { struct componentname *cnp = ap->a_cnp; - struct denode ndirent; struct denode *dep; struct denode *pdep = VTODE(ap->a_dvp); - int error; - int bn; - u_long newcluster, pcl; struct direntry *denp; struct msdosfsmount *pmp = pdep->de_pmp; struct buf *bp; + u_long newcluster, pcl; + int bn; + int error; + struct denode ndirent; struct timespec ts; /* * If this is the root directory and there is no space left we * can't do anything. This is because the root directory can not * change size. */ if (pdep->de_StartCluster == MSDOSFSROOT && pdep->de_fndoffset >= pdep->de_FileSize) { error = ENOSPC; goto bad2; } /* * Allocate a cluster to hold the about to be created directory. */ error = clusteralloc(pmp, 0, 1, CLUST_EOFE, &newcluster, NULL); if (error) goto bad2; bzero(&ndirent, sizeof(ndirent)); ndirent.de_pmp = pmp; ndirent.de_flag = DE_ACCESS | DE_CREATE | DE_UPDATE; getnanotime(&ts); DETIMES(&ndirent, &ts, &ts, &ts); /* * Now fill the cluster with the "." and ".." entries. And write * the cluster to disk. This way it is there for the parent * directory to be pointing at if there were a crash. */ bn = cntobn(pmp, newcluster); /* always succeeds */ bp = getblk(pmp->pm_devvp, bn, pmp->pm_bpcluster, 0, 0); bzero(bp->b_data, pmp->pm_bpcluster); bcopy(&dosdirtemplate, bp->b_data, sizeof dosdirtemplate); denp = (struct direntry *)bp->b_data; putushort(denp[0].deStartCluster, newcluster); putushort(denp[0].deCDate, ndirent.de_CDate); putushort(denp[0].deCTime, ndirent.de_CTime); denp[0].deCHundredth = ndirent.de_CHun; putushort(denp[0].deADate, ndirent.de_ADate); putushort(denp[0].deMDate, ndirent.de_MDate); putushort(denp[0].deMTime, ndirent.de_MTime); pcl = pdep->de_StartCluster; if (FAT32(pmp) && pcl == pmp->pm_rootdirblk) pcl = 0; putushort(denp[1].deStartCluster, pcl); putushort(denp[1].deCDate, ndirent.de_CDate); putushort(denp[1].deCTime, ndirent.de_CTime); denp[1].deCHundredth = ndirent.de_CHun; putushort(denp[1].deADate, ndirent.de_ADate); putushort(denp[1].deMDate, ndirent.de_MDate); putushort(denp[1].deMTime, ndirent.de_MTime); if (FAT32(pmp)) { putushort(denp[0].deHighClust, newcluster >> 16); putushort(denp[1].deHighClust, pdep->de_StartCluster >> 16); } error = bwrite(bp); if (error) goto bad; /* * Now build up a directory entry pointing to the newly allocated * cluster. This will be written to an empty slot in the parent * directory. */ #ifdef DIAGNOSTIC if ((cnp->cn_flags & HASBUF) == 0) panic("msdosfs_mkdir: no name"); #endif error = uniqdosname(pdep, cnp, ndirent.de_Name); if (error) goto bad; ndirent.de_Attributes = ATTR_DIRECTORY; ndirent.de_StartCluster = newcluster; ndirent.de_FileSize = 0; ndirent.de_dev = pdep->de_dev; ndirent.de_devvp = pdep->de_devvp; error = createde(&ndirent, pdep, &dep, cnp); if (error) goto bad; if ((cnp->cn_flags & SAVESTART) == 0) zfree(namei_zone, cnp->cn_pnbuf); - vput(ap->a_dvp); *ap->a_vpp = DETOV(dep); return (0); bad: clusterfree(pmp, newcluster, NULL); bad2: zfree(namei_zone, cnp->cn_pnbuf); - vput(ap->a_dvp); return (error); } static int msdosfs_rmdir(ap) struct vop_rmdir_args /* { struct vnode *a_dvp; struct vnode *a_vp; struct componentname *a_cnp; } */ *ap; { register struct vnode *vp = ap->a_vp; register struct vnode *dvp = ap->a_dvp; register struct componentname *cnp = ap->a_cnp; register struct denode *ip, *dp; + struct proc *p = cnp->cn_proc; int error; ip = VTODE(vp); dp = VTODE(dvp); /* * Verify the directory is empty (and valid). * (Rmdir ".." won't be valid since * ".." will contain a reference to * the current directory and thus be * non-empty.) */ error = 0; if (!dosdirempty(ip) || ip->de_flag & DE_RENAME) { error = ENOTEMPTY; goto out; } /* * Delete the entry from the directory. For dos filesystems this * gets rid of the directory entry on disk, the in memory copy * still exists but the de_refcnt is <= 0. This prevents it from * being found by deget(). When the vput() on dep is done we give * up access and eventually msdosfs_reclaim() will be called which * will remove it from the denode cache. */ error = removede(dp, ip); if (error) goto out; /* * This is where we decrement the link count in the parent * directory. Since dos filesystems don't do this we just purge - * the name cache and let go of the parent directory denode. + * the name cache. */ cache_purge(dvp); - vput(dvp); - dvp = NULL; + VOP_UNLOCK(dvp, 0, p); /* * Truncate the directory that is being deleted. */ - error = detrunc(ip, (u_long)0, IO_SYNC, cnp->cn_cred, cnp->cn_proc); + error = detrunc(ip, (u_long)0, IO_SYNC, cnp->cn_cred, p); cache_purge(vp); + + vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p); out: - if (dvp) - vput(dvp); - vput(vp); return (error); } /* * DOS filesystems don't know what symlinks are. */ static int msdosfs_symlink(ap) struct vop_symlink_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; struct vattr *a_vap; char *a_target; } */ *ap; { zfree(namei_zone, ap->a_cnp->cn_pnbuf); /* VOP_ABORTOP(ap->a_dvp, ap->a_cnp); ??? */ - vput(ap->a_dvp); return (EOPNOTSUPP); } static int msdosfs_readdir(ap) struct vop_readdir_args /* { struct vnode *a_vp; struct uio *a_uio; struct ucred *a_cred; int *a_eofflag; int *a_ncookies; u_long **a_cookies; } */ *ap; { int error = 0; int diff; long n; int blsize; long on; u_long cn; u_long fileno; u_long dirsperblk; long bias = 0; daddr_t bn, lbn; struct buf *bp; struct denode *dep = VTODE(ap->a_vp); struct msdosfsmount *pmp = dep->de_pmp; struct direntry *dentp; struct dirent dirbuf; struct uio *uio = ap->a_uio; u_long *cookies = NULL; int ncookies = 0; off_t offset, off; int chksum = -1; #ifdef MSDOSFS_DEBUG printf("msdosfs_readdir(): vp %p, uio %p, cred %p, eofflagp %p\n", ap->a_vp, uio, ap->a_cred, ap->a_eofflag); #endif /* * msdosfs_readdir() won't operate properly on regular files since * it does i/o only with the the filesystem vnode, and hence can * retrieve the wrong block from the buffer cache for a plain file. * So, fail attempts to readdir() on a plain file. */ if ((dep->de_Attributes & ATTR_DIRECTORY) == 0) return (ENOTDIR); /* * To be safe, initialize dirbuf */ bzero(dirbuf.d_name, sizeof(dirbuf.d_name)); /* * If the user buffer is smaller than the size of one dos directory * entry or the file offset is not a multiple of the size of a * directory entry, then we fail the read. */ off = offset = uio->uio_offset; if (uio->uio_resid < sizeof(struct direntry) || (offset & (sizeof(struct direntry) - 1))) return (EINVAL); if (ap->a_ncookies) { ncookies = uio->uio_resid / 16; MALLOC(cookies, u_long *, ncookies * sizeof(u_long), M_TEMP, M_WAITOK); *ap->a_cookies = cookies; *ap->a_ncookies = ncookies; } dirsperblk = pmp->pm_BytesPerSec / sizeof(struct direntry); /* * If they are reading from the root directory then, we simulate * the . and .. entries since these don't exist in the root * directory. We also set the offset bias to make up for having to * simulate these entries. By this I mean that at file offset 64 we * read the first entry in the root directory that lives on disk. */ if (dep->de_StartCluster == MSDOSFSROOT || (FAT32(pmp) && dep->de_StartCluster == pmp->pm_rootdirblk)) { #if 0 printf("msdosfs_readdir(): going after . or .. in root dir, offset %d\n", offset); #endif bias = 2 * sizeof(struct direntry); if (offset < bias) { for (n = (int)offset / sizeof(struct direntry); n < 2; n++) { if (FAT32(pmp)) dirbuf.d_fileno = cntobn(pmp, pmp->pm_rootdirblk) * dirsperblk; else dirbuf.d_fileno = 1; dirbuf.d_type = DT_DIR; switch (n) { case 0: dirbuf.d_namlen = 1; strcpy(dirbuf.d_name, "."); break; case 1: dirbuf.d_namlen = 2; strcpy(dirbuf.d_name, ".."); break; } dirbuf.d_reclen = GENERIC_DIRSIZ(&dirbuf); if (uio->uio_resid < dirbuf.d_reclen) goto out; error = uiomove((caddr_t) &dirbuf, dirbuf.d_reclen, uio); if (error) goto out; offset += sizeof(struct direntry); off = offset; if (cookies) { *cookies++ = offset; if (--ncookies <= 0) goto out; } } } } off = offset; while (uio->uio_resid > 0) { lbn = de_cluster(pmp, offset - bias); on = (offset - bias) & pmp->pm_crbomask; n = min(pmp->pm_bpcluster - on, uio->uio_resid); diff = dep->de_FileSize - (offset - bias); if (diff <= 0) break; n = min(n, diff); error = pcbmap(dep, lbn, &bn, &cn, &blsize); if (error) break; error = bread(pmp->pm_devvp, bn, blsize, NOCRED, &bp); if (error) { brelse(bp); return (error); } n = min(n, blsize - bp->b_resid); /* * Convert from dos directory entries to fs-independent * directory entries. */ for (dentp = (struct direntry *)(bp->b_data + on); (char *)dentp < bp->b_data + on + n; dentp++, offset += sizeof(struct direntry)) { #if 0 printf("rd: dentp %08x prev %08x crnt %08x deName %02x attr %02x\n", dentp, prev, crnt, dentp->deName[0], dentp->deAttributes); #endif /* * If this is an unused entry, we can stop. */ if (dentp->deName[0] == SLOT_EMPTY) { brelse(bp); goto out; } /* * Skip deleted entries. */ if (dentp->deName[0] == SLOT_DELETED) { chksum = -1; continue; } /* * Handle Win95 long directory entries */ if (dentp->deAttributes == ATTR_WIN95) { if (pmp->pm_flags & MSDOSFSMNT_SHORTNAME) continue; chksum = win2unixfn((struct winentry *)dentp, &dirbuf, chksum, pmp->pm_flags & MSDOSFSMNT_U2WTABLE, pmp->pm_u2w); continue; } /* * Skip volume labels */ if (dentp->deAttributes & ATTR_VOLUME) { chksum = -1; continue; } /* * This computation of d_fileno must match * the computation of va_fileid in * msdosfs_getattr. */ if (dentp->deAttributes & ATTR_DIRECTORY) { fileno = getushort(dentp->deStartCluster); if (FAT32(pmp)) fileno |= getushort(dentp->deHighClust) << 16; /* if this is the root directory */ if (fileno == MSDOSFSROOT) if (FAT32(pmp)) fileno = cntobn(pmp, pmp->pm_rootdirblk) * dirsperblk; else fileno = 1; else fileno = cntobn(pmp, fileno) * dirsperblk; dirbuf.d_fileno = fileno; dirbuf.d_type = DT_DIR; } else { dirbuf.d_fileno = offset / sizeof(struct direntry); dirbuf.d_type = DT_REG; } if (chksum != winChksum(dentp->deName)) dirbuf.d_namlen = dos2unixfn(dentp->deName, (u_char *)dirbuf.d_name, pmp->pm_flags & MSDOSFSMNT_SHORTNAME, pmp->pm_flags & MSDOSFSMNT_U2WTABLE, pmp->pm_d2u, pmp->pm_flags & MSDOSFSMNT_ULTABLE, pmp->pm_ul); else dirbuf.d_name[dirbuf.d_namlen] = 0; chksum = -1; dirbuf.d_reclen = GENERIC_DIRSIZ(&dirbuf); if (uio->uio_resid < dirbuf.d_reclen) { brelse(bp); goto out; } error = uiomove((caddr_t) &dirbuf, dirbuf.d_reclen, uio); if (error) { brelse(bp); goto out; } if (cookies) { *cookies++ = offset + sizeof(struct direntry); if (--ncookies <= 0) { brelse(bp); goto out; } } off = offset + sizeof(struct direntry); } brelse(bp); } out: /* Subtract unused cookies */ if (ap->a_ncookies) *ap->a_ncookies -= ncookies; uio->uio_offset = off; /* * Set the eofflag (NFS uses it) */ if (ap->a_eofflag) if (dep->de_FileSize - (offset - bias) <= 0) *ap->a_eofflag = 1; else *ap->a_eofflag = 0; return (error); } static int msdosfs_abortop(ap) struct vop_abortop_args /* { struct vnode *a_dvp; struct componentname *a_cnp; } */ *ap; { if ((ap->a_cnp->cn_flags & (HASBUF | SAVESTART)) == HASBUF) zfree(namei_zone, ap->a_cnp->cn_pnbuf); return (0); } /* * vp - address of vnode file the file * bn - which cluster we are interested in mapping to a filesystem block number. * vpp - returns the vnode for the block special file holding the filesystem * containing the file of interest * bnp - address of where to return the filesystem relative block number */ static int msdosfs_bmap(ap) struct vop_bmap_args /* { struct vnode *a_vp; daddr_t a_bn; struct vnode **a_vpp; daddr_t *a_bnp; int *a_runp; int *a_runb; } */ *ap; { struct denode *dep = VTODE(ap->a_vp); struct msdosfsmount *pmp = dep->de_pmp; if (ap->a_vpp != NULL) *ap->a_vpp = dep->de_devvp; if (ap->a_bnp == NULL) return (0); if (ap->a_runp) { /* * Sequential clusters should be counted here. */ *ap->a_runp = 0; } if (ap->a_runb) { *ap->a_runb = 0; } return (pcbmap(dep, ap->a_bn, ap->a_bnp, 0, 0)); } static int msdosfs_strategy(ap) struct vop_strategy_args /* { struct buf *a_bp; } */ *ap; { struct buf *bp = ap->a_bp; struct denode *dep = VTODE(bp->b_vp); struct vnode *vp; int error = 0; if (bp->b_vp->v_type == VBLK || bp->b_vp->v_type == VCHR) panic("msdosfs_strategy: spec"); /* * If we don't already know the filesystem relative block number * then get it using pcbmap(). If pcbmap() returns the block * number as -1 then we've got a hole in the file. DOS filesystems * don't allow files with holes, so we shouldn't ever see this. */ if (bp->b_blkno == bp->b_lblkno) { error = pcbmap(dep, bp->b_lblkno, &bp->b_blkno, 0, 0); if (error) { bp->b_error = error; bp->b_flags |= B_ERROR; biodone(bp); return (error); } if ((long)bp->b_blkno == -1) vfs_bio_clrbuf(bp); } if (bp->b_blkno == -1) { biodone(bp); return (0); } /* * Read/write the block from/to the disk that contains the desired * file block. */ vp = dep->de_devvp; bp->b_dev = vp->v_rdev; VOCALL(vp->v_op, VOFFSET(vop_strategy), ap); return (0); } static int msdosfs_print(ap) struct vop_print_args /* { struct vnode *vp; } */ *ap; { struct denode *dep = VTODE(ap->a_vp); printf( "tag VT_MSDOSFS, startcluster %d, dircluster %ld, diroffset %ld ", dep->de_StartCluster, dep->de_dirclust, dep->de_diroffset); printf(" dev %d, %d", major(dep->de_dev), minor(dep->de_dev)); lockmgr_printinfo(&dep->de_lock); printf("\n"); return (0); } static int msdosfs_pathconf(ap) struct vop_pathconf_args /* { struct vnode *a_vp; int a_name; int *a_retval; } */ *ap; { struct msdosfsmount *pmp = VTODE(ap->a_vp)->de_pmp; switch (ap->a_name) { case _PC_LINK_MAX: *ap->a_retval = 1; return (0); case _PC_NAME_MAX: *ap->a_retval = pmp->pm_flags & MSDOSFSMNT_LONGNAME ? WIN_MAXLEN : 12; return (0); case _PC_PATH_MAX: *ap->a_retval = PATH_MAX; return (0); case _PC_CHOWN_RESTRICTED: *ap->a_retval = 1; return (0); case _PC_NO_TRUNC: *ap->a_retval = 0; return (0); default: return (EINVAL); } /* NOTREACHED */ } /* * get page routine * * XXX By default, wimp out... note that a_offset is ignored (and always * XXX has been). */ int msdosfs_getpages(ap) struct vop_getpages_args *ap; { return vnode_pager_generic_getpages(ap->a_vp, ap->a_m, ap->a_count, ap->a_reqpage); } /* * put page routine * * XXX By default, wimp out... note that a_offset is ignored (and always * XXX has been). */ int msdosfs_putpages(ap) struct vop_putpages_args *ap; { return vnode_pager_generic_putpages(ap->a_vp, ap->a_m, ap->a_count, ap->a_sync, ap->a_rtvals); } /* Global vfs data structures for msdosfs */ vop_t **msdosfs_vnodeop_p; static struct vnodeopv_entry_desc msdosfs_vnodeop_entries[] = { { &vop_default_desc, (vop_t *) vop_defaultop }, { &vop_abortop_desc, (vop_t *) msdosfs_abortop }, { &vop_access_desc, (vop_t *) msdosfs_access }, { &vop_bmap_desc, (vop_t *) msdosfs_bmap }, { &vop_cachedlookup_desc, (vop_t *) msdosfs_lookup }, { &vop_close_desc, (vop_t *) msdosfs_close }, { &vop_create_desc, (vop_t *) msdosfs_create }, { &vop_fsync_desc, (vop_t *) msdosfs_fsync }, { &vop_getattr_desc, (vop_t *) msdosfs_getattr }, { &vop_inactive_desc, (vop_t *) msdosfs_inactive }, { &vop_islocked_desc, (vop_t *) vop_stdislocked }, { &vop_link_desc, (vop_t *) msdosfs_link }, { &vop_lock_desc, (vop_t *) vop_stdlock }, { &vop_lookup_desc, (vop_t *) vfs_cache_lookup }, { &vop_mkdir_desc, (vop_t *) msdosfs_mkdir }, { &vop_mknod_desc, (vop_t *) msdosfs_mknod }, { &vop_pathconf_desc, (vop_t *) msdosfs_pathconf }, { &vop_print_desc, (vop_t *) msdosfs_print }, { &vop_read_desc, (vop_t *) msdosfs_read }, { &vop_readdir_desc, (vop_t *) msdosfs_readdir }, { &vop_reclaim_desc, (vop_t *) msdosfs_reclaim }, { &vop_remove_desc, (vop_t *) msdosfs_remove }, { &vop_rename_desc, (vop_t *) msdosfs_rename }, { &vop_rmdir_desc, (vop_t *) msdosfs_rmdir }, { &vop_setattr_desc, (vop_t *) msdosfs_setattr }, { &vop_strategy_desc, (vop_t *) msdosfs_strategy }, { &vop_symlink_desc, (vop_t *) msdosfs_symlink }, { &vop_unlock_desc, (vop_t *) vop_stdunlock }, { &vop_write_desc, (vop_t *) msdosfs_write }, { &vop_getpages_desc, (vop_t *) msdosfs_getpages }, { &vop_putpages_desc, (vop_t *) msdosfs_putpages }, { NULL, NULL } }; static struct vnodeopv_desc msdosfs_vnodeop_opv_desc = { &msdosfs_vnodeop_p, msdosfs_vnodeop_entries }; VNODEOP_SET(msdosfs_vnodeop_opv_desc); Index: head/sys/fs/unionfs/union_subr.c =================================================================== --- head/sys/fs/unionfs/union_subr.c (revision 35822) +++ head/sys/fs/unionfs/union_subr.c (revision 35823) @@ -1,1137 +1,1140 @@ /* * Copyright (c) 1994 Jan-Simon Pendry * Copyright (c) 1994 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Jan-Simon Pendry. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)union_subr.c 8.20 (Berkeley) 5/20/95 - * $Id: union_subr.c,v 1.28 1998/02/10 03:32:05 kato Exp $ + * $Id: union_subr.c,v 1.29 1998/02/26 03:23:54 kato Exp $ */ #include #include #include #include #include #include #include #include #include #include #include /* for vnode_pager_setsize */ #include #include #include extern int union_init __P((void)); /* must be power of two, otherwise change UNION_HASH() */ #define NHASH 32 /* unsigned int ... */ #define UNION_HASH(u, l) \ (((((unsigned long) (u)) + ((unsigned long) l)) >> 8) & (NHASH-1)) static LIST_HEAD(unhead, union_node) unhead[NHASH]; static int unvplock[NHASH]; static void union_dircache_r __P((struct vnode *vp, struct vnode ***vppp, int *cntp)); static int union_list_lock __P((int ix)); static void union_list_unlock __P((int ix)); static int union_relookup __P((struct union_mount *um, struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, struct componentname *cn, char *path, int pathlen)); static void union_updatevp __P((struct union_node *un, struct vnode *uppervp, struct vnode *lowervp)); static void union_newlower __P((struct union_node *, struct vnode *)); static void union_newupper __P((struct union_node *, struct vnode *)); static int union_copyfile __P((struct vnode *, struct vnode *, struct ucred *, struct proc *)); static int union_vn_create __P((struct vnode **, struct union_node *, struct proc *)); static int union_vn_close __P((struct vnode *, int, struct ucred *, struct proc *)); int union_init() { int i; for (i = 0; i < NHASH; i++) LIST_INIT(&unhead[i]); bzero((caddr_t) unvplock, sizeof(unvplock)); return (0); } static int union_list_lock(ix) int ix; { if (unvplock[ix] & UN_LOCKED) { unvplock[ix] |= UN_WANT; (void) tsleep((caddr_t) &unvplock[ix], PINOD, "unllck", 0); return (1); } unvplock[ix] |= UN_LOCKED; return (0); } static void union_list_unlock(ix) int ix; { unvplock[ix] &= ~UN_LOCKED; if (unvplock[ix] & UN_WANT) { unvplock[ix] &= ~UN_WANT; wakeup((caddr_t) &unvplock[ix]); } } static void union_updatevp(un, uppervp, lowervp) struct union_node *un; struct vnode *uppervp; struct vnode *lowervp; { int ohash = UNION_HASH(un->un_uppervp, un->un_lowervp); int nhash = UNION_HASH(uppervp, lowervp); int docache = (lowervp != NULLVP || uppervp != NULLVP); int lhash, uhash; /* * Ensure locking is ordered from lower to higher * to avoid deadlocks. */ if (nhash < ohash) { lhash = nhash; uhash = ohash; } else { lhash = ohash; uhash = nhash; } if (lhash != uhash) while (union_list_lock(lhash)) continue; while (union_list_lock(uhash)) continue; if (ohash != nhash || !docache) { if (un->un_flags & UN_CACHED) { un->un_flags &= ~UN_CACHED; LIST_REMOVE(un, un_cache); } } if (ohash != nhash) union_list_unlock(ohash); if (un->un_lowervp != lowervp) { if (un->un_lowervp) { vrele(un->un_lowervp); if (un->un_path) { free(un->un_path, M_TEMP); un->un_path = 0; } if (un->un_dirvp) { vrele(un->un_dirvp); un->un_dirvp = NULLVP; } } un->un_lowervp = lowervp; un->un_lowersz = VNOVAL; } if (un->un_uppervp != uppervp) { if (un->un_uppervp) vrele(un->un_uppervp); un->un_uppervp = uppervp; un->un_uppersz = VNOVAL; } if (docache && (ohash != nhash)) { LIST_INSERT_HEAD(&unhead[nhash], un, un_cache); un->un_flags |= UN_CACHED; } union_list_unlock(nhash); } static void union_newlower(un, lowervp) struct union_node *un; struct vnode *lowervp; { union_updatevp(un, un->un_uppervp, lowervp); } static void union_newupper(un, uppervp) struct union_node *un; struct vnode *uppervp; { union_updatevp(un, uppervp, un->un_lowervp); } /* * Keep track of size changes in the underlying vnodes. * If the size changes, then callback to the vm layer * giving priority to the upper layer size. */ void union_newsize(vp, uppersz, lowersz) struct vnode *vp; off_t uppersz, lowersz; { struct union_node *un; off_t sz; /* only interested in regular files */ if (vp->v_type != VREG) return; un = VTOUNION(vp); sz = VNOVAL; if ((uppersz != VNOVAL) && (un->un_uppersz != uppersz)) { un->un_uppersz = uppersz; if (sz == VNOVAL) sz = un->un_uppersz; } if ((lowersz != VNOVAL) && (un->un_lowersz != lowersz)) { un->un_lowersz = lowersz; if (sz == VNOVAL) sz = un->un_lowersz; } if (sz != VNOVAL) { #ifdef UNION_DIAGNOSTIC printf("union: %s size now %ld\n", uppersz != VNOVAL ? "upper" : "lower", (long) sz); #endif vnode_pager_setsize(vp, sz); } } /* * allocate a union_node/vnode pair. the vnode is * referenced and locked. the new vnode is returned * via (vpp). (mp) is the mountpoint of the union filesystem, * (dvp) is the parent directory where the upper layer object * should exist (but doesn't) and (cnp) is the componentname * information which is partially copied to allow the upper * layer object to be created at a later time. (uppervp) * and (lowervp) reference the upper and lower layer objects * being mapped. either, but not both, can be nil. * if supplied, (uppervp) is locked. * the reference is either maintained in the new union_node * object which is allocated, or they are vrele'd. * * all union_nodes are maintained on a singly-linked * list. new nodes are only allocated when they cannot * be found on this list. entries on the list are * removed when the vfs reclaim entry is called. * * a single lock is kept for the entire list. this is * needed because the getnewvnode() function can block * waiting for a vnode to become free, in which case there * may be more than one process trying to get the same * vnode. this lock is only taken if we are going to * call getnewvnode, since the kernel itself is single-threaded. * * if an entry is found on the list, then call vget() to * take a reference. this is done because there may be * zero references to it and so it needs to removed from * the vnode free list. */ int union_allocvp(vpp, mp, undvp, dvp, cnp, uppervp, lowervp, docache) struct vnode **vpp; struct mount *mp; struct vnode *undvp; /* parent union vnode */ struct vnode *dvp; /* may be null */ struct componentname *cnp; /* may be null */ struct vnode *uppervp; /* may be null */ struct vnode *lowervp; /* may be null */ int docache; { int error; struct union_node *un = 0; struct vnode *xlowervp = NULLVP; struct union_mount *um = MOUNTTOUNIONMOUNT(mp); int hash; int vflag; int try; int klocked; if (uppervp == NULLVP && lowervp == NULLVP) panic("union: unidentifiable allocation"); if (uppervp && lowervp && (uppervp->v_type != lowervp->v_type)) { xlowervp = lowervp; lowervp = NULLVP; } /* detect the root vnode (and aliases) */ vflag = 0; if ((uppervp == um->um_uppervp) && ((lowervp == NULLVP) || lowervp == um->um_lowervp)) { if (lowervp == NULLVP) { lowervp = um->um_lowervp; if (lowervp != NULLVP) VREF(lowervp); } vflag = VROOT; } loop: if (!docache) { un = 0; } else for (try = 0; try < 3; try++) { switch (try) { case 0: if (lowervp == NULLVP) continue; hash = UNION_HASH(uppervp, lowervp); break; case 1: if (uppervp == NULLVP) continue; hash = UNION_HASH(uppervp, NULLVP); break; case 2: if (lowervp == NULLVP) continue; hash = UNION_HASH(NULLVP, lowervp); break; } while (union_list_lock(hash)) continue; for (un = unhead[hash].lh_first; un != 0; un = un->un_cache.le_next) { if ((un->un_lowervp == lowervp || un->un_lowervp == NULLVP) && (un->un_uppervp == uppervp || un->un_uppervp == NULLVP) && (UNIONTOV(un)->v_mount == mp)) { if (vget(UNIONTOV(un), 0, cnp ? cnp->cn_proc : NULL)) { union_list_unlock(hash); goto loop; } break; } } union_list_unlock(hash); if (un) break; } if (un) { /* * Obtain a lock on the union_node. * uppervp is locked, though un->un_uppervp * may not be. this doesn't break the locking * hierarchy since in the case that un->un_uppervp * is not yet locked it will be vrele'd and replaced * with uppervp. */ if ((dvp != NULLVP) && (uppervp == dvp)) { /* * Access ``.'', so (un) will already * be locked. Since this process has * the lock on (uppervp) no other * process can hold the lock on (un). */ #ifdef DIAGNOSTIC if ((un->un_flags & UN_LOCKED) == 0) panic("union: . not locked"); else if (curproc && un->un_pid != curproc->p_pid && un->un_pid > -1 && curproc->p_pid > -1) panic("union: allocvp not lock owner"); #endif } else { if (un->un_flags & UN_LOCKED) { vrele(UNIONTOV(un)); un->un_flags |= UN_WANT; (void) tsleep((caddr_t) &un->un_flags, PINOD, "unalvp", 0); goto loop; } un->un_flags |= UN_LOCKED; #ifdef DIAGNOSTIC if (curproc) un->un_pid = curproc->p_pid; else un->un_pid = -1; #endif } /* * At this point, the union_node is locked, * un->un_uppervp may not be locked, and uppervp * is locked or nil. */ /* * Save information about the upper layer. */ if (uppervp != un->un_uppervp) { union_newupper(un, uppervp); } else if (uppervp) { vrele(uppervp); } if (un->un_uppervp) { un->un_flags |= UN_ULOCK; un->un_flags &= ~UN_KLOCK; } /* * Save information about the lower layer. * This needs to keep track of pathname * and directory information which union_vn_create * might need. */ if (lowervp != un->un_lowervp) { union_newlower(un, lowervp); if (cnp && (lowervp != NULLVP)) { un->un_hash = cnp->cn_hash; un->un_path = malloc(cnp->cn_namelen+1, M_TEMP, M_WAITOK); bcopy(cnp->cn_nameptr, un->un_path, cnp->cn_namelen); un->un_path[cnp->cn_namelen] = '\0'; VREF(dvp); un->un_dirvp = dvp; } } else if (lowervp) { vrele(lowervp); } *vpp = UNIONTOV(un); return (0); } if (docache) { /* * otherwise lock the vp list while we call getnewvnode * since that can block. */ hash = UNION_HASH(uppervp, lowervp); if (union_list_lock(hash)) goto loop; } error = getnewvnode(VT_UNION, mp, union_vnodeop_p, vpp); if (error) { if (uppervp) { if (dvp == uppervp) vrele(uppervp); else vput(uppervp); } if (lowervp) vrele(lowervp); goto out; } MALLOC((*vpp)->v_data, void *, sizeof(struct union_node), M_TEMP, M_WAITOK); (*vpp)->v_flag |= vflag; if (uppervp) (*vpp)->v_type = uppervp->v_type; else (*vpp)->v_type = lowervp->v_type; un = VTOUNION(*vpp); un->un_vnode = *vpp; un->un_uppervp = uppervp; un->un_uppersz = VNOVAL; un->un_lowervp = lowervp; un->un_lowersz = VNOVAL; un->un_pvp = undvp; if (undvp != NULLVP) VREF(undvp); un->un_dircache = 0; un->un_openl = 0; un->un_flags = UN_LOCKED; if (un->un_uppervp) un->un_flags |= UN_ULOCK; #ifdef DIAGNOSTIC if (curproc) un->un_pid = curproc->p_pid; else un->un_pid = -1; #endif if (cnp && (lowervp != NULLVP)) { un->un_hash = cnp->cn_hash; un->un_path = malloc(cnp->cn_namelen+1, M_TEMP, M_WAITOK); bcopy(cnp->cn_nameptr, un->un_path, cnp->cn_namelen); un->un_path[cnp->cn_namelen] = '\0'; VREF(dvp); un->un_dirvp = dvp; } else { un->un_hash = 0; un->un_path = 0; un->un_dirvp = 0; } if (docache) { LIST_INSERT_HEAD(&unhead[hash], un, un_cache); un->un_flags |= UN_CACHED; } if (xlowervp) vrele(xlowervp); out: if (docache) union_list_unlock(hash); return (error); } int union_freevp(vp) struct vnode *vp; { struct union_node *un = VTOUNION(vp); if (un->un_flags & UN_CACHED) { un->un_flags &= ~UN_CACHED; LIST_REMOVE(un, un_cache); } if (un->un_pvp != NULLVP) vrele(un->un_pvp); if (un->un_uppervp != NULLVP) vrele(un->un_uppervp); if (un->un_lowervp != NULLVP) vrele(un->un_lowervp); if (un->un_dirvp != NULLVP) vrele(un->un_dirvp); if (un->un_path) free(un->un_path, M_TEMP); FREE(vp->v_data, M_TEMP); vp->v_data = 0; return (0); } /* * copyfile. copy the vnode (fvp) to the vnode (tvp) * using a sequence of reads and writes. both (fvp) * and (tvp) are locked on entry and exit. */ static int union_copyfile(fvp, tvp, cred, p) struct vnode *fvp; struct vnode *tvp; struct ucred *cred; struct proc *p; { char *buf; struct uio uio; struct iovec iov; int error = 0; /* * strategy: * allocate a buffer of size MAXBSIZE. * loop doing reads and writes, keeping track * of the current uio offset. * give up at the first sign of trouble. */ uio.uio_procp = p; uio.uio_segflg = UIO_SYSSPACE; uio.uio_offset = 0; VOP_UNLOCK(fvp, 0, p); /* XXX */ VOP_LEASE(fvp, p, cred, LEASE_READ); vn_lock(fvp, LK_EXCLUSIVE | LK_RETRY, p); /* XXX */ VOP_UNLOCK(tvp, 0, p); /* XXX */ VOP_LEASE(tvp, p, cred, LEASE_WRITE); vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY, p); /* XXX */ buf = malloc(MAXBSIZE, M_TEMP, M_WAITOK); /* ugly loop follows... */ do { off_t offset = uio.uio_offset; uio.uio_iov = &iov; uio.uio_iovcnt = 1; iov.iov_base = buf; iov.iov_len = MAXBSIZE; uio.uio_resid = iov.iov_len; uio.uio_rw = UIO_READ; error = VOP_READ(fvp, &uio, 0, cred); if (error == 0) { uio.uio_iov = &iov; uio.uio_iovcnt = 1; iov.iov_base = buf; iov.iov_len = MAXBSIZE - uio.uio_resid; uio.uio_offset = offset; uio.uio_rw = UIO_WRITE; uio.uio_resid = iov.iov_len; if (uio.uio_resid == 0) break; do { error = VOP_WRITE(tvp, &uio, 0, cred); } while ((uio.uio_resid > 0) && (error == 0)); } } while (error == 0); free(buf, M_TEMP); return (error); } /* * (un) is assumed to be locked on entry and remains * locked on exit. */ int union_copyup(un, docopy, cred, p) struct union_node *un; int docopy; struct ucred *cred; struct proc *p; { int error; struct vnode *lvp, *uvp; /* * If the user does not have read permission, the vnode should not * be copied to upper layer. */ vn_lock(un->un_lowervp, LK_EXCLUSIVE | LK_RETRY, p); error = VOP_ACCESS(un->un_lowervp, VREAD, cred, p); VOP_UNLOCK(un->un_lowervp, 0, p); if (error) return (error); error = union_vn_create(&uvp, un, p); if (error) return (error); /* at this point, uppervp is locked */ union_newupper(un, uvp); un->un_flags |= UN_ULOCK; lvp = un->un_lowervp; if (docopy) { /* * XX - should not ignore errors * from VOP_CLOSE */ vn_lock(lvp, LK_EXCLUSIVE | LK_RETRY, p); error = VOP_OPEN(lvp, FREAD, cred, p); if (error == 0) { error = union_copyfile(lvp, uvp, cred, p); VOP_UNLOCK(lvp, 0, p); (void) VOP_CLOSE(lvp, FREAD, cred, p); } #ifdef UNION_DIAGNOSTIC if (error == 0) uprintf("union: copied up %s\n", un->un_path); #endif } un->un_flags &= ~UN_ULOCK; VOP_UNLOCK(uvp, 0, p); union_vn_close(uvp, FWRITE, cred, p); vn_lock(uvp, LK_EXCLUSIVE | LK_RETRY, p); un->un_flags |= UN_ULOCK; /* * Subsequent IOs will go to the top layer, so * call close on the lower vnode and open on the * upper vnode to ensure that the filesystem keeps * its references counts right. This doesn't do * the right thing with (cred) and (FREAD) though. * Ignoring error returns is not right, either. */ if (error == 0) { int i; for (i = 0; i < un->un_openl; i++) { (void) VOP_CLOSE(lvp, FREAD, cred, p); (void) VOP_OPEN(uvp, FREAD, cred, p); } un->un_openl = 0; } return (error); } static int union_relookup(um, dvp, vpp, cnp, cn, path, pathlen) struct union_mount *um; struct vnode *dvp; struct vnode **vpp; struct componentname *cnp; struct componentname *cn; char *path; int pathlen; { int error; /* * A new componentname structure must be faked up because * there is no way to know where the upper level cnp came * from or what it is being used for. This must duplicate * some of the work done by NDINIT, some of the work done * by namei, some of the work done by lookup and some of * the work done by VOP_LOOKUP when given a CREATE flag. * Conclusion: Horrible. * * The pathname buffer will be FREEed by VOP_MKDIR. */ cn->cn_namelen = pathlen; cn->cn_pnbuf = zalloc(namei_zone); bcopy(path, cn->cn_pnbuf, cn->cn_namelen); cn->cn_pnbuf[cn->cn_namelen] = '\0'; cn->cn_nameiop = CREATE; cn->cn_flags = (LOCKPARENT|HASBUF|SAVENAME|SAVESTART|ISLASTCN); cn->cn_proc = cnp->cn_proc; if (um->um_op == UNMNT_ABOVE) cn->cn_cred = cnp->cn_cred; else cn->cn_cred = um->um_cred; cn->cn_nameptr = cn->cn_pnbuf; cn->cn_hash = cnp->cn_hash; cn->cn_consume = cnp->cn_consume; VREF(dvp); error = relookup(dvp, vpp, cn); if (!error) vrele(dvp); else { zfree(namei_zone, cn->cn_pnbuf); cn->cn_pnbuf = '\0'; } return (error); } /* * Create a shadow directory in the upper layer. * The new vnode is returned locked. * * (um) points to the union mount structure for access to the * the mounting process's credentials. * (dvp) is the directory in which to create the shadow directory. * it is unlocked on entry and exit. * (cnp) is the componentname to be created. * (vpp) is the returned newly created shadow directory, which * is returned locked. */ int union_mkshadow(um, dvp, cnp, vpp) struct union_mount *um; struct vnode *dvp; struct componentname *cnp; struct vnode **vpp; { int error; struct vattr va; struct proc *p = cnp->cn_proc; struct componentname cn; error = union_relookup(um, dvp, vpp, cnp, &cn, cnp->cn_nameptr, cnp->cn_namelen); if (error) return (error); if (*vpp) { VOP_ABORTOP(dvp, &cn); VOP_UNLOCK(dvp, 0, p); vrele(*vpp); *vpp = NULLVP; return (EEXIST); } /* * policy: when creating the shadow directory in the * upper layer, create it owned by the user who did * the mount, group from parent directory, and mode * 777 modified by umask (ie mostly identical to the * mkdir syscall). (jsp, kb) */ VATTR_NULL(&va); va.va_type = VDIR; va.va_mode = um->um_cmode; /* VOP_LEASE: dvp is locked */ VOP_LEASE(dvp, p, cn.cn_cred, LEASE_WRITE); error = VOP_MKDIR(dvp, vpp, &cn, &va); + vput(dvp); return (error); } /* * Create a whiteout entry in the upper layer. * * (um) points to the union mount structure for access to the * the mounting process's credentials. * (dvp) is the directory in which to create the whiteout. * it is locked on entry and exit. * (cnp) is the componentname to be created. */ int union_mkwhiteout(um, dvp, cnp, path) struct union_mount *um; struct vnode *dvp; struct componentname *cnp; char *path; { int error; struct proc *p = cnp->cn_proc; struct vnode *wvp; struct componentname cn; VOP_UNLOCK(dvp, 0, p); error = union_relookup(um, dvp, &wvp, cnp, &cn, path, strlen(path)); if (error) { vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p); return (error); } if (wvp) { VOP_ABORTOP(dvp, &cn); vrele(dvp); vrele(wvp); return (EEXIST); } /* VOP_LEASE: dvp is locked */ VOP_LEASE(dvp, p, p->p_ucred, LEASE_WRITE); error = VOP_WHITEOUT(dvp, &cn, CREATE); if (error) VOP_ABORTOP(dvp, &cn); vrele(dvp); return (error); } /* * union_vn_create: creates and opens a new shadow file * on the upper union layer. this function is similar * in spirit to calling vn_open but it avoids calling namei(). * the problem with calling namei is that a) it locks too many * things, and b) it doesn't start at the "right" directory, * whereas relookup is told where to start. */ static int union_vn_create(vpp, un, p) struct vnode **vpp; struct union_node *un; struct proc *p; { struct vnode *vp; struct ucred *cred = p->p_ucred; struct vattr vat; struct vattr *vap = &vat; int fmode = FFLAGS(O_WRONLY|O_CREAT|O_TRUNC|O_EXCL); int error; int cmode = UN_FILEMODE & ~p->p_fd->fd_cmask; struct componentname cn; *vpp = NULLVP; /* * Build a new componentname structure (for the same * reasons outlines in union_mkshadow). * The difference here is that the file is owned by * the current user, rather than by the person who * did the mount, since the current user needs to be * able to write the file (that's why it is being * copied in the first place). */ cn.cn_namelen = strlen(un->un_path); cn.cn_pnbuf = zalloc(namei_zone); bcopy(un->un_path, cn.cn_pnbuf, cn.cn_namelen+1); cn.cn_nameiop = CREATE; cn.cn_flags = (LOCKPARENT|HASBUF|SAVENAME|SAVESTART|ISLASTCN); cn.cn_proc = p; cn.cn_cred = p->p_ucred; cn.cn_nameptr = cn.cn_pnbuf; cn.cn_hash = un->un_hash; cn.cn_consume = 0; VREF(un->un_dirvp); error = relookup(un->un_dirvp, &vp, &cn); if (error) return (error); vrele(un->un_dirvp); if (vp) { VOP_ABORTOP(un->un_dirvp, &cn); if (un->un_dirvp == vp) vrele(un->un_dirvp); else vput(un->un_dirvp); vrele(vp); return (EEXIST); } /* * Good - there was no race to create the file * so go ahead and create it. The permissions * on the file will be 0666 modified by the * current user's umask. Access to the file, while * it is unioned, will require access to the top *and* * bottom files. Access when not unioned will simply * require access to the top-level file. * TODO: confirm choice of access permissions. */ VATTR_NULL(vap); vap->va_type = VREG; vap->va_mode = cmode; VOP_LEASE(un->un_dirvp, p, cred, LEASE_WRITE); - if (error = VOP_CREATE(un->un_dirvp, &vp, &cn, vap)) + error = VOP_CREATE(un->un_dirvp, &vp, &cn, vap); + vput(un->un_dirvp); + if (error) return (error); error = VOP_OPEN(vp, fmode, cred, p); if (error) { vput(vp); return (error); } vp->v_writecount++; *vpp = vp; return (0); } static int union_vn_close(vp, fmode, cred, p) struct vnode *vp; int fmode; struct ucred *cred; struct proc *p; { if (fmode & FWRITE) --vp->v_writecount; return (VOP_CLOSE(vp, fmode, cred, p)); } void union_removed_upper(un) struct union_node *un; { struct proc *p = curproc; /* XXX */ struct vnode **vpp; /* * Do not set the uppervp to NULLVP. If lowervp is NULLVP, * union node will have neither uppervp nor lowervp. We remove * the union node from cache, so that it will not be referrenced. */ #if 0 union_newupper(un, NULLVP); #endif if (un->un_dircache != 0) { for (vpp = un->un_dircache; *vpp != NULLVP; vpp++) vrele(*vpp); free(un->un_dircache, M_TEMP); un->un_dircache = 0; } if (un->un_flags & UN_CACHED) { un->un_flags &= ~UN_CACHED; LIST_REMOVE(un, un_cache); } if (un->un_flags & UN_ULOCK) { un->un_flags &= ~UN_ULOCK; VOP_UNLOCK(un->un_uppervp, 0, p); } } #if 0 struct vnode * union_lowervp(vp) struct vnode *vp; { struct union_node *un = VTOUNION(vp); if ((un->un_lowervp != NULLVP) && (vp->v_type == un->un_lowervp->v_type)) { if (vget(un->un_lowervp, 0) == 0) return (un->un_lowervp); } return (NULLVP); } #endif /* * determine whether a whiteout is needed * during a remove/rmdir operation. */ int union_dowhiteout(un, cred, p) struct union_node *un; struct ucred *cred; struct proc *p; { struct vattr va; if (un->un_lowervp != NULLVP) return (1); if (VOP_GETATTR(un->un_uppervp, &va, cred, p) == 0 && (va.va_flags & OPAQUE)) return (1); return (0); } static void union_dircache_r(vp, vppp, cntp) struct vnode *vp; struct vnode ***vppp; int *cntp; { struct union_node *un; if (vp->v_op != union_vnodeop_p) { if (vppp) { VREF(vp); *(*vppp)++ = vp; if (--(*cntp) == 0) panic("union: dircache table too small"); } else { (*cntp)++; } return; } un = VTOUNION(vp); if (un->un_uppervp != NULLVP) union_dircache_r(un->un_uppervp, vppp, cntp); if (un->un_lowervp != NULLVP) union_dircache_r(un->un_lowervp, vppp, cntp); } struct vnode * union_dircache(vp, p) struct vnode *vp; struct proc *p; { int cnt; struct vnode *nvp; struct vnode **vpp; struct vnode **dircache; struct union_node *un; int error; vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); dircache = VTOUNION(vp)->un_dircache; nvp = NULLVP; if (dircache == 0) { cnt = 0; union_dircache_r(vp, 0, &cnt); cnt++; dircache = (struct vnode **) malloc(cnt * sizeof(struct vnode *), M_TEMP, M_WAITOK); vpp = dircache; union_dircache_r(vp, &vpp, &cnt); *vpp = NULLVP; vpp = dircache + 1; } else { vpp = dircache; do { if (*vpp++ == VTOUNION(vp)->un_uppervp) break; } while (*vpp != NULLVP); } if (*vpp == NULLVP) goto out; vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY, p); VREF(*vpp); error = union_allocvp(&nvp, vp->v_mount, NULLVP, NULLVP, 0, *vpp, NULLVP, 0); if (error) goto out; VTOUNION(vp)->un_dircache = 0; un = VTOUNION(nvp); un->un_dircache = dircache; out: VOP_UNLOCK(vp, 0, p); return (nvp); } Index: head/sys/fs/unionfs/union_vnops.c =================================================================== --- head/sys/fs/unionfs/union_vnops.c (revision 35822) +++ head/sys/fs/unionfs/union_vnops.c (revision 35823) @@ -1,1798 +1,1799 @@ /* * Copyright (c) 1992, 1993, 1994, 1995 Jan-Simon Pendry. * Copyright (c) 1992, 1993, 1994, 1995 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Jan-Simon Pendry. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)union_vnops.c 8.32 (Berkeley) 6/23/95 - * $Id: union_vnops.c,v 1.55 1998/02/26 03:23:56 kato Exp $ + * $Id: union_vnops.c,v 1.56 1998/03/17 08:47:50 kato Exp $ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #define FIXUP(un, p) { \ if (((un)->un_flags & UN_ULOCK) == 0) { \ union_fixup(un, p); \ } \ } static int union_abortop __P((struct vop_abortop_args *ap)); static int union_access __P((struct vop_access_args *ap)); static int union_advlock __P((struct vop_advlock_args *ap)); static int union_bmap __P((struct vop_bmap_args *ap)); static int union_close __P((struct vop_close_args *ap)); static int union_create __P((struct vop_create_args *ap)); static void union_fixup __P((struct union_node *un, struct proc *p)); static int union_fsync __P((struct vop_fsync_args *ap)); static int union_getattr __P((struct vop_getattr_args *ap)); static int union_inactive __P((struct vop_inactive_args *ap)); static int union_ioctl __P((struct vop_ioctl_args *ap)); static int union_islocked __P((struct vop_islocked_args *ap)); static int union_lease __P((struct vop_lease_args *ap)); static int union_link __P((struct vop_link_args *ap)); static int union_lock __P((struct vop_lock_args *ap)); static int union_lookup __P((struct vop_lookup_args *ap)); static int union_lookup1 __P((struct vnode *udvp, struct vnode **dvpp, struct vnode **vpp, struct componentname *cnp)); static int union_mkdir __P((struct vop_mkdir_args *ap)); static int union_mknod __P((struct vop_mknod_args *ap)); static int union_mmap __P((struct vop_mmap_args *ap)); static int union_open __P((struct vop_open_args *ap)); static int union_pathconf __P((struct vop_pathconf_args *ap)); static int union_print __P((struct vop_print_args *ap)); static int union_read __P((struct vop_read_args *ap)); static int union_readdir __P((struct vop_readdir_args *ap)); static int union_readlink __P((struct vop_readlink_args *ap)); static int union_reclaim __P((struct vop_reclaim_args *ap)); static int union_remove __P((struct vop_remove_args *ap)); static int union_rename __P((struct vop_rename_args *ap)); static int union_revoke __P((struct vop_revoke_args *ap)); static int union_rmdir __P((struct vop_rmdir_args *ap)); static int union_poll __P((struct vop_poll_args *ap)); static int union_setattr __P((struct vop_setattr_args *ap)); static int union_strategy __P((struct vop_strategy_args *ap)); static int union_symlink __P((struct vop_symlink_args *ap)); static int union_unlock __P((struct vop_unlock_args *ap)); static int union_whiteout __P((struct vop_whiteout_args *ap)); static int union_write __P((struct vop_read_args *ap)); static void union_fixup(un, p) struct union_node *un; struct proc *p; { vn_lock(un->un_uppervp, LK_EXCLUSIVE | LK_RETRY, p); un->un_flags |= UN_ULOCK; } static int union_lookup1(udvp, dvpp, vpp, cnp) struct vnode *udvp; struct vnode **dvpp; struct vnode **vpp; struct componentname *cnp; { int error; struct proc *p = cnp->cn_proc; struct vnode *tdvp; struct vnode *dvp; struct mount *mp; dvp = *dvpp; /* * If stepping up the directory tree, check for going * back across the mount point, in which case do what * lookup would do by stepping back down the mount * hierarchy. */ if (cnp->cn_flags & ISDOTDOT) { while ((dvp != udvp) && (dvp->v_flag & VROOT)) { /* * Don't do the NOCROSSMOUNT check * at this level. By definition, * union fs deals with namespaces, not * filesystems. */ tdvp = dvp; *dvpp = dvp = dvp->v_mount->mnt_vnodecovered; vput(tdvp); VREF(dvp); vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p); } } error = VOP_LOOKUP(dvp, &tdvp, cnp); if (error) return (error); /* * The parent directory will have been unlocked, unless lookup * found the last component. In which case, re-lock the node * here to allow it to be unlocked again (phew) in union_lookup. */ if (dvp != tdvp && !(cnp->cn_flags & ISLASTCN)) vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p); dvp = tdvp; /* * Lastly check if the current node is a mount point in * which case walk up the mount hierarchy making sure not to * bump into the root of the mount tree (ie. dvp != udvp). */ while (dvp != udvp && (dvp->v_type == VDIR) && (mp = dvp->v_mountedhere)) { if (vfs_busy(mp, 0, 0, p)) continue; error = VFS_ROOT(mp, &tdvp); vfs_unbusy(mp, p); if (error) { vput(dvp); return (error); } vput(dvp); dvp = tdvp; } *vpp = dvp; return (0); } static int union_lookup(ap) struct vop_lookup_args /* { struct vnodeop_desc *a_desc; struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; } */ *ap; { int error; int uerror, lerror; struct vnode *uppervp, *lowervp; struct vnode *upperdvp, *lowerdvp; struct vnode *dvp = ap->a_dvp; struct union_node *dun = VTOUNION(dvp); struct componentname *cnp = ap->a_cnp; struct proc *p = cnp->cn_proc; int lockparent = cnp->cn_flags & LOCKPARENT; struct union_mount *um = MOUNTTOUNIONMOUNT(dvp->v_mount); struct ucred *saved_cred; int iswhiteout; struct vattr va; /* * Disallow write attemps to the filesystem mounted read-only. */ if ((cnp->cn_flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) && (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) return (EROFS); #ifdef notyet if (cnp->cn_namelen == 3 && cnp->cn_nameptr[2] == '.' && cnp->cn_nameptr[1] == '.' && cnp->cn_nameptr[0] == '.') { dvp = *ap->a_vpp = LOWERVP(ap->a_dvp); if (dvp == NULLVP) return (ENOENT); VREF(dvp); vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p); if (!lockparent || !(cnp->cn_flags & ISLASTCN)) VOP_UNLOCK(ap->a_dvp, 0, p); return (0); } #endif cnp->cn_flags |= LOCKPARENT; upperdvp = dun->un_uppervp; lowerdvp = dun->un_lowervp; uppervp = NULLVP; lowervp = NULLVP; iswhiteout = 0; if (cnp->cn_flags & ISDOTDOT) { if (upperdvp != NULL) VREF(upperdvp); if (lowerdvp != NULL) VREF(lowerdvp); } /* * do the lookup in the upper level. * if that level comsumes additional pathnames, * then assume that something special is going * on and just return that vnode. */ if (upperdvp != NULLVP) { FIXUP(dun, p); /* * If we're doing `..' in the underlying filesystem, * we must drop our lock on the union node before * going up the tree in the lower file system--if we block * on the lowervp lock, and that's held by someone else * coming down the tree and who's waiting for our lock, * we would be hosed. */ if (cnp->cn_flags & ISDOTDOT) { /* retain lock on underlying VP: */ dun->un_flags |= UN_KLOCK; VOP_UNLOCK(dvp, 0, p); } uerror = union_lookup1(um->um_uppervp, &upperdvp, &uppervp, cnp); /* * Disallow write attemps to the filesystem mounted read-only. */ if (uerror == EJUSTRETURN && (cnp->cn_flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) && (cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME)) { if (!lockparent) cnp->cn_flags &= ~LOCKPARENT; return (EROFS); } if (cnp->cn_flags & ISDOTDOT) { if (dun->un_uppervp == upperdvp) { /* * We got the underlying bugger back locked... * now take back the union node lock. Since we * hold the uppervp lock, we can diddle union * locking flags at will. :) */ dun->un_flags |= UN_ULOCK; } /* * If upperdvp got swapped out, it means we did * some mount point magic, and we do not have * dun->un_uppervp locked currently--so we get it * locked here (don't set the UN_ULOCK flag). */ vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p); } /*if (uppervp == upperdvp) dun->un_flags |= UN_KLOCK;*/ if (cnp->cn_consume != 0) { *ap->a_vpp = uppervp; if (!lockparent) cnp->cn_flags &= ~LOCKPARENT; error = uerror; goto out; } if (uerror == ENOENT || uerror == EJUSTRETURN) { if (cnp->cn_flags & ISWHITEOUT) { iswhiteout = 1; } else if (lowerdvp != NULLVP) { lerror = VOP_GETATTR(upperdvp, &va, cnp->cn_cred, cnp->cn_proc); if (lerror == 0 && (va.va_flags & OPAQUE)) iswhiteout = 1; } } } else { uerror = ENOENT; } /* * in a similar way to the upper layer, do the lookup * in the lower layer. this time, if there is some * component magic going on, then vput whatever we got * back from the upper layer and return the lower vnode * instead. */ if (lowerdvp != NULLVP && !iswhiteout) { int nameiop; vn_lock(lowerdvp, LK_EXCLUSIVE | LK_RETRY, p); /* * Only do a LOOKUP on the bottom node, since * we won't be making changes to it anyway. */ nameiop = cnp->cn_nameiop; cnp->cn_nameiop = LOOKUP; if (um->um_op == UNMNT_BELOW) { saved_cred = cnp->cn_cred; cnp->cn_cred = um->um_cred; } /* * We shouldn't have to worry about locking interactions * between the lower layer and our union layer (w.r.t. * `..' processing) because we don't futz with lowervp * locks in the union-node instantiation code path. */ lerror = union_lookup1(um->um_lowervp, &lowerdvp, &lowervp, cnp); if (um->um_op == UNMNT_BELOW) cnp->cn_cred = saved_cred; cnp->cn_nameiop = nameiop; if (lowervp != lowerdvp) VOP_UNLOCK(lowerdvp, 0, p); if (cnp->cn_consume != 0 || lerror == EACCES) { if (lerror == EACCES) lowervp = NULLVP; if (uppervp != NULLVP) { if (uppervp == upperdvp) vrele(uppervp); else vput(uppervp); uppervp = NULLVP; } *ap->a_vpp = lowervp; if (!lockparent) cnp->cn_flags &= ~LOCKPARENT; error = lerror; goto out; } } else { lerror = ENOENT; if ((cnp->cn_flags & ISDOTDOT) && dun->un_pvp != NULLVP) { lowervp = LOWERVP(dun->un_pvp); if (lowervp != NULLVP) { VREF(lowervp); vn_lock(lowervp, LK_EXCLUSIVE | LK_RETRY, p); lerror = 0; } } } if (!lockparent) cnp->cn_flags &= ~LOCKPARENT; /* * at this point, we have uerror and lerror indicating * possible errors with the lookups in the upper and lower * layers. additionally, uppervp and lowervp are (locked) * references to existing vnodes in the upper and lower layers. * * there are now three cases to consider. * 1. if both layers returned an error, then return whatever * error the upper layer generated. * * 2. if the top layer failed and the bottom layer succeeded * then two subcases occur. * a. the bottom vnode is not a directory, in which * case just return a new union vnode referencing * an empty top layer and the existing bottom layer. * b. the bottom vnode is a directory, in which case * create a new directory in the top-level and * continue as in case 3. * * 3. if the top layer succeeded then return a new union * vnode referencing whatever the new top layer and * whatever the bottom layer returned. */ *ap->a_vpp = NULLVP; /* case 1. */ if ((uerror != 0) && (lerror != 0)) { error = uerror; goto out; } /* case 2. */ if (uerror != 0 /* && (lerror == 0) */ ) { if (lowervp->v_type == VDIR) { /* case 2b. */ dun->un_flags &= ~UN_ULOCK; VOP_UNLOCK(upperdvp, 0, p); uerror = union_mkshadow(um, upperdvp, cnp, &uppervp); vn_lock(upperdvp, LK_EXCLUSIVE | LK_RETRY, p); dun->un_flags |= UN_ULOCK; if (uerror) { if (lowervp != NULLVP) { vput(lowervp); lowervp = NULLVP; } error = uerror; goto out; } } } if (lowervp != NULLVP) VOP_UNLOCK(lowervp, 0, p); error = union_allocvp(ap->a_vpp, dvp->v_mount, dvp, upperdvp, cnp, uppervp, lowervp, 1); if (error) { if (uppervp != NULLVP) vput(uppervp); if (lowervp != NULLVP) vrele(lowervp); } else { if (*ap->a_vpp != dvp) if (!lockparent || !(cnp->cn_flags & ISLASTCN)) VOP_UNLOCK(dvp, 0, p); #ifdef DIAGNOSTIC if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.' && *ap->a_vpp != dvp) { panic("union_lookup returning . (%p) not same as startdir (%p)", ap->a_vpp, dvp); } #endif } out: if (cnp->cn_flags & ISDOTDOT) { if (upperdvp != NULL) vrele(upperdvp); if (lowerdvp != NULL) vrele(lowerdvp); } return (error); } static int union_create(ap) struct vop_create_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; struct vattr *a_vap; } */ *ap; { - struct union_node *un = VTOUNION(ap->a_dvp); - struct vnode *dvp = un->un_uppervp; + struct union_node *dun = VTOUNION(ap->a_dvp); + struct vnode *dvp = dun->un_uppervp; struct componentname *cnp = ap->a_cnp; struct proc *p = cnp->cn_proc; if (dvp != NULLVP) { - int error; struct vnode *vp; struct mount *mp; + int error; - FIXUP(un, p); + FIXUP(dun, p); - VREF(dvp); - un->un_flags |= UN_KLOCK; - mp = ap->a_dvp->v_mount; - vput(ap->a_dvp); + dun->un_flags |= UN_KLOCK; + VOP_UNLOCK(ap->a_dvp, 0, p); error = VOP_CREATE(dvp, &vp, cnp, ap->a_vap); - if (error) + if (error) { + dun->un_flags |= UN_ULOCK; return (error); + } + mp = ap->a_dvp->v_mount; + VOP_UNLOCK(dvp, 0, p); error = union_allocvp(ap->a_vpp, mp, NULLVP, NULLVP, cnp, vp, NULLVP, 1); if (error) vput(vp); + vn_lock(ap->a_dvp, LK_EXCLUSIVE| LK_RETRY, p); return (error); } - vput(ap->a_dvp); return (EROFS); } static int union_whiteout(ap) struct vop_whiteout_args /* { struct vnode *a_dvp; struct componentname *a_cnp; int a_flags; } */ *ap; { struct union_node *un = VTOUNION(ap->a_dvp); struct componentname *cnp = ap->a_cnp; struct proc *p = cnp->cn_proc; if (un->un_uppervp == NULLVP) return (EOPNOTSUPP); FIXUP(un, p); return (VOP_WHITEOUT(un->un_uppervp, cnp, ap->a_flags)); } static int union_mknod(ap) struct vop_mknod_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; struct vattr *a_vap; } */ *ap; { - struct union_node *un = VTOUNION(ap->a_dvp); - struct vnode *dvp = un->un_uppervp; + struct union_node *dun = VTOUNION(ap->a_dvp); + struct vnode *dvp = dun->un_uppervp; struct componentname *cnp = ap->a_cnp; struct proc *p = cnp->cn_proc; if (dvp != NULLVP) { - int error; struct vnode *vp; struct mount *mp; + int error; - FIXUP(un, p); + FIXUP(dun, p); - VREF(dvp); - un->un_flags |= UN_KLOCK; - mp = ap->a_dvp->v_mount; - vput(ap->a_dvp); + dun->un_flags |= UN_KLOCK; + VOP_UNLOCK(ap->a_dvp, 0, p); error = VOP_MKNOD(dvp, &vp, cnp, ap->a_vap); - if (error) + if (error) { + dun->un_flags |= UN_ULOCK; return (error); + } if (vp != NULLVP) { + mp = ap->a_dvp->v_mount; + VOP_UNLOCK(dvp, 0, p); error = union_allocvp(ap->a_vpp, mp, NULLVP, NULLVP, cnp, vp, NULLVP, 1); if (error) vput(vp); + vn_lock(ap->a_dvp, LK_EXCLUSIVE| LK_RETRY, p); + } else { + dun->un_flags |= UN_ULOCK; } return (error); } - vput(ap->a_dvp); return (EROFS); } static int union_open(ap) struct vop_open_args /* { struct vnodeop_desc *a_desc; struct vnode *a_vp; int a_mode; struct ucred *a_cred; struct proc *a_p; } */ *ap; { struct union_node *un = VTOUNION(ap->a_vp); struct vnode *tvp; int mode = ap->a_mode; struct ucred *cred = ap->a_cred; struct proc *p = ap->a_p; int error; /* * If there is an existing upper vp then simply open that. */ tvp = un->un_uppervp; if (tvp == NULLVP) { /* * If the lower vnode is being opened for writing, then * copy the file contents to the upper vnode and open that, * otherwise can simply open the lower vnode. */ tvp = un->un_lowervp; if ((ap->a_mode & FWRITE) && (tvp->v_type == VREG)) { error = union_copyup(un, (mode&O_TRUNC) == 0, cred, p); if (error == 0) error = VOP_OPEN(un->un_uppervp, mode, cred, p); return (error); } /* * Just open the lower vnode */ un->un_openl++; vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY, p); error = VOP_OPEN(tvp, mode, cred, p); VOP_UNLOCK(tvp, 0, p); return (error); } FIXUP(un, p); error = VOP_OPEN(tvp, mode, cred, p); return (error); } static int union_close(ap) struct vop_close_args /* { struct vnode *a_vp; int a_fflag; struct ucred *a_cred; struct proc *a_p; } */ *ap; { struct union_node *un = VTOUNION(ap->a_vp); struct vnode *vp; if ((vp = un->un_uppervp) == NULLVP) { #ifdef UNION_DIAGNOSTIC if (un->un_openl <= 0) panic("union: un_openl cnt"); #endif --un->un_openl; vp = un->un_lowervp; } ap->a_vp = vp; return (VCALL(vp, VOFFSET(vop_close), ap)); } /* * Check access permission on the union vnode. * The access check being enforced is to check * against both the underlying vnode, and any * copied vnode. This ensures that no additional * file permissions are given away simply because * the user caused an implicit file copy. */ static int union_access(ap) struct vop_access_args /* { struct vnodeop_desc *a_desc; struct vnode *a_vp; int a_mode; struct ucred *a_cred; struct proc *a_p; } */ *ap; { struct union_node *un = VTOUNION(ap->a_vp); struct proc *p = ap->a_p; int error = EACCES; struct vnode *vp; struct vnode *savedvp; /* * Disallow write attempts on filesystems mounted read-only. */ if (ap->a_mode & VWRITE && (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY)) { switch (ap->a_vp->v_type) { case VREG: case VDIR: case VLNK: return (EROFS); } } if ((vp = un->un_uppervp) != NULLVP) { FIXUP(un, p); ap->a_vp = vp; return (VCALL(vp, VOFFSET(vop_access), ap)); } if ((vp = un->un_lowervp) != NULLVP) { vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); savedvp = ap->a_vp; ap->a_vp = vp; error = VCALL(vp, VOFFSET(vop_access), ap); if (error == 0) { struct union_mount *um = MOUNTTOUNIONMOUNT(savedvp->v_mount); if (um->um_op == UNMNT_BELOW) { ap->a_cred = um->um_cred; error = VCALL(vp, VOFFSET(vop_access), ap); } } VOP_UNLOCK(vp, 0, p); if (error) return (error); } return (error); } /* * We handle getattr only to change the fsid and * track object sizes */ static int union_getattr(ap) struct vop_getattr_args /* { struct vnode *a_vp; struct vattr *a_vap; struct ucred *a_cred; struct proc *a_p; } */ *ap; { int error; struct union_node *un = VTOUNION(ap->a_vp); struct vnode *vp = un->un_uppervp; struct proc *p = ap->a_p; struct vattr *vap; struct vattr va; /* * Some programs walk the filesystem hierarchy by counting * links to directories to avoid stat'ing all the time. * This means the link count on directories needs to be "correct". * The only way to do that is to call getattr on both layers * and fix up the link count. The link count will not necessarily * be accurate but will be large enough to defeat the tree walkers. */ vap = ap->a_vap; vp = un->un_uppervp; if (vp != NULLVP) { /* * It's not clear whether VOP_GETATTR is to be * called with the vnode locked or not. stat() calls * it with (vp) locked, and fstat calls it with * (vp) unlocked. * In the mean time, compensate here by checking * the union_node's lock flag. */ if (un->un_flags & UN_LOCKED) FIXUP(un, p); error = VOP_GETATTR(vp, vap, ap->a_cred, ap->a_p); if (error) return (error); union_newsize(ap->a_vp, vap->va_size, VNOVAL); } if (vp == NULLVP) { vp = un->un_lowervp; } else if (vp->v_type == VDIR && un->un_lowervp != NULLVP) { vp = un->un_lowervp; vap = &va; } else { vp = NULLVP; } if (vp != NULLVP) { error = VOP_GETATTR(vp, vap, ap->a_cred, ap->a_p); if (error) return (error); union_newsize(ap->a_vp, VNOVAL, vap->va_size); } if ((vap != ap->a_vap) && (vap->va_type == VDIR)) ap->a_vap->va_nlink += vap->va_nlink; ap->a_vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsid.val[0]; return (0); } static int union_setattr(ap) struct vop_setattr_args /* { struct vnode *a_vp; struct vattr *a_vap; struct ucred *a_cred; struct proc *a_p; } */ *ap; { struct union_node *un = VTOUNION(ap->a_vp); struct proc *p = ap->a_p; struct vattr *vap = ap->a_vap; int error; /* * Disallow write attempts on filesystems mounted read-only. */ if ((ap->a_vp->v_mount->mnt_flag & MNT_RDONLY) && (vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL)) return (EROFS); /* * Handle case of truncating lower object to zero size, * by creating a zero length upper object. This is to * handle the case of open with O_TRUNC and O_CREAT. */ if ((un->un_uppervp == NULLVP) && /* assert(un->un_lowervp != NULLVP) */ (un->un_lowervp->v_type == VREG)) { error = union_copyup(un, (ap->a_vap->va_size != 0), ap->a_cred, ap->a_p); if (error) return (error); } /* * Try to set attributes in upper layer, * otherwise return read-only filesystem error. */ if (un->un_uppervp != NULLVP) { FIXUP(un, p); error = VOP_SETATTR(un->un_uppervp, ap->a_vap, ap->a_cred, ap->a_p); if ((error == 0) && (ap->a_vap->va_size != VNOVAL)) union_newsize(ap->a_vp, ap->a_vap->va_size, VNOVAL); } else { error = EROFS; } return (error); } static int union_read(ap) struct vop_read_args /* { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; struct ucred *a_cred; } */ *ap; { int error; struct proc *p = ap->a_uio->uio_procp; struct vnode *vp = OTHERVP(ap->a_vp); int dolock = (vp == LOWERVP(ap->a_vp)); if (dolock) vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); else FIXUP(VTOUNION(ap->a_vp), p); error = VOP_READ(vp, ap->a_uio, ap->a_ioflag, ap->a_cred); if (dolock) VOP_UNLOCK(vp, 0, p); /* * XXX * perhaps the size of the underlying object has changed under * our feet. take advantage of the offset information present * in the uio structure. */ if (error == 0) { struct union_node *un = VTOUNION(ap->a_vp); off_t cur = ap->a_uio->uio_offset; if (vp == un->un_uppervp) { if (cur > un->un_uppersz) union_newsize(ap->a_vp, cur, VNOVAL); } else { if (cur > un->un_lowersz) union_newsize(ap->a_vp, VNOVAL, cur); } } return (error); } static int union_write(ap) struct vop_read_args /* { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; struct ucred *a_cred; } */ *ap; { int error; struct vnode *vp; struct union_node *un = VTOUNION(ap->a_vp); struct proc *p = ap->a_uio->uio_procp; vp = UPPERVP(ap->a_vp); if (vp == NULLVP) panic("union: missing upper layer in write"); FIXUP(un, p); error = VOP_WRITE(vp, ap->a_uio, ap->a_ioflag, ap->a_cred); /* * the size of the underlying object may be changed by the * write. */ if (error == 0) { off_t cur = ap->a_uio->uio_offset; if (cur > un->un_uppersz) union_newsize(ap->a_vp, cur, VNOVAL); } return (error); } static int union_lease(ap) struct vop_lease_args /* { struct vnode *a_vp; struct proc *a_p; struct ucred *a_cred; int a_flag; } */ *ap; { register struct vnode *ovp = OTHERVP(ap->a_vp); ap->a_vp = ovp; return (VCALL(ovp, VOFFSET(vop_lease), ap)); } static int union_ioctl(ap) struct vop_ioctl_args /* { struct vnode *a_vp; int a_command; caddr_t a_data; int a_fflag; struct ucred *a_cred; struct proc *a_p; } */ *ap; { register struct vnode *ovp = OTHERVP(ap->a_vp); ap->a_vp = ovp; return (VCALL(ovp, VOFFSET(vop_ioctl), ap)); } static int union_poll(ap) struct vop_poll_args /* { struct vnode *a_vp; int a_events; struct ucred *a_cred; struct proc *a_p; } */ *ap; { register struct vnode *ovp = OTHERVP(ap->a_vp); ap->a_vp = ovp; return (VCALL(ovp, VOFFSET(vop_poll), ap)); } static int union_revoke(ap) struct vop_revoke_args /* { struct vnode *a_vp; int a_flags; struct proc *a_p; } */ *ap; { struct vnode *vp = ap->a_vp; if (UPPERVP(vp)) VOP_REVOKE(UPPERVP(vp), ap->a_flags); if (LOWERVP(vp)) VOP_REVOKE(LOWERVP(vp), ap->a_flags); vgone(vp); return (0); } static int union_mmap(ap) struct vop_mmap_args /* { struct vnode *a_vp; int a_fflags; struct ucred *a_cred; struct proc *a_p; } */ *ap; { register struct vnode *ovp = OTHERVP(ap->a_vp); ap->a_vp = ovp; return (VCALL(ovp, VOFFSET(vop_mmap), ap)); } static int union_fsync(ap) struct vop_fsync_args /* { struct vnode *a_vp; struct ucred *a_cred; int a_waitfor; struct proc *a_p; } */ *ap; { int error = 0; struct proc *p = ap->a_p; struct vnode *targetvp = OTHERVP(ap->a_vp); struct union_node *un; if (targetvp != NULLVP) { int dolock = (targetvp == LOWERVP(ap->a_vp)); un = VTOUNION(ap->a_vp); if (dolock) vn_lock(targetvp, LK_EXCLUSIVE | LK_RETRY, p); else { un = VTOUNION(ap->a_vp); if ((un->un_flags & UN_ULOCK) == 0 && targetvp->v_data != NULL && ((struct lock *)targetvp->v_data)->lk_lockholder == curproc->p_pid && VOP_ISLOCKED(targetvp) != 0) return 0; /* XXX */ FIXUP(un, p); } error = VOP_FSYNC(targetvp, ap->a_cred, ap->a_waitfor, p); if (dolock) VOP_UNLOCK(targetvp, 0, p); } return (error); } static int union_remove(ap) struct vop_remove_args /* { struct vnode *a_dvp; struct vnode *a_vp; struct componentname *a_cnp; } */ *ap; { - int error; struct union_node *dun = VTOUNION(ap->a_dvp); struct union_node *un = VTOUNION(ap->a_vp); struct componentname *cnp = ap->a_cnp; struct proc *p = cnp->cn_proc; + int error; if (dun->un_uppervp == NULLVP) panic("union remove: null upper vnode"); if (un->un_uppervp != NULLVP) { struct vnode *dvp = dun->un_uppervp; struct vnode *vp = un->un_uppervp; FIXUP(dun, p); - VREF(dvp); dun->un_flags |= UN_KLOCK; - vput(ap->a_dvp); + VOP_UNLOCK(ap->a_dvp, 0, p); FIXUP(un, p); - VREF(vp); un->un_flags |= UN_KLOCK; - vput(ap->a_vp); + VOP_UNLOCK(ap->a_vp, 0, p); - if (union_dowhiteout(un, cnp->cn_cred, cnp->cn_proc)) + if (union_dowhiteout(un, cnp->cn_cred, p)) cnp->cn_flags |= DOWHITEOUT; error = VOP_REMOVE(dvp, vp, cnp); #if 0 /* XXX */ if (!error) union_removed_upper(un); #endif + dun->un_flags |= UN_ULOCK; + un->un_flags |= UN_ULOCK; } else { FIXUP(dun, p); error = union_mkwhiteout( MOUNTTOUNIONMOUNT(UNIONTOV(dun)->v_mount), dun->un_uppervp, ap->a_cnp, un->un_path); - vput(ap->a_dvp); - vput(ap->a_vp); } return (error); } static int union_link(ap) struct vop_link_args /* { struct vnode *a_tdvp; struct vnode *a_vp; struct componentname *a_cnp; } */ *ap; { - int error = 0; struct componentname *cnp = ap->a_cnp; struct proc *p = cnp->cn_proc; - struct union_node *un; + struct union_node *dun = VTOUNION(ap->a_tdvp); struct vnode *vp; struct vnode *tdvp; + int error = 0; - un = VTOUNION(ap->a_tdvp); if (ap->a_tdvp->v_op != ap->a_vp->v_op) { vp = ap->a_vp; } else { struct union_node *tun = VTOUNION(ap->a_vp); if (tun->un_uppervp == NULLVP) { vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY, p); - if (un->un_uppervp == tun->un_dirvp) { - un->un_flags &= ~UN_ULOCK; - VOP_UNLOCK(un->un_uppervp, 0, p); + if (dun->un_uppervp == tun->un_dirvp) { + dun->un_flags &= ~UN_ULOCK; + VOP_UNLOCK(dun->un_uppervp, 0, p); } error = union_copyup(tun, 1, cnp->cn_cred, p); - if (un->un_uppervp == tun->un_dirvp) { - vn_lock(un->un_uppervp, + if (dun->un_uppervp == tun->un_dirvp) { + vn_lock(dun->un_uppervp, LK_EXCLUSIVE | LK_RETRY, p); - un->un_flags |= UN_ULOCK; + dun->un_flags |= UN_ULOCK; } VOP_UNLOCK(ap->a_vp, 0, p); } vp = tun->un_uppervp; } - tdvp = un->un_uppervp; + tdvp = dun->un_uppervp; if (tdvp == NULLVP) error = EROFS; - if (error) { - vput(ap->a_tdvp); + if (error) return (error); - } - FIXUP(un, p); - VREF(tdvp); - un->un_flags |= UN_KLOCK; - vput(ap->a_tdvp); + FIXUP(dun, p); + dun->un_flags |= UN_KLOCK; + VOP_UNLOCK(ap->a_tdvp, 0, p); - return (VOP_LINK(tdvp, vp, cnp)); + error = VOP_LINK(tdvp, vp, cnp); + + dun->un_flags |= UN_ULOCK; + + return (error); } static int union_rename(ap) struct vop_rename_args /* { struct vnode *a_fdvp; struct vnode *a_fvp; struct componentname *a_fcnp; struct vnode *a_tdvp; struct vnode *a_tvp; struct componentname *a_tcnp; } */ *ap; { int error; struct vnode *fdvp = ap->a_fdvp; struct vnode *fvp = ap->a_fvp; struct vnode *tdvp = ap->a_tdvp; struct vnode *tvp = ap->a_tvp; if (fdvp->v_op == union_vnodeop_p) { /* always true */ struct union_node *un = VTOUNION(fdvp); if (un->un_uppervp == NULLVP) { /* * this should never happen in normal * operation but might if there was * a problem creating the top-level shadow * directory. */ error = EXDEV; goto bad; } fdvp = un->un_uppervp; VREF(fdvp); vrele(ap->a_fdvp); } if (fvp->v_op == union_vnodeop_p) { /* always true */ struct union_node *un = VTOUNION(fvp); if (un->un_uppervp == NULLVP) { /* XXX: should do a copyup */ error = EXDEV; goto bad; } if (un->un_lowervp != NULLVP) ap->a_fcnp->cn_flags |= DOWHITEOUT; fvp = un->un_uppervp; VREF(fvp); vrele(ap->a_fvp); } if (tdvp->v_op == union_vnodeop_p) { struct union_node *un = VTOUNION(tdvp); if (un->un_uppervp == NULLVP) { /* * this should never happen in normal * operation but might if there was * a problem creating the top-level shadow * directory. */ error = EXDEV; goto bad; } tdvp = un->un_uppervp; VREF(tdvp); un->un_flags |= UN_KLOCK; vput(ap->a_tdvp); } if (tvp != NULLVP && tvp->v_op == union_vnodeop_p) { struct union_node *un = VTOUNION(tvp); tvp = un->un_uppervp; if (tvp != NULLVP) { VREF(tvp); un->un_flags |= UN_KLOCK; } vput(ap->a_tvp); } return (VOP_RENAME(fdvp, fvp, ap->a_fcnp, tdvp, tvp, ap->a_tcnp)); bad: vrele(fdvp); vrele(fvp); vput(tdvp); if (tvp != NULLVP) vput(tvp); return (error); } static int union_mkdir(ap) struct vop_mkdir_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; struct vattr *a_vap; } */ *ap; { - struct union_node *un = VTOUNION(ap->a_dvp); - struct vnode *dvp = un->un_uppervp; + struct union_node *dun = VTOUNION(ap->a_dvp); + struct vnode *dvp = dun->un_uppervp; struct componentname *cnp = ap->a_cnp; struct proc *p = cnp->cn_proc; if (dvp != NULLVP) { - int error; struct vnode *vp; + int error; - FIXUP(un, p); - VREF(dvp); - un->un_flags |= UN_KLOCK; + FIXUP(dun, p); + dun->un_flags |= UN_KLOCK; VOP_UNLOCK(ap->a_dvp, 0, p); error = VOP_MKDIR(dvp, &vp, cnp, ap->a_vap); if (error) { - vrele(ap->a_dvp); + dun->un_flags |= UN_ULOCK; return (error); } + VOP_UNLOCK(dvp, 0, p); error = union_allocvp(ap->a_vpp, ap->a_dvp->v_mount, ap->a_dvp, NULLVP, cnp, vp, NULLVP, 1); - vrele(ap->a_dvp); if (error) vput(vp); + vn_lock(ap->a_dvp, LK_EXCLUSIVE| LK_RETRY, p); + return (error); } - vput(ap->a_dvp); return (EROFS); } static int union_rmdir(ap) struct vop_rmdir_args /* { struct vnode *a_dvp; struct vnode *a_vp; struct componentname *a_cnp; } */ *ap; { - int error; struct union_node *dun = VTOUNION(ap->a_dvp); struct union_node *un = VTOUNION(ap->a_vp); struct componentname *cnp = ap->a_cnp; struct proc *p = cnp->cn_proc; + int error; if (dun->un_uppervp == NULLVP) panic("union rmdir: null upper vnode"); if (un->un_uppervp != NULLVP) { struct vnode *dvp = dun->un_uppervp; struct vnode *vp = un->un_uppervp; FIXUP(dun, p); - VREF(dvp); dun->un_flags |= UN_KLOCK; - vput(ap->a_dvp); + VOP_UNLOCK(ap->a_dvp, 0, p); FIXUP(un, p); - VREF(vp); un->un_flags |= UN_KLOCK; - vput(ap->a_vp); + VOP_UNLOCK(ap->a_vp, 0, p); - if (union_dowhiteout(un, cnp->cn_cred, cnp->cn_proc)) + if (union_dowhiteout(un, cnp->cn_cred, p)) cnp->cn_flags |= DOWHITEOUT; error = VOP_RMDIR(dvp, vp, ap->a_cnp); #if 0 /* XXX */ if (!error) union_removed_upper(un); #endif + dun->un_flags |= UN_ULOCK; + un->un_flags |= UN_ULOCK; } else { FIXUP(dun, p); error = union_mkwhiteout( MOUNTTOUNIONMOUNT(UNIONTOV(dun)->v_mount), dun->un_uppervp, ap->a_cnp, un->un_path); - vput(ap->a_dvp); - vput(ap->a_vp); } return (error); } static int union_symlink(ap) struct vop_symlink_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; struct vattr *a_vap; char *a_target; } */ *ap; { - struct union_node *un = VTOUNION(ap->a_dvp); - struct vnode *dvp = un->un_uppervp; + struct union_node *dun = VTOUNION(ap->a_dvp); + struct vnode *dvp = dun->un_uppervp; struct componentname *cnp = ap->a_cnp; struct proc *p = cnp->cn_proc; if (dvp != NULLVP) { - int error; struct vnode *vp; + int error; - FIXUP(un, p); - VREF(dvp); - un->un_flags |= UN_KLOCK; - vput(ap->a_dvp); + FIXUP(dun, p); + dun->un_flags |= UN_KLOCK; + VOP_UNLOCK(ap->a_dvp, 0, p); error = VOP_SYMLINK(dvp, &vp, cnp, ap->a_vap, ap->a_target); + dun->un_flags |= UN_ULOCK; *ap->a_vpp = NULLVP; return (error); } - vput(ap->a_dvp); return (EROFS); } /* * union_readdir works in concert with getdirentries and * readdir(3) to provide a list of entries in the unioned * directories. getdirentries is responsible for walking * down the union stack. readdir(3) is responsible for * eliminating duplicate names from the returned data stream. */ static int union_readdir(ap) struct vop_readdir_args /* { struct vnode *a_vp; struct uio *a_uio; struct ucred *a_cred; int *a_eofflag; u_long *a_cookies; int a_ncookies; } */ *ap; { struct union_node *un = VTOUNION(ap->a_vp); struct vnode *uvp = un->un_uppervp; struct proc *p = ap->a_uio->uio_procp; if (uvp == NULLVP) return (0); FIXUP(un, p); ap->a_vp = uvp; return (VCALL(uvp, VOFFSET(vop_readdir), ap)); } static int union_readlink(ap) struct vop_readlink_args /* { struct vnode *a_vp; struct uio *a_uio; struct ucred *a_cred; } */ *ap; { int error; struct uio *uio = ap->a_uio; struct proc *p = uio->uio_procp; struct vnode *vp = OTHERVP(ap->a_vp); int dolock = (vp == LOWERVP(ap->a_vp)); if (dolock) vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); else FIXUP(VTOUNION(ap->a_vp), p); ap->a_vp = vp; error = VCALL(vp, VOFFSET(vop_readlink), ap); if (dolock) VOP_UNLOCK(vp, 0, p); return (error); } static int union_abortop(ap) struct vop_abortop_args /* { struct vnode *a_dvp; struct componentname *a_cnp; } */ *ap; { int error; struct componentname *cnp = ap->a_cnp; struct proc *p = cnp->cn_proc; struct vnode *vp = OTHERVP(ap->a_dvp); struct union_node *un = VTOUNION(ap->a_dvp); int islocked = un->un_flags & UN_LOCKED; int dolock = (vp == LOWERVP(ap->a_dvp)); if (islocked) { if (dolock) vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); else FIXUP(VTOUNION(ap->a_dvp), p); } ap->a_dvp = vp; error = VCALL(vp, VOFFSET(vop_abortop), ap); if (islocked && dolock) VOP_UNLOCK(vp, 0, p); return (error); } static int union_inactive(ap) struct vop_inactive_args /* { struct vnode *a_vp; struct proc *a_p; } */ *ap; { struct vnode *vp = ap->a_vp; struct proc *p = ap->a_p; struct union_node *un = VTOUNION(vp); struct vnode **vpp; /* * Do nothing (and _don't_ bypass). * Wait to vrele lowervp until reclaim, * so that until then our union_node is in the * cache and reusable. * * NEEDSWORK: Someday, consider inactive'ing * the lowervp and then trying to reactivate it * with capabilities (v_id) * like they do in the name lookup cache code. * That's too much work for now. */ if (un->un_dircache != 0) { for (vpp = un->un_dircache; *vpp != NULLVP; vpp++) vrele(*vpp); free(un->un_dircache, M_TEMP); un->un_dircache = 0; } VOP_UNLOCK(vp, 0, p); if ((un->un_flags & UN_CACHED) == 0) vgone(vp); return (0); } static int union_reclaim(ap) struct vop_reclaim_args /* { struct vnode *a_vp; } */ *ap; { union_freevp(ap->a_vp); return (0); } static int union_lock(ap) struct vop_lock_args *ap; { struct vnode *vp = ap->a_vp; struct proc *p = ap->a_p; int flags = ap->a_flags; struct union_node *un; int error; vop_nolock(ap); /* * Need to do real lockmgr-style locking here. * in the mean time, draining won't work quite right, * which could lead to a few race conditions. * the following test was here, but is not quite right, we * still need to take the lock: if ((flags & LK_TYPE_MASK) == LK_DRAIN) return (0); */ flags &= ~LK_INTERLOCK; start: un = VTOUNION(vp); if (un->un_uppervp != NULLVP) { if (((un->un_flags & UN_ULOCK) == 0) && (vp->v_usecount != 0)) { error = vn_lock(un->un_uppervp, flags, p); if (error) return (error); un->un_flags |= UN_ULOCK; } #ifdef DIAGNOSTIC if (un->un_flags & UN_KLOCK) { vprint("dangling upper lock", vp); panic("union: dangling upper lock"); } #endif } if (un->un_flags & UN_LOCKED) { #ifdef DIAGNOSTIC if (curproc && un->un_pid == curproc->p_pid && un->un_pid > -1 && curproc->p_pid > -1) panic("union: locking against myself"); #endif un->un_flags |= UN_WANT; tsleep((caddr_t)&un->un_flags, PINOD, "unionlk2", 0); goto start; } #ifdef DIAGNOSTIC if (curproc) un->un_pid = curproc->p_pid; else un->un_pid = -1; #endif un->un_flags |= UN_LOCKED; return (0); } /* * When operations want to vput() a union node yet retain a lock on * the upper vnode (say, to do some further operations like link(), * mkdir(), ...), they set UN_KLOCK on the union node, then call * vput() which calls VOP_UNLOCK() and comes here. union_unlock() * unlocks the union node (leaving the upper vnode alone), clears the * KLOCK flag, and then returns to vput(). The caller then does whatever * is left to do with the upper vnode, and ensures that it gets unlocked. * * If UN_KLOCK isn't set, then the upper vnode is unlocked here. */ static int union_unlock(ap) struct vop_unlock_args /* { struct vnode *a_vp; int a_flags; struct proc *a_p; } */ *ap; { struct union_node *un = VTOUNION(ap->a_vp); struct proc *p = ap->a_p; #ifdef DIAGNOSTIC if ((un->un_flags & UN_LOCKED) == 0) panic("union: unlock unlocked node"); if (curproc && un->un_pid != curproc->p_pid && curproc->p_pid > -1 && un->un_pid > -1) panic("union: unlocking other process's union node"); #endif un->un_flags &= ~UN_LOCKED; if ((un->un_flags & (UN_ULOCK|UN_KLOCK)) == UN_ULOCK) VOP_UNLOCK(un->un_uppervp, 0, p); un->un_flags &= ~(UN_ULOCK|UN_KLOCK); if (un->un_flags & UN_WANT) { un->un_flags &= ~UN_WANT; wakeup((caddr_t) &un->un_flags); } #ifdef DIAGNOSTIC un->un_pid = 0; #endif vop_nounlock(ap); return (0); } static int union_bmap(ap) struct vop_bmap_args /* { struct vnode *a_vp; daddr_t a_bn; struct vnode **a_vpp; daddr_t *a_bnp; int *a_runp; int *a_runb; } */ *ap; { int error; struct proc *p = curproc; /* XXX */ struct vnode *vp = OTHERVP(ap->a_vp); int dolock = (vp == LOWERVP(ap->a_vp)); if (dolock) vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); else FIXUP(VTOUNION(ap->a_vp), p); ap->a_vp = vp; error = VCALL(vp, VOFFSET(vop_bmap), ap); if (dolock) VOP_UNLOCK(vp, 0, p); return (error); } static int union_print(ap) struct vop_print_args /* { struct vnode *a_vp; } */ *ap; { struct vnode *vp = ap->a_vp; printf("\ttag VT_UNION, vp=%p, uppervp=%p, lowervp=%p\n", vp, UPPERVP(vp), LOWERVP(vp)); if (UPPERVP(vp) != NULLVP) vprint("union: upper", UPPERVP(vp)); if (LOWERVP(vp) != NULLVP) vprint("union: lower", LOWERVP(vp)); return (0); } static int union_islocked(ap) struct vop_islocked_args /* { struct vnode *a_vp; } */ *ap; { return ((VTOUNION(ap->a_vp)->un_flags & UN_LOCKED) ? 1 : 0); } static int union_pathconf(ap) struct vop_pathconf_args /* { struct vnode *a_vp; int a_name; int *a_retval; } */ *ap; { int error; struct proc *p = curproc; /* XXX */ struct vnode *vp = OTHERVP(ap->a_vp); int dolock = (vp == LOWERVP(ap->a_vp)); if (dolock) vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); else FIXUP(VTOUNION(ap->a_vp), p); ap->a_vp = vp; error = VCALL(vp, VOFFSET(vop_pathconf), ap); if (dolock) VOP_UNLOCK(vp, 0, p); return (error); } static int union_advlock(ap) struct vop_advlock_args /* { struct vnode *a_vp; caddr_t a_id; int a_op; struct flock *a_fl; int a_flags; } */ *ap; { register struct vnode *ovp = OTHERVP(ap->a_vp); ap->a_vp = ovp; return (VCALL(ovp, VOFFSET(vop_advlock), ap)); } /* * XXX - vop_strategy must be hand coded because it has no * vnode in its arguments. * This goes away with a merged VM/buffer cache. */ static int union_strategy(ap) struct vop_strategy_args /* { struct buf *a_bp; } */ *ap; { struct buf *bp = ap->a_bp; int error; struct vnode *savedvp; savedvp = bp->b_vp; bp->b_vp = OTHERVP(bp->b_vp); #ifdef DIAGNOSTIC if (bp->b_vp == NULLVP) panic("union_strategy: nil vp"); if (((bp->b_flags & B_READ) == 0) && (bp->b_vp == LOWERVP(savedvp))) panic("union_strategy: writing to lowervp"); #endif error = VOP_STRATEGY(bp); bp->b_vp = savedvp; return (error); } /* * Global vfs data structures */ vop_t **union_vnodeop_p; static struct vnodeopv_entry_desc union_vnodeop_entries[] = { { &vop_default_desc, (vop_t *) vop_defaultop }, { &vop_abortop_desc, (vop_t *) union_abortop }, { &vop_access_desc, (vop_t *) union_access }, { &vop_advlock_desc, (vop_t *) union_advlock }, { &vop_bmap_desc, (vop_t *) union_bmap }, { &vop_close_desc, (vop_t *) union_close }, { &vop_create_desc, (vop_t *) union_create }, { &vop_fsync_desc, (vop_t *) union_fsync }, { &vop_getattr_desc, (vop_t *) union_getattr }, { &vop_inactive_desc, (vop_t *) union_inactive }, { &vop_ioctl_desc, (vop_t *) union_ioctl }, { &vop_islocked_desc, (vop_t *) union_islocked }, { &vop_lease_desc, (vop_t *) union_lease }, { &vop_link_desc, (vop_t *) union_link }, { &vop_lock_desc, (vop_t *) union_lock }, { &vop_lookup_desc, (vop_t *) union_lookup }, { &vop_mkdir_desc, (vop_t *) union_mkdir }, { &vop_mknod_desc, (vop_t *) union_mknod }, { &vop_mmap_desc, (vop_t *) union_mmap }, { &vop_open_desc, (vop_t *) union_open }, { &vop_pathconf_desc, (vop_t *) union_pathconf }, { &vop_poll_desc, (vop_t *) union_poll }, { &vop_print_desc, (vop_t *) union_print }, { &vop_read_desc, (vop_t *) union_read }, { &vop_readdir_desc, (vop_t *) union_readdir }, { &vop_readlink_desc, (vop_t *) union_readlink }, { &vop_reclaim_desc, (vop_t *) union_reclaim }, { &vop_remove_desc, (vop_t *) union_remove }, { &vop_rename_desc, (vop_t *) union_rename }, { &vop_revoke_desc, (vop_t *) union_revoke }, { &vop_rmdir_desc, (vop_t *) union_rmdir }, { &vop_setattr_desc, (vop_t *) union_setattr }, { &vop_strategy_desc, (vop_t *) union_strategy }, { &vop_symlink_desc, (vop_t *) union_symlink }, { &vop_unlock_desc, (vop_t *) union_unlock }, { &vop_whiteout_desc, (vop_t *) union_whiteout }, { &vop_write_desc, (vop_t *) union_write }, { NULL, NULL } }; static struct vnodeopv_desc union_vnodeop_opv_desc = { &union_vnodeop_p, union_vnodeop_entries }; VNODEOP_SET(union_vnodeop_opv_desc); Index: head/sys/gnu/ext2fs/ext2_vnops.c =================================================================== --- head/sys/gnu/ext2fs/ext2_vnops.c (revision 35822) +++ head/sys/gnu/ext2fs/ext2_vnops.c (revision 35823) @@ -1,1234 +1,1225 @@ /* * modified for EXT2FS support in Lites 1.1 * * Aug 1995, Godmar Back (gback@cs.utah.edu) * University of Utah, Department of Computer Science */ /* * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ufs_vnops.c 8.27 (Berkeley) 5/27/95 * @(#)ext2_vnops.c 8.7 (Berkeley) 2/3/94 */ #include "opt_quota.h" #include "opt_suiddir.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static int ext2_makeinode __P((int mode, struct vnode *, struct vnode **, struct componentname *)); static int ext2_fsync __P((struct vop_fsync_args *)); static int ext2_read __P((struct vop_read_args *)); static int ext2_write __P((struct vop_write_args *)); static int ext2_remove __P((struct vop_remove_args *)); static int ext2_link __P((struct vop_link_args *)); static int ext2_rename __P((struct vop_rename_args *)); static int ext2_mkdir __P((struct vop_mkdir_args *)); static int ext2_rmdir __P((struct vop_rmdir_args *)); static int ext2_create __P((struct vop_create_args *)); static int ext2_mknod __P((struct vop_mknod_args *)); static int ext2_symlink __P((struct vop_symlink_args *)); static int ext2_getpages __P((struct vop_getpages_args *)); static int ext2_putpages __P((struct vop_putpages_args *)); /* Global vfs data structures for ufs. */ vop_t **ext2_vnodeop_p; static struct vnodeopv_entry_desc ext2_vnodeop_entries[] = { { &vop_default_desc, (vop_t *) ufs_vnoperate }, { &vop_cachedlookup_desc, (vop_t *) ext2_lookup }, { &vop_fsync_desc, (vop_t *) ext2_fsync }, { &vop_inactive_desc, (vop_t *) ext2_inactive }, { &vop_lookup_desc, (vop_t *) vfs_cache_lookup }, { &vop_read_desc, (vop_t *) ext2_read }, { &vop_readdir_desc, (vop_t *) ext2_readdir }, { &vop_reallocblks_desc, (vop_t *) ext2_reallocblks }, { &vop_write_desc, (vop_t *) ext2_write }, { &vop_remove_desc, (vop_t *) ext2_remove }, { &vop_link_desc, (vop_t *) ext2_link }, { &vop_rename_desc, (vop_t *) ext2_rename }, { &vop_mkdir_desc, (vop_t *) ext2_mkdir }, { &vop_rmdir_desc, (vop_t *) ext2_rmdir }, { &vop_create_desc, (vop_t *) ext2_create }, { &vop_mknod_desc, (vop_t *) ext2_mknod }, { &vop_symlink_desc, (vop_t *) ext2_symlink }, { &vop_getpages_desc, (vop_t *) ext2_getpages }, { &vop_putpages_desc, (vop_t *) ext2_putpages }, { NULL, NULL } }; static struct vnodeopv_desc ext2fs_vnodeop_opv_desc = { &ext2_vnodeop_p, ext2_vnodeop_entries }; vop_t **ext2_specop_p; static struct vnodeopv_entry_desc ext2_specop_entries[] = { { &vop_default_desc, (vop_t *) ufs_vnoperatespec }, { &vop_fsync_desc, (vop_t *) ext2_fsync }, { &vop_inactive_desc, (vop_t *) ext2_inactive }, { NULL, NULL } }; static struct vnodeopv_desc ext2fs_specop_opv_desc = { &ext2_specop_p, ext2_specop_entries }; vop_t **ext2_fifoop_p; static struct vnodeopv_entry_desc ext2_fifoop_entries[] = { { &vop_default_desc, (vop_t *) ufs_vnoperatefifo }, { &vop_fsync_desc, (vop_t *) ext2_fsync }, { &vop_inactive_desc, (vop_t *) ext2_inactive }, { NULL, NULL } }; static struct vnodeopv_desc ext2fs_fifoop_opv_desc = { &ext2_fifoop_p, ext2_fifoop_entries }; VNODEOP_SET(ext2fs_vnodeop_opv_desc); VNODEOP_SET(ext2fs_specop_opv_desc); VNODEOP_SET(ext2fs_fifoop_opv_desc); #include /* * Create a regular file */ static int ext2_create(ap) struct vop_create_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; struct vattr *a_vap; } */ *ap; { int error; error = ext2_makeinode(MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode), ap->a_dvp, ap->a_vpp, ap->a_cnp); if (error) return (error); return (0); } /* * Synch an open file. */ /* ARGSUSED */ static int ext2_fsync(ap) struct vop_fsync_args /* { struct vnode *a_vp; struct ucred *a_cred; int a_waitfor; struct proc *a_p; } */ *ap; { register struct vnode *vp = ap->a_vp; register struct buf *bp; struct timeval tv; struct buf *nbp; int s; /* * XXX why is all this fs specific? */ /* * Flush all dirty buffers associated with a vnode. */ ext2_discard_prealloc(VTOI(vp)); loop: s = splbio(); for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) { nbp = bp->b_vnbufs.le_next; if ((bp->b_flags & B_BUSY)) continue; if ((bp->b_flags & B_DELWRI) == 0) panic("ext2_fsync: not dirty"); bremfree(bp); bp->b_flags |= B_BUSY; splx(s); /* * Wait for I/O associated with indirect blocks to complete, * since there is no way to quickly wait for them below. */ if (bp->b_vp == vp || ap->a_waitfor == MNT_NOWAIT) (void) bawrite(bp); else (void) bwrite(bp); goto loop; } if (ap->a_waitfor == MNT_WAIT) { while (vp->v_numoutput) { vp->v_flag |= VBWAIT; tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "extfsn", 0); } #if DIAGNOSTIC if (vp->v_dirtyblkhd.lh_first) { vprint("ext2_fsync: dirty", vp); goto loop; } #endif } splx(s); getmicrotime(&tv); return (UFS_UPDATE(ap->a_vp, &tv, &tv, ap->a_waitfor == MNT_WAIT)); } /* * Mknod vnode call */ /* ARGSUSED */ static int ext2_mknod(ap) struct vop_mknod_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; struct vattr *a_vap; } */ *ap; { struct vattr *vap = ap->a_vap; struct vnode **vpp = ap->a_vpp; struct inode *ip; int error; error = ext2_makeinode(MAKEIMODE(vap->va_type, vap->va_mode), ap->a_dvp, vpp, ap->a_cnp); if (error) return (error); ip = VTOI(*vpp); ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE; if (vap->va_rdev != VNOVAL) { /* * Want to be able to use this to make badblock * inodes, so don't truncate the dev number. */ ip->i_rdev = vap->va_rdev; } /* * Remove inode so that it will be reloaded by VFS_VGET and * checked to see if it is an alias of an existing entry in * the inode cache. */ vput(*vpp); (*vpp)->v_type = VNON; vgone(*vpp); *vpp = 0; return (0); } static int ext2_remove(ap) struct vop_remove_args /* { struct vnode *a_dvp; struct vnode *a_vp; struct componentname *a_cnp; } */ *ap; { struct inode *ip; struct vnode *vp = ap->a_vp; struct vnode *dvp = ap->a_dvp; int error; ip = VTOI(vp); if ((ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) || (VTOI(dvp)->i_flags & APPEND)) { error = EPERM; goto out; } error = ext2_dirremove(dvp, ap->a_cnp); if (error == 0) { ip->i_nlink--; ip->i_flag |= IN_CHANGE; } out: - if (dvp == vp) - vrele(vp); - else - vput(vp); - vput(dvp); return (error); } /* * link vnode call */ static int ext2_link(ap) struct vop_link_args /* { struct vnode *a_tdvp; struct vnode *a_vp; struct componentname *a_cnp; } */ *ap; { struct vnode *vp = ap->a_vp; struct vnode *tdvp = ap->a_tdvp; struct componentname *cnp = ap->a_cnp; struct proc *p = cnp->cn_proc; struct inode *ip; struct timeval tv; int error; #ifdef DIAGNOSTIC if ((cnp->cn_flags & HASBUF) == 0) panic("ufs_link: no name"); #endif if (tdvp->v_mount != vp->v_mount) { VOP_ABORTOP(tdvp, cnp); error = EXDEV; goto out2; } if (tdvp != vp && (error = vn_lock(vp, LK_EXCLUSIVE, p))) { VOP_ABORTOP(tdvp, cnp); goto out2; } ip = VTOI(vp); if ((nlink_t)ip->i_nlink >= LINK_MAX) { VOP_ABORTOP(tdvp, cnp); error = EMLINK; goto out1; } if (ip->i_flags & (IMMUTABLE | APPEND)) { VOP_ABORTOP(tdvp, cnp); error = EPERM; goto out1; } ip->i_nlink++; ip->i_flag |= IN_CHANGE; getmicrotime(&tv); error = UFS_UPDATE(vp, &tv, &tv, 1); if (!error) error = ext2_direnter(ip, tdvp, cnp); if (error) { ip->i_nlink--; ip->i_flag |= IN_CHANGE; } zfree(namei_zone, cnp->cn_pnbuf); out1: if (tdvp != vp) VOP_UNLOCK(vp, 0, p); out2: - vput(tdvp); return (error); } /* * Rename system call. * See comments in sys/ufs/ufs/ufs_vnops.c */ static int ext2_rename(ap) struct vop_rename_args /* { struct vnode *a_fdvp; struct vnode *a_fvp; struct componentname *a_fcnp; struct vnode *a_tdvp; struct vnode *a_tvp; struct componentname *a_tcnp; } */ *ap; { struct vnode *tvp = ap->a_tvp; register struct vnode *tdvp = ap->a_tdvp; struct vnode *fvp = ap->a_fvp; struct vnode *fdvp = ap->a_fdvp; struct componentname *tcnp = ap->a_tcnp; struct componentname *fcnp = ap->a_fcnp; struct proc *p = fcnp->cn_proc; struct inode *ip, *xp, *dp; struct dirtemplate dirbuf; struct timeval tv; int doingdirectory = 0, oldparent = 0, newparent = 0; int error = 0; u_char namlen; #ifdef DIAGNOSTIC if ((tcnp->cn_flags & HASBUF) == 0 || (fcnp->cn_flags & HASBUF) == 0) panic("ufs_rename: no name"); #endif /* * Check for cross-device rename. */ if ((fvp->v_mount != tdvp->v_mount) || (tvp && (fvp->v_mount != tvp->v_mount))) { error = EXDEV; abortit: VOP_ABORTOP(tdvp, tcnp); /* XXX, why not in NFS? */ if (tdvp == tvp) vrele(tdvp); else vput(tdvp); if (tvp) vput(tvp); VOP_ABORTOP(fdvp, fcnp); /* XXX, why not in NFS? */ vrele(fdvp); vrele(fvp); return (error); } if (tvp && ((VTOI(tvp)->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) || (VTOI(tdvp)->i_flags & APPEND))) { error = EPERM; goto abortit; } /* * Check if just deleting a link name or if we've lost a race. * If another process completes the same rename after we've looked * up the source and have blocked looking up the target, then the * source and target inodes may be identical now although the * names were never linked. */ if (fvp == tvp) { if (fvp->v_type == VDIR) { /* * Linked directories are impossible, so we must * have lost the race. Pretend that the rename * completed before the lookup. */ #ifdef UFS_RENAME_DEBUG printf("ufs_rename: fvp == tvp for directories\n"); #endif error = ENOENT; goto abortit; } /* Release destination completely. */ VOP_ABORTOP(tdvp, tcnp); vput(tdvp); vput(tvp); /* * Delete source. There is another race now that everything * is unlocked, but this doesn't cause any new complications. * Relookup() may find a file that is unrelated to the * original one, or it may fail. Too bad. */ vrele(fdvp); vrele(fvp); fcnp->cn_flags &= ~MODMASK; fcnp->cn_flags |= LOCKPARENT | LOCKLEAF; if ((fcnp->cn_flags & SAVESTART) == 0) panic("ufs_rename: lost from startdir"); fcnp->cn_nameiop = DELETE; VREF(fdvp); error = relookup(fdvp, &fvp, fcnp); if (error == 0) vrele(fdvp); if (fvp == NULL) { #ifdef UFS_RENAME_DEBUG printf("ufs_rename: from name disappeared\n"); #endif return (ENOENT); } - return (VOP_REMOVE(fdvp, fvp, fcnp)); + error = VOP_REMOVE(fdvp, fvp, fcnp); + /* XXX - temporarily preserve previous behavior */ + if (fdvp == fvp) + vrele(fdvp); + else + vput(fdvp); + if (fvp != NULLVP) + vput(fvp); + return (error); } if (error = vn_lock(fvp, LK_EXCLUSIVE, p)) goto abortit; dp = VTOI(fdvp); ip = VTOI(fvp); if ((ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) || (dp->i_flags & APPEND)) { VOP_UNLOCK(fvp, 0, p); error = EPERM; goto abortit; } if ((ip->i_mode & IFMT) == IFDIR) { /* * Avoid ".", "..", and aliases of "." for obvious reasons. */ if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') || dp == ip || (fcnp->cn_flags | tcnp->cn_flags) & ISDOTDOT || (ip->i_flag & IN_RENAME)) { VOP_UNLOCK(fvp, 0, p); error = EINVAL; goto abortit; } ip->i_flag |= IN_RENAME; oldparent = dp->i_number; doingdirectory++; } vrele(fdvp); /* * When the target exists, both the directory * and target vnodes are returned locked. */ dp = VTOI(tdvp); xp = NULL; if (tvp) xp = VTOI(tvp); /* * 1) Bump link count while we're moving stuff * around. If we crash somewhere before * completing our work, the link count * may be wrong, but correctable. */ ip->i_nlink++; ip->i_flag |= IN_CHANGE; getmicrotime(&tv); if (error = UFS_UPDATE(fvp, &tv, &tv, 1)) { VOP_UNLOCK(fvp, 0, p); goto bad; } /* * If ".." must be changed (ie the directory gets a new * parent) then the source directory must not be in the * directory heirarchy above the target, as this would * orphan everything below the source directory. Also * the user must have write permission in the source so * as to be able to change "..". We must repeat the call * to namei, as the parent directory is unlocked by the * call to checkpath(). */ error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, tcnp->cn_proc); VOP_UNLOCK(fvp, 0, p); if (oldparent != dp->i_number) newparent = dp->i_number; if (doingdirectory && newparent) { if (error) /* write access check above */ goto bad; if (xp != NULL) vput(tvp); error = ext2_checkpath(ip, dp, tcnp->cn_cred); if (error) goto out; if ((tcnp->cn_flags & SAVESTART) == 0) panic("ufs_rename: lost to startdir"); VREF(tdvp); error = relookup(tdvp, &tvp, tcnp); if (error) goto out; vrele(tdvp); dp = VTOI(tdvp); xp = NULL; if (tvp) xp = VTOI(tvp); } /* * 2) If target doesn't exist, link the target * to the source and unlink the source. * Otherwise, rewrite the target directory * entry to reference the source inode and * expunge the original entry's existence. */ if (xp == NULL) { if (dp->i_dev != ip->i_dev) panic("ufs_rename: EXDEV"); /* * Account for ".." in new directory. * When source and destination have the same * parent we don't fool with the link count. */ if (doingdirectory && newparent) { if ((nlink_t)dp->i_nlink >= LINK_MAX) { error = EMLINK; goto bad; } dp->i_nlink++; dp->i_flag |= IN_CHANGE; error = UFS_UPDATE(tdvp, &tv, &tv, 1); if (error) goto bad; } error = ext2_direnter(ip, tdvp, tcnp); if (error) { if (doingdirectory && newparent) { dp->i_nlink--; dp->i_flag |= IN_CHANGE; (void)UFS_UPDATE(tdvp, &tv, &tv, 1); } goto bad; } vput(tdvp); } else { if (xp->i_dev != dp->i_dev || xp->i_dev != ip->i_dev) panic("ufs_rename: EXDEV"); /* * Short circuit rename(foo, foo). */ if (xp->i_number == ip->i_number) panic("ufs_rename: same file"); /* * If the parent directory is "sticky", then the user must * own the parent directory, or the destination of the rename, * otherwise the destination may not be changed (except by * root). This implements append-only directories. */ if ((dp->i_mode & S_ISTXT) && tcnp->cn_cred->cr_uid != 0 && tcnp->cn_cred->cr_uid != dp->i_uid && xp->i_uid != tcnp->cn_cred->cr_uid) { error = EPERM; goto bad; } /* * Target must be empty if a directory and have no links * to it. Also, ensure source and target are compatible * (both directories, or both not directories). */ if ((xp->i_mode&IFMT) == IFDIR) { if (! ext2_dirempty(xp, dp->i_number, tcnp->cn_cred) || xp->i_nlink > 2) { error = ENOTEMPTY; goto bad; } if (!doingdirectory) { error = ENOTDIR; goto bad; } cache_purge(tdvp); } else if (doingdirectory) { error = EISDIR; goto bad; } error = ext2_dirrewrite(dp, ip, tcnp); if (error) goto bad; /* * If the target directory is in the same * directory as the source directory, * decrement the link count on the parent * of the target directory. */ if (doingdirectory && !newparent) { dp->i_nlink--; dp->i_flag |= IN_CHANGE; } vput(tdvp); /* * Adjust the link count of the target to * reflect the dirrewrite above. If this is * a directory it is empty and there are * no links to it, so we can squash the inode and * any space associated with it. We disallowed * renaming over top of a directory with links to * it above, as the remaining link would point to * a directory without "." or ".." entries. */ xp->i_nlink--; if (doingdirectory) { if (--xp->i_nlink != 0) panic("ufs_rename: linked directory"); error = UFS_TRUNCATE(tvp, (off_t)0, IO_SYNC, tcnp->cn_cred, tcnp->cn_proc); } xp->i_flag |= IN_CHANGE; vput(tvp); xp = NULL; } /* * 3) Unlink the source. */ fcnp->cn_flags &= ~MODMASK; fcnp->cn_flags |= LOCKPARENT | LOCKLEAF; if ((fcnp->cn_flags & SAVESTART) == 0) panic("ufs_rename: lost from startdir"); VREF(fdvp); error = relookup(fdvp, &fvp, fcnp); if (error == 0) vrele(fdvp); if (fvp != NULL) { xp = VTOI(fvp); dp = VTOI(fdvp); } else { /* * From name has disappeared. */ if (doingdirectory) panic("ufs_rename: lost dir entry"); vrele(ap->a_fvp); return (0); } /* * Ensure that the directory entry still exists and has not * changed while the new name has been entered. If the source is * a file then the entry may have been unlinked or renamed. In * either case there is no further work to be done. If the source * is a directory then it cannot have been rmdir'ed; its link * count of three would cause a rmdir to fail with ENOTEMPTY. * The IN_RENAME flag ensures that it cannot be moved by another * rename. */ if (xp != ip) { if (doingdirectory) panic("ufs_rename: lost dir entry"); } else { /* * If the source is a directory with a * new parent, the link count of the old * parent directory must be decremented * and ".." set to point to the new parent. */ if (doingdirectory && newparent) { dp->i_nlink--; dp->i_flag |= IN_CHANGE; error = vn_rdwr(UIO_READ, fvp, (caddr_t)&dirbuf, sizeof (struct dirtemplate), (off_t)0, UIO_SYSSPACE, IO_NODELOCKED, tcnp->cn_cred, (int *)0, (struct proc *)0); if (error == 0) { namlen = ((struct odirtemplate *) &dirbuf)->dotdot_namlen; if (namlen != 2 || dirbuf.dotdot_name[0] != '.' || dirbuf.dotdot_name[1] != '.') { ufs_dirbad(xp, (doff_t)12, "rename: mangled dir"); } else { dirbuf.dotdot_ino = newparent; (void) vn_rdwr(UIO_WRITE, fvp, (caddr_t)&dirbuf, sizeof (struct dirtemplate), (off_t)0, UIO_SYSSPACE, IO_NODELOCKED|IO_SYNC, tcnp->cn_cred, (int *)0, (struct proc *)0); cache_purge(fdvp); } } } error = ext2_dirremove(fdvp, fcnp); if (!error) { xp->i_nlink--; xp->i_flag |= IN_CHANGE; } xp->i_flag &= ~IN_RENAME; } if (dp) vput(fdvp); if (xp) vput(fvp); vrele(ap->a_fvp); return (error); bad: if (xp) vput(ITOV(xp)); vput(ITOV(dp)); out: if (doingdirectory) ip->i_flag &= ~IN_RENAME; if (vn_lock(fvp, LK_EXCLUSIVE, p) == 0) { ip->i_nlink--; ip->i_flag |= IN_CHANGE; ip->i_flag &= ~IN_RENAME; vput(fvp); } else vrele(fvp); return (error); } /* * A virgin directory (no blushing please). */ static struct odirtemplate omastertemplate = { 0, 12, 1, { '.', 0 }, 0, DIRBLKSIZ - 12, 2, { '.', '.', 0 } }; /* * Mkdir system call */ static int ext2_mkdir(ap) struct vop_mkdir_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; struct vattr *a_vap; } */ *ap; { register struct vnode *dvp = ap->a_dvp; register struct vattr *vap = ap->a_vap; register struct componentname *cnp = ap->a_cnp; register struct inode *ip, *dp; struct vnode *tvp; struct dirtemplate dirtemplate, *dtp; struct timeval tv; int error, dmode; #ifdef DIAGNOSTIC if ((cnp->cn_flags & HASBUF) == 0) panic("ufs_mkdir: no name"); #endif dp = VTOI(dvp); if ((nlink_t)dp->i_nlink >= LINK_MAX) { error = EMLINK; goto out; } dmode = vap->va_mode & 0777; dmode |= IFDIR; /* * Must simulate part of ext2_makeinode here to acquire the inode, * but not have it entered in the parent directory. The entry is * made later after writing "." and ".." entries. */ error = UFS_VALLOC(dvp, dmode, cnp->cn_cred, &tvp); if (error) goto out; ip = VTOI(tvp); ip->i_gid = dp->i_gid; #ifdef SUIDDIR { #ifdef QUOTA struct ucred ucred, *ucp; ucp = cnp->cn_cred; #endif I /* * if we are hacking owners here, (only do this where told to) * and we are not giving it TOO root, (would subvert quotas) * then go ahead and give it to the other user. * The new directory also inherits the SUID bit. * If user's UID and dir UID are the same, * 'give it away' so that the SUID is still forced on. */ if ( (dvp->v_mount->mnt_flag & MNT_SUIDDIR) && (dp->i_mode & ISUID) && dp->i_uid) { dmode |= ISUID; ip->i_uid = dp->i_uid; #ifdef QUOTA if (dp->i_uid != cnp->cn_cred->cr_uid) { /* * make sure the correct user gets charged * for the space. * Make a dummy credential for the victim. * XXX This seems to never be accessed out of * our context so a stack variable is ok. */ ucred.cr_ref = 1; ucred.cr_uid = ip->i_uid; ucred.cr_ngroups = 1; ucred.cr_groups[0] = dp->i_gid; ucp = &ucred; } #endif I } else { ip->i_uid = cnp->cn_cred->cr_uid; } #ifdef QUOTA if ((error = getinoquota(ip)) || (error = chkiq(ip, 1, ucp, 0))) { zfree(namei_zone, cnp->cn_pnbuf); UFS_VFREE(tvp, ip->i_number, dmode); vput(tvp); - vput(dvp); return (error); } #endif } #else ip->i_uid = cnp->cn_cred->cr_uid; #ifdef QUOTA if ((error = getinoquota(ip)) || (error = chkiq(ip, 1, cnp->cn_cred, 0))) { zfree(namei_zone, cnp->cn_pnbuf); UFS_VFREE(tvp, ip->i_number, dmode); vput(tvp); - vput(dvp); return (error); } #endif #endif ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE; ip->i_mode = dmode; tvp->v_type = VDIR; /* Rest init'd in getnewvnode(). */ ip->i_nlink = 2; if (cnp->cn_flags & ISWHITEOUT) ip->i_flags |= UF_OPAQUE; getmicrotime(&tv); error = UFS_UPDATE(tvp, &tv, &tv, 1); /* * Bump link count in parent directory * to reflect work done below. Should * be done before reference is created * so reparation is possible if we crash. */ dp->i_nlink++; dp->i_flag |= IN_CHANGE; error = UFS_UPDATE(dvp, &tv, &tv, 1); if (error) goto bad; /* Initialize directory with "." and ".." from static template. */ dtp = (struct dirtemplate *)&omastertemplate; dirtemplate = *dtp; dirtemplate.dot_ino = ip->i_number; dirtemplate.dotdot_ino = dp->i_number; /* note that in ext2 DIRBLKSIZ == blocksize, not DEV_BSIZE * so let's just redefine it - for this function only */ #undef DIRBLKSIZ #define DIRBLKSIZ VTOI(dvp)->i_e2fs->s_blocksize dirtemplate.dotdot_reclen = DIRBLKSIZ - 12; error = vn_rdwr(UIO_WRITE, tvp, (caddr_t)&dirtemplate, sizeof (dirtemplate), (off_t)0, UIO_SYSSPACE, IO_NODELOCKED|IO_SYNC, cnp->cn_cred, (int *)0, (struct proc *)0); if (error) { dp->i_nlink--; dp->i_flag |= IN_CHANGE; goto bad; } if (DIRBLKSIZ > VFSTOUFS(dvp->v_mount)->um_mountp->mnt_stat.f_bsize) panic("ufs_mkdir: blksize"); /* XXX should grow with balloc() */ else { ip->i_size = DIRBLKSIZ; ip->i_flag |= IN_CHANGE; } /* Directory set up, now install its entry in the parent directory. */ error = ext2_direnter(ip, dvp, cnp); if (error) { dp->i_nlink--; dp->i_flag |= IN_CHANGE; } bad: /* * No need to do an explicit VOP_TRUNCATE here, vrele will do this * for us because we set the link count to 0. */ if (error) { ip->i_nlink = 0; ip->i_flag |= IN_CHANGE; vput(tvp); } else *ap->a_vpp = tvp; out: zfree(namei_zone, cnp->cn_pnbuf); - vput(dvp); return (error); #undef DIRBLKSIZ #define DIRBLKSIZ DEV_BSIZE } /* * Rmdir system call. */ static int ext2_rmdir(ap) struct vop_rmdir_args /* { struct vnode *a_dvp; struct vnode *a_vp; struct componentname *a_cnp; } */ *ap; { struct vnode *vp = ap->a_vp; struct vnode *dvp = ap->a_dvp; struct componentname *cnp = ap->a_cnp; + struct proc *p = cnp->cn_proc; struct inode *ip, *dp; int error; ip = VTOI(vp); dp = VTOI(dvp); /* * Verify the directory is empty (and valid). * (Rmdir ".." won't be valid since * ".." will contain a reference to * the current directory and thus be * non-empty.) */ error = 0; if (ip->i_nlink != 2 || !ext2_dirempty(ip, dp->i_number, cnp->cn_cred)) { error = ENOTEMPTY; goto out; } if ((dp->i_flags & APPEND) || (ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND))) { error = EPERM; goto out; } /* * Delete reference to directory before purging * inode. If we crash in between, the directory * will be reattached to lost+found, */ error = ext2_dirremove(dvp, cnp); if (error) goto out; dp->i_nlink--; dp->i_flag |= IN_CHANGE; cache_purge(dvp); - vput(dvp); - dvp = NULL; + VOP_UNLOCK(dvp, 0, p); /* * Truncate inode. The only stuff left * in the directory is "." and "..". The * "." reference is inconsequential since * we're quashing it. The ".." reference * has already been adjusted above. We've * removed the "." reference and the reference * in the parent directory, but there may be * other hard links so decrement by 2 and * worry about them later. */ ip->i_nlink -= 2; - error = UFS_TRUNCATE(vp, (off_t)0, IO_SYNC, cnp->cn_cred, - cnp->cn_proc); + error = UFS_TRUNCATE(vp, (off_t)0, IO_SYNC, cnp->cn_cred, p); cache_purge(ITOV(ip)); + vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p); out: - if (dvp) - vput(dvp); - vput(vp); return (error); } /* * symlink -- make a symbolic link */ static int ext2_symlink(ap) struct vop_symlink_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; struct vattr *a_vap; char *a_target; } */ *ap; { register struct vnode *vp, **vpp = ap->a_vpp; register struct inode *ip; int len, error; error = ext2_makeinode(IFLNK | ap->a_vap->va_mode, ap->a_dvp, vpp, ap->a_cnp); if (error) return (error); vp = *vpp; len = strlen(ap->a_target); if (len < vp->v_mount->mnt_maxsymlinklen) { ip = VTOI(vp); bcopy(ap->a_target, (char *)ip->i_shortlink, len); ip->i_size = len; ip->i_flag |= IN_CHANGE | IN_UPDATE; } else error = vn_rdwr(UIO_WRITE, vp, ap->a_target, len, (off_t)0, UIO_SYSSPACE, IO_NODELOCKED, ap->a_cnp->cn_cred, (int *)0, (struct proc *)0); vput(vp); return (error); } /* * Allocate a new inode. */ static int ext2_makeinode(mode, dvp, vpp, cnp) int mode; struct vnode *dvp; struct vnode **vpp; struct componentname *cnp; { register struct inode *ip, *pdir; struct timeval tv; struct vnode *tvp; int error; pdir = VTOI(dvp); #ifdef DIAGNOSTIC if ((cnp->cn_flags & HASBUF) == 0) panic("ext2_makeinode: no name"); #endif *vpp = NULL; if ((mode & IFMT) == 0) mode |= IFREG; error = UFS_VALLOC(dvp, mode, cnp->cn_cred, &tvp); if (error) { zfree(namei_zone, cnp->cn_pnbuf); - vput(dvp); return (error); } ip = VTOI(tvp); ip->i_gid = pdir->i_gid; #ifdef SUIDDIR { #ifdef QUOTA struct ucred ucred, *ucp; ucp = cnp->cn_cred; #endif I /* * if we are * not the owner of the directory, * and we are hacking owners here, (only do this where told to) * and we are not giving it TOO root, (would subvert quotas) * then go ahead and give it to the other user. * Note that this drops off the execute bits for security. */ if ( (dvp->v_mount->mnt_flag & MNT_SUIDDIR) && (pdir->i_mode & ISUID) && (pdir->i_uid != cnp->cn_cred->cr_uid) && pdir->i_uid) { ip->i_uid = pdir->i_uid; mode &= ~07111; #ifdef QUOTA /* * make sure the correct user gets charged * for the space. * Quickly knock up a dummy credential for the victim. * XXX This seems to never be accessed out of our * context so a stack variable is ok. */ ucred.cr_ref = 1; ucred.cr_uid = ip->i_uid; ucred.cr_ngroups = 1; ucred.cr_groups[0] = pdir->i_gid; ucp = &ucred; #endif I } else { ip->i_uid = cnp->cn_cred->cr_uid; } #ifdef QUOTA if ((error = getinoquota(ip)) || (error = chkiq(ip, 1, ucp, 0))) { zfree(namei_zone, cnp->cn_pnbuf); UFS_VFREE(tvp, ip->i_number, mode); vput(tvp); - vput(dvp); return (error); } #endif } #else ip->i_uid = cnp->cn_cred->cr_uid; #ifdef QUOTA if ((error = getinoquota(ip)) || (error = chkiq(ip, 1, cnp->cn_cred, 0))) { zfree(namei_zone, cnp->cn_pnbuf); UFS_VFREE(tvp, ip->i_number, mode); vput(tvp); - vput(dvp); return (error); } #endif #endif ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE; ip->i_mode = mode; tvp->v_type = IFTOVT(mode); /* Rest init'd in getnewvnode(). */ ip->i_nlink = 1; if ((ip->i_mode & ISGID) && !groupmember(ip->i_gid, cnp->cn_cred) && suser(cnp->cn_cred, NULL)) ip->i_mode &= ~ISGID; if (cnp->cn_flags & ISWHITEOUT) ip->i_flags |= UF_OPAQUE; /* * Make sure inode goes to disk before directory entry. */ getmicrotime(&tv); error = UFS_UPDATE(tvp, &tv, &tv, 1); if (error) goto bad; error = ext2_direnter(ip, dvp, cnp); if (error) goto bad; if ((cnp->cn_flags & SAVESTART) == 0) zfree(namei_zone, cnp->cn_pnbuf); - vput(dvp); *vpp = tvp; return (0); bad: /* * Write error occurred trying to update the inode * or the directory so must deallocate the inode. */ zfree(namei_zone, cnp->cn_pnbuf); - vput(dvp); ip->i_nlink = 0; ip->i_flag |= IN_CHANGE; vput(tvp); return (error); } /* * get page routine * * XXX By default, wimp out... note that a_offset is ignored (and always * XXX has been). */ static int ext2_getpages(ap) struct vop_getpages_args *ap; { return (vnode_pager_generic_getpages(ap->a_vp, ap->a_m, ap->a_count, ap->a_reqpage)); } /* * put page routine * * XXX By default, wimp out... note that a_offset is ignored (and always * XXX has been). */ static int ext2_putpages(ap) struct vop_putpages_args *ap; { return (vnode_pager_generic_putpages(ap->a_vp, ap->a_m, ap->a_count, ap->a_sync, ap->a_rtvals)); } Index: head/sys/gnu/fs/ext2fs/ext2_vnops.c =================================================================== --- head/sys/gnu/fs/ext2fs/ext2_vnops.c (revision 35822) +++ head/sys/gnu/fs/ext2fs/ext2_vnops.c (revision 35823) @@ -1,1234 +1,1225 @@ /* * modified for EXT2FS support in Lites 1.1 * * Aug 1995, Godmar Back (gback@cs.utah.edu) * University of Utah, Department of Computer Science */ /* * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ufs_vnops.c 8.27 (Berkeley) 5/27/95 * @(#)ext2_vnops.c 8.7 (Berkeley) 2/3/94 */ #include "opt_quota.h" #include "opt_suiddir.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static int ext2_makeinode __P((int mode, struct vnode *, struct vnode **, struct componentname *)); static int ext2_fsync __P((struct vop_fsync_args *)); static int ext2_read __P((struct vop_read_args *)); static int ext2_write __P((struct vop_write_args *)); static int ext2_remove __P((struct vop_remove_args *)); static int ext2_link __P((struct vop_link_args *)); static int ext2_rename __P((struct vop_rename_args *)); static int ext2_mkdir __P((struct vop_mkdir_args *)); static int ext2_rmdir __P((struct vop_rmdir_args *)); static int ext2_create __P((struct vop_create_args *)); static int ext2_mknod __P((struct vop_mknod_args *)); static int ext2_symlink __P((struct vop_symlink_args *)); static int ext2_getpages __P((struct vop_getpages_args *)); static int ext2_putpages __P((struct vop_putpages_args *)); /* Global vfs data structures for ufs. */ vop_t **ext2_vnodeop_p; static struct vnodeopv_entry_desc ext2_vnodeop_entries[] = { { &vop_default_desc, (vop_t *) ufs_vnoperate }, { &vop_cachedlookup_desc, (vop_t *) ext2_lookup }, { &vop_fsync_desc, (vop_t *) ext2_fsync }, { &vop_inactive_desc, (vop_t *) ext2_inactive }, { &vop_lookup_desc, (vop_t *) vfs_cache_lookup }, { &vop_read_desc, (vop_t *) ext2_read }, { &vop_readdir_desc, (vop_t *) ext2_readdir }, { &vop_reallocblks_desc, (vop_t *) ext2_reallocblks }, { &vop_write_desc, (vop_t *) ext2_write }, { &vop_remove_desc, (vop_t *) ext2_remove }, { &vop_link_desc, (vop_t *) ext2_link }, { &vop_rename_desc, (vop_t *) ext2_rename }, { &vop_mkdir_desc, (vop_t *) ext2_mkdir }, { &vop_rmdir_desc, (vop_t *) ext2_rmdir }, { &vop_create_desc, (vop_t *) ext2_create }, { &vop_mknod_desc, (vop_t *) ext2_mknod }, { &vop_symlink_desc, (vop_t *) ext2_symlink }, { &vop_getpages_desc, (vop_t *) ext2_getpages }, { &vop_putpages_desc, (vop_t *) ext2_putpages }, { NULL, NULL } }; static struct vnodeopv_desc ext2fs_vnodeop_opv_desc = { &ext2_vnodeop_p, ext2_vnodeop_entries }; vop_t **ext2_specop_p; static struct vnodeopv_entry_desc ext2_specop_entries[] = { { &vop_default_desc, (vop_t *) ufs_vnoperatespec }, { &vop_fsync_desc, (vop_t *) ext2_fsync }, { &vop_inactive_desc, (vop_t *) ext2_inactive }, { NULL, NULL } }; static struct vnodeopv_desc ext2fs_specop_opv_desc = { &ext2_specop_p, ext2_specop_entries }; vop_t **ext2_fifoop_p; static struct vnodeopv_entry_desc ext2_fifoop_entries[] = { { &vop_default_desc, (vop_t *) ufs_vnoperatefifo }, { &vop_fsync_desc, (vop_t *) ext2_fsync }, { &vop_inactive_desc, (vop_t *) ext2_inactive }, { NULL, NULL } }; static struct vnodeopv_desc ext2fs_fifoop_opv_desc = { &ext2_fifoop_p, ext2_fifoop_entries }; VNODEOP_SET(ext2fs_vnodeop_opv_desc); VNODEOP_SET(ext2fs_specop_opv_desc); VNODEOP_SET(ext2fs_fifoop_opv_desc); #include /* * Create a regular file */ static int ext2_create(ap) struct vop_create_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; struct vattr *a_vap; } */ *ap; { int error; error = ext2_makeinode(MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode), ap->a_dvp, ap->a_vpp, ap->a_cnp); if (error) return (error); return (0); } /* * Synch an open file. */ /* ARGSUSED */ static int ext2_fsync(ap) struct vop_fsync_args /* { struct vnode *a_vp; struct ucred *a_cred; int a_waitfor; struct proc *a_p; } */ *ap; { register struct vnode *vp = ap->a_vp; register struct buf *bp; struct timeval tv; struct buf *nbp; int s; /* * XXX why is all this fs specific? */ /* * Flush all dirty buffers associated with a vnode. */ ext2_discard_prealloc(VTOI(vp)); loop: s = splbio(); for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) { nbp = bp->b_vnbufs.le_next; if ((bp->b_flags & B_BUSY)) continue; if ((bp->b_flags & B_DELWRI) == 0) panic("ext2_fsync: not dirty"); bremfree(bp); bp->b_flags |= B_BUSY; splx(s); /* * Wait for I/O associated with indirect blocks to complete, * since there is no way to quickly wait for them below. */ if (bp->b_vp == vp || ap->a_waitfor == MNT_NOWAIT) (void) bawrite(bp); else (void) bwrite(bp); goto loop; } if (ap->a_waitfor == MNT_WAIT) { while (vp->v_numoutput) { vp->v_flag |= VBWAIT; tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "extfsn", 0); } #if DIAGNOSTIC if (vp->v_dirtyblkhd.lh_first) { vprint("ext2_fsync: dirty", vp); goto loop; } #endif } splx(s); getmicrotime(&tv); return (UFS_UPDATE(ap->a_vp, &tv, &tv, ap->a_waitfor == MNT_WAIT)); } /* * Mknod vnode call */ /* ARGSUSED */ static int ext2_mknod(ap) struct vop_mknod_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; struct vattr *a_vap; } */ *ap; { struct vattr *vap = ap->a_vap; struct vnode **vpp = ap->a_vpp; struct inode *ip; int error; error = ext2_makeinode(MAKEIMODE(vap->va_type, vap->va_mode), ap->a_dvp, vpp, ap->a_cnp); if (error) return (error); ip = VTOI(*vpp); ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE; if (vap->va_rdev != VNOVAL) { /* * Want to be able to use this to make badblock * inodes, so don't truncate the dev number. */ ip->i_rdev = vap->va_rdev; } /* * Remove inode so that it will be reloaded by VFS_VGET and * checked to see if it is an alias of an existing entry in * the inode cache. */ vput(*vpp); (*vpp)->v_type = VNON; vgone(*vpp); *vpp = 0; return (0); } static int ext2_remove(ap) struct vop_remove_args /* { struct vnode *a_dvp; struct vnode *a_vp; struct componentname *a_cnp; } */ *ap; { struct inode *ip; struct vnode *vp = ap->a_vp; struct vnode *dvp = ap->a_dvp; int error; ip = VTOI(vp); if ((ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) || (VTOI(dvp)->i_flags & APPEND)) { error = EPERM; goto out; } error = ext2_dirremove(dvp, ap->a_cnp); if (error == 0) { ip->i_nlink--; ip->i_flag |= IN_CHANGE; } out: - if (dvp == vp) - vrele(vp); - else - vput(vp); - vput(dvp); return (error); } /* * link vnode call */ static int ext2_link(ap) struct vop_link_args /* { struct vnode *a_tdvp; struct vnode *a_vp; struct componentname *a_cnp; } */ *ap; { struct vnode *vp = ap->a_vp; struct vnode *tdvp = ap->a_tdvp; struct componentname *cnp = ap->a_cnp; struct proc *p = cnp->cn_proc; struct inode *ip; struct timeval tv; int error; #ifdef DIAGNOSTIC if ((cnp->cn_flags & HASBUF) == 0) panic("ufs_link: no name"); #endif if (tdvp->v_mount != vp->v_mount) { VOP_ABORTOP(tdvp, cnp); error = EXDEV; goto out2; } if (tdvp != vp && (error = vn_lock(vp, LK_EXCLUSIVE, p))) { VOP_ABORTOP(tdvp, cnp); goto out2; } ip = VTOI(vp); if ((nlink_t)ip->i_nlink >= LINK_MAX) { VOP_ABORTOP(tdvp, cnp); error = EMLINK; goto out1; } if (ip->i_flags & (IMMUTABLE | APPEND)) { VOP_ABORTOP(tdvp, cnp); error = EPERM; goto out1; } ip->i_nlink++; ip->i_flag |= IN_CHANGE; getmicrotime(&tv); error = UFS_UPDATE(vp, &tv, &tv, 1); if (!error) error = ext2_direnter(ip, tdvp, cnp); if (error) { ip->i_nlink--; ip->i_flag |= IN_CHANGE; } zfree(namei_zone, cnp->cn_pnbuf); out1: if (tdvp != vp) VOP_UNLOCK(vp, 0, p); out2: - vput(tdvp); return (error); } /* * Rename system call. * See comments in sys/ufs/ufs/ufs_vnops.c */ static int ext2_rename(ap) struct vop_rename_args /* { struct vnode *a_fdvp; struct vnode *a_fvp; struct componentname *a_fcnp; struct vnode *a_tdvp; struct vnode *a_tvp; struct componentname *a_tcnp; } */ *ap; { struct vnode *tvp = ap->a_tvp; register struct vnode *tdvp = ap->a_tdvp; struct vnode *fvp = ap->a_fvp; struct vnode *fdvp = ap->a_fdvp; struct componentname *tcnp = ap->a_tcnp; struct componentname *fcnp = ap->a_fcnp; struct proc *p = fcnp->cn_proc; struct inode *ip, *xp, *dp; struct dirtemplate dirbuf; struct timeval tv; int doingdirectory = 0, oldparent = 0, newparent = 0; int error = 0; u_char namlen; #ifdef DIAGNOSTIC if ((tcnp->cn_flags & HASBUF) == 0 || (fcnp->cn_flags & HASBUF) == 0) panic("ufs_rename: no name"); #endif /* * Check for cross-device rename. */ if ((fvp->v_mount != tdvp->v_mount) || (tvp && (fvp->v_mount != tvp->v_mount))) { error = EXDEV; abortit: VOP_ABORTOP(tdvp, tcnp); /* XXX, why not in NFS? */ if (tdvp == tvp) vrele(tdvp); else vput(tdvp); if (tvp) vput(tvp); VOP_ABORTOP(fdvp, fcnp); /* XXX, why not in NFS? */ vrele(fdvp); vrele(fvp); return (error); } if (tvp && ((VTOI(tvp)->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) || (VTOI(tdvp)->i_flags & APPEND))) { error = EPERM; goto abortit; } /* * Check if just deleting a link name or if we've lost a race. * If another process completes the same rename after we've looked * up the source and have blocked looking up the target, then the * source and target inodes may be identical now although the * names were never linked. */ if (fvp == tvp) { if (fvp->v_type == VDIR) { /* * Linked directories are impossible, so we must * have lost the race. Pretend that the rename * completed before the lookup. */ #ifdef UFS_RENAME_DEBUG printf("ufs_rename: fvp == tvp for directories\n"); #endif error = ENOENT; goto abortit; } /* Release destination completely. */ VOP_ABORTOP(tdvp, tcnp); vput(tdvp); vput(tvp); /* * Delete source. There is another race now that everything * is unlocked, but this doesn't cause any new complications. * Relookup() may find a file that is unrelated to the * original one, or it may fail. Too bad. */ vrele(fdvp); vrele(fvp); fcnp->cn_flags &= ~MODMASK; fcnp->cn_flags |= LOCKPARENT | LOCKLEAF; if ((fcnp->cn_flags & SAVESTART) == 0) panic("ufs_rename: lost from startdir"); fcnp->cn_nameiop = DELETE; VREF(fdvp); error = relookup(fdvp, &fvp, fcnp); if (error == 0) vrele(fdvp); if (fvp == NULL) { #ifdef UFS_RENAME_DEBUG printf("ufs_rename: from name disappeared\n"); #endif return (ENOENT); } - return (VOP_REMOVE(fdvp, fvp, fcnp)); + error = VOP_REMOVE(fdvp, fvp, fcnp); + /* XXX - temporarily preserve previous behavior */ + if (fdvp == fvp) + vrele(fdvp); + else + vput(fdvp); + if (fvp != NULLVP) + vput(fvp); + return (error); } if (error = vn_lock(fvp, LK_EXCLUSIVE, p)) goto abortit; dp = VTOI(fdvp); ip = VTOI(fvp); if ((ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) || (dp->i_flags & APPEND)) { VOP_UNLOCK(fvp, 0, p); error = EPERM; goto abortit; } if ((ip->i_mode & IFMT) == IFDIR) { /* * Avoid ".", "..", and aliases of "." for obvious reasons. */ if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') || dp == ip || (fcnp->cn_flags | tcnp->cn_flags) & ISDOTDOT || (ip->i_flag & IN_RENAME)) { VOP_UNLOCK(fvp, 0, p); error = EINVAL; goto abortit; } ip->i_flag |= IN_RENAME; oldparent = dp->i_number; doingdirectory++; } vrele(fdvp); /* * When the target exists, both the directory * and target vnodes are returned locked. */ dp = VTOI(tdvp); xp = NULL; if (tvp) xp = VTOI(tvp); /* * 1) Bump link count while we're moving stuff * around. If we crash somewhere before * completing our work, the link count * may be wrong, but correctable. */ ip->i_nlink++; ip->i_flag |= IN_CHANGE; getmicrotime(&tv); if (error = UFS_UPDATE(fvp, &tv, &tv, 1)) { VOP_UNLOCK(fvp, 0, p); goto bad; } /* * If ".." must be changed (ie the directory gets a new * parent) then the source directory must not be in the * directory heirarchy above the target, as this would * orphan everything below the source directory. Also * the user must have write permission in the source so * as to be able to change "..". We must repeat the call * to namei, as the parent directory is unlocked by the * call to checkpath(). */ error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, tcnp->cn_proc); VOP_UNLOCK(fvp, 0, p); if (oldparent != dp->i_number) newparent = dp->i_number; if (doingdirectory && newparent) { if (error) /* write access check above */ goto bad; if (xp != NULL) vput(tvp); error = ext2_checkpath(ip, dp, tcnp->cn_cred); if (error) goto out; if ((tcnp->cn_flags & SAVESTART) == 0) panic("ufs_rename: lost to startdir"); VREF(tdvp); error = relookup(tdvp, &tvp, tcnp); if (error) goto out; vrele(tdvp); dp = VTOI(tdvp); xp = NULL; if (tvp) xp = VTOI(tvp); } /* * 2) If target doesn't exist, link the target * to the source and unlink the source. * Otherwise, rewrite the target directory * entry to reference the source inode and * expunge the original entry's existence. */ if (xp == NULL) { if (dp->i_dev != ip->i_dev) panic("ufs_rename: EXDEV"); /* * Account for ".." in new directory. * When source and destination have the same * parent we don't fool with the link count. */ if (doingdirectory && newparent) { if ((nlink_t)dp->i_nlink >= LINK_MAX) { error = EMLINK; goto bad; } dp->i_nlink++; dp->i_flag |= IN_CHANGE; error = UFS_UPDATE(tdvp, &tv, &tv, 1); if (error) goto bad; } error = ext2_direnter(ip, tdvp, tcnp); if (error) { if (doingdirectory && newparent) { dp->i_nlink--; dp->i_flag |= IN_CHANGE; (void)UFS_UPDATE(tdvp, &tv, &tv, 1); } goto bad; } vput(tdvp); } else { if (xp->i_dev != dp->i_dev || xp->i_dev != ip->i_dev) panic("ufs_rename: EXDEV"); /* * Short circuit rename(foo, foo). */ if (xp->i_number == ip->i_number) panic("ufs_rename: same file"); /* * If the parent directory is "sticky", then the user must * own the parent directory, or the destination of the rename, * otherwise the destination may not be changed (except by * root). This implements append-only directories. */ if ((dp->i_mode & S_ISTXT) && tcnp->cn_cred->cr_uid != 0 && tcnp->cn_cred->cr_uid != dp->i_uid && xp->i_uid != tcnp->cn_cred->cr_uid) { error = EPERM; goto bad; } /* * Target must be empty if a directory and have no links * to it. Also, ensure source and target are compatible * (both directories, or both not directories). */ if ((xp->i_mode&IFMT) == IFDIR) { if (! ext2_dirempty(xp, dp->i_number, tcnp->cn_cred) || xp->i_nlink > 2) { error = ENOTEMPTY; goto bad; } if (!doingdirectory) { error = ENOTDIR; goto bad; } cache_purge(tdvp); } else if (doingdirectory) { error = EISDIR; goto bad; } error = ext2_dirrewrite(dp, ip, tcnp); if (error) goto bad; /* * If the target directory is in the same * directory as the source directory, * decrement the link count on the parent * of the target directory. */ if (doingdirectory && !newparent) { dp->i_nlink--; dp->i_flag |= IN_CHANGE; } vput(tdvp); /* * Adjust the link count of the target to * reflect the dirrewrite above. If this is * a directory it is empty and there are * no links to it, so we can squash the inode and * any space associated with it. We disallowed * renaming over top of a directory with links to * it above, as the remaining link would point to * a directory without "." or ".." entries. */ xp->i_nlink--; if (doingdirectory) { if (--xp->i_nlink != 0) panic("ufs_rename: linked directory"); error = UFS_TRUNCATE(tvp, (off_t)0, IO_SYNC, tcnp->cn_cred, tcnp->cn_proc); } xp->i_flag |= IN_CHANGE; vput(tvp); xp = NULL; } /* * 3) Unlink the source. */ fcnp->cn_flags &= ~MODMASK; fcnp->cn_flags |= LOCKPARENT | LOCKLEAF; if ((fcnp->cn_flags & SAVESTART) == 0) panic("ufs_rename: lost from startdir"); VREF(fdvp); error = relookup(fdvp, &fvp, fcnp); if (error == 0) vrele(fdvp); if (fvp != NULL) { xp = VTOI(fvp); dp = VTOI(fdvp); } else { /* * From name has disappeared. */ if (doingdirectory) panic("ufs_rename: lost dir entry"); vrele(ap->a_fvp); return (0); } /* * Ensure that the directory entry still exists and has not * changed while the new name has been entered. If the source is * a file then the entry may have been unlinked or renamed. In * either case there is no further work to be done. If the source * is a directory then it cannot have been rmdir'ed; its link * count of three would cause a rmdir to fail with ENOTEMPTY. * The IN_RENAME flag ensures that it cannot be moved by another * rename. */ if (xp != ip) { if (doingdirectory) panic("ufs_rename: lost dir entry"); } else { /* * If the source is a directory with a * new parent, the link count of the old * parent directory must be decremented * and ".." set to point to the new parent. */ if (doingdirectory && newparent) { dp->i_nlink--; dp->i_flag |= IN_CHANGE; error = vn_rdwr(UIO_READ, fvp, (caddr_t)&dirbuf, sizeof (struct dirtemplate), (off_t)0, UIO_SYSSPACE, IO_NODELOCKED, tcnp->cn_cred, (int *)0, (struct proc *)0); if (error == 0) { namlen = ((struct odirtemplate *) &dirbuf)->dotdot_namlen; if (namlen != 2 || dirbuf.dotdot_name[0] != '.' || dirbuf.dotdot_name[1] != '.') { ufs_dirbad(xp, (doff_t)12, "rename: mangled dir"); } else { dirbuf.dotdot_ino = newparent; (void) vn_rdwr(UIO_WRITE, fvp, (caddr_t)&dirbuf, sizeof (struct dirtemplate), (off_t)0, UIO_SYSSPACE, IO_NODELOCKED|IO_SYNC, tcnp->cn_cred, (int *)0, (struct proc *)0); cache_purge(fdvp); } } } error = ext2_dirremove(fdvp, fcnp); if (!error) { xp->i_nlink--; xp->i_flag |= IN_CHANGE; } xp->i_flag &= ~IN_RENAME; } if (dp) vput(fdvp); if (xp) vput(fvp); vrele(ap->a_fvp); return (error); bad: if (xp) vput(ITOV(xp)); vput(ITOV(dp)); out: if (doingdirectory) ip->i_flag &= ~IN_RENAME; if (vn_lock(fvp, LK_EXCLUSIVE, p) == 0) { ip->i_nlink--; ip->i_flag |= IN_CHANGE; ip->i_flag &= ~IN_RENAME; vput(fvp); } else vrele(fvp); return (error); } /* * A virgin directory (no blushing please). */ static struct odirtemplate omastertemplate = { 0, 12, 1, { '.', 0 }, 0, DIRBLKSIZ - 12, 2, { '.', '.', 0 } }; /* * Mkdir system call */ static int ext2_mkdir(ap) struct vop_mkdir_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; struct vattr *a_vap; } */ *ap; { register struct vnode *dvp = ap->a_dvp; register struct vattr *vap = ap->a_vap; register struct componentname *cnp = ap->a_cnp; register struct inode *ip, *dp; struct vnode *tvp; struct dirtemplate dirtemplate, *dtp; struct timeval tv; int error, dmode; #ifdef DIAGNOSTIC if ((cnp->cn_flags & HASBUF) == 0) panic("ufs_mkdir: no name"); #endif dp = VTOI(dvp); if ((nlink_t)dp->i_nlink >= LINK_MAX) { error = EMLINK; goto out; } dmode = vap->va_mode & 0777; dmode |= IFDIR; /* * Must simulate part of ext2_makeinode here to acquire the inode, * but not have it entered in the parent directory. The entry is * made later after writing "." and ".." entries. */ error = UFS_VALLOC(dvp, dmode, cnp->cn_cred, &tvp); if (error) goto out; ip = VTOI(tvp); ip->i_gid = dp->i_gid; #ifdef SUIDDIR { #ifdef QUOTA struct ucred ucred, *ucp; ucp = cnp->cn_cred; #endif I /* * if we are hacking owners here, (only do this where told to) * and we are not giving it TOO root, (would subvert quotas) * then go ahead and give it to the other user. * The new directory also inherits the SUID bit. * If user's UID and dir UID are the same, * 'give it away' so that the SUID is still forced on. */ if ( (dvp->v_mount->mnt_flag & MNT_SUIDDIR) && (dp->i_mode & ISUID) && dp->i_uid) { dmode |= ISUID; ip->i_uid = dp->i_uid; #ifdef QUOTA if (dp->i_uid != cnp->cn_cred->cr_uid) { /* * make sure the correct user gets charged * for the space. * Make a dummy credential for the victim. * XXX This seems to never be accessed out of * our context so a stack variable is ok. */ ucred.cr_ref = 1; ucred.cr_uid = ip->i_uid; ucred.cr_ngroups = 1; ucred.cr_groups[0] = dp->i_gid; ucp = &ucred; } #endif I } else { ip->i_uid = cnp->cn_cred->cr_uid; } #ifdef QUOTA if ((error = getinoquota(ip)) || (error = chkiq(ip, 1, ucp, 0))) { zfree(namei_zone, cnp->cn_pnbuf); UFS_VFREE(tvp, ip->i_number, dmode); vput(tvp); - vput(dvp); return (error); } #endif } #else ip->i_uid = cnp->cn_cred->cr_uid; #ifdef QUOTA if ((error = getinoquota(ip)) || (error = chkiq(ip, 1, cnp->cn_cred, 0))) { zfree(namei_zone, cnp->cn_pnbuf); UFS_VFREE(tvp, ip->i_number, dmode); vput(tvp); - vput(dvp); return (error); } #endif #endif ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE; ip->i_mode = dmode; tvp->v_type = VDIR; /* Rest init'd in getnewvnode(). */ ip->i_nlink = 2; if (cnp->cn_flags & ISWHITEOUT) ip->i_flags |= UF_OPAQUE; getmicrotime(&tv); error = UFS_UPDATE(tvp, &tv, &tv, 1); /* * Bump link count in parent directory * to reflect work done below. Should * be done before reference is created * so reparation is possible if we crash. */ dp->i_nlink++; dp->i_flag |= IN_CHANGE; error = UFS_UPDATE(dvp, &tv, &tv, 1); if (error) goto bad; /* Initialize directory with "." and ".." from static template. */ dtp = (struct dirtemplate *)&omastertemplate; dirtemplate = *dtp; dirtemplate.dot_ino = ip->i_number; dirtemplate.dotdot_ino = dp->i_number; /* note that in ext2 DIRBLKSIZ == blocksize, not DEV_BSIZE * so let's just redefine it - for this function only */ #undef DIRBLKSIZ #define DIRBLKSIZ VTOI(dvp)->i_e2fs->s_blocksize dirtemplate.dotdot_reclen = DIRBLKSIZ - 12; error = vn_rdwr(UIO_WRITE, tvp, (caddr_t)&dirtemplate, sizeof (dirtemplate), (off_t)0, UIO_SYSSPACE, IO_NODELOCKED|IO_SYNC, cnp->cn_cred, (int *)0, (struct proc *)0); if (error) { dp->i_nlink--; dp->i_flag |= IN_CHANGE; goto bad; } if (DIRBLKSIZ > VFSTOUFS(dvp->v_mount)->um_mountp->mnt_stat.f_bsize) panic("ufs_mkdir: blksize"); /* XXX should grow with balloc() */ else { ip->i_size = DIRBLKSIZ; ip->i_flag |= IN_CHANGE; } /* Directory set up, now install its entry in the parent directory. */ error = ext2_direnter(ip, dvp, cnp); if (error) { dp->i_nlink--; dp->i_flag |= IN_CHANGE; } bad: /* * No need to do an explicit VOP_TRUNCATE here, vrele will do this * for us because we set the link count to 0. */ if (error) { ip->i_nlink = 0; ip->i_flag |= IN_CHANGE; vput(tvp); } else *ap->a_vpp = tvp; out: zfree(namei_zone, cnp->cn_pnbuf); - vput(dvp); return (error); #undef DIRBLKSIZ #define DIRBLKSIZ DEV_BSIZE } /* * Rmdir system call. */ static int ext2_rmdir(ap) struct vop_rmdir_args /* { struct vnode *a_dvp; struct vnode *a_vp; struct componentname *a_cnp; } */ *ap; { struct vnode *vp = ap->a_vp; struct vnode *dvp = ap->a_dvp; struct componentname *cnp = ap->a_cnp; + struct proc *p = cnp->cn_proc; struct inode *ip, *dp; int error; ip = VTOI(vp); dp = VTOI(dvp); /* * Verify the directory is empty (and valid). * (Rmdir ".." won't be valid since * ".." will contain a reference to * the current directory and thus be * non-empty.) */ error = 0; if (ip->i_nlink != 2 || !ext2_dirempty(ip, dp->i_number, cnp->cn_cred)) { error = ENOTEMPTY; goto out; } if ((dp->i_flags & APPEND) || (ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND))) { error = EPERM; goto out; } /* * Delete reference to directory before purging * inode. If we crash in between, the directory * will be reattached to lost+found, */ error = ext2_dirremove(dvp, cnp); if (error) goto out; dp->i_nlink--; dp->i_flag |= IN_CHANGE; cache_purge(dvp); - vput(dvp); - dvp = NULL; + VOP_UNLOCK(dvp, 0, p); /* * Truncate inode. The only stuff left * in the directory is "." and "..". The * "." reference is inconsequential since * we're quashing it. The ".." reference * has already been adjusted above. We've * removed the "." reference and the reference * in the parent directory, but there may be * other hard links so decrement by 2 and * worry about them later. */ ip->i_nlink -= 2; - error = UFS_TRUNCATE(vp, (off_t)0, IO_SYNC, cnp->cn_cred, - cnp->cn_proc); + error = UFS_TRUNCATE(vp, (off_t)0, IO_SYNC, cnp->cn_cred, p); cache_purge(ITOV(ip)); + vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p); out: - if (dvp) - vput(dvp); - vput(vp); return (error); } /* * symlink -- make a symbolic link */ static int ext2_symlink(ap) struct vop_symlink_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; struct vattr *a_vap; char *a_target; } */ *ap; { register struct vnode *vp, **vpp = ap->a_vpp; register struct inode *ip; int len, error; error = ext2_makeinode(IFLNK | ap->a_vap->va_mode, ap->a_dvp, vpp, ap->a_cnp); if (error) return (error); vp = *vpp; len = strlen(ap->a_target); if (len < vp->v_mount->mnt_maxsymlinklen) { ip = VTOI(vp); bcopy(ap->a_target, (char *)ip->i_shortlink, len); ip->i_size = len; ip->i_flag |= IN_CHANGE | IN_UPDATE; } else error = vn_rdwr(UIO_WRITE, vp, ap->a_target, len, (off_t)0, UIO_SYSSPACE, IO_NODELOCKED, ap->a_cnp->cn_cred, (int *)0, (struct proc *)0); vput(vp); return (error); } /* * Allocate a new inode. */ static int ext2_makeinode(mode, dvp, vpp, cnp) int mode; struct vnode *dvp; struct vnode **vpp; struct componentname *cnp; { register struct inode *ip, *pdir; struct timeval tv; struct vnode *tvp; int error; pdir = VTOI(dvp); #ifdef DIAGNOSTIC if ((cnp->cn_flags & HASBUF) == 0) panic("ext2_makeinode: no name"); #endif *vpp = NULL; if ((mode & IFMT) == 0) mode |= IFREG; error = UFS_VALLOC(dvp, mode, cnp->cn_cred, &tvp); if (error) { zfree(namei_zone, cnp->cn_pnbuf); - vput(dvp); return (error); } ip = VTOI(tvp); ip->i_gid = pdir->i_gid; #ifdef SUIDDIR { #ifdef QUOTA struct ucred ucred, *ucp; ucp = cnp->cn_cred; #endif I /* * if we are * not the owner of the directory, * and we are hacking owners here, (only do this where told to) * and we are not giving it TOO root, (would subvert quotas) * then go ahead and give it to the other user. * Note that this drops off the execute bits for security. */ if ( (dvp->v_mount->mnt_flag & MNT_SUIDDIR) && (pdir->i_mode & ISUID) && (pdir->i_uid != cnp->cn_cred->cr_uid) && pdir->i_uid) { ip->i_uid = pdir->i_uid; mode &= ~07111; #ifdef QUOTA /* * make sure the correct user gets charged * for the space. * Quickly knock up a dummy credential for the victim. * XXX This seems to never be accessed out of our * context so a stack variable is ok. */ ucred.cr_ref = 1; ucred.cr_uid = ip->i_uid; ucred.cr_ngroups = 1; ucred.cr_groups[0] = pdir->i_gid; ucp = &ucred; #endif I } else { ip->i_uid = cnp->cn_cred->cr_uid; } #ifdef QUOTA if ((error = getinoquota(ip)) || (error = chkiq(ip, 1, ucp, 0))) { zfree(namei_zone, cnp->cn_pnbuf); UFS_VFREE(tvp, ip->i_number, mode); vput(tvp); - vput(dvp); return (error); } #endif } #else ip->i_uid = cnp->cn_cred->cr_uid; #ifdef QUOTA if ((error = getinoquota(ip)) || (error = chkiq(ip, 1, cnp->cn_cred, 0))) { zfree(namei_zone, cnp->cn_pnbuf); UFS_VFREE(tvp, ip->i_number, mode); vput(tvp); - vput(dvp); return (error); } #endif #endif ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE; ip->i_mode = mode; tvp->v_type = IFTOVT(mode); /* Rest init'd in getnewvnode(). */ ip->i_nlink = 1; if ((ip->i_mode & ISGID) && !groupmember(ip->i_gid, cnp->cn_cred) && suser(cnp->cn_cred, NULL)) ip->i_mode &= ~ISGID; if (cnp->cn_flags & ISWHITEOUT) ip->i_flags |= UF_OPAQUE; /* * Make sure inode goes to disk before directory entry. */ getmicrotime(&tv); error = UFS_UPDATE(tvp, &tv, &tv, 1); if (error) goto bad; error = ext2_direnter(ip, dvp, cnp); if (error) goto bad; if ((cnp->cn_flags & SAVESTART) == 0) zfree(namei_zone, cnp->cn_pnbuf); - vput(dvp); *vpp = tvp; return (0); bad: /* * Write error occurred trying to update the inode * or the directory so must deallocate the inode. */ zfree(namei_zone, cnp->cn_pnbuf); - vput(dvp); ip->i_nlink = 0; ip->i_flag |= IN_CHANGE; vput(tvp); return (error); } /* * get page routine * * XXX By default, wimp out... note that a_offset is ignored (and always * XXX has been). */ static int ext2_getpages(ap) struct vop_getpages_args *ap; { return (vnode_pager_generic_getpages(ap->a_vp, ap->a_m, ap->a_count, ap->a_reqpage)); } /* * put page routine * * XXX By default, wimp out... note that a_offset is ignored (and always * XXX has been). */ static int ext2_putpages(ap) struct vop_putpages_args *ap; { return (vnode_pager_generic_putpages(ap->a_vp, ap->a_m, ap->a_count, ap->a_sync, ap->a_rtvals)); } Index: head/sys/kern/uipc_usrreq.c =================================================================== --- head/sys/kern/uipc_usrreq.c (revision 35822) +++ head/sys/kern/uipc_usrreq.c (revision 35823) @@ -1,1066 +1,1068 @@ /* * Copyright (c) 1982, 1986, 1989, 1991, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * From: @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94 - * $Id: uipc_usrreq.c,v 1.32 1998/02/06 12:13:28 eivind Exp $ + * $Id: uipc_usrreq.c,v 1.33 1998/04/17 22:36:50 des Exp $ */ #include #include #include #include #include #include /* XXX must be before */ #include #include #include #include #include #include #include #include #include #include #include #include /* * Unix communications domain. * * TODO: * SEQPACKET, RDM * rethink name space problems * need a proper out-of-band */ static struct sockaddr sun_noname = { sizeof(sun_noname), AF_LOCAL }; static ino_t unp_ino; /* prototype for fake inode numbers */ static int unp_attach __P((struct socket *)); static void unp_detach __P((struct unpcb *)); static int unp_bind __P((struct unpcb *,struct sockaddr *, struct proc *)); static int unp_connect __P((struct socket *,struct sockaddr *, struct proc *)); static void unp_disconnect __P((struct unpcb *)); static void unp_shutdown __P((struct unpcb *)); static void unp_drop __P((struct unpcb *, int)); static void unp_gc __P((void)); static void unp_scan __P((struct mbuf *, void (*)(struct file *))); static void unp_mark __P((struct file *)); static void unp_discard __P((struct file *)); static int unp_internalize __P((struct mbuf *, struct proc *)); static int uipc_abort(struct socket *so) { struct unpcb *unp = sotounpcb(so); if (unp == 0) return EINVAL; unp_drop(unp, ECONNABORTED); return 0; } static int uipc_accept(struct socket *so, struct sockaddr **nam) { struct unpcb *unp = sotounpcb(so); if (unp == 0) return EINVAL; /* * Pass back name of connected socket, * if it was bound and we are still connected * (our peer may have closed already!). */ if (unp->unp_conn && unp->unp_conn->unp_addr) { *nam = dup_sockaddr((struct sockaddr *)unp->unp_conn->unp_addr, 1); } else { *nam = dup_sockaddr((struct sockaddr *)&sun_noname, 1); } return 0; } static int uipc_attach(struct socket *so, int proto, struct proc *p) { struct unpcb *unp = sotounpcb(so); if (unp != 0) return EISCONN; return unp_attach(so); } static int uipc_bind(struct socket *so, struct sockaddr *nam, struct proc *p) { struct unpcb *unp = sotounpcb(so); if (unp == 0) return EINVAL; return unp_bind(unp, nam, p); } static int uipc_connect(struct socket *so, struct sockaddr *nam, struct proc *p) { struct unpcb *unp = sotounpcb(so); if (unp == 0) return EINVAL; return unp_connect(so, nam, curproc); } static int uipc_connect2(struct socket *so1, struct socket *so2) { struct unpcb *unp = sotounpcb(so1); if (unp == 0) return EINVAL; return unp_connect2(so1, so2); } /* control is EOPNOTSUPP */ static int uipc_detach(struct socket *so) { struct unpcb *unp = sotounpcb(so); if (unp == 0) return EINVAL; unp_detach(unp); return 0; } static int uipc_disconnect(struct socket *so) { struct unpcb *unp = sotounpcb(so); if (unp == 0) return EINVAL; unp_disconnect(unp); return 0; } static int uipc_listen(struct socket *so, struct proc *p) { struct unpcb *unp = sotounpcb(so); if (unp == 0 || unp->unp_vnode == 0) return EINVAL; return 0; } static int uipc_peeraddr(struct socket *so, struct sockaddr **nam) { struct unpcb *unp = sotounpcb(so); if (unp == 0) return EINVAL; if (unp->unp_conn && unp->unp_conn->unp_addr) *nam = dup_sockaddr((struct sockaddr *)unp->unp_conn->unp_addr, 1); return 0; } static int uipc_rcvd(struct socket *so, int flags) { struct unpcb *unp = sotounpcb(so); struct socket *so2; if (unp == 0) return EINVAL; switch (so->so_type) { case SOCK_DGRAM: panic("uipc_rcvd DGRAM?"); /*NOTREACHED*/ case SOCK_STREAM: #define rcv (&so->so_rcv) #define snd (&so2->so_snd) if (unp->unp_conn == 0) break; so2 = unp->unp_conn->unp_socket; /* * Adjust backpressure on sender * and wakeup any waiting to write. */ snd->sb_mbmax += unp->unp_mbcnt - rcv->sb_mbcnt; unp->unp_mbcnt = rcv->sb_mbcnt; snd->sb_hiwat += unp->unp_cc - rcv->sb_cc; unp->unp_cc = rcv->sb_cc; sowwakeup(so2); #undef snd #undef rcv break; default: panic("uipc_rcvd unknown socktype"); } return 0; } /* pru_rcvoob is EOPNOTSUPP */ static int uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, struct mbuf *control, struct proc *p) { int error = 0; struct unpcb *unp = sotounpcb(so); struct socket *so2; if (unp == 0) { error = EINVAL; goto release; } if (flags & PRUS_OOB) { error = EOPNOTSUPP; goto release; } if (control && (error = unp_internalize(control, p))) goto release; switch (so->so_type) { case SOCK_DGRAM: { struct sockaddr *from; if (nam) { if (unp->unp_conn) { error = EISCONN; break; } error = unp_connect(so, nam, p); if (error) break; } else { if (unp->unp_conn == 0) { error = ENOTCONN; break; } } so2 = unp->unp_conn->unp_socket; if (unp->unp_addr) from = (struct sockaddr *)unp->unp_addr; else from = &sun_noname; if (sbappendaddr(&so2->so_rcv, from, m, control)) { sorwakeup(so2); m = 0; control = 0; } else error = ENOBUFS; if (nam) unp_disconnect(unp); break; } case SOCK_STREAM: #define rcv (&so2->so_rcv) #define snd (&so->so_snd) /* Connect if not connected yet. */ /* * Note: A better implementation would complain * if not equal to the peer's address. */ if ((so->so_state & SS_ISCONNECTED) == 0) { if (nam) { error = unp_connect(so, nam, p); if (error) break; /* XXX */ } else { error = ENOTCONN; break; } } if (so->so_state & SS_CANTSENDMORE) { error = EPIPE; break; } if (unp->unp_conn == 0) panic("uipc_send connected but no connection?"); so2 = unp->unp_conn->unp_socket; /* * Send to paired receive port, and then reduce * send buffer hiwater marks to maintain backpressure. * Wake up readers. */ if (control) { if (sbappendcontrol(rcv, m, control)) control = 0; } else sbappend(rcv, m); snd->sb_mbmax -= rcv->sb_mbcnt - unp->unp_conn->unp_mbcnt; unp->unp_conn->unp_mbcnt = rcv->sb_mbcnt; snd->sb_hiwat -= rcv->sb_cc - unp->unp_conn->unp_cc; unp->unp_conn->unp_cc = rcv->sb_cc; sorwakeup(so2); m = 0; #undef snd #undef rcv break; default: panic("uipc_send unknown socktype"); } /* * SEND_EOF is equivalent to a SEND followed by * a SHUTDOWN. */ if (flags & PRUS_EOF) { socantsendmore(so); unp_shutdown(unp); } release: if (control) m_freem(control); if (m) m_freem(m); return error; } static int uipc_sense(struct socket *so, struct stat *sb) { struct unpcb *unp = sotounpcb(so); struct socket *so2; if (unp == 0) return EINVAL; sb->st_blksize = so->so_snd.sb_hiwat; if (so->so_type == SOCK_STREAM && unp->unp_conn != 0) { so2 = unp->unp_conn->unp_socket; sb->st_blksize += so2->so_rcv.sb_cc; } sb->st_dev = NODEV; if (unp->unp_ino == 0) unp->unp_ino = unp_ino++; sb->st_ino = unp->unp_ino; return (0); } static int uipc_shutdown(struct socket *so) { struct unpcb *unp = sotounpcb(so); if (unp == 0) return EINVAL; socantsendmore(so); unp_shutdown(unp); return 0; } static int uipc_sockaddr(struct socket *so, struct sockaddr **nam) { struct unpcb *unp = sotounpcb(so); if (unp == 0) return EINVAL; if (unp->unp_addr) *nam = dup_sockaddr((struct sockaddr *)unp->unp_addr, 1); return 0; } struct pr_usrreqs uipc_usrreqs = { uipc_abort, uipc_accept, uipc_attach, uipc_bind, uipc_connect, uipc_connect2, pru_control_notsupp, uipc_detach, uipc_disconnect, uipc_listen, uipc_peeraddr, uipc_rcvd, pru_rcvoob_notsupp, uipc_send, uipc_sense, uipc_shutdown, uipc_sockaddr, sosend, soreceive, sopoll }; /* * Both send and receive buffers are allocated PIPSIZ bytes of buffering * for stream sockets, although the total for sender and receiver is * actually only PIPSIZ. * Datagram sockets really use the sendspace as the maximum datagram size, * and don't really want to reserve the sendspace. Their recvspace should * be large enough for at least one max-size datagram plus address. */ #ifndef PIPSIZ #define PIPSIZ 8192 #endif static u_long unpst_sendspace = PIPSIZ; static u_long unpst_recvspace = PIPSIZ; static u_long unpdg_sendspace = 2*1024; /* really max datagram size */ static u_long unpdg_recvspace = 4*1024; static int unp_rights; /* file descriptors in flight */ SYSCTL_INT(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW, &unpst_sendspace, 0, ""); SYSCTL_INT(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW, &unpst_recvspace, 0, ""); SYSCTL_INT(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW, &unpdg_sendspace, 0, ""); SYSCTL_INT(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW, &unpdg_recvspace, 0, ""); SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD, &unp_rights, 0, ""); static int unp_attach(so) struct socket *so; { register struct unpcb *unp; int error; if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { switch (so->so_type) { case SOCK_STREAM: error = soreserve(so, unpst_sendspace, unpst_recvspace); break; case SOCK_DGRAM: error = soreserve(so, unpdg_sendspace, unpdg_recvspace); break; default: panic("unp_attach"); } if (error) return (error); } MALLOC(unp, struct unpcb *, sizeof *unp, M_PCB, M_NOWAIT); if (unp == NULL) return (ENOBUFS); bzero(unp, sizeof *unp); so->so_pcb = (caddr_t)unp; unp->unp_socket = so; return (0); } static void unp_detach(unp) register struct unpcb *unp; { if (unp->unp_vnode) { unp->unp_vnode->v_socket = 0; vrele(unp->unp_vnode); unp->unp_vnode = 0; } if (unp->unp_conn) unp_disconnect(unp); while (unp->unp_refs) unp_drop(unp->unp_refs, ECONNRESET); soisdisconnected(unp->unp_socket); unp->unp_socket->so_pcb = 0; if (unp_rights) { /* * Normally the receive buffer is flushed later, * in sofree, but if our receive buffer holds references * to descriptors that are now garbage, we will dispose * of those descriptor references after the garbage collector * gets them (resulting in a "panic: closef: count < 0"). */ sorflush(unp->unp_socket); unp_gc(); } if (unp->unp_addr) FREE(unp->unp_addr, M_SONAME); FREE(unp, M_PCB); } static int unp_bind(unp, nam, p) struct unpcb *unp; struct sockaddr *nam; struct proc *p; { struct sockaddr_un *soun = (struct sockaddr_un *)nam; register struct vnode *vp; struct vattr vattr; int error, namelen; struct nameidata nd; char buf[SOCK_MAXADDRLEN]; if (unp->unp_vnode != NULL) return (EINVAL); #define offsetof(s, e) ((char *)&((s *)0)->e - (char *)((s *)0)) namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path); if (namelen <= 0) return EINVAL; strncpy(buf, soun->sun_path, namelen); buf[namelen] = 0; /* null-terminate the string */ NDINIT(&nd, CREATE, FOLLOW | LOCKPARENT, UIO_SYSSPACE, buf, p); /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */ error = namei(&nd); if (error) return (error); vp = nd.ni_vp; if (vp != NULL) { VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); if (nd.ni_dvp == vp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); vrele(vp); return (EADDRINUSE); } VATTR_NULL(&vattr); vattr.va_type = VSOCK; vattr.va_mode = (ACCESSPERMS & ~p->p_fd->fd_cmask); VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); - if (error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr)) + error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); + vput(nd.ni_dvp); + if (error) return (error); vp = nd.ni_vp; vp->v_socket = unp->unp_socket; unp->unp_vnode = vp; unp->unp_addr = (struct sockaddr_un *)dup_sockaddr(nam, 1); VOP_UNLOCK(vp, 0, p); return (0); } static int unp_connect(so, nam, p) struct socket *so; struct sockaddr *nam; struct proc *p; { register struct sockaddr_un *soun = (struct sockaddr_un *)nam; register struct vnode *vp; register struct socket *so2, *so3; struct unpcb *unp2, *unp3; int error, len; struct nameidata nd; char buf[SOCK_MAXADDRLEN]; len = nam->sa_len - offsetof(struct sockaddr_un, sun_path); if (len <= 0) return EINVAL; strncpy(buf, soun->sun_path, len); buf[len] = 0; NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, buf, p); error = namei(&nd); if (error) return (error); vp = nd.ni_vp; if (vp->v_type != VSOCK) { error = ENOTSOCK; goto bad; } error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p); if (error) goto bad; so2 = vp->v_socket; if (so2 == 0) { error = ECONNREFUSED; goto bad; } if (so->so_type != so2->so_type) { error = EPROTOTYPE; goto bad; } if (so->so_proto->pr_flags & PR_CONNREQUIRED) { if ((so2->so_options & SO_ACCEPTCONN) == 0 || (so3 = sonewconn(so2, 0)) == 0) { error = ECONNREFUSED; goto bad; } unp2 = sotounpcb(so2); unp3 = sotounpcb(so3); if (unp2->unp_addr) unp3->unp_addr = (struct sockaddr_un *) dup_sockaddr((struct sockaddr *) unp2->unp_addr, 1); so2 = so3; } error = unp_connect2(so, so2); bad: vput(vp); return (error); } int unp_connect2(so, so2) register struct socket *so; register struct socket *so2; { register struct unpcb *unp = sotounpcb(so); register struct unpcb *unp2; if (so2->so_type != so->so_type) return (EPROTOTYPE); unp2 = sotounpcb(so2); unp->unp_conn = unp2; switch (so->so_type) { case SOCK_DGRAM: unp->unp_nextref = unp2->unp_refs; unp2->unp_refs = unp; soisconnected(so); break; case SOCK_STREAM: unp2->unp_conn = unp; soisconnected(so); soisconnected(so2); break; default: panic("unp_connect2"); } return (0); } static void unp_disconnect(unp) struct unpcb *unp; { register struct unpcb *unp2 = unp->unp_conn; if (unp2 == 0) return; unp->unp_conn = 0; switch (unp->unp_socket->so_type) { case SOCK_DGRAM: if (unp2->unp_refs == unp) unp2->unp_refs = unp->unp_nextref; else { unp2 = unp2->unp_refs; for (;;) { if (unp2 == 0) panic("unp_disconnect"); if (unp2->unp_nextref == unp) break; unp2 = unp2->unp_nextref; } unp2->unp_nextref = unp->unp_nextref; } unp->unp_nextref = 0; unp->unp_socket->so_state &= ~SS_ISCONNECTED; break; case SOCK_STREAM: soisdisconnected(unp->unp_socket); unp2->unp_conn = 0; soisdisconnected(unp2->unp_socket); break; } } #ifdef notdef void unp_abort(unp) struct unpcb *unp; { unp_detach(unp); } #endif static void unp_shutdown(unp) struct unpcb *unp; { struct socket *so; if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn && (so = unp->unp_conn->unp_socket)) socantrcvmore(so); } static void unp_drop(unp, errno) struct unpcb *unp; int errno; { struct socket *so = unp->unp_socket; so->so_error = errno; unp_disconnect(unp); if (so->so_head) { so->so_pcb = (caddr_t) 0; if (unp->unp_addr) FREE(unp->unp_addr, M_SONAME); FREE(unp, M_PCB); sofree(so); } } #ifdef notdef void unp_drain() { } #endif int unp_externalize(rights) struct mbuf *rights; { struct proc *p = curproc; /* XXX */ register int i; register struct cmsghdr *cm = mtod(rights, struct cmsghdr *); register struct file **rp = (struct file **)(cm + 1); register struct file *fp; int newfds = (cm->cmsg_len - sizeof(*cm)) / sizeof (int); int f; /* * if the new FD's will not fit, then we free them all */ if (!fdavail(p, newfds)) { for (i = 0; i < newfds; i++) { fp = *rp; unp_discard(fp); *rp++ = 0; } return (EMSGSIZE); } /* * now change each pointer to an fd in the global table to * an integer that is the index to the local fd table entry * that we set up to point to the global one we are transferring. * XXX this assumes a pointer and int are the same size...! */ for (i = 0; i < newfds; i++) { if (fdalloc(p, 0, &f)) panic("unp_externalize"); fp = *rp; p->p_fd->fd_ofiles[f] = fp; fp->f_msgcount--; unp_rights--; *(int *)rp++ = f; } return (0); } #ifndef MIN #define MIN(a,b) (((a)<(b))?(a):(b)) #endif static int unp_internalize(control, p) struct mbuf *control; struct proc *p; { struct filedesc *fdp = p->p_fd; register struct cmsghdr *cm = mtod(control, struct cmsghdr *); register struct file **rp; register struct file *fp; register int i, fd; register struct cmsgcred *cmcred; int oldfds; if ((cm->cmsg_type != SCM_RIGHTS && cm->cmsg_type != SCM_CREDS) || cm->cmsg_level != SOL_SOCKET || cm->cmsg_len != control->m_len) return (EINVAL); /* * Fill in credential information. */ if (cm->cmsg_type == SCM_CREDS) { cmcred = (struct cmsgcred *)(cm + 1); cmcred->cmcred_pid = p->p_pid; cmcred->cmcred_uid = p->p_cred->p_ruid; cmcred->cmcred_gid = p->p_cred->p_rgid; cmcred->cmcred_euid = p->p_ucred->cr_uid; cmcred->cmcred_ngroups = MIN(p->p_ucred->cr_ngroups, CMGROUP_MAX); for (i = 0; i < cmcred->cmcred_ngroups; i++) cmcred->cmcred_groups[i] = p->p_ucred->cr_groups[i]; return(0); } oldfds = (cm->cmsg_len - sizeof (*cm)) / sizeof (int); /* * check that all the FDs passed in refer to legal OPEN files * If not, reject the entire operation. */ rp = (struct file **)(cm + 1); for (i = 0; i < oldfds; i++) { fd = *(int *)rp++; if ((unsigned)fd >= fdp->fd_nfiles || fdp->fd_ofiles[fd] == NULL) return (EBADF); } /* * Now replace the integer FDs with pointers to * the associated global file table entry.. * XXX this assumes a pointer and an int are the same size! */ rp = (struct file **)(cm + 1); for (i = 0; i < oldfds; i++) { fp = fdp->fd_ofiles[*(int *)rp]; *rp++ = fp; fp->f_count++; fp->f_msgcount++; unp_rights++; } return (0); } static int unp_defer, unp_gcing; static void unp_gc() { register struct file *fp, *nextfp; register struct socket *so; struct file **extra_ref, **fpp; int nunref, i; if (unp_gcing) return; unp_gcing = 1; unp_defer = 0; /* * before going through all this, set all FDs to * be NOT defered and NOT externally accessible */ for (fp = filehead.lh_first; fp != 0; fp = fp->f_list.le_next) fp->f_flag &= ~(FMARK|FDEFER); do { for (fp = filehead.lh_first; fp != 0; fp = fp->f_list.le_next) { /* * If the file is not open, skip it */ if (fp->f_count == 0) continue; /* * If we already marked it as 'defer' in a * previous pass, then try process it this time * and un-mark it */ if (fp->f_flag & FDEFER) { fp->f_flag &= ~FDEFER; unp_defer--; } else { /* * if it's not defered, then check if it's * already marked.. if so skip it */ if (fp->f_flag & FMARK) continue; /* * If all references are from messages * in transit, then skip it. it's not * externally accessible. */ if (fp->f_count == fp->f_msgcount) continue; /* * If it got this far then it must be * externally accessible. */ fp->f_flag |= FMARK; } /* * either it was defered, or it is externally * accessible and not already marked so. * Now check if it is possibly one of OUR sockets. */ if (fp->f_type != DTYPE_SOCKET || (so = (struct socket *)fp->f_data) == 0) continue; if (so->so_proto->pr_domain != &localdomain || (so->so_proto->pr_flags&PR_RIGHTS) == 0) continue; #ifdef notdef if (so->so_rcv.sb_flags & SB_LOCK) { /* * This is problematical; it's not clear * we need to wait for the sockbuf to be * unlocked (on a uniprocessor, at least), * and it's also not clear what to do * if sbwait returns an error due to receipt * of a signal. If sbwait does return * an error, we'll go into an infinite * loop. Delete all of this for now. */ (void) sbwait(&so->so_rcv); goto restart; } #endif /* * So, Ok, it's one of our sockets and it IS externally * accessible (or was defered). Now we look * to see if we hold any file descriptors in its * message buffers. Follow those links and mark them * as accessible too. */ unp_scan(so->so_rcv.sb_mb, unp_mark); } } while (unp_defer); /* * We grab an extra reference to each of the file table entries * that are not otherwise accessible and then free the rights * that are stored in messages on them. * * The bug in the orginal code is a little tricky, so I'll describe * what's wrong with it here. * * It is incorrect to simply unp_discard each entry for f_msgcount * times -- consider the case of sockets A and B that contain * references to each other. On a last close of some other socket, * we trigger a gc since the number of outstanding rights (unp_rights) * is non-zero. If during the sweep phase the gc code un_discards, * we end up doing a (full) closef on the descriptor. A closef on A * results in the following chain. Closef calls soo_close, which * calls soclose. Soclose calls first (through the switch * uipc_usrreq) unp_detach, which re-invokes unp_gc. Unp_gc simply * returns because the previous instance had set unp_gcing, and * we return all the way back to soclose, which marks the socket * with SS_NOFDREF, and then calls sofree. Sofree calls sorflush * to free up the rights that are queued in messages on the socket A, * i.e., the reference on B. The sorflush calls via the dom_dispose * switch unp_dispose, which unp_scans with unp_discard. This second * instance of unp_discard just calls closef on B. * * Well, a similar chain occurs on B, resulting in a sorflush on B, * which results in another closef on A. Unfortunately, A is already * being closed, and the descriptor has already been marked with * SS_NOFDREF, and soclose panics at this point. * * Here, we first take an extra reference to each inaccessible * descriptor. Then, we call sorflush ourself, since we know * it is a Unix domain socket anyhow. After we destroy all the * rights carried in messages, we do a last closef to get rid * of our extra reference. This is the last close, and the * unp_detach etc will shut down the socket. * * 91/09/19, bsy@cs.cmu.edu */ extra_ref = malloc(nfiles * sizeof(struct file *), M_FILE, M_WAITOK); for (nunref = 0, fp = filehead.lh_first, fpp = extra_ref; fp != 0; fp = nextfp) { nextfp = fp->f_list.le_next; /* * If it's not open, skip it */ if (fp->f_count == 0) continue; /* * If all refs are from msgs, and it's not marked accessible * then it must be referenced from some unreachable cycle * of (shut-down) FDs, so include it in our * list of FDs to remove */ if (fp->f_count == fp->f_msgcount && !(fp->f_flag & FMARK)) { *fpp++ = fp; nunref++; fp->f_count++; } } /* * for each FD on our hit list, do the following two things */ for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) sorflush((struct socket *)(*fpp)->f_data); for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) closef(*fpp, (struct proc *) NULL); free((caddr_t)extra_ref, M_FILE); unp_gcing = 0; } void unp_dispose(m) struct mbuf *m; { if (m) unp_scan(m, unp_discard); } static void unp_scan(m0, op) register struct mbuf *m0; void (*op) __P((struct file *)); { register struct mbuf *m; register struct file **rp; register struct cmsghdr *cm; register int i; int qfds; while (m0) { for (m = m0; m; m = m->m_next) if (m->m_type == MT_CONTROL && m->m_len >= sizeof(*cm)) { cm = mtod(m, struct cmsghdr *); if (cm->cmsg_level != SOL_SOCKET || cm->cmsg_type != SCM_RIGHTS) continue; qfds = (cm->cmsg_len - sizeof *cm) / sizeof (struct file *); rp = (struct file **)(cm + 1); for (i = 0; i < qfds; i++) (*op)(*rp++); break; /* XXX, but saves time */ } m0 = m0->m_act; } } static void unp_mark(fp) struct file *fp; { if (fp->f_flag & FMARK) return; unp_defer++; fp->f_flag |= (FMARK|FDEFER); } static void unp_discard(fp) struct file *fp; { fp->f_msgcount--; unp_rights--; (void) closef(fp, (struct proc *)NULL); } Index: head/sys/kern/vfs_extattr.c =================================================================== --- head/sys/kern/vfs_extattr.c (revision 35822) +++ head/sys/kern/vfs_extattr.c (revision 35823) @@ -1,2846 +1,2852 @@ /* * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 - * $Id: vfs_syscalls.c,v 1.97 1998/04/08 18:31:57 wosch Exp $ + * $Id: vfs_syscalls.c,v 1.99 1998/04/19 22:20:32 des Exp $ */ /* For 4.3 integer FS ID compatibility */ #include "opt_compat.h" /* * XXX - The following is required because of some magic done * in getdirentries() below which is only done if the translucent * filesystem `UNION' is compiled into the kernel. This is broken, * but I don't have time to study the code deeply enough to understand * what's going on and determine an appropriate fix. -GAW */ #include "opt_union.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef UNION #include #endif #include #include #include #include #include static int change_dir __P((struct nameidata *ndp, struct proc *p)); static void checkdirs __P((struct vnode *olddp)); static int usermount = 0; /* if 1, non-root can mount fs. */ SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0, ""); /* * Virtual File System System Calls */ /* * Mount a file system. */ #ifndef _SYS_SYSPROTO_H_ struct mount_args { char *type; char *path; int flags; caddr_t data; }; #endif /* ARGSUSED */ int mount(p, uap) struct proc *p; register struct mount_args /* { syscallarg(char *) type; syscallarg(char *) path; syscallarg(int) flags; syscallarg(caddr_t) data; } */ *uap; { struct vnode *vp; struct mount *mp; struct vfsconf *vfsp; int error, flag = 0, flag2 = 0; struct vattr va; u_long fstypenum; struct nameidata nd; char fstypename[MFSNAMELEN]; if (usermount == 0 && (error = suser(p->p_ucred, &p->p_acflag))) return (error); /* * Get vnode to be covered */ NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); vp = nd.ni_vp; if (SCARG(uap, flags) & MNT_UPDATE) { if ((vp->v_flag & VROOT) == 0) { vput(vp); return (EINVAL); } mp = vp->v_mount; flag = mp->mnt_flag; flag2 = mp->mnt_kern_flag; /* * We only allow the filesystem to be reloaded if it * is currently mounted read-only. */ if ((SCARG(uap, flags) & MNT_RELOAD) && ((mp->mnt_flag & MNT_RDONLY) == 0)) { vput(vp); return (EOPNOTSUPP); /* Needs translation */ } mp->mnt_flag |= SCARG(uap, flags) & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE); /* * Only root, or the user that did the original mount is * permitted to update it. */ if (mp->mnt_stat.f_owner != p->p_ucred->cr_uid && (error = suser(p->p_ucred, &p->p_acflag))) { vput(vp); return (error); } /* * Do not allow NFS export by non-root users. Silently * enforce MNT_NOSUID and MNT_NODEV for non-root users. */ if (p->p_ucred->cr_uid != 0) { if (SCARG(uap, flags) & MNT_EXPORTED) { vput(vp); return (EPERM); } SCARG(uap, flags) |= MNT_NOSUID | MNT_NODEV; } if (vfs_busy(mp, LK_NOWAIT, 0, p)) { vput(vp); return (EBUSY); } VOP_UNLOCK(vp, 0, p); goto update; } /* * If the user is not root, ensure that they own the directory * onto which we are attempting to mount. */ if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) || (va.va_uid != p->p_ucred->cr_uid && (error = suser(p->p_ucred, &p->p_acflag)))) { vput(vp); return (error); } /* * Do not allow NFS export by non-root users. Silently * enforce MNT_NOSUID and MNT_NODEV for non-root users. */ if (p->p_ucred->cr_uid != 0) { if (SCARG(uap, flags) & MNT_EXPORTED) { vput(vp); return (EPERM); } SCARG(uap, flags) |= MNT_NOSUID | MNT_NODEV; } if (error = vinvalbuf(vp, V_SAVE, p->p_ucred, p, 0, 0)) return (error); if (vp->v_type != VDIR) { vput(vp); return (ENOTDIR); } #ifdef COMPAT_43 /* * Historically filesystem types were identified by number. If we * get an integer for the filesystem type instead of a string, we * check to see if it matches one of the historic filesystem types. */ fstypenum = (u_long)SCARG(uap, type); if (fstypenum < maxvfsconf) { for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) if (vfsp->vfc_typenum == fstypenum) break; if (vfsp == NULL) { vput(vp); return (ENODEV); } strncpy(fstypename, vfsp->vfc_name, MFSNAMELEN); } else #endif /* COMPAT_43 */ if (error = copyinstr(SCARG(uap, type), fstypename, MFSNAMELEN, NULL)) { vput(vp); return (error); } for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) if (!strcmp(vfsp->vfc_name, fstypename)) break; if (vfsp == NULL) { vput(vp); return (ENODEV); } if (vp->v_mountedhere != NULL) { vput(vp); return (EBUSY); } /* * Allocate and initialize the filesystem. */ mp = (struct mount *)malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); bzero((char *)mp, (u_long)sizeof(struct mount)); lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE); (void)vfs_busy(mp, LK_NOWAIT, 0, p); mp->mnt_op = vfsp->vfc_vfsops; mp->mnt_vfc = vfsp; vfsp->vfc_refcount++; mp->mnt_stat.f_type = vfsp->vfc_typenum; mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); vp->v_mountedhere = mp; mp->mnt_vnodecovered = vp; mp->mnt_stat.f_owner = p->p_ucred->cr_uid; update: /* * Set the mount level flags. */ if (SCARG(uap, flags) & MNT_RDONLY) mp->mnt_flag |= MNT_RDONLY; else if (mp->mnt_flag & MNT_RDONLY) mp->mnt_kern_flag |= MNTK_WANTRDWR; mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOATIME | MNT_NOSYMFOLLOW | MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR); mp->mnt_flag |= SCARG(uap, flags) & (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_FORCE | MNT_NOSYMFOLLOW | MNT_NOATIME | MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR); /* * Mount the filesystem. */ error = VFS_MOUNT(mp, SCARG(uap, path), SCARG(uap, data), &nd, p); if (mp->mnt_flag & MNT_UPDATE) { vrele(vp); if (mp->mnt_kern_flag & MNTK_WANTRDWR) mp->mnt_flag &= ~MNT_RDONLY; mp->mnt_flag &=~ (MNT_UPDATE | MNT_RELOAD | MNT_FORCE); mp->mnt_kern_flag &=~ MNTK_WANTRDWR; if (error) { mp->mnt_flag = flag; mp->mnt_kern_flag = flag2; } if ((mp->mnt_flag & MNT_RDONLY) == 0) { if (mp->mnt_syncer == NULL) error = vfs_allocate_syncvnode(mp); } else { if (mp->mnt_syncer != NULL) vrele(mp->mnt_syncer); mp->mnt_syncer = NULL; } vfs_unbusy(mp, p); return (error); } /* * Put the new filesystem on the mount list after root. */ cache_purge(vp); if (!error) { simple_lock(&mountlist_slock); CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list); simple_unlock(&mountlist_slock); checkdirs(vp); VOP_UNLOCK(vp, 0, p); if ((mp->mnt_flag & MNT_RDONLY) == 0) error = vfs_allocate_syncvnode(mp); vfs_unbusy(mp, p); if (error = VFS_START(mp, 0, p)) vrele(vp); } else { mp->mnt_vnodecovered->v_mountedhere = (struct mount *)0; mp->mnt_vfc->vfc_refcount--; vfs_unbusy(mp, p); free((caddr_t)mp, M_MOUNT); vput(vp); } return (error); } /* * Scan all active processes to see if any of them have a current * or root directory onto which the new filesystem has just been * mounted. If so, replace them with the new mount point. */ static void checkdirs(olddp) struct vnode *olddp; { struct filedesc *fdp; struct vnode *newdp; struct proc *p; if (olddp->v_usecount == 1) return; if (VFS_ROOT(olddp->v_mountedhere, &newdp)) panic("mount: lost mount"); for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) { fdp = p->p_fd; if (fdp->fd_cdir == olddp) { vrele(fdp->fd_cdir); VREF(newdp); fdp->fd_cdir = newdp; } if (fdp->fd_rdir == olddp) { vrele(fdp->fd_rdir); VREF(newdp); fdp->fd_rdir = newdp; } } if (rootvnode == olddp) { vrele(rootvnode); VREF(newdp); rootvnode = newdp; } vput(newdp); } /* * Unmount a file system. * * Note: unmount takes a path to the vnode mounted on as argument, * not special file (as before). */ #ifndef _SYS_SYSPROTO_H_ struct unmount_args { char *path; int flags; }; #endif /* ARGSUSED */ int unmount(p, uap) struct proc *p; register struct unmount_args /* { syscallarg(char *) path; syscallarg(int) flags; } */ *uap; { register struct vnode *vp; struct mount *mp; int error; struct nameidata nd; NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); vp = nd.ni_vp; mp = vp->v_mount; /* * Only root, or the user that did the original mount is * permitted to unmount this filesystem. */ if ((mp->mnt_stat.f_owner != p->p_ucred->cr_uid) && (error = suser(p->p_ucred, &p->p_acflag))) { vput(vp); return (error); } /* * Don't allow unmounting the root file system. */ if (mp->mnt_flag & MNT_ROOTFS) { vput(vp); return (EINVAL); } /* * Must be the root of the filesystem */ if ((vp->v_flag & VROOT) == 0) { vput(vp); return (EINVAL); } vput(vp); return (dounmount(mp, SCARG(uap, flags), p)); } /* * Do the actual file system unmount. */ int dounmount(mp, flags, p) register struct mount *mp; int flags; struct proc *p; { struct vnode *coveredvp; int error; simple_lock(&mountlist_slock); mp->mnt_kern_flag |= MNTK_UNMOUNT; lockmgr(&mp->mnt_lock, LK_DRAIN | LK_INTERLOCK, &mountlist_slock, p); if (mp->mnt_flag & MNT_EXPUBLIC) vfs_setpublicfs(NULL, NULL, NULL); vfs_msync(mp, MNT_WAIT); mp->mnt_flag &=~ MNT_ASYNC; cache_purgevfs(mp); /* remove cache entries for this file sys */ if (mp->mnt_syncer != NULL) vrele(mp->mnt_syncer); if (((mp->mnt_flag & MNT_RDONLY) || (error = VFS_SYNC(mp, MNT_WAIT, p->p_ucred, p)) == 0) || (flags & MNT_FORCE)) error = VFS_UNMOUNT(mp, flags, p); simple_lock(&mountlist_slock); if (error) { if ((mp->mnt_flag & MNT_RDONLY) == 0 && mp->mnt_syncer == NULL) (void) vfs_allocate_syncvnode(mp); mp->mnt_kern_flag &= ~MNTK_UNMOUNT; lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK | LK_REENABLE, &mountlist_slock, p); return (error); } CIRCLEQ_REMOVE(&mountlist, mp, mnt_list); if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) { coveredvp->v_mountedhere = (struct mount *)0; vrele(coveredvp); } mp->mnt_vfc->vfc_refcount--; if (mp->mnt_vnodelist.lh_first != NULL) panic("unmount: dangling vnode"); lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK, &mountlist_slock, p); if (mp->mnt_kern_flag & MNTK_MWAIT) wakeup((caddr_t)mp); free((caddr_t)mp, M_MOUNT); return (0); } /* * Sync each mounted filesystem. */ #ifndef _SYS_SYSPROTO_H_ struct sync_args { int dummy; }; #endif #ifdef DEBUG static int syncprt = 0; SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, ""); #endif /* ARGSUSED */ int sync(p, uap) struct proc *p; struct sync_args *uap; { register struct mount *mp, *nmp; int asyncflag; simple_lock(&mountlist_slock); for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { nmp = mp->mnt_list.cqe_next; continue; } if ((mp->mnt_flag & MNT_RDONLY) == 0) { asyncflag = mp->mnt_flag & MNT_ASYNC; mp->mnt_flag &= ~MNT_ASYNC; vfs_msync(mp, MNT_NOWAIT); VFS_SYNC(mp, MNT_NOWAIT, ((p != NULL) ? p->p_ucred : NOCRED), p); mp->mnt_flag |= asyncflag; } simple_lock(&mountlist_slock); nmp = mp->mnt_list.cqe_next; vfs_unbusy(mp, p); } simple_unlock(&mountlist_slock); #if 0 /* * XXX don't call vfs_bufstats() yet because that routine * was not imported in the Lite2 merge. */ #ifdef DIAGNOSTIC if (syncprt) vfs_bufstats(); #endif /* DIAGNOSTIC */ #endif return (0); } /* * Change filesystem quotas. */ #ifndef _SYS_SYSPROTO_H_ struct quotactl_args { char *path; int cmd; int uid; caddr_t arg; }; #endif /* ARGSUSED */ int quotactl(p, uap) struct proc *p; register struct quotactl_args /* { syscallarg(char *) path; syscallarg(int) cmd; syscallarg(int) uid; syscallarg(caddr_t) arg; } */ *uap; { register struct mount *mp; int error; struct nameidata nd; NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); mp = nd.ni_vp->v_mount; vrele(nd.ni_vp); return (VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid), SCARG(uap, arg), p)); } /* * Get filesystem statistics. */ #ifndef _SYS_SYSPROTO_H_ struct statfs_args { char *path; struct statfs *buf; }; #endif /* ARGSUSED */ int statfs(p, uap) struct proc *p; register struct statfs_args /* { syscallarg(char *) path; syscallarg(struct statfs *) buf; } */ *uap; { register struct mount *mp; register struct statfs *sp; int error; struct nameidata nd; struct statfs sb; NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); mp = nd.ni_vp->v_mount; sp = &mp->mnt_stat; vrele(nd.ni_vp); error = VFS_STATFS(mp, sp, p); if (error) return (error); sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; if (p->p_ucred->cr_uid != 0) { bcopy((caddr_t)sp, (caddr_t)&sb, sizeof(sb)); sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; sp = &sb; } return (copyout((caddr_t)sp, (caddr_t)SCARG(uap, buf), sizeof(*sp))); } /* * Get filesystem statistics. */ #ifndef _SYS_SYSPROTO_H_ struct fstatfs_args { int fd; struct statfs *buf; }; #endif /* ARGSUSED */ int fstatfs(p, uap) struct proc *p; register struct fstatfs_args /* { syscallarg(int) fd; syscallarg(struct statfs *) buf; } */ *uap; { struct file *fp; struct mount *mp; register struct statfs *sp; int error; struct statfs sb; if (error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) return (error); mp = ((struct vnode *)fp->f_data)->v_mount; sp = &mp->mnt_stat; error = VFS_STATFS(mp, sp, p); if (error) return (error); sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; if (p->p_ucred->cr_uid != 0) { bcopy((caddr_t)sp, (caddr_t)&sb, sizeof(sb)); sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; sp = &sb; } return (copyout((caddr_t)sp, (caddr_t)SCARG(uap, buf), sizeof(*sp))); } /* * Get statistics on all filesystems. */ #ifndef _SYS_SYSPROTO_H_ struct getfsstat_args { struct statfs *buf; long bufsize; int flags; }; #endif int getfsstat(p, uap) struct proc *p; register struct getfsstat_args /* { syscallarg(struct statfs *) buf; syscallarg(long) bufsize; syscallarg(int) flags; } */ *uap; { register struct mount *mp, *nmp; register struct statfs *sp; caddr_t sfsp; long count, maxcount, error; maxcount = SCARG(uap, bufsize) / sizeof(struct statfs); sfsp = (caddr_t)SCARG(uap, buf); count = 0; simple_lock(&mountlist_slock); for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { nmp = mp->mnt_list.cqe_next; continue; } if (sfsp && count < maxcount) { sp = &mp->mnt_stat; /* * If MNT_NOWAIT or MNT_LAZY is specified, do not * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY * overrides MNT_WAIT. */ if (((SCARG(uap, flags) & (MNT_LAZY|MNT_NOWAIT)) == 0 || (SCARG(uap, flags) & MNT_WAIT)) && (error = VFS_STATFS(mp, sp, p))) { simple_lock(&mountlist_slock); nmp = mp->mnt_list.cqe_next; vfs_unbusy(mp, p); continue; } sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; error = copyout((caddr_t)sp, sfsp, sizeof(*sp)); if (error) { vfs_unbusy(mp, p); return (error); } sfsp += sizeof(*sp); } count++; simple_lock(&mountlist_slock); nmp = mp->mnt_list.cqe_next; vfs_unbusy(mp, p); } simple_unlock(&mountlist_slock); if (sfsp && count > maxcount) p->p_retval[0] = maxcount; else p->p_retval[0] = count; return (0); } /* * Change current working directory to a given file descriptor. */ #ifndef _SYS_SYSPROTO_H_ struct fchdir_args { int fd; }; #endif /* ARGSUSED */ int fchdir(p, uap) struct proc *p; struct fchdir_args /* { syscallarg(int) fd; } */ *uap; { register struct filedesc *fdp = p->p_fd; struct vnode *vp, *tdp; struct mount *mp; struct file *fp; int error; if (error = getvnode(fdp, SCARG(uap, fd), &fp)) return (error); vp = (struct vnode *)fp->f_data; VREF(vp); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); if (vp->v_type != VDIR) error = ENOTDIR; else error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p); while (!error && (mp = vp->v_mountedhere) != NULL) { if (vfs_busy(mp, 0, 0, p)) continue; error = VFS_ROOT(mp, &tdp); vfs_unbusy(mp, p); if (error) break; vput(vp); vp = tdp; } if (error) { vput(vp); return (error); } VOP_UNLOCK(vp, 0, p); vrele(fdp->fd_cdir); fdp->fd_cdir = vp; return (0); } /* * Change current working directory (``.''). */ #ifndef _SYS_SYSPROTO_H_ struct chdir_args { char *path; }; #endif /* ARGSUSED */ int chdir(p, uap) struct proc *p; struct chdir_args /* { syscallarg(char *) path; } */ *uap; { register struct filedesc *fdp = p->p_fd; int error; struct nameidata nd; NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, SCARG(uap, path), p); if (error = change_dir(&nd, p)) return (error); vrele(fdp->fd_cdir); fdp->fd_cdir = nd.ni_vp; return (0); } /* * Change notion of root (``/'') directory. */ #ifndef _SYS_SYSPROTO_H_ struct chroot_args { char *path; }; #endif /* ARGSUSED */ int chroot(p, uap) struct proc *p; struct chroot_args /* { syscallarg(char *) path; } */ *uap; { register struct filedesc *fdp = p->p_fd; int error; struct nameidata nd; error = suser(p->p_ucred, &p->p_acflag); if (error) return (error); NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, SCARG(uap, path), p); if (error = change_dir(&nd, p)) return (error); vrele(fdp->fd_rdir); fdp->fd_rdir = nd.ni_vp; return (0); } /* * Common routine for chroot and chdir. */ static int change_dir(ndp, p) register struct nameidata *ndp; struct proc *p; { struct vnode *vp; int error; error = namei(ndp); if (error) return (error); vp = ndp->ni_vp; if (vp->v_type != VDIR) error = ENOTDIR; else error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p); if (error) vput(vp); else VOP_UNLOCK(vp, 0, p); return (error); } /* * Check permissions, allocate an open file structure, * and call the device open routine if any. */ #ifndef _SYS_SYSPROTO_H_ struct open_args { char *path; int flags; int mode; }; #endif int open(p, uap) struct proc *p; register struct open_args /* { syscallarg(char *) path; syscallarg(int) flags; syscallarg(int) mode; } */ *uap; { register struct filedesc *fdp = p->p_fd; register struct file *fp; register struct vnode *vp; int cmode, flags, oflags; struct file *nfp; int type, indx, error; struct flock lf; struct nameidata nd; oflags = SCARG(uap, flags); if ((oflags & O_ACCMODE) == O_ACCMODE) return (EINVAL); flags = FFLAGS(oflags); error = falloc(p, &nfp, &indx); if (error) return (error); fp = nfp; cmode = ((SCARG(uap, mode) &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT; NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); p->p_dupfd = -indx - 1; /* XXX check for fdopen */ error = vn_open(&nd, flags, cmode); if (error) { ffree(fp); if ((error == ENODEV || error == ENXIO) && p->p_dupfd >= 0 && /* XXX from fdopen */ (error = dupfdopen(fdp, indx, p->p_dupfd, flags, error)) == 0) { p->p_retval[0] = indx; return (0); } if (error == ERESTART) error = EINTR; fdp->fd_ofiles[indx] = NULL; return (error); } p->p_dupfd = 0; vp = nd.ni_vp; fp->f_flag = flags & FMASK; fp->f_type = (vp->v_type == VFIFO ? DTYPE_FIFO : DTYPE_VNODE); fp->f_ops = &vnops; fp->f_data = (caddr_t)vp; if (flags & (O_EXLOCK | O_SHLOCK)) { lf.l_whence = SEEK_SET; lf.l_start = 0; lf.l_len = 0; if (flags & O_EXLOCK) lf.l_type = F_WRLCK; else lf.l_type = F_RDLCK; type = F_FLOCK; if ((flags & FNONBLOCK) == 0) type |= F_WAIT; VOP_UNLOCK(vp, 0, p); if (error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) { (void) vn_close(vp, fp->f_flag, fp->f_cred, p); ffree(fp); fdp->fd_ofiles[indx] = NULL; return (error); } vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); fp->f_flag |= FHASLOCK; } if ((vp->v_type == VREG) && (vp->v_object == NULL)) vfs_object_create(vp, p, p->p_ucred, TRUE); VOP_UNLOCK(vp, 0, p); p->p_retval[0] = indx; return (0); } #ifdef COMPAT_43 /* * Create a file. */ #ifndef _SYS_SYSPROTO_H_ struct ocreat_args { char *path; int mode; }; #endif int ocreat(p, uap) struct proc *p; register struct ocreat_args /* { syscallarg(char *) path; syscallarg(int) mode; } */ *uap; { struct open_args /* { syscallarg(char *) path; syscallarg(int) flags; syscallarg(int) mode; } */ nuap; SCARG(&nuap, path) = SCARG(uap, path); SCARG(&nuap, mode) = SCARG(uap, mode); SCARG(&nuap, flags) = O_WRONLY | O_CREAT | O_TRUNC; return (open(p, &nuap)); } #endif /* COMPAT_43 */ /* * Create a special file. */ #ifndef _SYS_SYSPROTO_H_ struct mknod_args { char *path; int mode; int dev; }; #endif /* ARGSUSED */ int mknod(p, uap) struct proc *p; register struct mknod_args /* { syscallarg(char *) path; syscallarg(int) mode; syscallarg(int) dev; } */ *uap; { register struct vnode *vp; struct vattr vattr; int error; int whiteout; struct nameidata nd; error = suser(p->p_ucred, &p->p_acflag); if (error) return (error); NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); vp = nd.ni_vp; if (vp != NULL) error = EEXIST; else { VATTR_NULL(&vattr); vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_fd->fd_cmask; vattr.va_rdev = SCARG(uap, dev); whiteout = 0; switch (SCARG(uap, mode) & S_IFMT) { case S_IFMT: /* used by badsect to flag bad sectors */ vattr.va_type = VBAD; break; case S_IFCHR: vattr.va_type = VCHR; break; case S_IFBLK: vattr.va_type = VBLK; break; case S_IFWHT: whiteout = 1; break; default: error = EINVAL; break; } } if (!error) { VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); if (whiteout) { error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); if (error) VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); vput(nd.ni_dvp); } else { error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); + vput(nd.ni_dvp); } } else { VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); if (nd.ni_dvp == vp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); if (vp) vrele(vp); } ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mknod"); ASSERT_VOP_UNLOCKED(nd.ni_vp, "mknod"); return (error); } /* * Create a named pipe. */ #ifndef _SYS_SYSPROTO_H_ struct mkfifo_args { char *path; int mode; }; #endif /* ARGSUSED */ int mkfifo(p, uap) struct proc *p; register struct mkfifo_args /* { syscallarg(char *) path; syscallarg(int) mode; } */ *uap; { struct vattr vattr; int error; struct nameidata nd; NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); if (nd.ni_vp != NULL) { VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); if (nd.ni_dvp == nd.ni_vp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); vrele(nd.ni_vp); return (EEXIST); } VATTR_NULL(&vattr); vattr.va_type = VFIFO; vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_fd->fd_cmask; VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); - return (VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr)); + error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); + vput(nd.ni_dvp); + return (error); } /* * Make a hard file link. */ #ifndef _SYS_SYSPROTO_H_ struct link_args { char *path; char *link; }; #endif /* ARGSUSED */ int link(p, uap) struct proc *p; register struct link_args /* { syscallarg(char *) path; syscallarg(char *) link; } */ *uap; { register struct vnode *vp; struct nameidata nd; int error; NDINIT(&nd, LOOKUP, FOLLOW|NOOBJ, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); vp = nd.ni_vp; if (vp->v_type == VDIR) error = EPERM; /* POSIX */ else { NDINIT(&nd, CREATE, LOCKPARENT|NOOBJ, UIO_USERSPACE, SCARG(uap, link), p); error = namei(&nd); if (!error) { if (nd.ni_vp != NULL) { VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); - if (nd.ni_dvp == nd.ni_vp) - vrele(nd.ni_dvp); - else - vput(nd.ni_dvp); if (nd.ni_vp) vrele(nd.ni_vp); error = EEXIST; } else { VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); } + if (nd.ni_dvp == nd.ni_vp) + vrele(nd.ni_dvp); + else + vput(nd.ni_dvp); } } vrele(vp); ASSERT_VOP_UNLOCKED(nd.ni_dvp, "link"); ASSERT_VOP_UNLOCKED(nd.ni_vp, "link"); return (error); } /* * Make a symbolic link. */ #ifndef _SYS_SYSPROTO_H_ struct symlink_args { char *path; char *link; }; #endif /* ARGSUSED */ int symlink(p, uap) struct proc *p; register struct symlink_args /* { syscallarg(char *) path; syscallarg(char *) link; } */ *uap; { struct vattr vattr; char *path; int error; struct nameidata nd; path = zalloc(namei_zone); if (error = copyinstr(SCARG(uap, path), path, MAXPATHLEN, NULL)) goto out; NDINIT(&nd, CREATE, LOCKPARENT|NOOBJ, UIO_USERSPACE, SCARG(uap, link), p); if (error = namei(&nd)) goto out; if (nd.ni_vp) { VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); if (nd.ni_dvp == nd.ni_vp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); vrele(nd.ni_vp); error = EEXIST; goto out; } VATTR_NULL(&vattr); vattr.va_mode = ACCESSPERMS &~ p->p_fd->fd_cmask; VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path); + vput(nd.ni_dvp); ASSERT_VOP_UNLOCKED(nd.ni_dvp, "symlink"); ASSERT_VOP_UNLOCKED(nd.ni_vp, "symlink"); out: zfree(namei_zone, path); return (error); } /* * Delete a whiteout from the filesystem. */ /* ARGSUSED */ int undelete(p, uap) struct proc *p; register struct undelete_args /* { syscallarg(char *) path; } */ *uap; { int error; struct nameidata nd; NDINIT(&nd, DELETE, LOCKPARENT|DOWHITEOUT, UIO_USERSPACE, SCARG(uap, path), p); error = namei(&nd); if (error) return (error); if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); if (nd.ni_dvp == nd.ni_vp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); if (nd.ni_vp) vrele(nd.ni_vp); return (EEXIST); } VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); if (error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE)) VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); vput(nd.ni_dvp); ASSERT_VOP_UNLOCKED(nd.ni_dvp, "undelete"); ASSERT_VOP_UNLOCKED(nd.ni_vp, "undelete"); return (error); } /* * Delete a name from the filesystem. */ #ifndef _SYS_SYSPROTO_H_ struct unlink_args { char *path; }; #endif /* ARGSUSED */ int unlink(p, uap) struct proc *p; struct unlink_args /* { syscallarg(char *) path; } */ *uap; { register struct vnode *vp; int error; struct nameidata nd; NDINIT(&nd, DELETE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); vp = nd.ni_vp; VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); if (vp->v_type == VDIR) error = EPERM; /* POSIX */ else { /* * The root of a mounted filesystem cannot be deleted. * * XXX: can this only be a VDIR case? */ if (vp->v_flag & VROOT) error = EBUSY; } if (!error) { VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); } else { VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); - if (nd.ni_dvp == vp) - vrele(nd.ni_dvp); - else - vput(nd.ni_dvp); - if (vp != NULLVP) - vput(vp); } + if (nd.ni_dvp == vp) + vrele(nd.ni_dvp); + else + vput(nd.ni_dvp); + if (vp != NULLVP) + vput(vp); ASSERT_VOP_UNLOCKED(nd.ni_dvp, "unlink"); ASSERT_VOP_UNLOCKED(nd.ni_vp, "unlink"); return (error); } /* * Reposition read/write file offset. */ #ifndef _SYS_SYSPROTO_H_ struct lseek_args { int fd; int pad; off_t offset; int whence; }; #endif int lseek(p, uap) struct proc *p; register struct lseek_args /* { syscallarg(int) fd; syscallarg(int) pad; syscallarg(off_t) offset; syscallarg(int) whence; } */ *uap; { struct ucred *cred = p->p_ucred; register struct filedesc *fdp = p->p_fd; register struct file *fp; struct vattr vattr; int error; if ((u_int)SCARG(uap, fd) >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[SCARG(uap, fd)]) == NULL) return (EBADF); if (fp->f_type != DTYPE_VNODE) return (ESPIPE); switch (SCARG(uap, whence)) { case L_INCR: fp->f_offset += SCARG(uap, offset); break; case L_XTND: error=VOP_GETATTR((struct vnode *)fp->f_data, &vattr, cred, p); if (error) return (error); fp->f_offset = SCARG(uap, offset) + vattr.va_size; break; case L_SET: fp->f_offset = SCARG(uap, offset); break; default: return (EINVAL); } *(off_t *)(p->p_retval) = fp->f_offset; return (0); } #if defined(COMPAT_43) || defined(COMPAT_SUNOS) /* * Reposition read/write file offset. */ #ifndef _SYS_SYSPROTO_H_ struct olseek_args { int fd; long offset; int whence; }; #endif int olseek(p, uap) struct proc *p; register struct olseek_args /* { syscallarg(int) fd; syscallarg(long) offset; syscallarg(int) whence; } */ *uap; { struct lseek_args /* { syscallarg(int) fd; syscallarg(int) pad; syscallarg(off_t) offset; syscallarg(int) whence; } */ nuap; int error; SCARG(&nuap, fd) = SCARG(uap, fd); SCARG(&nuap, offset) = SCARG(uap, offset); SCARG(&nuap, whence) = SCARG(uap, whence); error = lseek(p, &nuap); return (error); } #endif /* COMPAT_43 */ /* * Check access permissions. */ #ifndef _SYS_SYSPROTO_H_ struct access_args { char *path; int flags; }; #endif int access(p, uap) struct proc *p; register struct access_args /* { syscallarg(char *) path; syscallarg(int) flags; } */ *uap; { register struct ucred *cred = p->p_ucred; register struct vnode *vp; int error, flags, t_gid, t_uid; struct nameidata nd; t_uid = cred->cr_uid; t_gid = cred->cr_groups[0]; cred->cr_uid = p->p_cred->p_ruid; cred->cr_groups[0] = p->p_cred->p_rgid; NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) goto out1; vp = nd.ni_vp; /* Flags == 0 means only check for existence. */ if (SCARG(uap, flags)) { flags = 0; if (SCARG(uap, flags) & R_OK) flags |= VREAD; if (SCARG(uap, flags) & W_OK) flags |= VWRITE; if (SCARG(uap, flags) & X_OK) flags |= VEXEC; if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0) error = VOP_ACCESS(vp, flags, cred, p); } vput(vp); out1: cred->cr_uid = t_uid; cred->cr_groups[0] = t_gid; return (error); } #if defined(COMPAT_43) || defined(COMPAT_SUNOS) /* * Get file status; this version follows links. */ #ifndef _SYS_SYSPROTO_H_ struct ostat_args { char *path; struct ostat *ub; }; #endif /* ARGSUSED */ int ostat(p, uap) struct proc *p; register struct ostat_args /* { syscallarg(char *) path; syscallarg(struct ostat *) ub; } */ *uap; { struct stat sb; struct ostat osb; int error; struct nameidata nd; NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); error = vn_stat(nd.ni_vp, &sb, p); vput(nd.ni_vp); if (error) return (error); cvtstat(&sb, &osb); error = copyout((caddr_t)&osb, (caddr_t)SCARG(uap, ub), sizeof (osb)); return (error); } /* * Get file status; this version does not follow links. */ #ifndef _SYS_SYSPROTO_H_ struct olstat_args { char *path; struct ostat *ub; }; #endif /* ARGSUSED */ int olstat(p, uap) struct proc *p; register struct olstat_args /* { syscallarg(char *) path; syscallarg(struct ostat *) ub; } */ *uap; { struct vnode *vp; struct stat sb; struct ostat osb; int error; struct nameidata nd; NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); vp = nd.ni_vp; error = vn_stat(vp, &sb, p); vput(vp); if (error) return (error); cvtstat(&sb, &osb); error = copyout((caddr_t)&osb, (caddr_t)SCARG(uap, ub), sizeof (osb)); return (error); } /* * Convert from an old to a new stat structure. */ void cvtstat(st, ost) struct stat *st; struct ostat *ost; { ost->st_dev = st->st_dev; ost->st_ino = st->st_ino; ost->st_mode = st->st_mode; ost->st_nlink = st->st_nlink; ost->st_uid = st->st_uid; ost->st_gid = st->st_gid; ost->st_rdev = st->st_rdev; if (st->st_size < (quad_t)1 << 32) ost->st_size = st->st_size; else ost->st_size = -2; ost->st_atime = st->st_atime; ost->st_mtime = st->st_mtime; ost->st_ctime = st->st_ctime; ost->st_blksize = st->st_blksize; ost->st_blocks = st->st_blocks; ost->st_flags = st->st_flags; ost->st_gen = st->st_gen; } #endif /* COMPAT_43 || COMPAT_SUNOS */ /* * Get file status; this version follows links. */ #ifndef _SYS_SYSPROTO_H_ struct stat_args { char *path; struct stat *ub; }; #endif /* ARGSUSED */ int stat(p, uap) struct proc *p; register struct stat_args /* { syscallarg(char *) path; syscallarg(struct stat *) ub; } */ *uap; { struct stat sb; int error; struct nameidata nd; NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); error = vn_stat(nd.ni_vp, &sb, p); vput(nd.ni_vp); if (error) return (error); error = copyout((caddr_t)&sb, (caddr_t)SCARG(uap, ub), sizeof (sb)); return (error); } /* * Get file status; this version does not follow links. */ #ifndef _SYS_SYSPROTO_H_ struct lstat_args { char *path; struct stat *ub; }; #endif /* ARGSUSED */ int lstat(p, uap) struct proc *p; register struct lstat_args /* { syscallarg(char *) path; syscallarg(struct stat *) ub; } */ *uap; { int error; struct vnode *vp; struct stat sb; struct nameidata nd; NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); vp = nd.ni_vp; error = vn_stat(vp, &sb, p); vput(vp); if (error) return (error); error = copyout((caddr_t)&sb, (caddr_t)SCARG(uap, ub), sizeof (sb)); return (error); } /* * Get configurable pathname variables. */ #ifndef _SYS_SYSPROTO_H_ struct pathconf_args { char *path; int name; }; #endif /* ARGSUSED */ int pathconf(p, uap) struct proc *p; register struct pathconf_args /* { syscallarg(char *) path; syscallarg(int) name; } */ *uap; { int error; struct nameidata nd; NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), p->p_retval); vput(nd.ni_vp); return (error); } /* * Return target name of a symbolic link. */ #ifndef _SYS_SYSPROTO_H_ struct readlink_args { char *path; char *buf; int count; }; #endif /* ARGSUSED */ int readlink(p, uap) struct proc *p; register struct readlink_args /* { syscallarg(char *) path; syscallarg(char *) buf; syscallarg(int) count; } */ *uap; { register struct vnode *vp; struct iovec aiov; struct uio auio; int error; struct nameidata nd; NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); vp = nd.ni_vp; if (vp->v_type != VLNK) error = EINVAL; else { aiov.iov_base = SCARG(uap, buf); aiov.iov_len = SCARG(uap, count); auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_offset = 0; auio.uio_rw = UIO_READ; auio.uio_segflg = UIO_USERSPACE; auio.uio_procp = p; auio.uio_resid = SCARG(uap, count); error = VOP_READLINK(vp, &auio, p->p_ucred); } vput(vp); p->p_retval[0] = SCARG(uap, count) - auio.uio_resid; return (error); } /* * Change flags of a file given a path name. */ #ifndef _SYS_SYSPROTO_H_ struct chflags_args { char *path; int flags; }; #endif /* ARGSUSED */ int chflags(p, uap) struct proc *p; register struct chflags_args /* { syscallarg(char *) path; syscallarg(int) flags; } */ *uap; { register struct vnode *vp; struct vattr vattr; int error; struct nameidata nd; NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); vp = nd.ni_vp; VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); VATTR_NULL(&vattr); vattr.va_flags = SCARG(uap, flags); error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); vput(vp); return (error); } /* * Change flags of a file given a file descriptor. */ #ifndef _SYS_SYSPROTO_H_ struct fchflags_args { int fd; int flags; }; #endif /* ARGSUSED */ int fchflags(p, uap) struct proc *p; register struct fchflags_args /* { syscallarg(int) fd; syscallarg(int) flags; } */ *uap; { struct vattr vattr; struct vnode *vp; struct file *fp; int error; if (error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) return (error); vp = (struct vnode *)fp->f_data; VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); VATTR_NULL(&vattr); vattr.va_flags = SCARG(uap, flags); error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); VOP_UNLOCK(vp, 0, p); return (error); } /* * Change mode of a file given path name. */ #ifndef _SYS_SYSPROTO_H_ struct chmod_args { char *path; int mode; }; #endif /* ARGSUSED */ int chmod(p, uap) struct proc *p; register struct chmod_args /* { syscallarg(char *) path; syscallarg(int) mode; } */ *uap; { register struct vnode *vp; struct vattr vattr; int error; struct nameidata nd; NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); vp = nd.ni_vp; VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); VATTR_NULL(&vattr); vattr.va_mode = SCARG(uap, mode) & ALLPERMS; error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); vput(vp); return (error); } /* * Change mode of a file given a file descriptor. */ #ifndef _SYS_SYSPROTO_H_ struct fchmod_args { int fd; int mode; }; #endif /* ARGSUSED */ int fchmod(p, uap) struct proc *p; register struct fchmod_args /* { syscallarg(int) fd; syscallarg(int) mode; } */ *uap; { struct vattr vattr; struct vnode *vp; struct file *fp; int error; if (error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) return (error); vp = (struct vnode *)fp->f_data; VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); VATTR_NULL(&vattr); vattr.va_mode = SCARG(uap, mode) & ALLPERMS; error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); VOP_UNLOCK(vp, 0, p); return (error); } /* * Set ownership given a path name. */ #ifndef _SYS_SYSPROTO_H_ struct chown_args { char *path; int uid; int gid; }; #endif /* ARGSUSED */ int chown(p, uap) struct proc *p; register struct chown_args /* { syscallarg(char *) path; syscallarg(int) uid; syscallarg(int) gid; } */ *uap; { register struct vnode *vp; struct vattr vattr; int error; struct nameidata nd; NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); vp = nd.ni_vp; VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); VATTR_NULL(&vattr); vattr.va_uid = SCARG(uap, uid); vattr.va_gid = SCARG(uap, gid); error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); vput(vp); return (error); } /* * Set ownership given a path name, do not cross symlinks. */ #ifndef _SYS_SYSPROTO_H_ struct lchown_args { char *path; int uid; int gid; }; #endif /* ARGSUSED */ int lchown(p, uap) struct proc *p; register struct lchown_args /* { syscallarg(char *) path; syscallarg(int) uid; syscallarg(int) gid; } */ *uap; { register struct vnode *vp; struct vattr vattr; int error; struct nameidata nd; NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); vp = nd.ni_vp; VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); VATTR_NULL(&vattr); vattr.va_uid = SCARG(uap, uid); vattr.va_gid = SCARG(uap, gid); error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); vput(vp); return (error); } /* * Set ownership given a file descriptor. */ #ifndef _SYS_SYSPROTO_H_ struct fchown_args { int fd; int uid; int gid; }; #endif /* ARGSUSED */ int fchown(p, uap) struct proc *p; register struct fchown_args /* { syscallarg(int) fd; syscallarg(int) uid; syscallarg(int) gid; } */ *uap; { struct vattr vattr; struct vnode *vp; struct file *fp; int error; if (error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) return (error); vp = (struct vnode *)fp->f_data; VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); VATTR_NULL(&vattr); vattr.va_uid = SCARG(uap, uid); vattr.va_gid = SCARG(uap, gid); error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); VOP_UNLOCK(vp, 0, p); return (error); } /* * Set the access and modification times of a file. */ #ifndef _SYS_SYSPROTO_H_ struct utimes_args { char *path; struct timeval *tptr; }; #endif /* ARGSUSED */ int utimes(p, uap) struct proc *p; register struct utimes_args /* { syscallarg(char *) path; syscallarg(struct timeval *) tptr; } */ *uap; { register struct vnode *vp; struct timeval tv[2]; struct vattr vattr; int error; struct nameidata nd; VATTR_NULL(&vattr); if (SCARG(uap, tptr) == NULL) { microtime(&tv[0]); tv[1] = tv[0]; vattr.va_vaflags |= VA_UTIMES_NULL; } else if (error = copyin((caddr_t)SCARG(uap, tptr), (caddr_t)tv, sizeof (tv))) return (error); NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); vp = nd.ni_vp; VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); vattr.va_atime.tv_sec = tv[0].tv_sec; vattr.va_atime.tv_nsec = tv[0].tv_usec * 1000; vattr.va_mtime.tv_sec = tv[1].tv_sec; vattr.va_mtime.tv_nsec = tv[1].tv_usec * 1000; error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); vput(vp); return (error); } /* * Truncate a file given its path name. */ #ifndef _SYS_SYSPROTO_H_ struct truncate_args { char *path; int pad; off_t length; }; #endif /* ARGSUSED */ int truncate(p, uap) struct proc *p; register struct truncate_args /* { syscallarg(char *) path; syscallarg(int) pad; syscallarg(off_t) length; } */ *uap; { register struct vnode *vp; struct vattr vattr; int error; struct nameidata nd; if (uap->length < 0) return(EINVAL); NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); vp = nd.ni_vp; VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); if (vp->v_type == VDIR) error = EISDIR; else if ((error = vn_writechk(vp)) == 0 && (error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) == 0) { VATTR_NULL(&vattr); vattr.va_size = SCARG(uap, length); error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); } vput(vp); return (error); } /* * Truncate a file given a file descriptor. */ #ifndef _SYS_SYSPROTO_H_ struct ftruncate_args { int fd; int pad; off_t length; }; #endif /* ARGSUSED */ int ftruncate(p, uap) struct proc *p; register struct ftruncate_args /* { syscallarg(int) fd; syscallarg(int) pad; syscallarg(off_t) length; } */ *uap; { struct vattr vattr; struct vnode *vp; struct file *fp; int error; if (uap->length < 0) return(EINVAL); if (error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) return (error); if ((fp->f_flag & FWRITE) == 0) return (EINVAL); vp = (struct vnode *)fp->f_data; VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); if (vp->v_type == VDIR) error = EISDIR; else if ((error = vn_writechk(vp)) == 0) { VATTR_NULL(&vattr); vattr.va_size = SCARG(uap, length); error = VOP_SETATTR(vp, &vattr, fp->f_cred, p); } VOP_UNLOCK(vp, 0, p); return (error); } #if defined(COMPAT_43) || defined(COMPAT_SUNOS) /* * Truncate a file given its path name. */ #ifndef _SYS_SYSPROTO_H_ struct otruncate_args { char *path; long length; }; #endif /* ARGSUSED */ int otruncate(p, uap) struct proc *p; register struct otruncate_args /* { syscallarg(char *) path; syscallarg(long) length; } */ *uap; { struct truncate_args /* { syscallarg(char *) path; syscallarg(int) pad; syscallarg(off_t) length; } */ nuap; SCARG(&nuap, path) = SCARG(uap, path); SCARG(&nuap, length) = SCARG(uap, length); return (truncate(p, &nuap)); } /* * Truncate a file given a file descriptor. */ #ifndef _SYS_SYSPROTO_H_ struct oftruncate_args { int fd; long length; }; #endif /* ARGSUSED */ int oftruncate(p, uap) struct proc *p; register struct oftruncate_args /* { syscallarg(int) fd; syscallarg(long) length; } */ *uap; { struct ftruncate_args /* { syscallarg(int) fd; syscallarg(int) pad; syscallarg(off_t) length; } */ nuap; SCARG(&nuap, fd) = SCARG(uap, fd); SCARG(&nuap, length) = SCARG(uap, length); return (ftruncate(p, &nuap)); } #endif /* COMPAT_43 || COMPAT_SUNOS */ /* * Sync an open file. */ #ifndef _SYS_SYSPROTO_H_ struct fsync_args { int fd; }; #endif /* ARGSUSED */ int fsync(p, uap) struct proc *p; struct fsync_args /* { syscallarg(int) fd; } */ *uap; { register struct vnode *vp; struct file *fp; int error; if (error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) return (error); vp = (struct vnode *)fp->f_data; if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p)) == NULL) { if (vp->v_object) { vm_object_page_clean(vp->v_object, 0, 0, FALSE); } if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP)) { error = VOP_FSYNC(vp, fp->f_cred, MNT_LAZY, p); } else { error = VOP_FSYNC(vp, fp->f_cred, (vp->v_mount && (vp->v_mount->mnt_flag & MNT_ASYNC)) ? MNT_NOWAIT : MNT_WAIT, p); } VOP_UNLOCK(vp, 0, p); if ((vp->v_mount->mnt_flag & MNT_SOFTDEP) && bioops.io_sync) (*bioops.io_sync)(NULL); } return (error); } /* * Rename files. Source and destination must either both be directories, * or both not be directories. If target is a directory, it must be empty. */ #ifndef _SYS_SYSPROTO_H_ struct rename_args { char *from; char *to; }; #endif /* ARGSUSED */ int rename(p, uap) struct proc *p; register struct rename_args /* { syscallarg(char *) from; syscallarg(char *) to; } */ *uap; { register struct vnode *tvp, *fvp, *tdvp; struct nameidata fromnd, tond; int error; NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART, UIO_USERSPACE, SCARG(uap, from), p); if (error = namei(&fromnd)) return (error); fvp = fromnd.ni_vp; NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | NOOBJ, UIO_USERSPACE, SCARG(uap, to), p); if (fromnd.ni_vp->v_type == VDIR) tond.ni_cnd.cn_flags |= WILLBEDIR; if (error = namei(&tond)) { /* Translate error code for rename("dir1", "dir2/."). */ if (error == EISDIR && fvp->v_type == VDIR) error = EINVAL; VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); vrele(fromnd.ni_dvp); vrele(fvp); goto out1; } tdvp = tond.ni_dvp; tvp = tond.ni_vp; if (tvp != NULL) { if (fvp->v_type == VDIR && tvp->v_type != VDIR) { error = ENOTDIR; goto out; } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { error = EISDIR; goto out; } } if (fvp == tdvp) error = EINVAL; /* * If source is the same as the destination (that is the * same inode number with the same name in the same directory), * then there is nothing to do. */ if (fvp == tvp && fromnd.ni_dvp == tdvp && fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen && !bcmp(fromnd.ni_cnd.cn_nameptr, tond.ni_cnd.cn_nameptr, fromnd.ni_cnd.cn_namelen)) error = -1; out: if (!error) { VOP_LEASE(tdvp, p, p->p_ucred, LEASE_WRITE); if (fromnd.ni_dvp != tdvp) { VOP_LEASE(fromnd.ni_dvp, p, p->p_ucred, LEASE_WRITE); } if (tvp) { VOP_LEASE(tvp, p, p->p_ucred, LEASE_WRITE); } error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); } else { VOP_ABORTOP(tond.ni_dvp, &tond.ni_cnd); if (tdvp == tvp) vrele(tdvp); else vput(tdvp); if (tvp) vput(tvp); VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); vrele(fromnd.ni_dvp); vrele(fvp); } vrele(tond.ni_startdir); ASSERT_VOP_UNLOCKED(fromnd.ni_dvp, "rename"); ASSERT_VOP_UNLOCKED(fromnd.ni_vp, "rename"); ASSERT_VOP_UNLOCKED(tond.ni_dvp, "rename"); ASSERT_VOP_UNLOCKED(tond.ni_vp, "rename"); zfree(namei_zone, tond.ni_cnd.cn_pnbuf); out1: if (fromnd.ni_startdir) vrele(fromnd.ni_startdir); zfree(namei_zone, fromnd.ni_cnd.cn_pnbuf); if (error == -1) return (0); return (error); } /* * Make a directory file. */ #ifndef _SYS_SYSPROTO_H_ struct mkdir_args { char *path; int mode; }; #endif /* ARGSUSED */ int mkdir(p, uap) struct proc *p; register struct mkdir_args /* { syscallarg(char *) path; syscallarg(int) mode; } */ *uap; { register struct vnode *vp; struct vattr vattr; int error; struct nameidata nd; NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p); nd.ni_cnd.cn_flags |= WILLBEDIR; if (error = namei(&nd)) return (error); vp = nd.ni_vp; if (vp != NULL) { VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); if (nd.ni_dvp == vp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); vrele(vp); return (EEXIST); } VATTR_NULL(&vattr); vattr.va_type = VDIR; vattr.va_mode = (SCARG(uap, mode) & ACCESSPERMS) &~ p->p_fd->fd_cmask; VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); + vput(nd.ni_dvp); if (!error) vput(nd.ni_vp); ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mkdir"); ASSERT_VOP_UNLOCKED(nd.ni_vp, "mkdir"); return (error); } /* * Remove a directory file. */ #ifndef _SYS_SYSPROTO_H_ struct rmdir_args { char *path; }; #endif /* ARGSUSED */ int rmdir(p, uap) struct proc *p; struct rmdir_args /* { syscallarg(char *) path; } */ *uap; { register struct vnode *vp; int error; struct nameidata nd; NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); vp = nd.ni_vp; if (vp->v_type != VDIR) { error = ENOTDIR; goto out; } /* * No rmdir "." please. */ if (nd.ni_dvp == vp) { error = EINVAL; goto out; } /* * The root of a mounted filesystem cannot be deleted. */ if (vp->v_flag & VROOT) error = EBUSY; out: if (!error) { VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); } else { VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); - if (nd.ni_dvp == vp) - vrele(nd.ni_dvp); - else - vput(nd.ni_dvp); - vput(vp); } + if (nd.ni_dvp == vp) + vrele(nd.ni_dvp); + else + vput(nd.ni_dvp); + if (vp != NULLVP) + vput(vp); ASSERT_VOP_UNLOCKED(nd.ni_dvp, "rmdir"); ASSERT_VOP_UNLOCKED(nd.ni_vp, "rmdir"); return (error); } #ifdef COMPAT_43 /* * Read a block of directory entries in a file system independent format. */ #ifndef _SYS_SYSPROTO_H_ struct ogetdirentries_args { int fd; char *buf; u_int count; long *basep; }; #endif int ogetdirentries(p, uap) struct proc *p; register struct ogetdirentries_args /* { syscallarg(int) fd; syscallarg(char *) buf; syscallarg(u_int) count; syscallarg(long *) basep; } */ *uap; { register struct vnode *vp; struct file *fp; struct uio auio, kuio; struct iovec aiov, kiov; struct dirent *dp, *edp; caddr_t dirbuf; int error, eofflag, readcnt; long loff; if (error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) return (error); if ((fp->f_flag & FREAD) == 0) return (EBADF); vp = (struct vnode *)fp->f_data; unionread: if (vp->v_type != VDIR) return (EINVAL); aiov.iov_base = SCARG(uap, buf); aiov.iov_len = SCARG(uap, count); auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_rw = UIO_READ; auio.uio_segflg = UIO_USERSPACE; auio.uio_procp = p; auio.uio_resid = SCARG(uap, count); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); loff = auio.uio_offset = fp->f_offset; # if (BYTE_ORDER != LITTLE_ENDIAN) if (vp->v_mount->mnt_maxsymlinklen <= 0) { error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, NULL); fp->f_offset = auio.uio_offset; } else # endif { kuio = auio; kuio.uio_iov = &kiov; kuio.uio_segflg = UIO_SYSSPACE; kiov.iov_len = SCARG(uap, count); MALLOC(dirbuf, caddr_t, SCARG(uap, count), M_TEMP, M_WAITOK); kiov.iov_base = dirbuf; error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag, NULL, NULL); fp->f_offset = kuio.uio_offset; if (error == 0) { readcnt = SCARG(uap, count) - kuio.uio_resid; edp = (struct dirent *)&dirbuf[readcnt]; for (dp = (struct dirent *)dirbuf; dp < edp; ) { # if (BYTE_ORDER == LITTLE_ENDIAN) /* * The expected low byte of * dp->d_namlen is our dp->d_type. * The high MBZ byte of dp->d_namlen * is our dp->d_namlen. */ dp->d_type = dp->d_namlen; dp->d_namlen = 0; # else /* * The dp->d_type is the high byte * of the expected dp->d_namlen, * so must be zero'ed. */ dp->d_type = 0; # endif if (dp->d_reclen > 0) { dp = (struct dirent *) ((char *)dp + dp->d_reclen); } else { error = EIO; break; } } if (dp >= edp) error = uiomove(dirbuf, readcnt, &auio); } FREE(dirbuf, M_TEMP); } VOP_UNLOCK(vp, 0, p); if (error) return (error); #ifdef UNION { if ((SCARG(uap, count) == auio.uio_resid) && (vp->v_op == union_vnodeop_p)) { struct vnode *lvp; lvp = union_dircache(vp, p); if (lvp != NULLVP) { struct vattr va; /* * If the directory is opaque, * then don't show lower entries */ error = VOP_GETATTR(vp, &va, fp->f_cred, p); if (va.va_flags & OPAQUE) { vput(lvp); lvp = NULL; } } if (lvp != NULLVP) { error = VOP_OPEN(lvp, FREAD, fp->f_cred, p); if (error) { vput(lvp); return (error); } VOP_UNLOCK(lvp, 0, p); fp->f_data = (caddr_t) lvp; fp->f_offset = 0; error = vn_close(vp, FREAD, fp->f_cred, p); if (error) return (error); vp = lvp; goto unionread; } } } #endif /* UNION */ if ((SCARG(uap, count) == auio.uio_resid) && (vp->v_flag & VROOT) && (vp->v_mount->mnt_flag & MNT_UNION)) { struct vnode *tvp = vp; vp = vp->v_mount->mnt_vnodecovered; VREF(vp); fp->f_data = (caddr_t) vp; fp->f_offset = 0; vrele(tvp); goto unionread; } error = copyout((caddr_t)&loff, (caddr_t)SCARG(uap, basep), sizeof(long)); p->p_retval[0] = SCARG(uap, count) - auio.uio_resid; return (error); } #endif /* COMPAT_43 */ /* * Read a block of directory entries in a file system independent format. */ #ifndef _SYS_SYSPROTO_H_ struct getdirentries_args { int fd; char *buf; u_int count; long *basep; }; #endif int getdirentries(p, uap) struct proc *p; register struct getdirentries_args /* { syscallarg(int) fd; syscallarg(char *) buf; syscallarg(u_int) count; syscallarg(long *) basep; } */ *uap; { register struct vnode *vp; struct file *fp; struct uio auio; struct iovec aiov; long loff; int error, eofflag; if (error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) return (error); if ((fp->f_flag & FREAD) == 0) return (EBADF); vp = (struct vnode *)fp->f_data; unionread: if (vp->v_type != VDIR) return (EINVAL); aiov.iov_base = SCARG(uap, buf); aiov.iov_len = SCARG(uap, count); auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_rw = UIO_READ; auio.uio_segflg = UIO_USERSPACE; auio.uio_procp = p; auio.uio_resid = SCARG(uap, count); /* vn_lock(vp, LK_SHARED | LK_RETRY, p); */ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); loff = auio.uio_offset = fp->f_offset; error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, NULL); fp->f_offset = auio.uio_offset; VOP_UNLOCK(vp, 0, p); if (error) return (error); #ifdef UNION { if ((SCARG(uap, count) == auio.uio_resid) && (vp->v_op == union_vnodeop_p)) { struct vnode *lvp; lvp = union_dircache(vp, p); if (lvp != NULLVP) { struct vattr va; /* * If the directory is opaque, * then don't show lower entries */ error = VOP_GETATTR(vp, &va, fp->f_cred, p); if (va.va_flags & OPAQUE) { vput(lvp); lvp = NULL; } } if (lvp != NULLVP) { error = VOP_OPEN(lvp, FREAD, fp->f_cred, p); if (error) { vput(lvp); return (error); } VOP_UNLOCK(lvp, 0, p); fp->f_data = (caddr_t) lvp; fp->f_offset = 0; error = vn_close(vp, FREAD, fp->f_cred, p); if (error) return (error); vp = lvp; goto unionread; } } } #endif /* UNION */ if ((SCARG(uap, count) == auio.uio_resid) && (vp->v_flag & VROOT) && (vp->v_mount->mnt_flag & MNT_UNION)) { struct vnode *tvp = vp; vp = vp->v_mount->mnt_vnodecovered; VREF(vp); fp->f_data = (caddr_t) vp; fp->f_offset = 0; vrele(tvp); goto unionread; } error = copyout((caddr_t)&loff, (caddr_t)SCARG(uap, basep), sizeof(long)); p->p_retval[0] = SCARG(uap, count) - auio.uio_resid; return (error); } /* * Set the mode mask for creation of filesystem nodes. */ #ifndef _SYS_SYSPROTO_H_ struct umask_args { int newmask; }; #endif int umask(p, uap) struct proc *p; struct umask_args /* { syscallarg(int) newmask; } */ *uap; { register struct filedesc *fdp; fdp = p->p_fd; p->p_retval[0] = fdp->fd_cmask; fdp->fd_cmask = SCARG(uap, newmask) & ALLPERMS; return (0); } /* * Void all references to file by ripping underlying filesystem * away from vnode. */ #ifndef _SYS_SYSPROTO_H_ struct revoke_args { char *path; }; #endif /* ARGSUSED */ int revoke(p, uap) struct proc *p; register struct revoke_args /* { syscallarg(char *) path; } */ *uap; { register struct vnode *vp; struct vattr vattr; int error; struct nameidata nd; NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); vp = nd.ni_vp; if (error = VOP_GETATTR(vp, &vattr, p->p_ucred, p)) goto out; if (p->p_ucred->cr_uid != vattr.va_uid && (error = suser(p->p_ucred, &p->p_acflag))) goto out; if (vp->v_usecount > 1 || (vp->v_flag & VALIASED)) VOP_REVOKE(vp, REVOKEALL); out: vrele(vp); return (error); } /* * Convert a user file descriptor to a kernel file entry. */ int getvnode(fdp, fd, fpp) struct filedesc *fdp; int fd; struct file **fpp; { struct file *fp; if ((u_int)fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[fd]) == NULL) return (EBADF); if (fp->f_type != DTYPE_VNODE && fp->f_type != DTYPE_FIFO) return (EINVAL); *fpp = fp; return (0); } #ifndef _SYS_SYSPROTO_H_ struct __getcwd_args { u_char *buf; u_int buflen; }; #endif #define STATNODE(mode, name, var) \ SYSCTL_INT(_vfs_cache, OID_AUTO, name, mode, var, 0, ""); static int disablecwd; SYSCTL_INT(_debug, OID_AUTO, disablecwd, CTLFLAG_RW, &disablecwd, 0, ""); static u_long numcwdcalls; STATNODE(CTLFLAG_RD, numcwdcalls, &numcwdcalls); static u_long numcwdfail1; STATNODE(CTLFLAG_RD, numcwdfail1, &numcwdfail1); static u_long numcwdfail2; STATNODE(CTLFLAG_RD, numcwdfail2, &numcwdfail2); static u_long numcwdfail3; STATNODE(CTLFLAG_RD, numcwdfail3, &numcwdfail3); static u_long numcwdfail4; STATNODE(CTLFLAG_RD, numcwdfail4, &numcwdfail4); static u_long numcwdfound; STATNODE(CTLFLAG_RD, numcwdfound, &numcwdfound); int __getcwd(p, uap) struct proc *p; struct __getcwd_args *uap; { char *bp, *buf; int error, i, slash_prefixed; struct filedesc *fdp; struct namecache *ncp; struct vnode *vp; numcwdcalls++; if (disablecwd) return (ENODEV); if (uap->buflen < 2) return (EINVAL); if (uap->buflen > MAXPATHLEN) uap->buflen = MAXPATHLEN; buf = bp = malloc(uap->buflen, M_TEMP, M_WAITOK); bp += uap->buflen - 1; *bp = '\0'; fdp = p->p_fd; slash_prefixed = 0; for (vp = fdp->fd_cdir; vp != fdp->fd_rdir && vp != rootvnode;) { if (vp->v_flag & VROOT) { vp = vp->v_mount->mnt_vnodecovered; continue; } if (vp->v_dd->v_id != vp->v_ddid) { numcwdfail1++; free(buf, M_TEMP); return (ENOTDIR); } ncp = TAILQ_FIRST(&vp->v_cache_dst); if (!ncp) { numcwdfail2++; free(buf, M_TEMP); return (ENOENT); } if (ncp->nc_dvp != vp->v_dd) { numcwdfail3++; free(buf, M_TEMP); return (EBADF); } for (i = ncp->nc_nlen - 1; i >= 0; i--) { if (bp == buf) { numcwdfail4++; free(buf, M_TEMP); return (ENOMEM); } *--bp = ncp->nc_name[i]; } if (bp == buf) { numcwdfail4++; free(buf, M_TEMP); return (ENOMEM); } *--bp = '/'; slash_prefixed = 1; vp = vp->v_dd; } if (!slash_prefixed) { if (bp == buf) { numcwdfail4++; free(buf, M_TEMP); return (ENOMEM); } *--bp = '/'; } numcwdfound++; error = copyout(bp, uap->buf, strlen(bp) + 1); free(buf, M_TEMP); return (error); } Index: head/sys/kern/vfs_syscalls.c =================================================================== --- head/sys/kern/vfs_syscalls.c (revision 35822) +++ head/sys/kern/vfs_syscalls.c (revision 35823) @@ -1,2846 +1,2852 @@ /* * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 - * $Id: vfs_syscalls.c,v 1.97 1998/04/08 18:31:57 wosch Exp $ + * $Id: vfs_syscalls.c,v 1.99 1998/04/19 22:20:32 des Exp $ */ /* For 4.3 integer FS ID compatibility */ #include "opt_compat.h" /* * XXX - The following is required because of some magic done * in getdirentries() below which is only done if the translucent * filesystem `UNION' is compiled into the kernel. This is broken, * but I don't have time to study the code deeply enough to understand * what's going on and determine an appropriate fix. -GAW */ #include "opt_union.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef UNION #include #endif #include #include #include #include #include static int change_dir __P((struct nameidata *ndp, struct proc *p)); static void checkdirs __P((struct vnode *olddp)); static int usermount = 0; /* if 1, non-root can mount fs. */ SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0, ""); /* * Virtual File System System Calls */ /* * Mount a file system. */ #ifndef _SYS_SYSPROTO_H_ struct mount_args { char *type; char *path; int flags; caddr_t data; }; #endif /* ARGSUSED */ int mount(p, uap) struct proc *p; register struct mount_args /* { syscallarg(char *) type; syscallarg(char *) path; syscallarg(int) flags; syscallarg(caddr_t) data; } */ *uap; { struct vnode *vp; struct mount *mp; struct vfsconf *vfsp; int error, flag = 0, flag2 = 0; struct vattr va; u_long fstypenum; struct nameidata nd; char fstypename[MFSNAMELEN]; if (usermount == 0 && (error = suser(p->p_ucred, &p->p_acflag))) return (error); /* * Get vnode to be covered */ NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); vp = nd.ni_vp; if (SCARG(uap, flags) & MNT_UPDATE) { if ((vp->v_flag & VROOT) == 0) { vput(vp); return (EINVAL); } mp = vp->v_mount; flag = mp->mnt_flag; flag2 = mp->mnt_kern_flag; /* * We only allow the filesystem to be reloaded if it * is currently mounted read-only. */ if ((SCARG(uap, flags) & MNT_RELOAD) && ((mp->mnt_flag & MNT_RDONLY) == 0)) { vput(vp); return (EOPNOTSUPP); /* Needs translation */ } mp->mnt_flag |= SCARG(uap, flags) & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE); /* * Only root, or the user that did the original mount is * permitted to update it. */ if (mp->mnt_stat.f_owner != p->p_ucred->cr_uid && (error = suser(p->p_ucred, &p->p_acflag))) { vput(vp); return (error); } /* * Do not allow NFS export by non-root users. Silently * enforce MNT_NOSUID and MNT_NODEV for non-root users. */ if (p->p_ucred->cr_uid != 0) { if (SCARG(uap, flags) & MNT_EXPORTED) { vput(vp); return (EPERM); } SCARG(uap, flags) |= MNT_NOSUID | MNT_NODEV; } if (vfs_busy(mp, LK_NOWAIT, 0, p)) { vput(vp); return (EBUSY); } VOP_UNLOCK(vp, 0, p); goto update; } /* * If the user is not root, ensure that they own the directory * onto which we are attempting to mount. */ if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) || (va.va_uid != p->p_ucred->cr_uid && (error = suser(p->p_ucred, &p->p_acflag)))) { vput(vp); return (error); } /* * Do not allow NFS export by non-root users. Silently * enforce MNT_NOSUID and MNT_NODEV for non-root users. */ if (p->p_ucred->cr_uid != 0) { if (SCARG(uap, flags) & MNT_EXPORTED) { vput(vp); return (EPERM); } SCARG(uap, flags) |= MNT_NOSUID | MNT_NODEV; } if (error = vinvalbuf(vp, V_SAVE, p->p_ucred, p, 0, 0)) return (error); if (vp->v_type != VDIR) { vput(vp); return (ENOTDIR); } #ifdef COMPAT_43 /* * Historically filesystem types were identified by number. If we * get an integer for the filesystem type instead of a string, we * check to see if it matches one of the historic filesystem types. */ fstypenum = (u_long)SCARG(uap, type); if (fstypenum < maxvfsconf) { for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) if (vfsp->vfc_typenum == fstypenum) break; if (vfsp == NULL) { vput(vp); return (ENODEV); } strncpy(fstypename, vfsp->vfc_name, MFSNAMELEN); } else #endif /* COMPAT_43 */ if (error = copyinstr(SCARG(uap, type), fstypename, MFSNAMELEN, NULL)) { vput(vp); return (error); } for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) if (!strcmp(vfsp->vfc_name, fstypename)) break; if (vfsp == NULL) { vput(vp); return (ENODEV); } if (vp->v_mountedhere != NULL) { vput(vp); return (EBUSY); } /* * Allocate and initialize the filesystem. */ mp = (struct mount *)malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); bzero((char *)mp, (u_long)sizeof(struct mount)); lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE); (void)vfs_busy(mp, LK_NOWAIT, 0, p); mp->mnt_op = vfsp->vfc_vfsops; mp->mnt_vfc = vfsp; vfsp->vfc_refcount++; mp->mnt_stat.f_type = vfsp->vfc_typenum; mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); vp->v_mountedhere = mp; mp->mnt_vnodecovered = vp; mp->mnt_stat.f_owner = p->p_ucred->cr_uid; update: /* * Set the mount level flags. */ if (SCARG(uap, flags) & MNT_RDONLY) mp->mnt_flag |= MNT_RDONLY; else if (mp->mnt_flag & MNT_RDONLY) mp->mnt_kern_flag |= MNTK_WANTRDWR; mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOATIME | MNT_NOSYMFOLLOW | MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR); mp->mnt_flag |= SCARG(uap, flags) & (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_FORCE | MNT_NOSYMFOLLOW | MNT_NOATIME | MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR); /* * Mount the filesystem. */ error = VFS_MOUNT(mp, SCARG(uap, path), SCARG(uap, data), &nd, p); if (mp->mnt_flag & MNT_UPDATE) { vrele(vp); if (mp->mnt_kern_flag & MNTK_WANTRDWR) mp->mnt_flag &= ~MNT_RDONLY; mp->mnt_flag &=~ (MNT_UPDATE | MNT_RELOAD | MNT_FORCE); mp->mnt_kern_flag &=~ MNTK_WANTRDWR; if (error) { mp->mnt_flag = flag; mp->mnt_kern_flag = flag2; } if ((mp->mnt_flag & MNT_RDONLY) == 0) { if (mp->mnt_syncer == NULL) error = vfs_allocate_syncvnode(mp); } else { if (mp->mnt_syncer != NULL) vrele(mp->mnt_syncer); mp->mnt_syncer = NULL; } vfs_unbusy(mp, p); return (error); } /* * Put the new filesystem on the mount list after root. */ cache_purge(vp); if (!error) { simple_lock(&mountlist_slock); CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list); simple_unlock(&mountlist_slock); checkdirs(vp); VOP_UNLOCK(vp, 0, p); if ((mp->mnt_flag & MNT_RDONLY) == 0) error = vfs_allocate_syncvnode(mp); vfs_unbusy(mp, p); if (error = VFS_START(mp, 0, p)) vrele(vp); } else { mp->mnt_vnodecovered->v_mountedhere = (struct mount *)0; mp->mnt_vfc->vfc_refcount--; vfs_unbusy(mp, p); free((caddr_t)mp, M_MOUNT); vput(vp); } return (error); } /* * Scan all active processes to see if any of them have a current * or root directory onto which the new filesystem has just been * mounted. If so, replace them with the new mount point. */ static void checkdirs(olddp) struct vnode *olddp; { struct filedesc *fdp; struct vnode *newdp; struct proc *p; if (olddp->v_usecount == 1) return; if (VFS_ROOT(olddp->v_mountedhere, &newdp)) panic("mount: lost mount"); for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) { fdp = p->p_fd; if (fdp->fd_cdir == olddp) { vrele(fdp->fd_cdir); VREF(newdp); fdp->fd_cdir = newdp; } if (fdp->fd_rdir == olddp) { vrele(fdp->fd_rdir); VREF(newdp); fdp->fd_rdir = newdp; } } if (rootvnode == olddp) { vrele(rootvnode); VREF(newdp); rootvnode = newdp; } vput(newdp); } /* * Unmount a file system. * * Note: unmount takes a path to the vnode mounted on as argument, * not special file (as before). */ #ifndef _SYS_SYSPROTO_H_ struct unmount_args { char *path; int flags; }; #endif /* ARGSUSED */ int unmount(p, uap) struct proc *p; register struct unmount_args /* { syscallarg(char *) path; syscallarg(int) flags; } */ *uap; { register struct vnode *vp; struct mount *mp; int error; struct nameidata nd; NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); vp = nd.ni_vp; mp = vp->v_mount; /* * Only root, or the user that did the original mount is * permitted to unmount this filesystem. */ if ((mp->mnt_stat.f_owner != p->p_ucred->cr_uid) && (error = suser(p->p_ucred, &p->p_acflag))) { vput(vp); return (error); } /* * Don't allow unmounting the root file system. */ if (mp->mnt_flag & MNT_ROOTFS) { vput(vp); return (EINVAL); } /* * Must be the root of the filesystem */ if ((vp->v_flag & VROOT) == 0) { vput(vp); return (EINVAL); } vput(vp); return (dounmount(mp, SCARG(uap, flags), p)); } /* * Do the actual file system unmount. */ int dounmount(mp, flags, p) register struct mount *mp; int flags; struct proc *p; { struct vnode *coveredvp; int error; simple_lock(&mountlist_slock); mp->mnt_kern_flag |= MNTK_UNMOUNT; lockmgr(&mp->mnt_lock, LK_DRAIN | LK_INTERLOCK, &mountlist_slock, p); if (mp->mnt_flag & MNT_EXPUBLIC) vfs_setpublicfs(NULL, NULL, NULL); vfs_msync(mp, MNT_WAIT); mp->mnt_flag &=~ MNT_ASYNC; cache_purgevfs(mp); /* remove cache entries for this file sys */ if (mp->mnt_syncer != NULL) vrele(mp->mnt_syncer); if (((mp->mnt_flag & MNT_RDONLY) || (error = VFS_SYNC(mp, MNT_WAIT, p->p_ucred, p)) == 0) || (flags & MNT_FORCE)) error = VFS_UNMOUNT(mp, flags, p); simple_lock(&mountlist_slock); if (error) { if ((mp->mnt_flag & MNT_RDONLY) == 0 && mp->mnt_syncer == NULL) (void) vfs_allocate_syncvnode(mp); mp->mnt_kern_flag &= ~MNTK_UNMOUNT; lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK | LK_REENABLE, &mountlist_slock, p); return (error); } CIRCLEQ_REMOVE(&mountlist, mp, mnt_list); if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) { coveredvp->v_mountedhere = (struct mount *)0; vrele(coveredvp); } mp->mnt_vfc->vfc_refcount--; if (mp->mnt_vnodelist.lh_first != NULL) panic("unmount: dangling vnode"); lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK, &mountlist_slock, p); if (mp->mnt_kern_flag & MNTK_MWAIT) wakeup((caddr_t)mp); free((caddr_t)mp, M_MOUNT); return (0); } /* * Sync each mounted filesystem. */ #ifndef _SYS_SYSPROTO_H_ struct sync_args { int dummy; }; #endif #ifdef DEBUG static int syncprt = 0; SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, ""); #endif /* ARGSUSED */ int sync(p, uap) struct proc *p; struct sync_args *uap; { register struct mount *mp, *nmp; int asyncflag; simple_lock(&mountlist_slock); for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { nmp = mp->mnt_list.cqe_next; continue; } if ((mp->mnt_flag & MNT_RDONLY) == 0) { asyncflag = mp->mnt_flag & MNT_ASYNC; mp->mnt_flag &= ~MNT_ASYNC; vfs_msync(mp, MNT_NOWAIT); VFS_SYNC(mp, MNT_NOWAIT, ((p != NULL) ? p->p_ucred : NOCRED), p); mp->mnt_flag |= asyncflag; } simple_lock(&mountlist_slock); nmp = mp->mnt_list.cqe_next; vfs_unbusy(mp, p); } simple_unlock(&mountlist_slock); #if 0 /* * XXX don't call vfs_bufstats() yet because that routine * was not imported in the Lite2 merge. */ #ifdef DIAGNOSTIC if (syncprt) vfs_bufstats(); #endif /* DIAGNOSTIC */ #endif return (0); } /* * Change filesystem quotas. */ #ifndef _SYS_SYSPROTO_H_ struct quotactl_args { char *path; int cmd; int uid; caddr_t arg; }; #endif /* ARGSUSED */ int quotactl(p, uap) struct proc *p; register struct quotactl_args /* { syscallarg(char *) path; syscallarg(int) cmd; syscallarg(int) uid; syscallarg(caddr_t) arg; } */ *uap; { register struct mount *mp; int error; struct nameidata nd; NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); mp = nd.ni_vp->v_mount; vrele(nd.ni_vp); return (VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid), SCARG(uap, arg), p)); } /* * Get filesystem statistics. */ #ifndef _SYS_SYSPROTO_H_ struct statfs_args { char *path; struct statfs *buf; }; #endif /* ARGSUSED */ int statfs(p, uap) struct proc *p; register struct statfs_args /* { syscallarg(char *) path; syscallarg(struct statfs *) buf; } */ *uap; { register struct mount *mp; register struct statfs *sp; int error; struct nameidata nd; struct statfs sb; NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); mp = nd.ni_vp->v_mount; sp = &mp->mnt_stat; vrele(nd.ni_vp); error = VFS_STATFS(mp, sp, p); if (error) return (error); sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; if (p->p_ucred->cr_uid != 0) { bcopy((caddr_t)sp, (caddr_t)&sb, sizeof(sb)); sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; sp = &sb; } return (copyout((caddr_t)sp, (caddr_t)SCARG(uap, buf), sizeof(*sp))); } /* * Get filesystem statistics. */ #ifndef _SYS_SYSPROTO_H_ struct fstatfs_args { int fd; struct statfs *buf; }; #endif /* ARGSUSED */ int fstatfs(p, uap) struct proc *p; register struct fstatfs_args /* { syscallarg(int) fd; syscallarg(struct statfs *) buf; } */ *uap; { struct file *fp; struct mount *mp; register struct statfs *sp; int error; struct statfs sb; if (error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) return (error); mp = ((struct vnode *)fp->f_data)->v_mount; sp = &mp->mnt_stat; error = VFS_STATFS(mp, sp, p); if (error) return (error); sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; if (p->p_ucred->cr_uid != 0) { bcopy((caddr_t)sp, (caddr_t)&sb, sizeof(sb)); sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; sp = &sb; } return (copyout((caddr_t)sp, (caddr_t)SCARG(uap, buf), sizeof(*sp))); } /* * Get statistics on all filesystems. */ #ifndef _SYS_SYSPROTO_H_ struct getfsstat_args { struct statfs *buf; long bufsize; int flags; }; #endif int getfsstat(p, uap) struct proc *p; register struct getfsstat_args /* { syscallarg(struct statfs *) buf; syscallarg(long) bufsize; syscallarg(int) flags; } */ *uap; { register struct mount *mp, *nmp; register struct statfs *sp; caddr_t sfsp; long count, maxcount, error; maxcount = SCARG(uap, bufsize) / sizeof(struct statfs); sfsp = (caddr_t)SCARG(uap, buf); count = 0; simple_lock(&mountlist_slock); for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { nmp = mp->mnt_list.cqe_next; continue; } if (sfsp && count < maxcount) { sp = &mp->mnt_stat; /* * If MNT_NOWAIT or MNT_LAZY is specified, do not * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY * overrides MNT_WAIT. */ if (((SCARG(uap, flags) & (MNT_LAZY|MNT_NOWAIT)) == 0 || (SCARG(uap, flags) & MNT_WAIT)) && (error = VFS_STATFS(mp, sp, p))) { simple_lock(&mountlist_slock); nmp = mp->mnt_list.cqe_next; vfs_unbusy(mp, p); continue; } sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; error = copyout((caddr_t)sp, sfsp, sizeof(*sp)); if (error) { vfs_unbusy(mp, p); return (error); } sfsp += sizeof(*sp); } count++; simple_lock(&mountlist_slock); nmp = mp->mnt_list.cqe_next; vfs_unbusy(mp, p); } simple_unlock(&mountlist_slock); if (sfsp && count > maxcount) p->p_retval[0] = maxcount; else p->p_retval[0] = count; return (0); } /* * Change current working directory to a given file descriptor. */ #ifndef _SYS_SYSPROTO_H_ struct fchdir_args { int fd; }; #endif /* ARGSUSED */ int fchdir(p, uap) struct proc *p; struct fchdir_args /* { syscallarg(int) fd; } */ *uap; { register struct filedesc *fdp = p->p_fd; struct vnode *vp, *tdp; struct mount *mp; struct file *fp; int error; if (error = getvnode(fdp, SCARG(uap, fd), &fp)) return (error); vp = (struct vnode *)fp->f_data; VREF(vp); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); if (vp->v_type != VDIR) error = ENOTDIR; else error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p); while (!error && (mp = vp->v_mountedhere) != NULL) { if (vfs_busy(mp, 0, 0, p)) continue; error = VFS_ROOT(mp, &tdp); vfs_unbusy(mp, p); if (error) break; vput(vp); vp = tdp; } if (error) { vput(vp); return (error); } VOP_UNLOCK(vp, 0, p); vrele(fdp->fd_cdir); fdp->fd_cdir = vp; return (0); } /* * Change current working directory (``.''). */ #ifndef _SYS_SYSPROTO_H_ struct chdir_args { char *path; }; #endif /* ARGSUSED */ int chdir(p, uap) struct proc *p; struct chdir_args /* { syscallarg(char *) path; } */ *uap; { register struct filedesc *fdp = p->p_fd; int error; struct nameidata nd; NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, SCARG(uap, path), p); if (error = change_dir(&nd, p)) return (error); vrele(fdp->fd_cdir); fdp->fd_cdir = nd.ni_vp; return (0); } /* * Change notion of root (``/'') directory. */ #ifndef _SYS_SYSPROTO_H_ struct chroot_args { char *path; }; #endif /* ARGSUSED */ int chroot(p, uap) struct proc *p; struct chroot_args /* { syscallarg(char *) path; } */ *uap; { register struct filedesc *fdp = p->p_fd; int error; struct nameidata nd; error = suser(p->p_ucred, &p->p_acflag); if (error) return (error); NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, SCARG(uap, path), p); if (error = change_dir(&nd, p)) return (error); vrele(fdp->fd_rdir); fdp->fd_rdir = nd.ni_vp; return (0); } /* * Common routine for chroot and chdir. */ static int change_dir(ndp, p) register struct nameidata *ndp; struct proc *p; { struct vnode *vp; int error; error = namei(ndp); if (error) return (error); vp = ndp->ni_vp; if (vp->v_type != VDIR) error = ENOTDIR; else error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p); if (error) vput(vp); else VOP_UNLOCK(vp, 0, p); return (error); } /* * Check permissions, allocate an open file structure, * and call the device open routine if any. */ #ifndef _SYS_SYSPROTO_H_ struct open_args { char *path; int flags; int mode; }; #endif int open(p, uap) struct proc *p; register struct open_args /* { syscallarg(char *) path; syscallarg(int) flags; syscallarg(int) mode; } */ *uap; { register struct filedesc *fdp = p->p_fd; register struct file *fp; register struct vnode *vp; int cmode, flags, oflags; struct file *nfp; int type, indx, error; struct flock lf; struct nameidata nd; oflags = SCARG(uap, flags); if ((oflags & O_ACCMODE) == O_ACCMODE) return (EINVAL); flags = FFLAGS(oflags); error = falloc(p, &nfp, &indx); if (error) return (error); fp = nfp; cmode = ((SCARG(uap, mode) &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT; NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); p->p_dupfd = -indx - 1; /* XXX check for fdopen */ error = vn_open(&nd, flags, cmode); if (error) { ffree(fp); if ((error == ENODEV || error == ENXIO) && p->p_dupfd >= 0 && /* XXX from fdopen */ (error = dupfdopen(fdp, indx, p->p_dupfd, flags, error)) == 0) { p->p_retval[0] = indx; return (0); } if (error == ERESTART) error = EINTR; fdp->fd_ofiles[indx] = NULL; return (error); } p->p_dupfd = 0; vp = nd.ni_vp; fp->f_flag = flags & FMASK; fp->f_type = (vp->v_type == VFIFO ? DTYPE_FIFO : DTYPE_VNODE); fp->f_ops = &vnops; fp->f_data = (caddr_t)vp; if (flags & (O_EXLOCK | O_SHLOCK)) { lf.l_whence = SEEK_SET; lf.l_start = 0; lf.l_len = 0; if (flags & O_EXLOCK) lf.l_type = F_WRLCK; else lf.l_type = F_RDLCK; type = F_FLOCK; if ((flags & FNONBLOCK) == 0) type |= F_WAIT; VOP_UNLOCK(vp, 0, p); if (error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) { (void) vn_close(vp, fp->f_flag, fp->f_cred, p); ffree(fp); fdp->fd_ofiles[indx] = NULL; return (error); } vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); fp->f_flag |= FHASLOCK; } if ((vp->v_type == VREG) && (vp->v_object == NULL)) vfs_object_create(vp, p, p->p_ucred, TRUE); VOP_UNLOCK(vp, 0, p); p->p_retval[0] = indx; return (0); } #ifdef COMPAT_43 /* * Create a file. */ #ifndef _SYS_SYSPROTO_H_ struct ocreat_args { char *path; int mode; }; #endif int ocreat(p, uap) struct proc *p; register struct ocreat_args /* { syscallarg(char *) path; syscallarg(int) mode; } */ *uap; { struct open_args /* { syscallarg(char *) path; syscallarg(int) flags; syscallarg(int) mode; } */ nuap; SCARG(&nuap, path) = SCARG(uap, path); SCARG(&nuap, mode) = SCARG(uap, mode); SCARG(&nuap, flags) = O_WRONLY | O_CREAT | O_TRUNC; return (open(p, &nuap)); } #endif /* COMPAT_43 */ /* * Create a special file. */ #ifndef _SYS_SYSPROTO_H_ struct mknod_args { char *path; int mode; int dev; }; #endif /* ARGSUSED */ int mknod(p, uap) struct proc *p; register struct mknod_args /* { syscallarg(char *) path; syscallarg(int) mode; syscallarg(int) dev; } */ *uap; { register struct vnode *vp; struct vattr vattr; int error; int whiteout; struct nameidata nd; error = suser(p->p_ucred, &p->p_acflag); if (error) return (error); NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); vp = nd.ni_vp; if (vp != NULL) error = EEXIST; else { VATTR_NULL(&vattr); vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_fd->fd_cmask; vattr.va_rdev = SCARG(uap, dev); whiteout = 0; switch (SCARG(uap, mode) & S_IFMT) { case S_IFMT: /* used by badsect to flag bad sectors */ vattr.va_type = VBAD; break; case S_IFCHR: vattr.va_type = VCHR; break; case S_IFBLK: vattr.va_type = VBLK; break; case S_IFWHT: whiteout = 1; break; default: error = EINVAL; break; } } if (!error) { VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); if (whiteout) { error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); if (error) VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); vput(nd.ni_dvp); } else { error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); + vput(nd.ni_dvp); } } else { VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); if (nd.ni_dvp == vp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); if (vp) vrele(vp); } ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mknod"); ASSERT_VOP_UNLOCKED(nd.ni_vp, "mknod"); return (error); } /* * Create a named pipe. */ #ifndef _SYS_SYSPROTO_H_ struct mkfifo_args { char *path; int mode; }; #endif /* ARGSUSED */ int mkfifo(p, uap) struct proc *p; register struct mkfifo_args /* { syscallarg(char *) path; syscallarg(int) mode; } */ *uap; { struct vattr vattr; int error; struct nameidata nd; NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); if (nd.ni_vp != NULL) { VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); if (nd.ni_dvp == nd.ni_vp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); vrele(nd.ni_vp); return (EEXIST); } VATTR_NULL(&vattr); vattr.va_type = VFIFO; vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_fd->fd_cmask; VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); - return (VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr)); + error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); + vput(nd.ni_dvp); + return (error); } /* * Make a hard file link. */ #ifndef _SYS_SYSPROTO_H_ struct link_args { char *path; char *link; }; #endif /* ARGSUSED */ int link(p, uap) struct proc *p; register struct link_args /* { syscallarg(char *) path; syscallarg(char *) link; } */ *uap; { register struct vnode *vp; struct nameidata nd; int error; NDINIT(&nd, LOOKUP, FOLLOW|NOOBJ, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); vp = nd.ni_vp; if (vp->v_type == VDIR) error = EPERM; /* POSIX */ else { NDINIT(&nd, CREATE, LOCKPARENT|NOOBJ, UIO_USERSPACE, SCARG(uap, link), p); error = namei(&nd); if (!error) { if (nd.ni_vp != NULL) { VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); - if (nd.ni_dvp == nd.ni_vp) - vrele(nd.ni_dvp); - else - vput(nd.ni_dvp); if (nd.ni_vp) vrele(nd.ni_vp); error = EEXIST; } else { VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); } + if (nd.ni_dvp == nd.ni_vp) + vrele(nd.ni_dvp); + else + vput(nd.ni_dvp); } } vrele(vp); ASSERT_VOP_UNLOCKED(nd.ni_dvp, "link"); ASSERT_VOP_UNLOCKED(nd.ni_vp, "link"); return (error); } /* * Make a symbolic link. */ #ifndef _SYS_SYSPROTO_H_ struct symlink_args { char *path; char *link; }; #endif /* ARGSUSED */ int symlink(p, uap) struct proc *p; register struct symlink_args /* { syscallarg(char *) path; syscallarg(char *) link; } */ *uap; { struct vattr vattr; char *path; int error; struct nameidata nd; path = zalloc(namei_zone); if (error = copyinstr(SCARG(uap, path), path, MAXPATHLEN, NULL)) goto out; NDINIT(&nd, CREATE, LOCKPARENT|NOOBJ, UIO_USERSPACE, SCARG(uap, link), p); if (error = namei(&nd)) goto out; if (nd.ni_vp) { VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); if (nd.ni_dvp == nd.ni_vp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); vrele(nd.ni_vp); error = EEXIST; goto out; } VATTR_NULL(&vattr); vattr.va_mode = ACCESSPERMS &~ p->p_fd->fd_cmask; VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path); + vput(nd.ni_dvp); ASSERT_VOP_UNLOCKED(nd.ni_dvp, "symlink"); ASSERT_VOP_UNLOCKED(nd.ni_vp, "symlink"); out: zfree(namei_zone, path); return (error); } /* * Delete a whiteout from the filesystem. */ /* ARGSUSED */ int undelete(p, uap) struct proc *p; register struct undelete_args /* { syscallarg(char *) path; } */ *uap; { int error; struct nameidata nd; NDINIT(&nd, DELETE, LOCKPARENT|DOWHITEOUT, UIO_USERSPACE, SCARG(uap, path), p); error = namei(&nd); if (error) return (error); if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); if (nd.ni_dvp == nd.ni_vp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); if (nd.ni_vp) vrele(nd.ni_vp); return (EEXIST); } VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); if (error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE)) VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); vput(nd.ni_dvp); ASSERT_VOP_UNLOCKED(nd.ni_dvp, "undelete"); ASSERT_VOP_UNLOCKED(nd.ni_vp, "undelete"); return (error); } /* * Delete a name from the filesystem. */ #ifndef _SYS_SYSPROTO_H_ struct unlink_args { char *path; }; #endif /* ARGSUSED */ int unlink(p, uap) struct proc *p; struct unlink_args /* { syscallarg(char *) path; } */ *uap; { register struct vnode *vp; int error; struct nameidata nd; NDINIT(&nd, DELETE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); vp = nd.ni_vp; VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); if (vp->v_type == VDIR) error = EPERM; /* POSIX */ else { /* * The root of a mounted filesystem cannot be deleted. * * XXX: can this only be a VDIR case? */ if (vp->v_flag & VROOT) error = EBUSY; } if (!error) { VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); } else { VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); - if (nd.ni_dvp == vp) - vrele(nd.ni_dvp); - else - vput(nd.ni_dvp); - if (vp != NULLVP) - vput(vp); } + if (nd.ni_dvp == vp) + vrele(nd.ni_dvp); + else + vput(nd.ni_dvp); + if (vp != NULLVP) + vput(vp); ASSERT_VOP_UNLOCKED(nd.ni_dvp, "unlink"); ASSERT_VOP_UNLOCKED(nd.ni_vp, "unlink"); return (error); } /* * Reposition read/write file offset. */ #ifndef _SYS_SYSPROTO_H_ struct lseek_args { int fd; int pad; off_t offset; int whence; }; #endif int lseek(p, uap) struct proc *p; register struct lseek_args /* { syscallarg(int) fd; syscallarg(int) pad; syscallarg(off_t) offset; syscallarg(int) whence; } */ *uap; { struct ucred *cred = p->p_ucred; register struct filedesc *fdp = p->p_fd; register struct file *fp; struct vattr vattr; int error; if ((u_int)SCARG(uap, fd) >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[SCARG(uap, fd)]) == NULL) return (EBADF); if (fp->f_type != DTYPE_VNODE) return (ESPIPE); switch (SCARG(uap, whence)) { case L_INCR: fp->f_offset += SCARG(uap, offset); break; case L_XTND: error=VOP_GETATTR((struct vnode *)fp->f_data, &vattr, cred, p); if (error) return (error); fp->f_offset = SCARG(uap, offset) + vattr.va_size; break; case L_SET: fp->f_offset = SCARG(uap, offset); break; default: return (EINVAL); } *(off_t *)(p->p_retval) = fp->f_offset; return (0); } #if defined(COMPAT_43) || defined(COMPAT_SUNOS) /* * Reposition read/write file offset. */ #ifndef _SYS_SYSPROTO_H_ struct olseek_args { int fd; long offset; int whence; }; #endif int olseek(p, uap) struct proc *p; register struct olseek_args /* { syscallarg(int) fd; syscallarg(long) offset; syscallarg(int) whence; } */ *uap; { struct lseek_args /* { syscallarg(int) fd; syscallarg(int) pad; syscallarg(off_t) offset; syscallarg(int) whence; } */ nuap; int error; SCARG(&nuap, fd) = SCARG(uap, fd); SCARG(&nuap, offset) = SCARG(uap, offset); SCARG(&nuap, whence) = SCARG(uap, whence); error = lseek(p, &nuap); return (error); } #endif /* COMPAT_43 */ /* * Check access permissions. */ #ifndef _SYS_SYSPROTO_H_ struct access_args { char *path; int flags; }; #endif int access(p, uap) struct proc *p; register struct access_args /* { syscallarg(char *) path; syscallarg(int) flags; } */ *uap; { register struct ucred *cred = p->p_ucred; register struct vnode *vp; int error, flags, t_gid, t_uid; struct nameidata nd; t_uid = cred->cr_uid; t_gid = cred->cr_groups[0]; cred->cr_uid = p->p_cred->p_ruid; cred->cr_groups[0] = p->p_cred->p_rgid; NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) goto out1; vp = nd.ni_vp; /* Flags == 0 means only check for existence. */ if (SCARG(uap, flags)) { flags = 0; if (SCARG(uap, flags) & R_OK) flags |= VREAD; if (SCARG(uap, flags) & W_OK) flags |= VWRITE; if (SCARG(uap, flags) & X_OK) flags |= VEXEC; if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0) error = VOP_ACCESS(vp, flags, cred, p); } vput(vp); out1: cred->cr_uid = t_uid; cred->cr_groups[0] = t_gid; return (error); } #if defined(COMPAT_43) || defined(COMPAT_SUNOS) /* * Get file status; this version follows links. */ #ifndef _SYS_SYSPROTO_H_ struct ostat_args { char *path; struct ostat *ub; }; #endif /* ARGSUSED */ int ostat(p, uap) struct proc *p; register struct ostat_args /* { syscallarg(char *) path; syscallarg(struct ostat *) ub; } */ *uap; { struct stat sb; struct ostat osb; int error; struct nameidata nd; NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); error = vn_stat(nd.ni_vp, &sb, p); vput(nd.ni_vp); if (error) return (error); cvtstat(&sb, &osb); error = copyout((caddr_t)&osb, (caddr_t)SCARG(uap, ub), sizeof (osb)); return (error); } /* * Get file status; this version does not follow links. */ #ifndef _SYS_SYSPROTO_H_ struct olstat_args { char *path; struct ostat *ub; }; #endif /* ARGSUSED */ int olstat(p, uap) struct proc *p; register struct olstat_args /* { syscallarg(char *) path; syscallarg(struct ostat *) ub; } */ *uap; { struct vnode *vp; struct stat sb; struct ostat osb; int error; struct nameidata nd; NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); vp = nd.ni_vp; error = vn_stat(vp, &sb, p); vput(vp); if (error) return (error); cvtstat(&sb, &osb); error = copyout((caddr_t)&osb, (caddr_t)SCARG(uap, ub), sizeof (osb)); return (error); } /* * Convert from an old to a new stat structure. */ void cvtstat(st, ost) struct stat *st; struct ostat *ost; { ost->st_dev = st->st_dev; ost->st_ino = st->st_ino; ost->st_mode = st->st_mode; ost->st_nlink = st->st_nlink; ost->st_uid = st->st_uid; ost->st_gid = st->st_gid; ost->st_rdev = st->st_rdev; if (st->st_size < (quad_t)1 << 32) ost->st_size = st->st_size; else ost->st_size = -2; ost->st_atime = st->st_atime; ost->st_mtime = st->st_mtime; ost->st_ctime = st->st_ctime; ost->st_blksize = st->st_blksize; ost->st_blocks = st->st_blocks; ost->st_flags = st->st_flags; ost->st_gen = st->st_gen; } #endif /* COMPAT_43 || COMPAT_SUNOS */ /* * Get file status; this version follows links. */ #ifndef _SYS_SYSPROTO_H_ struct stat_args { char *path; struct stat *ub; }; #endif /* ARGSUSED */ int stat(p, uap) struct proc *p; register struct stat_args /* { syscallarg(char *) path; syscallarg(struct stat *) ub; } */ *uap; { struct stat sb; int error; struct nameidata nd; NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); error = vn_stat(nd.ni_vp, &sb, p); vput(nd.ni_vp); if (error) return (error); error = copyout((caddr_t)&sb, (caddr_t)SCARG(uap, ub), sizeof (sb)); return (error); } /* * Get file status; this version does not follow links. */ #ifndef _SYS_SYSPROTO_H_ struct lstat_args { char *path; struct stat *ub; }; #endif /* ARGSUSED */ int lstat(p, uap) struct proc *p; register struct lstat_args /* { syscallarg(char *) path; syscallarg(struct stat *) ub; } */ *uap; { int error; struct vnode *vp; struct stat sb; struct nameidata nd; NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); vp = nd.ni_vp; error = vn_stat(vp, &sb, p); vput(vp); if (error) return (error); error = copyout((caddr_t)&sb, (caddr_t)SCARG(uap, ub), sizeof (sb)); return (error); } /* * Get configurable pathname variables. */ #ifndef _SYS_SYSPROTO_H_ struct pathconf_args { char *path; int name; }; #endif /* ARGSUSED */ int pathconf(p, uap) struct proc *p; register struct pathconf_args /* { syscallarg(char *) path; syscallarg(int) name; } */ *uap; { int error; struct nameidata nd; NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), p->p_retval); vput(nd.ni_vp); return (error); } /* * Return target name of a symbolic link. */ #ifndef _SYS_SYSPROTO_H_ struct readlink_args { char *path; char *buf; int count; }; #endif /* ARGSUSED */ int readlink(p, uap) struct proc *p; register struct readlink_args /* { syscallarg(char *) path; syscallarg(char *) buf; syscallarg(int) count; } */ *uap; { register struct vnode *vp; struct iovec aiov; struct uio auio; int error; struct nameidata nd; NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); vp = nd.ni_vp; if (vp->v_type != VLNK) error = EINVAL; else { aiov.iov_base = SCARG(uap, buf); aiov.iov_len = SCARG(uap, count); auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_offset = 0; auio.uio_rw = UIO_READ; auio.uio_segflg = UIO_USERSPACE; auio.uio_procp = p; auio.uio_resid = SCARG(uap, count); error = VOP_READLINK(vp, &auio, p->p_ucred); } vput(vp); p->p_retval[0] = SCARG(uap, count) - auio.uio_resid; return (error); } /* * Change flags of a file given a path name. */ #ifndef _SYS_SYSPROTO_H_ struct chflags_args { char *path; int flags; }; #endif /* ARGSUSED */ int chflags(p, uap) struct proc *p; register struct chflags_args /* { syscallarg(char *) path; syscallarg(int) flags; } */ *uap; { register struct vnode *vp; struct vattr vattr; int error; struct nameidata nd; NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); vp = nd.ni_vp; VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); VATTR_NULL(&vattr); vattr.va_flags = SCARG(uap, flags); error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); vput(vp); return (error); } /* * Change flags of a file given a file descriptor. */ #ifndef _SYS_SYSPROTO_H_ struct fchflags_args { int fd; int flags; }; #endif /* ARGSUSED */ int fchflags(p, uap) struct proc *p; register struct fchflags_args /* { syscallarg(int) fd; syscallarg(int) flags; } */ *uap; { struct vattr vattr; struct vnode *vp; struct file *fp; int error; if (error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) return (error); vp = (struct vnode *)fp->f_data; VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); VATTR_NULL(&vattr); vattr.va_flags = SCARG(uap, flags); error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); VOP_UNLOCK(vp, 0, p); return (error); } /* * Change mode of a file given path name. */ #ifndef _SYS_SYSPROTO_H_ struct chmod_args { char *path; int mode; }; #endif /* ARGSUSED */ int chmod(p, uap) struct proc *p; register struct chmod_args /* { syscallarg(char *) path; syscallarg(int) mode; } */ *uap; { register struct vnode *vp; struct vattr vattr; int error; struct nameidata nd; NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); vp = nd.ni_vp; VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); VATTR_NULL(&vattr); vattr.va_mode = SCARG(uap, mode) & ALLPERMS; error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); vput(vp); return (error); } /* * Change mode of a file given a file descriptor. */ #ifndef _SYS_SYSPROTO_H_ struct fchmod_args { int fd; int mode; }; #endif /* ARGSUSED */ int fchmod(p, uap) struct proc *p; register struct fchmod_args /* { syscallarg(int) fd; syscallarg(int) mode; } */ *uap; { struct vattr vattr; struct vnode *vp; struct file *fp; int error; if (error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) return (error); vp = (struct vnode *)fp->f_data; VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); VATTR_NULL(&vattr); vattr.va_mode = SCARG(uap, mode) & ALLPERMS; error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); VOP_UNLOCK(vp, 0, p); return (error); } /* * Set ownership given a path name. */ #ifndef _SYS_SYSPROTO_H_ struct chown_args { char *path; int uid; int gid; }; #endif /* ARGSUSED */ int chown(p, uap) struct proc *p; register struct chown_args /* { syscallarg(char *) path; syscallarg(int) uid; syscallarg(int) gid; } */ *uap; { register struct vnode *vp; struct vattr vattr; int error; struct nameidata nd; NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); vp = nd.ni_vp; VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); VATTR_NULL(&vattr); vattr.va_uid = SCARG(uap, uid); vattr.va_gid = SCARG(uap, gid); error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); vput(vp); return (error); } /* * Set ownership given a path name, do not cross symlinks. */ #ifndef _SYS_SYSPROTO_H_ struct lchown_args { char *path; int uid; int gid; }; #endif /* ARGSUSED */ int lchown(p, uap) struct proc *p; register struct lchown_args /* { syscallarg(char *) path; syscallarg(int) uid; syscallarg(int) gid; } */ *uap; { register struct vnode *vp; struct vattr vattr; int error; struct nameidata nd; NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); vp = nd.ni_vp; VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); VATTR_NULL(&vattr); vattr.va_uid = SCARG(uap, uid); vattr.va_gid = SCARG(uap, gid); error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); vput(vp); return (error); } /* * Set ownership given a file descriptor. */ #ifndef _SYS_SYSPROTO_H_ struct fchown_args { int fd; int uid; int gid; }; #endif /* ARGSUSED */ int fchown(p, uap) struct proc *p; register struct fchown_args /* { syscallarg(int) fd; syscallarg(int) uid; syscallarg(int) gid; } */ *uap; { struct vattr vattr; struct vnode *vp; struct file *fp; int error; if (error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) return (error); vp = (struct vnode *)fp->f_data; VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); VATTR_NULL(&vattr); vattr.va_uid = SCARG(uap, uid); vattr.va_gid = SCARG(uap, gid); error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); VOP_UNLOCK(vp, 0, p); return (error); } /* * Set the access and modification times of a file. */ #ifndef _SYS_SYSPROTO_H_ struct utimes_args { char *path; struct timeval *tptr; }; #endif /* ARGSUSED */ int utimes(p, uap) struct proc *p; register struct utimes_args /* { syscallarg(char *) path; syscallarg(struct timeval *) tptr; } */ *uap; { register struct vnode *vp; struct timeval tv[2]; struct vattr vattr; int error; struct nameidata nd; VATTR_NULL(&vattr); if (SCARG(uap, tptr) == NULL) { microtime(&tv[0]); tv[1] = tv[0]; vattr.va_vaflags |= VA_UTIMES_NULL; } else if (error = copyin((caddr_t)SCARG(uap, tptr), (caddr_t)tv, sizeof (tv))) return (error); NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); vp = nd.ni_vp; VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); vattr.va_atime.tv_sec = tv[0].tv_sec; vattr.va_atime.tv_nsec = tv[0].tv_usec * 1000; vattr.va_mtime.tv_sec = tv[1].tv_sec; vattr.va_mtime.tv_nsec = tv[1].tv_usec * 1000; error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); vput(vp); return (error); } /* * Truncate a file given its path name. */ #ifndef _SYS_SYSPROTO_H_ struct truncate_args { char *path; int pad; off_t length; }; #endif /* ARGSUSED */ int truncate(p, uap) struct proc *p; register struct truncate_args /* { syscallarg(char *) path; syscallarg(int) pad; syscallarg(off_t) length; } */ *uap; { register struct vnode *vp; struct vattr vattr; int error; struct nameidata nd; if (uap->length < 0) return(EINVAL); NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); vp = nd.ni_vp; VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); if (vp->v_type == VDIR) error = EISDIR; else if ((error = vn_writechk(vp)) == 0 && (error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) == 0) { VATTR_NULL(&vattr); vattr.va_size = SCARG(uap, length); error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); } vput(vp); return (error); } /* * Truncate a file given a file descriptor. */ #ifndef _SYS_SYSPROTO_H_ struct ftruncate_args { int fd; int pad; off_t length; }; #endif /* ARGSUSED */ int ftruncate(p, uap) struct proc *p; register struct ftruncate_args /* { syscallarg(int) fd; syscallarg(int) pad; syscallarg(off_t) length; } */ *uap; { struct vattr vattr; struct vnode *vp; struct file *fp; int error; if (uap->length < 0) return(EINVAL); if (error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) return (error); if ((fp->f_flag & FWRITE) == 0) return (EINVAL); vp = (struct vnode *)fp->f_data; VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); if (vp->v_type == VDIR) error = EISDIR; else if ((error = vn_writechk(vp)) == 0) { VATTR_NULL(&vattr); vattr.va_size = SCARG(uap, length); error = VOP_SETATTR(vp, &vattr, fp->f_cred, p); } VOP_UNLOCK(vp, 0, p); return (error); } #if defined(COMPAT_43) || defined(COMPAT_SUNOS) /* * Truncate a file given its path name. */ #ifndef _SYS_SYSPROTO_H_ struct otruncate_args { char *path; long length; }; #endif /* ARGSUSED */ int otruncate(p, uap) struct proc *p; register struct otruncate_args /* { syscallarg(char *) path; syscallarg(long) length; } */ *uap; { struct truncate_args /* { syscallarg(char *) path; syscallarg(int) pad; syscallarg(off_t) length; } */ nuap; SCARG(&nuap, path) = SCARG(uap, path); SCARG(&nuap, length) = SCARG(uap, length); return (truncate(p, &nuap)); } /* * Truncate a file given a file descriptor. */ #ifndef _SYS_SYSPROTO_H_ struct oftruncate_args { int fd; long length; }; #endif /* ARGSUSED */ int oftruncate(p, uap) struct proc *p; register struct oftruncate_args /* { syscallarg(int) fd; syscallarg(long) length; } */ *uap; { struct ftruncate_args /* { syscallarg(int) fd; syscallarg(int) pad; syscallarg(off_t) length; } */ nuap; SCARG(&nuap, fd) = SCARG(uap, fd); SCARG(&nuap, length) = SCARG(uap, length); return (ftruncate(p, &nuap)); } #endif /* COMPAT_43 || COMPAT_SUNOS */ /* * Sync an open file. */ #ifndef _SYS_SYSPROTO_H_ struct fsync_args { int fd; }; #endif /* ARGSUSED */ int fsync(p, uap) struct proc *p; struct fsync_args /* { syscallarg(int) fd; } */ *uap; { register struct vnode *vp; struct file *fp; int error; if (error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) return (error); vp = (struct vnode *)fp->f_data; if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p)) == NULL) { if (vp->v_object) { vm_object_page_clean(vp->v_object, 0, 0, FALSE); } if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP)) { error = VOP_FSYNC(vp, fp->f_cred, MNT_LAZY, p); } else { error = VOP_FSYNC(vp, fp->f_cred, (vp->v_mount && (vp->v_mount->mnt_flag & MNT_ASYNC)) ? MNT_NOWAIT : MNT_WAIT, p); } VOP_UNLOCK(vp, 0, p); if ((vp->v_mount->mnt_flag & MNT_SOFTDEP) && bioops.io_sync) (*bioops.io_sync)(NULL); } return (error); } /* * Rename files. Source and destination must either both be directories, * or both not be directories. If target is a directory, it must be empty. */ #ifndef _SYS_SYSPROTO_H_ struct rename_args { char *from; char *to; }; #endif /* ARGSUSED */ int rename(p, uap) struct proc *p; register struct rename_args /* { syscallarg(char *) from; syscallarg(char *) to; } */ *uap; { register struct vnode *tvp, *fvp, *tdvp; struct nameidata fromnd, tond; int error; NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART, UIO_USERSPACE, SCARG(uap, from), p); if (error = namei(&fromnd)) return (error); fvp = fromnd.ni_vp; NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | NOOBJ, UIO_USERSPACE, SCARG(uap, to), p); if (fromnd.ni_vp->v_type == VDIR) tond.ni_cnd.cn_flags |= WILLBEDIR; if (error = namei(&tond)) { /* Translate error code for rename("dir1", "dir2/."). */ if (error == EISDIR && fvp->v_type == VDIR) error = EINVAL; VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); vrele(fromnd.ni_dvp); vrele(fvp); goto out1; } tdvp = tond.ni_dvp; tvp = tond.ni_vp; if (tvp != NULL) { if (fvp->v_type == VDIR && tvp->v_type != VDIR) { error = ENOTDIR; goto out; } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { error = EISDIR; goto out; } } if (fvp == tdvp) error = EINVAL; /* * If source is the same as the destination (that is the * same inode number with the same name in the same directory), * then there is nothing to do. */ if (fvp == tvp && fromnd.ni_dvp == tdvp && fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen && !bcmp(fromnd.ni_cnd.cn_nameptr, tond.ni_cnd.cn_nameptr, fromnd.ni_cnd.cn_namelen)) error = -1; out: if (!error) { VOP_LEASE(tdvp, p, p->p_ucred, LEASE_WRITE); if (fromnd.ni_dvp != tdvp) { VOP_LEASE(fromnd.ni_dvp, p, p->p_ucred, LEASE_WRITE); } if (tvp) { VOP_LEASE(tvp, p, p->p_ucred, LEASE_WRITE); } error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); } else { VOP_ABORTOP(tond.ni_dvp, &tond.ni_cnd); if (tdvp == tvp) vrele(tdvp); else vput(tdvp); if (tvp) vput(tvp); VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); vrele(fromnd.ni_dvp); vrele(fvp); } vrele(tond.ni_startdir); ASSERT_VOP_UNLOCKED(fromnd.ni_dvp, "rename"); ASSERT_VOP_UNLOCKED(fromnd.ni_vp, "rename"); ASSERT_VOP_UNLOCKED(tond.ni_dvp, "rename"); ASSERT_VOP_UNLOCKED(tond.ni_vp, "rename"); zfree(namei_zone, tond.ni_cnd.cn_pnbuf); out1: if (fromnd.ni_startdir) vrele(fromnd.ni_startdir); zfree(namei_zone, fromnd.ni_cnd.cn_pnbuf); if (error == -1) return (0); return (error); } /* * Make a directory file. */ #ifndef _SYS_SYSPROTO_H_ struct mkdir_args { char *path; int mode; }; #endif /* ARGSUSED */ int mkdir(p, uap) struct proc *p; register struct mkdir_args /* { syscallarg(char *) path; syscallarg(int) mode; } */ *uap; { register struct vnode *vp; struct vattr vattr; int error; struct nameidata nd; NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p); nd.ni_cnd.cn_flags |= WILLBEDIR; if (error = namei(&nd)) return (error); vp = nd.ni_vp; if (vp != NULL) { VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); if (nd.ni_dvp == vp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); vrele(vp); return (EEXIST); } VATTR_NULL(&vattr); vattr.va_type = VDIR; vattr.va_mode = (SCARG(uap, mode) & ACCESSPERMS) &~ p->p_fd->fd_cmask; VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); + vput(nd.ni_dvp); if (!error) vput(nd.ni_vp); ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mkdir"); ASSERT_VOP_UNLOCKED(nd.ni_vp, "mkdir"); return (error); } /* * Remove a directory file. */ #ifndef _SYS_SYSPROTO_H_ struct rmdir_args { char *path; }; #endif /* ARGSUSED */ int rmdir(p, uap) struct proc *p; struct rmdir_args /* { syscallarg(char *) path; } */ *uap; { register struct vnode *vp; int error; struct nameidata nd; NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); vp = nd.ni_vp; if (vp->v_type != VDIR) { error = ENOTDIR; goto out; } /* * No rmdir "." please. */ if (nd.ni_dvp == vp) { error = EINVAL; goto out; } /* * The root of a mounted filesystem cannot be deleted. */ if (vp->v_flag & VROOT) error = EBUSY; out: if (!error) { VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); } else { VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); - if (nd.ni_dvp == vp) - vrele(nd.ni_dvp); - else - vput(nd.ni_dvp); - vput(vp); } + if (nd.ni_dvp == vp) + vrele(nd.ni_dvp); + else + vput(nd.ni_dvp); + if (vp != NULLVP) + vput(vp); ASSERT_VOP_UNLOCKED(nd.ni_dvp, "rmdir"); ASSERT_VOP_UNLOCKED(nd.ni_vp, "rmdir"); return (error); } #ifdef COMPAT_43 /* * Read a block of directory entries in a file system independent format. */ #ifndef _SYS_SYSPROTO_H_ struct ogetdirentries_args { int fd; char *buf; u_int count; long *basep; }; #endif int ogetdirentries(p, uap) struct proc *p; register struct ogetdirentries_args /* { syscallarg(int) fd; syscallarg(char *) buf; syscallarg(u_int) count; syscallarg(long *) basep; } */ *uap; { register struct vnode *vp; struct file *fp; struct uio auio, kuio; struct iovec aiov, kiov; struct dirent *dp, *edp; caddr_t dirbuf; int error, eofflag, readcnt; long loff; if (error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) return (error); if ((fp->f_flag & FREAD) == 0) return (EBADF); vp = (struct vnode *)fp->f_data; unionread: if (vp->v_type != VDIR) return (EINVAL); aiov.iov_base = SCARG(uap, buf); aiov.iov_len = SCARG(uap, count); auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_rw = UIO_READ; auio.uio_segflg = UIO_USERSPACE; auio.uio_procp = p; auio.uio_resid = SCARG(uap, count); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); loff = auio.uio_offset = fp->f_offset; # if (BYTE_ORDER != LITTLE_ENDIAN) if (vp->v_mount->mnt_maxsymlinklen <= 0) { error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, NULL); fp->f_offset = auio.uio_offset; } else # endif { kuio = auio; kuio.uio_iov = &kiov; kuio.uio_segflg = UIO_SYSSPACE; kiov.iov_len = SCARG(uap, count); MALLOC(dirbuf, caddr_t, SCARG(uap, count), M_TEMP, M_WAITOK); kiov.iov_base = dirbuf; error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag, NULL, NULL); fp->f_offset = kuio.uio_offset; if (error == 0) { readcnt = SCARG(uap, count) - kuio.uio_resid; edp = (struct dirent *)&dirbuf[readcnt]; for (dp = (struct dirent *)dirbuf; dp < edp; ) { # if (BYTE_ORDER == LITTLE_ENDIAN) /* * The expected low byte of * dp->d_namlen is our dp->d_type. * The high MBZ byte of dp->d_namlen * is our dp->d_namlen. */ dp->d_type = dp->d_namlen; dp->d_namlen = 0; # else /* * The dp->d_type is the high byte * of the expected dp->d_namlen, * so must be zero'ed. */ dp->d_type = 0; # endif if (dp->d_reclen > 0) { dp = (struct dirent *) ((char *)dp + dp->d_reclen); } else { error = EIO; break; } } if (dp >= edp) error = uiomove(dirbuf, readcnt, &auio); } FREE(dirbuf, M_TEMP); } VOP_UNLOCK(vp, 0, p); if (error) return (error); #ifdef UNION { if ((SCARG(uap, count) == auio.uio_resid) && (vp->v_op == union_vnodeop_p)) { struct vnode *lvp; lvp = union_dircache(vp, p); if (lvp != NULLVP) { struct vattr va; /* * If the directory is opaque, * then don't show lower entries */ error = VOP_GETATTR(vp, &va, fp->f_cred, p); if (va.va_flags & OPAQUE) { vput(lvp); lvp = NULL; } } if (lvp != NULLVP) { error = VOP_OPEN(lvp, FREAD, fp->f_cred, p); if (error) { vput(lvp); return (error); } VOP_UNLOCK(lvp, 0, p); fp->f_data = (caddr_t) lvp; fp->f_offset = 0; error = vn_close(vp, FREAD, fp->f_cred, p); if (error) return (error); vp = lvp; goto unionread; } } } #endif /* UNION */ if ((SCARG(uap, count) == auio.uio_resid) && (vp->v_flag & VROOT) && (vp->v_mount->mnt_flag & MNT_UNION)) { struct vnode *tvp = vp; vp = vp->v_mount->mnt_vnodecovered; VREF(vp); fp->f_data = (caddr_t) vp; fp->f_offset = 0; vrele(tvp); goto unionread; } error = copyout((caddr_t)&loff, (caddr_t)SCARG(uap, basep), sizeof(long)); p->p_retval[0] = SCARG(uap, count) - auio.uio_resid; return (error); } #endif /* COMPAT_43 */ /* * Read a block of directory entries in a file system independent format. */ #ifndef _SYS_SYSPROTO_H_ struct getdirentries_args { int fd; char *buf; u_int count; long *basep; }; #endif int getdirentries(p, uap) struct proc *p; register struct getdirentries_args /* { syscallarg(int) fd; syscallarg(char *) buf; syscallarg(u_int) count; syscallarg(long *) basep; } */ *uap; { register struct vnode *vp; struct file *fp; struct uio auio; struct iovec aiov; long loff; int error, eofflag; if (error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) return (error); if ((fp->f_flag & FREAD) == 0) return (EBADF); vp = (struct vnode *)fp->f_data; unionread: if (vp->v_type != VDIR) return (EINVAL); aiov.iov_base = SCARG(uap, buf); aiov.iov_len = SCARG(uap, count); auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_rw = UIO_READ; auio.uio_segflg = UIO_USERSPACE; auio.uio_procp = p; auio.uio_resid = SCARG(uap, count); /* vn_lock(vp, LK_SHARED | LK_RETRY, p); */ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); loff = auio.uio_offset = fp->f_offset; error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, NULL); fp->f_offset = auio.uio_offset; VOP_UNLOCK(vp, 0, p); if (error) return (error); #ifdef UNION { if ((SCARG(uap, count) == auio.uio_resid) && (vp->v_op == union_vnodeop_p)) { struct vnode *lvp; lvp = union_dircache(vp, p); if (lvp != NULLVP) { struct vattr va; /* * If the directory is opaque, * then don't show lower entries */ error = VOP_GETATTR(vp, &va, fp->f_cred, p); if (va.va_flags & OPAQUE) { vput(lvp); lvp = NULL; } } if (lvp != NULLVP) { error = VOP_OPEN(lvp, FREAD, fp->f_cred, p); if (error) { vput(lvp); return (error); } VOP_UNLOCK(lvp, 0, p); fp->f_data = (caddr_t) lvp; fp->f_offset = 0; error = vn_close(vp, FREAD, fp->f_cred, p); if (error) return (error); vp = lvp; goto unionread; } } } #endif /* UNION */ if ((SCARG(uap, count) == auio.uio_resid) && (vp->v_flag & VROOT) && (vp->v_mount->mnt_flag & MNT_UNION)) { struct vnode *tvp = vp; vp = vp->v_mount->mnt_vnodecovered; VREF(vp); fp->f_data = (caddr_t) vp; fp->f_offset = 0; vrele(tvp); goto unionread; } error = copyout((caddr_t)&loff, (caddr_t)SCARG(uap, basep), sizeof(long)); p->p_retval[0] = SCARG(uap, count) - auio.uio_resid; return (error); } /* * Set the mode mask for creation of filesystem nodes. */ #ifndef _SYS_SYSPROTO_H_ struct umask_args { int newmask; }; #endif int umask(p, uap) struct proc *p; struct umask_args /* { syscallarg(int) newmask; } */ *uap; { register struct filedesc *fdp; fdp = p->p_fd; p->p_retval[0] = fdp->fd_cmask; fdp->fd_cmask = SCARG(uap, newmask) & ALLPERMS; return (0); } /* * Void all references to file by ripping underlying filesystem * away from vnode. */ #ifndef _SYS_SYSPROTO_H_ struct revoke_args { char *path; }; #endif /* ARGSUSED */ int revoke(p, uap) struct proc *p; register struct revoke_args /* { syscallarg(char *) path; } */ *uap; { register struct vnode *vp; struct vattr vattr; int error; struct nameidata nd; NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); vp = nd.ni_vp; if (error = VOP_GETATTR(vp, &vattr, p->p_ucred, p)) goto out; if (p->p_ucred->cr_uid != vattr.va_uid && (error = suser(p->p_ucred, &p->p_acflag))) goto out; if (vp->v_usecount > 1 || (vp->v_flag & VALIASED)) VOP_REVOKE(vp, REVOKEALL); out: vrele(vp); return (error); } /* * Convert a user file descriptor to a kernel file entry. */ int getvnode(fdp, fd, fpp) struct filedesc *fdp; int fd; struct file **fpp; { struct file *fp; if ((u_int)fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[fd]) == NULL) return (EBADF); if (fp->f_type != DTYPE_VNODE && fp->f_type != DTYPE_FIFO) return (EINVAL); *fpp = fp; return (0); } #ifndef _SYS_SYSPROTO_H_ struct __getcwd_args { u_char *buf; u_int buflen; }; #endif #define STATNODE(mode, name, var) \ SYSCTL_INT(_vfs_cache, OID_AUTO, name, mode, var, 0, ""); static int disablecwd; SYSCTL_INT(_debug, OID_AUTO, disablecwd, CTLFLAG_RW, &disablecwd, 0, ""); static u_long numcwdcalls; STATNODE(CTLFLAG_RD, numcwdcalls, &numcwdcalls); static u_long numcwdfail1; STATNODE(CTLFLAG_RD, numcwdfail1, &numcwdfail1); static u_long numcwdfail2; STATNODE(CTLFLAG_RD, numcwdfail2, &numcwdfail2); static u_long numcwdfail3; STATNODE(CTLFLAG_RD, numcwdfail3, &numcwdfail3); static u_long numcwdfail4; STATNODE(CTLFLAG_RD, numcwdfail4, &numcwdfail4); static u_long numcwdfound; STATNODE(CTLFLAG_RD, numcwdfound, &numcwdfound); int __getcwd(p, uap) struct proc *p; struct __getcwd_args *uap; { char *bp, *buf; int error, i, slash_prefixed; struct filedesc *fdp; struct namecache *ncp; struct vnode *vp; numcwdcalls++; if (disablecwd) return (ENODEV); if (uap->buflen < 2) return (EINVAL); if (uap->buflen > MAXPATHLEN) uap->buflen = MAXPATHLEN; buf = bp = malloc(uap->buflen, M_TEMP, M_WAITOK); bp += uap->buflen - 1; *bp = '\0'; fdp = p->p_fd; slash_prefixed = 0; for (vp = fdp->fd_cdir; vp != fdp->fd_rdir && vp != rootvnode;) { if (vp->v_flag & VROOT) { vp = vp->v_mount->mnt_vnodecovered; continue; } if (vp->v_dd->v_id != vp->v_ddid) { numcwdfail1++; free(buf, M_TEMP); return (ENOTDIR); } ncp = TAILQ_FIRST(&vp->v_cache_dst); if (!ncp) { numcwdfail2++; free(buf, M_TEMP); return (ENOENT); } if (ncp->nc_dvp != vp->v_dd) { numcwdfail3++; free(buf, M_TEMP); return (EBADF); } for (i = ncp->nc_nlen - 1; i >= 0; i--) { if (bp == buf) { numcwdfail4++; free(buf, M_TEMP); return (ENOMEM); } *--bp = ncp->nc_name[i]; } if (bp == buf) { numcwdfail4++; free(buf, M_TEMP); return (ENOMEM); } *--bp = '/'; slash_prefixed = 1; vp = vp->v_dd; } if (!slash_prefixed) { if (bp == buf) { numcwdfail4++; free(buf, M_TEMP); return (ENOMEM); } *--bp = '/'; } numcwdfound++; error = copyout(bp, uap->buf, strlen(bp) + 1); free(buf, M_TEMP); return (error); } Index: head/sys/kern/vfs_vnops.c =================================================================== --- head/sys/kern/vfs_vnops.c (revision 35822) +++ head/sys/kern/vfs_vnops.c (revision 35823) @@ -1,546 +1,548 @@ /* * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)vfs_vnops.c 8.2 (Berkeley) 1/21/94 - * $Id: vfs_vnops.c,v 1.55 1998/04/08 18:31:58 wosch Exp $ + * $Id: vfs_vnops.c,v 1.56 1998/04/10 00:09:04 alex Exp $ */ #include #include #include #include #include #include #include #include #include #include #include static int vn_closefile __P((struct file *fp, struct proc *p)); static int vn_ioctl __P((struct file *fp, int com, caddr_t data, struct proc *p)); static int vn_read __P((struct file *fp, struct uio *uio, struct ucred *cred)); static int vn_poll __P((struct file *fp, int events, struct ucred *cred, struct proc *p)); static int vn_write __P((struct file *fp, struct uio *uio, struct ucred *cred)); struct fileops vnops = { vn_read, vn_write, vn_ioctl, vn_poll, vn_closefile }; /* * Common code for vnode open operations. * Check permissions, and call the VOP_OPEN or VOP_CREATE routine. */ int vn_open(ndp, fmode, cmode) register struct nameidata *ndp; int fmode, cmode; { register struct vnode *vp; register struct proc *p = ndp->ni_cnd.cn_proc; register struct ucred *cred = p->p_ucred; struct vattr vat; struct vattr *vap = &vat; int error; if (fmode & O_CREAT) { ndp->ni_cnd.cn_nameiop = CREATE; ndp->ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF; if ((fmode & O_EXCL) == 0 && (fmode & O_NOFOLLOW) == 0) ndp->ni_cnd.cn_flags |= FOLLOW; error = namei(ndp); if (error) return (error); if (ndp->ni_vp == NULL) { VATTR_NULL(vap); vap->va_type = VREG; vap->va_mode = cmode; if (fmode & O_EXCL) vap->va_vaflags |= VA_EXCLUSIVE; VOP_LEASE(ndp->ni_dvp, p, cred, LEASE_WRITE); - if (error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp, - &ndp->ni_cnd, vap)) + error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp, + &ndp->ni_cnd, vap); + vput(ndp->ni_dvp); + if (error) return (error); ASSERT_VOP_UNLOCKED(ndp->ni_dvp, "create"); ASSERT_VOP_LOCKED(ndp->ni_vp, "create"); fmode &= ~O_TRUNC; vp = ndp->ni_vp; } else { VOP_ABORTOP(ndp->ni_dvp, &ndp->ni_cnd); if (ndp->ni_dvp == ndp->ni_vp) vrele(ndp->ni_dvp); else vput(ndp->ni_dvp); ndp->ni_dvp = NULL; vp = ndp->ni_vp; if (fmode & O_EXCL) { error = EEXIST; goto bad; } fmode &= ~O_CREAT; } } else { ndp->ni_cnd.cn_nameiop = LOOKUP; ndp->ni_cnd.cn_flags = ((fmode & O_NOFOLLOW) ? NOFOLLOW : FOLLOW) | LOCKLEAF; error = namei(ndp); if (error) return (error); vp = ndp->ni_vp; } if (vp->v_type == VLNK) { error = EMLINK; goto bad; } if (vp->v_type == VSOCK) { error = EOPNOTSUPP; goto bad; } if ((fmode & O_CREAT) == 0) { if (fmode & FREAD) { error = VOP_ACCESS(vp, VREAD, cred, p); if (error) goto bad; } if (fmode & (FWRITE | O_TRUNC)) { if (vp->v_type == VDIR) { error = EISDIR; goto bad; } error = vn_writechk(vp); if (error) goto bad; error = VOP_ACCESS(vp, VWRITE, cred, p); if (error) goto bad; } } if (fmode & O_TRUNC) { VOP_UNLOCK(vp, 0, p); /* XXX */ VOP_LEASE(vp, p, cred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); /* XXX */ VATTR_NULL(vap); vap->va_size = 0; error = VOP_SETATTR(vp, vap, cred, p); if (error) goto bad; } error = VOP_OPEN(vp, fmode, cred, p); if (error) goto bad; /* * Make sure that a VM object is created for VMIO support. */ if (vp->v_type == VREG) { if ((error = vfs_object_create(vp, p, cred, 1)) != 0) goto bad; } if (fmode & FWRITE) vp->v_writecount++; return (0); bad: vput(vp); return (error); } /* * Check for write permissions on the specified vnode. * Prototype text segments cannot be written. */ int vn_writechk(vp) register struct vnode *vp; { /* * If there's shared text associated with * the vnode, try to free it up once. If * we fail, we can't allow writing. */ if (vp->v_flag & VTEXT) return (ETXTBSY); return (0); } /* * Vnode close call */ int vn_close(vp, flags, cred, p) register struct vnode *vp; int flags; struct ucred *cred; struct proc *p; { int error; if (flags & FWRITE) vp->v_writecount--; error = VOP_CLOSE(vp, flags, cred, p); vrele(vp); return (error); } /* * Package up an I/O request on a vnode into a uio and do it. */ int vn_rdwr(rw, vp, base, len, offset, segflg, ioflg, cred, aresid, p) enum uio_rw rw; struct vnode *vp; caddr_t base; int len; off_t offset; enum uio_seg segflg; int ioflg; struct ucred *cred; int *aresid; struct proc *p; { struct uio auio; struct iovec aiov; int error; if ((ioflg & IO_NODELOCKED) == 0) vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); auio.uio_iov = &aiov; auio.uio_iovcnt = 1; aiov.iov_base = base; aiov.iov_len = len; auio.uio_resid = len; auio.uio_offset = offset; auio.uio_segflg = segflg; auio.uio_rw = rw; auio.uio_procp = p; if (rw == UIO_READ) { error = VOP_READ(vp, &auio, ioflg, cred); } else { error = VOP_WRITE(vp, &auio, ioflg, cred); } if (aresid) *aresid = auio.uio_resid; else if (auio.uio_resid && error == 0) error = EIO; if ((ioflg & IO_NODELOCKED) == 0) VOP_UNLOCK(vp, 0, p); return (error); } /* * File table vnode read routine. */ static int vn_read(fp, uio, cred) struct file *fp; struct uio *uio; struct ucred *cred; { struct vnode *vp = (struct vnode *)fp->f_data; struct proc *p = uio->uio_procp; int count, error; int flag; VOP_LEASE(vp, p, cred, LEASE_READ); vn_lock(vp, LK_SHARED | LK_NOPAUSE | LK_RETRY, p); if (uio->uio_offset == -1) uio->uio_offset = fp->f_offset; count = uio->uio_resid; flag = 0; if (fp->f_flag & FNONBLOCK) flag |= IO_NDELAY; /* * Sequential read heuristic. * If we have been doing sequential input, * a rewind operation doesn't turn off * sequential input mode. */ if (((fp->f_offset == 0) && (fp->f_seqcount > 0)) || (fp->f_offset == fp->f_nextread)) { int tmpseq = fp->f_seqcount; /* * XXX we assume that the filesystem block size is * the default. Not true, but still gives us a pretty * good indicator of how sequential the read operations * are. */ tmpseq += ((count + BKVASIZE - 1) / BKVASIZE); if (tmpseq >= 127) tmpseq = 127; fp->f_seqcount = tmpseq; flag |= (fp->f_seqcount << 16); } else { if (fp->f_seqcount > 1) fp->f_seqcount = 1; else fp->f_seqcount = 0; } error = VOP_READ(vp, uio, flag, cred); fp->f_offset += count - uio->uio_resid; fp->f_nextread = fp->f_offset; VOP_UNLOCK(vp, 0, p); return (error); } /* * File table vnode write routine. */ static int vn_write(fp, uio, cred) struct file *fp; struct uio *uio; struct ucred *cred; { struct vnode *vp = (struct vnode *)fp->f_data; struct proc *p = uio->uio_procp; int count, error, ioflag = IO_UNIT; if (uio->uio_offset == -1 && vp->v_type == VREG && (fp->f_flag & O_APPEND)) ioflag |= IO_APPEND; if (fp->f_flag & FNONBLOCK) ioflag |= IO_NDELAY; if ((fp->f_flag & O_FSYNC) || (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS))) ioflag |= IO_SYNC; VOP_LEASE(vp, p, cred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); uio->uio_offset = fp->f_offset; count = uio->uio_resid; error = VOP_WRITE(vp, uio, ioflag, cred); if (ioflag & IO_APPEND) fp->f_offset = uio->uio_offset; else fp->f_offset += count - uio->uio_resid; VOP_UNLOCK(vp, 0, p); return (error); } /* * File table vnode stat routine. */ int vn_stat(vp, sb, p) struct vnode *vp; register struct stat *sb; struct proc *p; { struct vattr vattr; register struct vattr *vap; int error; u_short mode; vap = &vattr; error = VOP_GETATTR(vp, vap, p->p_ucred, p); if (error) return (error); /* * Copy from vattr table */ sb->st_dev = vap->va_fsid; sb->st_ino = vap->va_fileid; mode = vap->va_mode; switch (vp->v_type) { case VREG: mode |= S_IFREG; break; case VDIR: mode |= S_IFDIR; break; case VBLK: mode |= S_IFBLK; break; case VCHR: mode |= S_IFCHR; break; case VLNK: mode |= S_IFLNK; /* This is a cosmetic change, symlinks do not have a mode. */ if (vp->v_mount->mnt_flag & MNT_NOSYMFOLLOW) sb->st_mode &= ~ACCESSPERMS; /* 0000 */ else sb->st_mode |= ACCESSPERMS; /* 0777 */ break; case VSOCK: mode |= S_IFSOCK; break; case VFIFO: mode |= S_IFIFO; break; default: return (EBADF); }; sb->st_mode = mode; sb->st_nlink = vap->va_nlink; sb->st_uid = vap->va_uid; sb->st_gid = vap->va_gid; sb->st_rdev = vap->va_rdev; sb->st_size = vap->va_size; sb->st_atimespec = vap->va_atime; sb->st_mtimespec = vap->va_mtime; sb->st_ctimespec = vap->va_ctime; sb->st_blksize = vap->va_blocksize; sb->st_flags = vap->va_flags; if (p->p_ucred->cr_uid != 0) sb->st_gen = 0; else sb->st_gen = vap->va_gen; #if (S_BLKSIZE == 512) /* Optimize this case */ sb->st_blocks = vap->va_bytes >> 9; #else sb->st_blocks = vap->va_bytes / S_BLKSIZE; #endif return (0); } /* * File table vnode ioctl routine. */ static int vn_ioctl(fp, com, data, p) struct file *fp; int com; caddr_t data; struct proc *p; { register struct vnode *vp = ((struct vnode *)fp->f_data); struct vattr vattr; int error; switch (vp->v_type) { case VREG: case VDIR: if (com == FIONREAD) { error = VOP_GETATTR(vp, &vattr, p->p_ucred, p); if (error) return (error); *(int *)data = vattr.va_size - fp->f_offset; return (0); } if (com == FIONBIO || com == FIOASYNC) /* XXX */ return (0); /* XXX */ /* fall into ... */ default: #if 0 return (ENOTTY); #endif case VFIFO: case VCHR: case VBLK: error = VOP_IOCTL(vp, com, data, fp->f_flag, p->p_ucred, p); if (error == 0 && com == TIOCSCTTY) { /* Do nothing if reassigning same control tty */ if (p->p_session->s_ttyvp == vp) return (0); /* Get rid of reference to old control tty */ if (p->p_session->s_ttyvp) vrele(p->p_session->s_ttyvp); p->p_session->s_ttyvp = vp; VREF(vp); } return (error); } } /* * File table vnode poll routine. */ static int vn_poll(fp, events, cred, p) struct file *fp; int events; struct ucred *cred; struct proc *p; { return (VOP_POLL(((struct vnode *)fp->f_data), events, cred, p)); } /* * Check that the vnode is still valid, and if so * acquire requested lock. */ int vn_lock(vp, flags, p) struct vnode *vp; int flags; struct proc *p; { int error; do { if ((flags & LK_INTERLOCK) == 0) simple_lock(&vp->v_interlock); if (vp->v_flag & VXLOCK) { vp->v_flag |= VXWANT; simple_unlock(&vp->v_interlock); tsleep((caddr_t)vp, PINOD, "vn_lock", 0); error = ENOENT; } else { error = VOP_LOCK(vp, flags | LK_NOPAUSE | LK_INTERLOCK, p); if (error == 0) return (error); } flags &= ~LK_INTERLOCK; } while (flags & LK_RETRY); return (error); } /* * File table vnode close routine. */ static int vn_closefile(fp, p) struct file *fp; struct proc *p; { return (vn_close(((struct vnode *)fp->f_data), fp->f_flag, fp->f_cred, p)); } Index: head/sys/kern/vnode_if.src =================================================================== --- head/sys/kern/vnode_if.src (revision 35822) +++ head/sys/kern/vnode_if.src (revision 35823) @@ -1,473 +1,473 @@ # # Copyright (c) 1992, 1993 # The Regents of the University of California. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # 3. All advertising materials mentioning features or use of this software # must display the following acknowledgement: # This product includes software developed by the University of # California, Berkeley and its contributors. # 4. Neither the name of the University nor the names of its contributors # may be used to endorse or promote products derived from this software # without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF # SUCH DAMAGE. # # @(#)vnode_if.src 8.12 (Berkeley) 5/14/95 -# $Id: vnode_if.src,v 1.15 1997/10/16 20:32:23 phk Exp $ +# $Id: vnode_if.src,v 1.16 1998/03/08 09:57:26 julian Exp $ # # # Above each of the vop descriptors is a specification of the locking # protocol used by each vop call. The first column is the name of # the variable, the remaining three columns are in, out and error # respectively. The "in" column defines the lock state on input, # the "out" column defines the state on succesful return, and the # "error" column defines the locking state on error exit. # # The locking value can take the following values: # L: locked. # U: unlocked/ # -: not applicable. vnode does not yet (or no longer) exists. # =: the same on input and output, may be either L or U. # X: locked if not nil. # # #% lookup dvp L ? ? #% lookup vpp - L - # # XXX - the lookup locking protocol defies simple description and depends # on the flags and operation fields in the (cnp) structure. Note # especially that *vpp may equal dvp and both may be locked. # vop_lookup { IN struct vnode *dvp; INOUT struct vnode **vpp; IN struct componentname *cnp; }; # #% cachedlookup dvp L ? ? #% cachedlookup vpp - L - # # This must be an exact copy of lookup. See kern/vfs_cache.c for details. # vop_cachedlookup { IN struct vnode *dvp; INOUT struct vnode **vpp; IN struct componentname *cnp; }; # -#% create dvp L U U +#% create dvp L L L #% create vpp - L - # vop_create { - IN WILLRELE struct vnode *dvp; + IN struct vnode *dvp; OUT struct vnode **vpp; IN struct componentname *cnp; IN struct vattr *vap; }; # #% whiteout dvp L L L #% whiteout cnp - - - #% whiteout flag - - - # vop_whiteout { - IN WILLRELE struct vnode *dvp; + IN struct vnode *dvp; IN struct componentname *cnp; IN int flags; }; # -#% mknod dvp L U U +#% mknod dvp L L L #% mknod vpp - X - # vop_mknod { - IN WILLRELE struct vnode *dvp; + IN struct vnode *dvp; OUT WILLRELE struct vnode **vpp; IN struct componentname *cnp; IN struct vattr *vap; }; # #% open vp L L L # vop_open { IN struct vnode *vp; IN int mode; IN struct ucred *cred; IN struct proc *p; }; # #% close vp U U U # vop_close { IN struct vnode *vp; IN int fflag; IN struct ucred *cred; IN struct proc *p; }; # #% access vp L L L # vop_access { IN struct vnode *vp; IN int mode; IN struct ucred *cred; IN struct proc *p; }; # #% getattr vp = = = # vop_getattr { IN struct vnode *vp; IN struct vattr *vap; IN struct ucred *cred; IN struct proc *p; }; # #% setattr vp L L L # vop_setattr { IN struct vnode *vp; IN struct vattr *vap; IN struct ucred *cred; IN struct proc *p; }; # #% read vp L L L # vop_read { IN struct vnode *vp; INOUT struct uio *uio; IN int ioflag; IN struct ucred *cred; }; # #% write vp L L L # vop_write { IN struct vnode *vp; INOUT struct uio *uio; IN int ioflag; IN struct ucred *cred; }; # #% lease vp = = = # vop_lease { IN struct vnode *vp; IN struct proc *p; IN struct ucred *cred; IN int flag; }; # #% ioctl vp U U U # vop_ioctl { IN struct vnode *vp; IN u_long command; IN caddr_t data; IN int fflag; IN struct ucred *cred; IN struct proc *p; }; # #% poll vp U U U # vop_poll { IN struct vnode *vp; IN int events; IN struct ucred *cred; IN struct proc *p; }; # #% revoke vp U U U # vop_revoke { IN struct vnode *vp; IN int flags; }; # # XXX - not used # vop_mmap { IN struct vnode *vp; IN int fflags; IN struct ucred *cred; IN struct proc *p; }; # #% fsync vp L L L # vop_fsync { IN struct vnode *vp; IN struct ucred *cred; IN int waitfor; IN struct proc *p; }; # -#% remove dvp L U U -#% remove vp L U U +#% remove dvp L L L +#% remove vp L L L # vop_remove { - IN WILLRELE struct vnode *dvp; - IN WILLRELE struct vnode *vp; + IN struct vnode *dvp; + IN struct vnode *vp; IN struct componentname *cnp; }; # +#% link tdvp L L L #% link vp U U U -#% link tdvp L U U # vop_link { - IN WILLRELE struct vnode *tdvp; + IN struct vnode *tdvp; IN struct vnode *vp; IN struct componentname *cnp; }; # #% rename fdvp U U U #% rename fvp U U U #% rename tdvp L U U #% rename tvp X U U # vop_rename { IN WILLRELE struct vnode *fdvp; IN WILLRELE struct vnode *fvp; IN struct componentname *fcnp; IN WILLRELE struct vnode *tdvp; IN WILLRELE struct vnode *tvp; IN struct componentname *tcnp; }; # -#% mkdir dvp L U U +#% mkdir dvp L L L #% mkdir vpp - L - # vop_mkdir { - IN WILLRELE struct vnode *dvp; + IN struct vnode *dvp; OUT struct vnode **vpp; IN struct componentname *cnp; IN struct vattr *vap; }; # -#% rmdir dvp L U U -#% rmdir vp L U U +#% rmdir dvp L L L +#% rmdir vp L L L # vop_rmdir { - IN WILLRELE struct vnode *dvp; - IN WILLRELE struct vnode *vp; + IN struct vnode *dvp; + IN struct vnode *vp; IN struct componentname *cnp; }; # -#% symlink dvp L U U +#% symlink dvp L L L #% symlink vpp - U - # # XXX - note that the return vnode has already been VRELE'ed # by the filesystem layer. To use it you must use vget, # possibly with a further namei. # vop_symlink { - IN WILLRELE struct vnode *dvp; + IN struct vnode *dvp; OUT WILLRELE struct vnode **vpp; IN struct componentname *cnp; IN struct vattr *vap; IN char *target; }; # #% readdir vp L L L # vop_readdir { IN struct vnode *vp; INOUT struct uio *uio; IN struct ucred *cred; INOUT int *eofflag; OUT int *ncookies; INOUT u_long **cookies; }; # #% readlink vp L L L # vop_readlink { IN struct vnode *vp; INOUT struct uio *uio; IN struct ucred *cred; }; # #% abortop dvp = = = # vop_abortop { IN struct vnode *dvp; IN struct componentname *cnp; }; # #% inactive vp L U U # vop_inactive { IN struct vnode *vp; IN struct proc *p; }; # #% reclaim vp U U U # vop_reclaim { IN struct vnode *vp; IN struct proc *p; }; # #% lock vp U L U # vop_lock { IN struct vnode *vp; IN int flags; IN struct proc *p; }; # #% unlock vp L U L # vop_unlock { IN struct vnode *vp; IN int flags; IN struct proc *p; }; # #% bmap vp L L L #% bmap vpp - U - # vop_bmap { IN struct vnode *vp; IN daddr_t bn; OUT struct vnode **vpp; IN daddr_t *bnp; OUT int *runp; OUT int *runb; }; # # Needs work: no vp? # #vop_strategy { # IN struct buf *bp; #}; # #% print vp = = = # vop_print { IN struct vnode *vp; }; # #% islocked vp = = = # vop_islocked { IN struct vnode *vp; }; # #% pathconf vp L L L # vop_pathconf { IN struct vnode *vp; IN int name; OUT register_t *retval; }; # #% advlock vp U U U # vop_advlock { IN struct vnode *vp; IN caddr_t id; IN int op; IN struct flock *fl; IN int flags; }; # #% balloc vp L L L # vop_balloc { IN struct vnode *vp; IN off_t startoffset; IN int size; IN struct ucred *cred; IN int flags; OUT struct buf **bpp; }; # #% reallocblks vp L L L # vop_reallocblks { IN struct vnode *vp; IN struct cluster_save *buflist; }; vop_getpages { IN struct vnode *vp; IN vm_page_t *m; IN int count; IN int reqpage; IN vm_ooffset_t offset; }; vop_putpages { IN struct vnode *vp; IN vm_page_t *m; IN int count; IN int sync; IN int *rtvals; IN vm_ooffset_t offset; }; # # Needs work: no vp? # #vop_bwrite { # IN struct buf *bp; #}; Index: head/sys/miscfs/devfs/devfs_vnops.c =================================================================== --- head/sys/miscfs/devfs/devfs_vnops.c (revision 35822) +++ head/sys/miscfs/devfs/devfs_vnops.c (revision 35823) @@ -1,1581 +1,1566 @@ /* * Copyright 1997,1998 Julian Elischer. All rights reserved. * julian@freebsd.org * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER ``AS IS'' AND ANY EXPRESS * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE HOLDER OR CONTRIBUTORS BE LIABLE FOR * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $Id: devfs_vnops.c,v 1.53 1998/03/26 20:52:12 phk Exp $ + * $Id: devfs_vnops.c,v 1.54 1998/04/19 23:32:17 julian Exp $ */ #include #include #include #include #include #include #include #include #include #include /* definitions of spec functions we use */ #include #include /* * Insert description here */ /* * Convert a component of a pathname into a pointer to a locked node. * This is a very central and rather complicated routine. * If the file system is not maintained in a strict tree hierarchy, * this can result in a deadlock situation (see comments in code below). * * The flag argument is LOOKUP, CREATE, RENAME, or DELETE depending on * whether the name is to be looked up, created, renamed, or deleted. * When CREATE, RENAME, or DELETE is specified, information usable in * creating, renaming, or deleting a directory entry may be calculated. * If flag has LOCKPARENT or'ed into it and the target of the pathname * exists, lookup returns both the target and its parent directory locked. * When creating or renaming and LOCKPARENT is specified, the target may * not be ".". When deleting and LOCKPARENT is specified, the target may * be "."., but the caller must check to ensure it does an vrele and DNUNLOCK * instead of two DNUNLOCKs. * * Overall outline of devfs_lookup: * * check accessibility of directory * null terminate the component (lookup leaves the whole string alone) * look for name in cache, if found, then if at end of path * and deleting or creating, drop it, else return name * search for name in directory, to found or notfound * notfound: * if creating, return locked directory, * else return error * found: * if at end of path and deleting, return information to allow delete * if at end of path and rewriting (RENAME and LOCKPARENT), lock target * node and return info to allow rewrite * if not at end, add name to cache; if at end and neither creating * nor deleting, add name to cache * On return to lookup, remove the null termination we put in at the start. * * NOTE: (LOOKUP | LOCKPARENT) currently returns the parent node unlocked. */ static int devfs_lookup(struct vop_lookup_args *ap) /*struct vop_lookup_args { struct vnode * a_dvp; directory vnode ptr struct vnode ** a_vpp; where to put the result struct componentname * a_cnp; the name we want };*/ { struct componentname *cnp = ap->a_cnp; struct vnode *dir_vnode = ap->a_dvp; struct vnode **result_vnode = ap->a_vpp; dn_p dir_node; /* the directory we are searching */ dn_p new_node; /* the node we are searching for */ devnm_p new_nodename; int flags = cnp->cn_flags; int op = cnp->cn_nameiop; /* LOOKUP, CREATE, RENAME, or DELETE */ int lockparent = flags & LOCKPARENT; int wantparent = flags & (LOCKPARENT|WANTPARENT); int error = 0; struct proc *p = cnp->cn_proc; char heldchar; /* the char at the end of the name componet */ *result_vnode = NULL; /* safe not sorry */ /*XXX*/ DBPRINT(("lookup\n")); if (dir_vnode->v_usecount == 0) printf("dir had no refs "); if (devfs_vntodn(dir_vnode,&dir_node)) { printf("vnode has changed?\n"); vprint("=",dir_vnode); return(EINVAL); } /* * Check accessiblity of directory. */ if (dir_node->type != DEV_DIR) /* XXX or symlink? */ { return (ENOTDIR); } if (error = VOP_ACCESS(dir_vnode, VEXEC, cnp->cn_cred, p)) { return (error); } /* * We now have a segment name to search for, and a directory to search. * */ /***********************************************************************\ * SEARCH FOR NAME * * while making sure the component is null terminated for the strcmp * \***********************************************************************/ heldchar = cnp->cn_nameptr[cnp->cn_namelen]; cnp->cn_nameptr[cnp->cn_namelen] = '\0'; new_nodename = dev_findname(dir_node,cnp->cn_nameptr); cnp->cn_nameptr[cnp->cn_namelen] = heldchar; if(!new_nodename) { /*******************************************************\ * Failed to find it.. (That may be good) * \*******************************************************/ new_node = NULL; /* to be safe */ /* * If creating, and at end of pathname * then can consider * allowing file to be created. */ if (!(flags & ISLASTCN) || !(op == CREATE || op == RENAME)) { return ENOENT; } /* * Access for write is interpreted as allowing * creation of files in the directory. */ if (error = VOP_ACCESS(dir_vnode, VWRITE, cnp->cn_cred, p)) { DBPRINT(("MKACCESS ")); return (error); } /* * We return with the directory locked, so that * the parameters we set up above will still be * valid if we actually decide to add a new entry. * We return ni_vp == NULL to indicate that the entry * does not currently exist; we leave a pointer to * the (locked) directory vnode in namei_data->ni_dvp. * The pathname buffer is saved so that the name * can be obtained later. * * NB - if the directory is unlocked, then this * information cannot be used. */ cnp->cn_flags |= SAVENAME; /*XXX why? */ if (!lockparent) VOP_UNLOCK(dir_vnode, 0, p); return (EJUSTRETURN); } /***************************************************************\ * Found it.. this is not always a good thing.. * \***************************************************************/ new_node = new_nodename->dnp; new_node->last_lookup = new_nodename; /* for unlink */ /* * If deleting, and at end of pathname, return * parameters which can be used to remove file. * If the wantparent flag isn't set, we return only * the directory (in namei_data->ni_dvp), otherwise we go * on and lock the node, being careful with ".". */ if (op == DELETE && (flags & ISLASTCN)) { /* * Write access to directory required to delete files. */ if (error = VOP_ACCESS(dir_vnode, VWRITE, cnp->cn_cred, p)) return (error); /* * we are trying to delete '.'. What does this mean? XXX */ if (dir_node == new_node) { VREF(dir_vnode); *result_vnode = dir_vnode; return (0); } /* * If directory is "sticky", then user must own * the directory, or the file in it, else she * may not delete it (unless she's root). This * implements append-only directories. */ devfs_dntovn(new_node,result_vnode); #ifdef NOTYET if ((dir_node->mode & ISVTX) && cnp->cn_cred->cr_uid != 0 && cnp->cn_cred->cr_uid != dir_node->uid && cnp->cn_cred->cr_uid != new_node->uid) { VOP_UNLOCK(*result_vnode, 0, p); return (EPERM); } #endif if (!lockparent) VOP_UNLOCK(dir_vnode, 0, p); return (0); } /* * If rewriting (RENAME), return the vnode and the * information required to rewrite the present directory * Must get node of directory entry to verify it's a * regular file, or empty directory. */ if (op == RENAME && wantparent && (flags & ISLASTCN)) { /* * Are we allowed to change the holding directory? */ if (error = VOP_ACCESS(dir_vnode, VWRITE, cnp->cn_cred, p)) return (error); /* * Careful about locking second node. * This can only occur if the target is ".". */ if (dir_node == new_node) return (EISDIR); devfs_dntovn(new_node,result_vnode); /* hmm save the 'from' name (we need to delete it) */ cnp->cn_flags |= SAVENAME; if (!lockparent) VOP_UNLOCK(dir_vnode, 0, p); return (0); } /* * Step through the translation in the name. We do not unlock the * directory because we may need it again if a symbolic link * is relative to the current directory. Instead we save it * unlocked as "saved_dir_node" XXX. We must get the target * node before unlocking * the directory to insure that the node will not be removed * before we get it. We prevent deadlock by always fetching * nodes from the root, moving down the directory tree. Thus * when following backward pointers ".." we must unlock the * parent directory before getting the requested directory. * There is a potential race condition here if both the current * and parent directories are removed before the lock for the * node associated with ".." returns. We hope that this occurs * infrequently since we cannot avoid this race condition without * implementing a sophisticated deadlock detection algorithm. * Note also that this simple deadlock detection scheme will not * work if the file system has any hard links other than ".." * that point backwards in the directory structure. */ if (flags & ISDOTDOT) { VOP_UNLOCK(dir_vnode, 0, p); /* race to get the node */ devfs_dntovn(new_node,result_vnode); if (lockparent && (flags & ISLASTCN)) vn_lock(dir_vnode, LK_EXCLUSIVE | LK_RETRY, p); } else if (dir_node == new_node) { VREF(dir_vnode); /* we want ourself, ie "." */ *result_vnode = dir_vnode; } else { devfs_dntovn(new_node,result_vnode); if (!lockparent || (flags & ISLASTCN)) VOP_UNLOCK(dir_vnode, 0, p); } DBPRINT(("GOT\n")); return (0); } /* * Create a regular file. * We must also free the pathname buffer pointed at * by ndp->ni_pnbuf, always on error, or only if the * SAVESTART bit in ni_nameiop is clear on success. * * * Always error... no such thing in this FS */ #ifdef notyet static int devfs_create(struct vop_mknod_args *ap) /*struct vop_mknod_args { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; struct vattr *a_vap; } */ { DBPRINT(("create\n")); - vput(ap->a_dvp); return EINVAL; } static int devfs_mknod( struct vop_mknod_args *ap) /*struct vop_mknod_args { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; struct vattr *a_vap; } */ { int error; DBPRINT(("mknod\n")); switch (ap->a_vap->va_type) { case VDIR: #ifdef VNSLEAZE return devfs_mkdir(ap); /*XXX check for WILLRELE settings (different)*/ #else error = VOP_MKDIR(ap->a_dvp, ap->a_vpp, ap->a_cnp, ap->a_vap); #endif break; /* * devfs_create() sets ndp->ni_vp. */ case VREG: #ifdef VNSLEAZE return devfs_create(ap); /*XXX check for WILLRELE settings (different)*/ #else error = VOP_CREATE(ap->a_dvp, ap->a_vpp, ap->a_cnp, ap->a_vap); #endif break; default: return EINVAL; break; } return error; } #endif /* notyet */ static int devfs_access(struct vop_access_args *ap) /*struct vop_access_args { struct vnode *a_vp; int a_mode; struct ucred *a_cred; struct proc *a_p; } */ { /* * mode is filled with a combination of VREAD, VWRITE, * and/or VEXEC bits turned on. In an octal number these * are the Y in 0Y00. */ struct vnode *vp = ap->a_vp; int mode = ap->a_mode; struct ucred *cred = ap->a_cred; dn_p file_node; int error; gid_t *gp; int i; DBPRINT(("access\n")); if (error = devfs_vntodn(vp,&file_node)) { printf("devfs_vntodn returned %d ",error); return error; } /* * if we are not running as a process, we are in the * kernel and we DO have permission */ if (ap->a_p == NULL) return 0; /* * Access check is based on only one of owner, group, public. * If not owner, then check group. If not a member of the * group, then check public access. */ if (cred->cr_uid != file_node->uid) { /* failing that.. try groups */ mode >>= 3; gp = cred->cr_groups; for (i = 0; i < cred->cr_ngroups; i++, gp++) { if (file_node->gid == *gp) { goto found; } } /* failing that.. try general access */ mode >>= 3; found: ; } if ((file_node->mode & mode) == mode) return (0); /* * Root gets to do anything. * but only use suser prives as a last resort * (Use of super powers is recorded in ap->a_p->p_acflag) */ if( suser(cred, &ap->a_p->p_acflag) == 0) /* XXX what if no proc? */ return 0; return (EACCES); } static int devfs_getattr(struct vop_getattr_args *ap) /*struct vop_getattr_args { struct vnode *a_vp; struct vattr *a_vap; struct ucred *a_cred; struct proc *a_p; } */ { struct vnode *vp = ap->a_vp; struct vattr *vap = ap->a_vap; dn_p file_node; int error; DBPRINT(("getattr\n")); if (error = devfs_vntodn(vp,&file_node)) { printf("devfs_vntodn returned %d ",error); return error; } vap->va_rdev = 0;/* default value only */ vap->va_mode = file_node->mode; switch (file_node->type) { case DEV_DIR: vap->va_rdev = (dev_t)file_node->dvm; vap->va_mode |= (S_IFDIR); break; case DEV_CDEV: vap->va_rdev = file_node->by.Cdev.dev; vap->va_mode |= (S_IFCHR); break; case DEV_BDEV: vap->va_rdev = file_node->by.Bdev.dev; vap->va_mode |= (S_IFBLK); break; case DEV_SLNK: break; } vap->va_type = vp->v_type; vap->va_nlink = file_node->links; vap->va_uid = file_node->uid; vap->va_gid = file_node->gid; vap->va_fsid = (long)file_node->dvm; vap->va_fileid = (long)file_node; vap->va_size = file_node->len; /* now a u_quad_t */ vap->va_blocksize = 512; /* * XXX If the node times are in Jan 1, 1970, then * update them to the boot time. * When we made the node, the date/time was not yet known. */ if(file_node->ctime.tv_sec < (24 * 3600)) { TIMEVAL_TO_TIMESPEC(&boottime,&(file_node->ctime)); TIMEVAL_TO_TIMESPEC(&boottime,&(file_node->mtime)); TIMEVAL_TO_TIMESPEC(&boottime,&(file_node->atime)); } vap->va_ctime = file_node->ctime; vap->va_mtime = file_node->mtime; vap->va_atime = file_node->atime; vap->va_gen = 0; vap->va_flags = 0; vap->va_bytes = file_node->len; /* u_quad_t */ vap->va_filerev = 0; /* XXX */ /* u_quad_t */ vap->va_vaflags = 0; /* XXX */ return 0; } static int devfs_setattr(struct vop_setattr_args *ap) /*struct vop_setattr_args { struct vnode *a_vp; struct vattr *a_vap; struct ucred *a_cred; struct proc *a_p; } */ { struct vnode *vp = ap->a_vp; struct vattr *vap = ap->a_vap; struct ucred *cred = ap->a_cred; struct proc *p = ap->a_p; int error = 0; gid_t *gp; int i; dn_p file_node; if (error = devfs_vntodn(vp,&file_node)) { printf("devfs_vntodn returned %d ",error); return error; } DBPRINT(("setattr\n")); if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) || (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) || (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) || (vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL )) { return EINVAL; } /* * Anyone can touch the files in such a way that the times are set * to NOW (e.g. run 'touch') if they have write permissions * however only the owner or root can set "un-natural times. * They also don't need write permissions. */ if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) { #if 0 /* * This next test is pointless under devfs for now.. * as there is only one devfs hiding under potentially many * mountpoints and actual device node are really 'mounted' under * a FAKE mountpoint inside the kernel only, no matter where it * APPEARS they are mounted to the outside world.. * A readonly devfs doesn't exist anyway. */ if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); #endif if (((vap->va_vaflags & VA_UTIMES_NULL) == 0) && (cred->cr_uid != file_node->uid) && suser(cred, &p->p_acflag)) return (EPERM); if(VOP_ACCESS(vp, VWRITE, cred, p)) return (EACCES); file_node->atime = vap->va_atime; file_node->mtime = vap->va_mtime; nanotime(&file_node->ctime); return (0); } /* * Change the permissions.. must be root or owner to do this. */ if (vap->va_mode != (u_short)VNOVAL) { if ((cred->cr_uid != file_node->uid) && suser(cred, &p->p_acflag)) return (EPERM); /* set drwxwxrwx stuff */ file_node->mode &= ~07777; file_node->mode |= vap->va_mode & 07777; } /* * Change the owner.. must be root to do this. */ if (vap->va_uid != (uid_t)VNOVAL) { if (suser(cred, &p->p_acflag)) return (EPERM); file_node->uid = vap->va_uid; } /* * Change the group.. must be root or owner to do this. * If we are the owner, we must be in the target group too. * don't use suser() unless you have to as it reports * whether you needed suser powers or not. */ if (vap->va_gid != (gid_t)VNOVAL) { if (cred->cr_uid == file_node->uid){ gp = cred->cr_groups; for (i = 0; i < cred->cr_ngroups; i++, gp++) { if (vap->va_gid == *gp) goto cando; } } /* * we can't do it with normal privs, * do we have an ace up our sleeve? */ if( suser(cred, &p->p_acflag)) return (EPERM); cando: file_node->gid = vap->va_gid; } #if 0 /* * Copied from somewhere else * but only kept as a marker and reminder of the fact that * flags should be handled some day */ if (vap->va_flags != VNOVAL) { if (error = suser(cred, &p->p_acflag)) return error; if (cred->cr_uid == 0) ; else { } } #endif return error; } static int devfs_read(struct vop_read_args *ap) /*struct vop_read_args { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; struct ucred *a_cred; } */ { int error = 0; dn_p file_node; DBPRINT(("read\n")); if (error = devfs_vntodn(ap->a_vp,&file_node)) { printf("devfs_vntodn returned %d ",error); return error; } switch (ap->a_vp->v_type) { case VREG: return(EINVAL); case VDIR: return VOP_READDIR(ap->a_vp,ap->a_uio,ap->a_cred, NULL,NULL,NULL); case VCHR: case VBLK: error = VOCALL(spec_vnodeop_p, VOFFSET(vop_read), ap); getnanotime(&(file_node->atime)); return(error); default: panic("devfs_read(): bad file type"); break; } } /* * Write data to a file or directory. */ static int devfs_write(struct vop_write_args *ap) /*struct vop_write_args { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; struct ucred *a_cred; } */ { dn_p file_node; int error; DBPRINT(("write\n")); if (error = devfs_vntodn(ap->a_vp,&file_node)) { printf("devfs_vntodn returned %d ",error); return error; } switch (ap->a_vp->v_type) { case VREG: return(EINVAL); case VDIR: return(EISDIR); case VCHR: case VBLK: error = VOCALL(spec_vnodeop_p, VOFFSET(vop_write), ap); getnanotime(&(file_node->mtime)); return(error); default: panic("devfs_write(): bad file type"); break; } } /* presently not called from devices anyhow */ #ifdef notyet static int devfs_ioctl(struct vop_ioctl_args *ap) /*struct vop_ioctl_args { struct vnode *a_vp; int a_command; caddr_t a_data; int a_fflag; struct ucred *a_cred; struct proc *a_p; } */ { DBPRINT(("ioctl\n")); return ENOTTY; } static int devfs_select(struct vop_select_args *ap) /*struct vop_select_args { struct vnode *a_vp; int a_which; int a_fflags; struct ucred *a_cred; struct proc *a_p; } */ { DBPRINT(("select\n")); return 1; /* filesystems never block? */ } #endif /* notyet */ static int devfs_remove(struct vop_remove_args *ap) /*struct vop_remove_args { struct vnode *a_dvp; struct vnode *a_vp; struct componentname *a_cnp; } */ { struct vnode *vp = ap->a_vp; struct vnode *dvp = ap->a_dvp; struct componentname *cnp = ap->a_cnp; dn_p tp, tdp; devnm_p tnp; int doingdirectory = 0; int error = 0; uid_t ouruid = cnp->cn_cred->cr_uid; DBPRINT(("remove\n")); /* * Lock our directories and get our name pointers * assume that the names are null terminated as they * are the end of the path. Get pointers to all our * devfs structures. */ - if ( error = devfs_vntodn(dvp,&tdp)) { + if (error = devfs_vntodn(dvp, &tdp)) { abortit: VOP_ABORTOP(dvp, cnp); - if (dvp == vp) /* eh? */ - vrele(dvp); - else - vput(dvp); - if (vp) - vput(vp); return (error); } - if ( error = devfs_vntodn(vp,&tp)) goto abortit; + if (error = devfs_vntodn(vp, &tp)) goto abortit; /* * Assuming we are atomic, dev_lookup left this for us */ tnp = tp->last_lookup; /* * Check we are doing legal things WRT the new flags */ if ((tp->flags & (IMMUTABLE | APPEND)) || (tdp->flags & APPEND) /*XXX eh?*/ ) { error = EPERM; goto abortit; } /* * Make sure that we don't try do something stupid */ if ((tp->type) == DEV_DIR) { /* * Avoid ".", "..", and aliases of "." for obvious reasons. */ if ( (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') || (cnp->cn_flags&ISDOTDOT) ) { error = EINVAL; goto abortit; } doingdirectory++; } /*********************************** * Start actually doing things.... * ***********************************/ getnanotime(&(tdp->mtime)); /* * own the parent directory, or the destination of the rename, * otherwise the destination may not be changed (except by * root). This implements append-only directories. * XXX shoudn't this be in generic code? */ if ((tdp->mode & S_ISTXT) && ouruid != 0 && ouruid != tdp->uid && ouruid != tp->uid ) { error = EPERM; goto abortit; } /* * Target must be empty if a directory and have no links * to it. Also, ensure source and target are compatible * (both directories, or both not directories). */ if (( doingdirectory) && (tp->links > 2)) { printf("nlink = %d\n",tp->links); /*XXX*/ error = ENOTEMPTY; goto abortit; } dev_free_name(tnp); tp = NULL; - vput(vp); - vput(dvp); return (error); } /* */ static int devfs_link(struct vop_link_args *ap) /*struct vop_link_args { struct vnode *a_tdvp; struct vnode *a_vp; struct componentname *a_cnp; } */ { struct vnode *vp = ap->a_vp; struct vnode *tdvp = ap->a_tdvp; struct componentname *cnp = ap->a_cnp; dn_p fp, tdp; devnm_p tnp; int error = 0; DBPRINT(("link\n")); /* * First catch an arbitrary restriction for this FS */ if(cnp->cn_namelen > DEVMAXNAMESIZE) { error = ENAMETOOLONG; goto abortit; } /* * Lock our directories and get our name pointers * assume that the names are null terminated as they * are the end of the path. Get pointers to all our * devfs structures. */ if ( error = devfs_vntodn(tdvp,&tdp)) goto abortit; if ( error = devfs_vntodn(vp,&fp)) goto abortit; /* * trying to move it out of devfs? (v_tag == VT_DEVFS) */ if ( (vp->v_tag != VT_DEVFS) || (vp->v_tag != tdvp->v_tag) ) { error = EXDEV; abortit: VOP_ABORTOP(tdvp, cnp); goto out; } /* * Check we are doing legal things WRT the new flags */ if (fp->flags & (IMMUTABLE | APPEND)) { error = EPERM; goto abortit; } /*********************************** * Start actually doing things.... * ***********************************/ getnanotime(&(tdp->atime)); error = dev_add_name(cnp->cn_nameptr, tdp, NULL, fp, &tnp); out: - vput(tdvp); return (error); } /* * Rename system call. Seems overly complicated to me... * rename("foo", "bar"); * is essentially * unlink("bar"); * link("foo", "bar"); * unlink("foo"); * but ``atomically''. * * When the target exists, both the directory * and target vnodes are locked. * the source and source-parent vnodes are referenced * * * Basic algorithm is: * * 1) Bump link count on source while we're linking it to the * target. This also ensure the inode won't be deleted out * from underneath us while we work (it may be truncated by * a concurrent `trunc' or `open' for creation). * 2) Link source to destination. If destination already exists, * delete it first. * 3) Unlink source reference to node if still around. If a * directory was moved and the parent of the destination * is different from the source, patch the ".." entry in the * directory. */ static int devfs_rename(struct vop_rename_args *ap) /*struct vop_rename_args { struct vnode *a_fdvp; struct vnode *a_fvp; struct componentname *a_fcnp; struct vnode *a_tdvp; struct vnode *a_tvp; struct componentname *a_tcnp; } */ { struct vnode *tvp = ap->a_tvp; struct vnode *tdvp = ap->a_tdvp; struct vnode *fvp = ap->a_fvp; struct vnode *fdvp = ap->a_fdvp; struct componentname *tcnp = ap->a_tcnp; struct componentname *fcnp = ap->a_fcnp; struct proc *p = fcnp->cn_proc; dn_p fp, fdp, tp, tdp; devnm_p fnp,tnp; int doingdirectory = 0; int error = 0; /* * First catch an arbitrary restriction for this FS */ if(tcnp->cn_namelen > DEVMAXNAMESIZE) { error = ENAMETOOLONG; goto abortit; } /* * Lock our directories and get our name pointers * assume that the names are null terminated as they * are the end of the path. Get pointers to all our * devfs structures. */ if ( error = devfs_vntodn(tdvp,&tdp)) goto abortit; if ( error = devfs_vntodn(fdvp,&fdp)) goto abortit; if ( error = devfs_vntodn(fvp,&fp)) goto abortit; fnp = fp->last_lookup; if (tvp) { if ( error = devfs_vntodn(tvp,&tp)) goto abortit; tnp = tp->last_lookup; } else { tp = NULL; tnp = NULL; } /* * trying to move it out of devfs? (v_tag == VT_DEVFS) * if we move a dir across mnt points. we need to fix all * the mountpoint pointers! XXX * so for now keep dirs within the same mount */ if ( (fvp->v_tag != VT_DEVFS) || (fvp->v_tag != tdvp->v_tag) || (tvp && (fvp->v_tag != tvp->v_tag)) || ((fp->type == DEV_DIR) && (fp->dvm != tdp->dvm ))) { error = EXDEV; abortit: VOP_ABORTOP(tdvp, tcnp); if (tdvp == tvp) /* eh? */ vrele(tdvp); else vput(tdvp); if (tvp) vput(tvp); VOP_ABORTOP(fdvp, fcnp); /* XXX, why not in NFS? */ vrele(fdvp); vrele(fvp); return (error); } /* * Check we are doing legal things WRT the new flags */ if ((tp && (tp->flags & (IMMUTABLE | APPEND))) || (fp->flags & (IMMUTABLE | APPEND)) || (fdp->flags & APPEND)) { error = EPERM; goto abortit; } /* * Make sure that we don't try do something stupid */ if ((fp->type) == DEV_DIR) { /* * Avoid ".", "..", and aliases of "." for obvious reasons. */ if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') || (fcnp->cn_flags&ISDOTDOT) || (tcnp->cn_namelen == 1 && tcnp->cn_nameptr[0] == '.') || (tcnp->cn_flags&ISDOTDOT) || (tdp == fp )) { error = EINVAL; goto abortit; } doingdirectory++; } /* * If ".." must be changed (ie the directory gets a new * parent) then the source directory must not be in the * directory heirarchy above the target, as this would * orphan everything below the source directory. Also * the user must have write permission in the source so * as to be able to change "..". */ if (doingdirectory && (tdp != fdp)) { dn_p tmp,ntmp; error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, tcnp->cn_proc); tmp = tdp; do { if(tmp == fp) { /* XXX unlock stuff here probably */ error = EINVAL; goto out; } ntmp = tmp; } while ((tmp = tmp->by.Dir.parent) != ntmp); } /*********************************** * Start actually doing things.... * ***********************************/ getnanotime(&(fp->atime)); /* * Check if just deleting a link name. */ if (fvp == tvp) { if (fvp->v_type == VDIR) { error = EINVAL; goto abortit; } /* Release destination completely. */ VOP_ABORTOP(tdvp, tcnp); vput(tdvp); vput(tvp); /* Delete source. */ VOP_ABORTOP(fdvp, fcnp); /*XXX*/ vrele(fdvp); vrele(fvp); dev_free_name(fnp); return 0; } /* * 1) Bump link count while we're moving stuff * around. If we crash somewhere before * completing our work, too bad :) */ fp->links++; /* * If the target exists zap it (unless it's a non-empty directory) * We could do that as well but won't */ if (tp) { int ouruid = tcnp->cn_cred->cr_uid; /* * If the parent directory is "sticky", then the user must * own the parent directory, or the destination of the rename, * otherwise the destination may not be changed (except by * root). This implements append-only directories. * XXX shoudn't this be in generic code? */ if ((tdp->mode & S_ISTXT) && ouruid != 0 && ouruid != tdp->uid && ouruid != tp->uid ) { error = EPERM; goto bad; } /* * Target must be empty if a directory and have no links * to it. Also, ensure source and target are compatible * (both directories, or both not directories). */ if (( doingdirectory) && (tp->links > 2)) { printf("nlink = %d\n",tp->links); /*XXX*/ error = ENOTEMPTY; goto bad; } dev_free_name(tnp); tp = NULL; } dev_add_name(tcnp->cn_nameptr,tdp,fnp->as.front.realthing,fp,&tnp); fnp->dnp = NULL; fp->links--; /* one less link to it.. */ dev_free_name(fnp); fp->links--; /* we added one earlier*/ if (tdp) vput(tdvp); if (tp) vput(fvp); vrele(ap->a_fvp); return (error); bad: if (tp) vput(tvp); vput(tdvp); out: if (vn_lock(fvp, LK_EXCLUSIVE | LK_RETRY, p) == 0) { fp->links--; /* we added one earlier*/ vput(fvp); } else vrele(fvp); return (error); } #ifdef notyet static int devfs_mkdir(struct vop_mkdir_args *ap) /*struct vop_mkdir_args { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; struct vattr *a_vap; } */ { DBPRINT(("mkdir\n")); vput(ap->a_dvp); return EINVAL; } static int devfs_rmdir(struct vop_rmdir_args *ap) /*struct vop_rmdir_args { struct vnode *a_dvp; struct vnode *a_vp; struct componentname *a_cnp; } */ { DBPRINT(("rmdir\n")); - vput(ap->a_dvp); - vput(ap->a_vp); - return 0; + return (0); } #endif static int devfs_symlink(struct vop_symlink_args *ap) /*struct vop_symlink_args { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; struct vattr *a_vap; char *a_target; } */ { - int err; + struct vnode *vp; + int error; dn_p dnp; union typeinfo by; devnm_p nm_p; - struct vnode *vp; DBPRINT(("symlink\n")); - if(err = devfs_vntodn(ap->a_dvp,&dnp)) { - vput(ap->a_dvp); - return err; + if(error = devfs_vntodn(ap->a_dvp, &dnp)) { + return (error); } by.Slnk.name = ap->a_target; by.Slnk.namelen = strlen(ap->a_target); - dev_add_entry( ap->a_cnp->cn_nameptr, dnp, DEV_SLNK, &by, + dev_add_entry(ap->a_cnp->cn_nameptr, dnp, DEV_SLNK, &by, NULL, NULL, &nm_p); - if(err = devfs_dntovn(nm_p->dnp,&vp) ) { - vput(ap->a_dvp); - return err; + if(error = devfs_dntovn(nm_p->dnp, &vp)) { + return (error); } VOP_SETATTR(vp, ap->a_vap, ap->a_cnp->cn_cred, ap->a_cnp->cn_proc); *ap->a_vpp = NULL; vput(vp); - vput(ap->a_dvp); return 0; } /* * Vnode op for readdir */ static int devfs_readdir(struct vop_readdir_args *ap) /*struct vop_readdir_args { struct vnode *a_vp; struct uio *a_uio; struct ucred *a_cred; int *eofflag; int *ncookies; u_int **cookies; } */ { struct vnode *vp = ap->a_vp; struct uio *uio = ap->a_uio; struct dirent dirent; dn_p dir_node; devnm_p name_node; char *name; int error = 0; int reclen; int nodenumber; int startpos,pos; DBPRINT(("readdir\n")); /* set up refs to dir */ if (error = devfs_vntodn(vp,&dir_node)) return error; if(dir_node->type != DEV_DIR) return(ENOTDIR); pos = 0; startpos = uio->uio_offset; name_node = dir_node->by.Dir.dirlist; nodenumber = 0; getnanotime(&(dir_node->atime)); while ((name_node || (nodenumber < 2)) && (uio->uio_resid > 0)) { switch(nodenumber) { case 0: dirent.d_fileno = (unsigned long int)dir_node; name = "."; dirent.d_namlen = 1; dirent.d_type = DT_DIR; break; case 1: if(dir_node->by.Dir.parent) dirent.d_fileno = (unsigned long int)dir_node->by.Dir.parent; else dirent.d_fileno = (unsigned long int)dir_node; name = ".."; dirent.d_namlen = 2; dirent.d_type = DT_DIR; break; default: dirent.d_fileno = (unsigned long int)name_node->dnp; dirent.d_namlen = strlen(name_node->name); name = name_node->name; switch(name_node->dnp->type) { case DEV_BDEV: dirent.d_type = DT_BLK; break; case DEV_CDEV: dirent.d_type = DT_CHR; break; case DEV_DDEV: dirent.d_type = DT_SOCK; /*XXX*/ break; case DEV_DIR: dirent.d_type = DT_DIR; break; case DEV_SLNK: dirent.d_type = DT_LNK; break; default: dirent.d_type = DT_UNKNOWN; } } reclen = dirent.d_reclen = GENERIC_DIRSIZ(&dirent); if(pos >= startpos) /* made it to the offset yet? */ { if (uio->uio_resid < reclen) /* will it fit? */ break; strcpy( dirent.d_name,name); if (error = uiomove ((caddr_t)&dirent, dirent.d_reclen, uio)) break; } pos += reclen; if((nodenumber >1) && name_node) name_node = name_node->next; nodenumber++; } uio->uio_offset = pos; return (error); } /* */ static int devfs_readlink(struct vop_readlink_args *ap) /*struct vop_readlink_args { struct vnode *a_vp; struct uio *a_uio; struct ucred *a_cred; } */ { struct vnode *vp = ap->a_vp; struct uio *uio = ap->a_uio; dn_p lnk_node; int error = 0; DBPRINT(("readlink\n")); /* set up refs to dir */ if (error = devfs_vntodn(vp,&lnk_node)) return error; if(lnk_node->type != DEV_SLNK) return(EINVAL); if (error = VOP_ACCESS(vp, VREAD, ap->a_cred, NULL)) { /* XXX */ return error; } error = uiomove(lnk_node->by.Slnk.name, lnk_node->by.Slnk.namelen, uio); return error; } #ifdef notyet static int devfs_abortop(struct vop_abortop_args *ap) /*struct vop_abortop_args { struct vnode *a_dvp; struct componentname *a_cnp; } */ { DBPRINT(("abortop\n")); if ((ap->a_cnp->cn_flags & (HASBUF | SAVESTART)) == HASBUF) zfree(namei_zone, ap->a_cnp->cn_pnbuf); return 0; } #endif /* notyet */ static int devfs_inactive(struct vop_inactive_args *ap) /*struct vop_inactive_args { struct vnode *a_vp; } */ { DBPRINT(("inactive\n")); return 0; } #ifdef notyet static int devfs_lock(struct vop_lock_args *ap) { DBPRINT(("lock\n")); return 0; } static int devfs_unlock( struct vop_unlock_args *ap) { DBPRINT(("unlock\n")); return 0; } static int devfs_islocked(struct vop_islocked_args *ap) /*struct vop_islocked_args { struct vnode *a_vp; } */ { DBPRINT(("islocked\n")); return 0; } static int devfs_bmap(struct vop_bmap_args *ap) /*struct vop_bmap_args { struct vnode *a_vp; daddr_t a_bn; struct vnode **a_vpp; daddr_t *a_bnp; int *a_runp; int *a_runb; } */ { DBPRINT(("bmap\n")); return 0; } static int devfs_advlock(struct vop_advlock_args *ap) /*struct vop_advlock_args { struct vnode *a_vp; caddr_t a_id; int a_op; struct flock *a_fl; int a_flags; } */ { DBPRINT(("advlock\n")); return EINVAL; /* we don't do locking yet */ } #endif /* notyet */ static int devfs_reclaim(struct vop_reclaim_args *ap) /*struct vop_reclaim_args { struct vnode *a_vp; } */ { dn_p file_node = NULL; int error; DBPRINT(("reclaim\n")); if (error = devfs_vntodn(ap->a_vp,&file_node)) { printf("devfs_vntodn returned %d ",error); return error; } ap->a_vp->v_data = NULL; if (file_node) { file_node->vn = 0; file_node->vn_id = 0; } return(0); } /* * Print out the contents of a /devfs vnode. */ static int devfs_print(struct vop_print_args *ap) /*struct vop_print_args { struct vnode *a_vp; } */ { printf("tag VT_DEVFS, devfs vnode\n"); return (0); } /**************************************************************************\ * pseudo ops * \**************************************************************************/ /* * /devfs vnode unsupported operation */ static int devfs_enotsupp(void *junk) { return (EOPNOTSUPP); } /* * /devfs "should never get here" operation */ static int devfs_badop(void *junk) { panic("devfs: bad op"); /* NOTREACHED */ } /*proto*/ void devfs_dropvnode(dn_p dnp) { struct vnode *vn_p; #ifdef PARANOID if(!dnp) { printf("devfs: dn count dropped too early\n"); } #endif vn_p = dnp->vn; /* * check if we have a vnode....... */ if((vn_p) && ( dnp->vn_id == vn_p->v_id) && (dnp == (dn_p)vn_p->v_data)) { VOP_REVOKE(vn_p, REVOKEALL); } dnp->vn = NULL; /* be pedantic about this */ } /* These are the operations used by directories etc in a devfs */ vop_t **devfs_vnodeop_p; static struct vnodeopv_entry_desc devfs_vnodeop_entries[] = { { &vop_default_desc, (vop_t *) vop_defaultop }, { &vop_access_desc, (vop_t *) devfs_access }, { &vop_bmap_desc, (vop_t *) devfs_badop }, { &vop_getattr_desc, (vop_t *) devfs_getattr }, { &vop_inactive_desc, (vop_t *) devfs_inactive }, { &vop_link_desc, (vop_t *) devfs_link }, { &vop_lookup_desc, (vop_t *) devfs_lookup }, { &vop_pathconf_desc, (vop_t *) vop_stdpathconf }, { &vop_print_desc, (vop_t *) devfs_print }, { &vop_read_desc, (vop_t *) devfs_read }, { &vop_readdir_desc, (vop_t *) devfs_readdir }, { &vop_readlink_desc, (vop_t *) devfs_readlink }, { &vop_reclaim_desc, (vop_t *) devfs_reclaim }, { &vop_remove_desc, (vop_t *) devfs_remove }, { &vop_rename_desc, (vop_t *) devfs_rename }, { &vop_setattr_desc, (vop_t *) devfs_setattr }, { &vop_symlink_desc, (vop_t *) devfs_symlink }, { &vop_write_desc, (vop_t *) devfs_write }, { NULL, NULL } }; static struct vnodeopv_desc devfs_vnodeop_opv_desc = { &devfs_vnodeop_p, devfs_vnodeop_entries }; VNODEOP_SET(devfs_vnodeop_opv_desc); vop_t **dev_spec_vnodeop_p; static struct vnodeopv_entry_desc dev_spec_vnodeop_entries[] = { { &vop_default_desc, (vop_t *) spec_vnoperate }, { &vop_access_desc, (vop_t *) devfs_access }, { &vop_getattr_desc, (vop_t *) devfs_getattr }, { &vop_read_desc, (vop_t *) devfs_read }, { &vop_reclaim_desc, (vop_t *) devfs_reclaim }, { &vop_setattr_desc, (vop_t *) devfs_setattr }, { &vop_symlink_desc, (vop_t *) devfs_symlink }, { &vop_write_desc, (vop_t *) devfs_write }, { NULL, NULL } }; static struct vnodeopv_desc dev_spec_vnodeop_opv_desc = { &dev_spec_vnodeop_p, dev_spec_vnodeop_entries }; VNODEOP_SET(dev_spec_vnodeop_opv_desc); Index: head/sys/miscfs/union/union_subr.c =================================================================== --- head/sys/miscfs/union/union_subr.c (revision 35822) +++ head/sys/miscfs/union/union_subr.c (revision 35823) @@ -1,1137 +1,1140 @@ /* * Copyright (c) 1994 Jan-Simon Pendry * Copyright (c) 1994 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Jan-Simon Pendry. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)union_subr.c 8.20 (Berkeley) 5/20/95 - * $Id: union_subr.c,v 1.28 1998/02/10 03:32:05 kato Exp $ + * $Id: union_subr.c,v 1.29 1998/02/26 03:23:54 kato Exp $ */ #include #include #include #include #include #include #include #include #include #include #include /* for vnode_pager_setsize */ #include #include #include extern int union_init __P((void)); /* must be power of two, otherwise change UNION_HASH() */ #define NHASH 32 /* unsigned int ... */ #define UNION_HASH(u, l) \ (((((unsigned long) (u)) + ((unsigned long) l)) >> 8) & (NHASH-1)) static LIST_HEAD(unhead, union_node) unhead[NHASH]; static int unvplock[NHASH]; static void union_dircache_r __P((struct vnode *vp, struct vnode ***vppp, int *cntp)); static int union_list_lock __P((int ix)); static void union_list_unlock __P((int ix)); static int union_relookup __P((struct union_mount *um, struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, struct componentname *cn, char *path, int pathlen)); static void union_updatevp __P((struct union_node *un, struct vnode *uppervp, struct vnode *lowervp)); static void union_newlower __P((struct union_node *, struct vnode *)); static void union_newupper __P((struct union_node *, struct vnode *)); static int union_copyfile __P((struct vnode *, struct vnode *, struct ucred *, struct proc *)); static int union_vn_create __P((struct vnode **, struct union_node *, struct proc *)); static int union_vn_close __P((struct vnode *, int, struct ucred *, struct proc *)); int union_init() { int i; for (i = 0; i < NHASH; i++) LIST_INIT(&unhead[i]); bzero((caddr_t) unvplock, sizeof(unvplock)); return (0); } static int union_list_lock(ix) int ix; { if (unvplock[ix] & UN_LOCKED) { unvplock[ix] |= UN_WANT; (void) tsleep((caddr_t) &unvplock[ix], PINOD, "unllck", 0); return (1); } unvplock[ix] |= UN_LOCKED; return (0); } static void union_list_unlock(ix) int ix; { unvplock[ix] &= ~UN_LOCKED; if (unvplock[ix] & UN_WANT) { unvplock[ix] &= ~UN_WANT; wakeup((caddr_t) &unvplock[ix]); } } static void union_updatevp(un, uppervp, lowervp) struct union_node *un; struct vnode *uppervp; struct vnode *lowervp; { int ohash = UNION_HASH(un->un_uppervp, un->un_lowervp); int nhash = UNION_HASH(uppervp, lowervp); int docache = (lowervp != NULLVP || uppervp != NULLVP); int lhash, uhash; /* * Ensure locking is ordered from lower to higher * to avoid deadlocks. */ if (nhash < ohash) { lhash = nhash; uhash = ohash; } else { lhash = ohash; uhash = nhash; } if (lhash != uhash) while (union_list_lock(lhash)) continue; while (union_list_lock(uhash)) continue; if (ohash != nhash || !docache) { if (un->un_flags & UN_CACHED) { un->un_flags &= ~UN_CACHED; LIST_REMOVE(un, un_cache); } } if (ohash != nhash) union_list_unlock(ohash); if (un->un_lowervp != lowervp) { if (un->un_lowervp) { vrele(un->un_lowervp); if (un->un_path) { free(un->un_path, M_TEMP); un->un_path = 0; } if (un->un_dirvp) { vrele(un->un_dirvp); un->un_dirvp = NULLVP; } } un->un_lowervp = lowervp; un->un_lowersz = VNOVAL; } if (un->un_uppervp != uppervp) { if (un->un_uppervp) vrele(un->un_uppervp); un->un_uppervp = uppervp; un->un_uppersz = VNOVAL; } if (docache && (ohash != nhash)) { LIST_INSERT_HEAD(&unhead[nhash], un, un_cache); un->un_flags |= UN_CACHED; } union_list_unlock(nhash); } static void union_newlower(un, lowervp) struct union_node *un; struct vnode *lowervp; { union_updatevp(un, un->un_uppervp, lowervp); } static void union_newupper(un, uppervp) struct union_node *un; struct vnode *uppervp; { union_updatevp(un, uppervp, un->un_lowervp); } /* * Keep track of size changes in the underlying vnodes. * If the size changes, then callback to the vm layer * giving priority to the upper layer size. */ void union_newsize(vp, uppersz, lowersz) struct vnode *vp; off_t uppersz, lowersz; { struct union_node *un; off_t sz; /* only interested in regular files */ if (vp->v_type != VREG) return; un = VTOUNION(vp); sz = VNOVAL; if ((uppersz != VNOVAL) && (un->un_uppersz != uppersz)) { un->un_uppersz = uppersz; if (sz == VNOVAL) sz = un->un_uppersz; } if ((lowersz != VNOVAL) && (un->un_lowersz != lowersz)) { un->un_lowersz = lowersz; if (sz == VNOVAL) sz = un->un_lowersz; } if (sz != VNOVAL) { #ifdef UNION_DIAGNOSTIC printf("union: %s size now %ld\n", uppersz != VNOVAL ? "upper" : "lower", (long) sz); #endif vnode_pager_setsize(vp, sz); } } /* * allocate a union_node/vnode pair. the vnode is * referenced and locked. the new vnode is returned * via (vpp). (mp) is the mountpoint of the union filesystem, * (dvp) is the parent directory where the upper layer object * should exist (but doesn't) and (cnp) is the componentname * information which is partially copied to allow the upper * layer object to be created at a later time. (uppervp) * and (lowervp) reference the upper and lower layer objects * being mapped. either, but not both, can be nil. * if supplied, (uppervp) is locked. * the reference is either maintained in the new union_node * object which is allocated, or they are vrele'd. * * all union_nodes are maintained on a singly-linked * list. new nodes are only allocated when they cannot * be found on this list. entries on the list are * removed when the vfs reclaim entry is called. * * a single lock is kept for the entire list. this is * needed because the getnewvnode() function can block * waiting for a vnode to become free, in which case there * may be more than one process trying to get the same * vnode. this lock is only taken if we are going to * call getnewvnode, since the kernel itself is single-threaded. * * if an entry is found on the list, then call vget() to * take a reference. this is done because there may be * zero references to it and so it needs to removed from * the vnode free list. */ int union_allocvp(vpp, mp, undvp, dvp, cnp, uppervp, lowervp, docache) struct vnode **vpp; struct mount *mp; struct vnode *undvp; /* parent union vnode */ struct vnode *dvp; /* may be null */ struct componentname *cnp; /* may be null */ struct vnode *uppervp; /* may be null */ struct vnode *lowervp; /* may be null */ int docache; { int error; struct union_node *un = 0; struct vnode *xlowervp = NULLVP; struct union_mount *um = MOUNTTOUNIONMOUNT(mp); int hash; int vflag; int try; int klocked; if (uppervp == NULLVP && lowervp == NULLVP) panic("union: unidentifiable allocation"); if (uppervp && lowervp && (uppervp->v_type != lowervp->v_type)) { xlowervp = lowervp; lowervp = NULLVP; } /* detect the root vnode (and aliases) */ vflag = 0; if ((uppervp == um->um_uppervp) && ((lowervp == NULLVP) || lowervp == um->um_lowervp)) { if (lowervp == NULLVP) { lowervp = um->um_lowervp; if (lowervp != NULLVP) VREF(lowervp); } vflag = VROOT; } loop: if (!docache) { un = 0; } else for (try = 0; try < 3; try++) { switch (try) { case 0: if (lowervp == NULLVP) continue; hash = UNION_HASH(uppervp, lowervp); break; case 1: if (uppervp == NULLVP) continue; hash = UNION_HASH(uppervp, NULLVP); break; case 2: if (lowervp == NULLVP) continue; hash = UNION_HASH(NULLVP, lowervp); break; } while (union_list_lock(hash)) continue; for (un = unhead[hash].lh_first; un != 0; un = un->un_cache.le_next) { if ((un->un_lowervp == lowervp || un->un_lowervp == NULLVP) && (un->un_uppervp == uppervp || un->un_uppervp == NULLVP) && (UNIONTOV(un)->v_mount == mp)) { if (vget(UNIONTOV(un), 0, cnp ? cnp->cn_proc : NULL)) { union_list_unlock(hash); goto loop; } break; } } union_list_unlock(hash); if (un) break; } if (un) { /* * Obtain a lock on the union_node. * uppervp is locked, though un->un_uppervp * may not be. this doesn't break the locking * hierarchy since in the case that un->un_uppervp * is not yet locked it will be vrele'd and replaced * with uppervp. */ if ((dvp != NULLVP) && (uppervp == dvp)) { /* * Access ``.'', so (un) will already * be locked. Since this process has * the lock on (uppervp) no other * process can hold the lock on (un). */ #ifdef DIAGNOSTIC if ((un->un_flags & UN_LOCKED) == 0) panic("union: . not locked"); else if (curproc && un->un_pid != curproc->p_pid && un->un_pid > -1 && curproc->p_pid > -1) panic("union: allocvp not lock owner"); #endif } else { if (un->un_flags & UN_LOCKED) { vrele(UNIONTOV(un)); un->un_flags |= UN_WANT; (void) tsleep((caddr_t) &un->un_flags, PINOD, "unalvp", 0); goto loop; } un->un_flags |= UN_LOCKED; #ifdef DIAGNOSTIC if (curproc) un->un_pid = curproc->p_pid; else un->un_pid = -1; #endif } /* * At this point, the union_node is locked, * un->un_uppervp may not be locked, and uppervp * is locked or nil. */ /* * Save information about the upper layer. */ if (uppervp != un->un_uppervp) { union_newupper(un, uppervp); } else if (uppervp) { vrele(uppervp); } if (un->un_uppervp) { un->un_flags |= UN_ULOCK; un->un_flags &= ~UN_KLOCK; } /* * Save information about the lower layer. * This needs to keep track of pathname * and directory information which union_vn_create * might need. */ if (lowervp != un->un_lowervp) { union_newlower(un, lowervp); if (cnp && (lowervp != NULLVP)) { un->un_hash = cnp->cn_hash; un->un_path = malloc(cnp->cn_namelen+1, M_TEMP, M_WAITOK); bcopy(cnp->cn_nameptr, un->un_path, cnp->cn_namelen); un->un_path[cnp->cn_namelen] = '\0'; VREF(dvp); un->un_dirvp = dvp; } } else if (lowervp) { vrele(lowervp); } *vpp = UNIONTOV(un); return (0); } if (docache) { /* * otherwise lock the vp list while we call getnewvnode * since that can block. */ hash = UNION_HASH(uppervp, lowervp); if (union_list_lock(hash)) goto loop; } error = getnewvnode(VT_UNION, mp, union_vnodeop_p, vpp); if (error) { if (uppervp) { if (dvp == uppervp) vrele(uppervp); else vput(uppervp); } if (lowervp) vrele(lowervp); goto out; } MALLOC((*vpp)->v_data, void *, sizeof(struct union_node), M_TEMP, M_WAITOK); (*vpp)->v_flag |= vflag; if (uppervp) (*vpp)->v_type = uppervp->v_type; else (*vpp)->v_type = lowervp->v_type; un = VTOUNION(*vpp); un->un_vnode = *vpp; un->un_uppervp = uppervp; un->un_uppersz = VNOVAL; un->un_lowervp = lowervp; un->un_lowersz = VNOVAL; un->un_pvp = undvp; if (undvp != NULLVP) VREF(undvp); un->un_dircache = 0; un->un_openl = 0; un->un_flags = UN_LOCKED; if (un->un_uppervp) un->un_flags |= UN_ULOCK; #ifdef DIAGNOSTIC if (curproc) un->un_pid = curproc->p_pid; else un->un_pid = -1; #endif if (cnp && (lowervp != NULLVP)) { un->un_hash = cnp->cn_hash; un->un_path = malloc(cnp->cn_namelen+1, M_TEMP, M_WAITOK); bcopy(cnp->cn_nameptr, un->un_path, cnp->cn_namelen); un->un_path[cnp->cn_namelen] = '\0'; VREF(dvp); un->un_dirvp = dvp; } else { un->un_hash = 0; un->un_path = 0; un->un_dirvp = 0; } if (docache) { LIST_INSERT_HEAD(&unhead[hash], un, un_cache); un->un_flags |= UN_CACHED; } if (xlowervp) vrele(xlowervp); out: if (docache) union_list_unlock(hash); return (error); } int union_freevp(vp) struct vnode *vp; { struct union_node *un = VTOUNION(vp); if (un->un_flags & UN_CACHED) { un->un_flags &= ~UN_CACHED; LIST_REMOVE(un, un_cache); } if (un->un_pvp != NULLVP) vrele(un->un_pvp); if (un->un_uppervp != NULLVP) vrele(un->un_uppervp); if (un->un_lowervp != NULLVP) vrele(un->un_lowervp); if (un->un_dirvp != NULLVP) vrele(un->un_dirvp); if (un->un_path) free(un->un_path, M_TEMP); FREE(vp->v_data, M_TEMP); vp->v_data = 0; return (0); } /* * copyfile. copy the vnode (fvp) to the vnode (tvp) * using a sequence of reads and writes. both (fvp) * and (tvp) are locked on entry and exit. */ static int union_copyfile(fvp, tvp, cred, p) struct vnode *fvp; struct vnode *tvp; struct ucred *cred; struct proc *p; { char *buf; struct uio uio; struct iovec iov; int error = 0; /* * strategy: * allocate a buffer of size MAXBSIZE. * loop doing reads and writes, keeping track * of the current uio offset. * give up at the first sign of trouble. */ uio.uio_procp = p; uio.uio_segflg = UIO_SYSSPACE; uio.uio_offset = 0; VOP_UNLOCK(fvp, 0, p); /* XXX */ VOP_LEASE(fvp, p, cred, LEASE_READ); vn_lock(fvp, LK_EXCLUSIVE | LK_RETRY, p); /* XXX */ VOP_UNLOCK(tvp, 0, p); /* XXX */ VOP_LEASE(tvp, p, cred, LEASE_WRITE); vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY, p); /* XXX */ buf = malloc(MAXBSIZE, M_TEMP, M_WAITOK); /* ugly loop follows... */ do { off_t offset = uio.uio_offset; uio.uio_iov = &iov; uio.uio_iovcnt = 1; iov.iov_base = buf; iov.iov_len = MAXBSIZE; uio.uio_resid = iov.iov_len; uio.uio_rw = UIO_READ; error = VOP_READ(fvp, &uio, 0, cred); if (error == 0) { uio.uio_iov = &iov; uio.uio_iovcnt = 1; iov.iov_base = buf; iov.iov_len = MAXBSIZE - uio.uio_resid; uio.uio_offset = offset; uio.uio_rw = UIO_WRITE; uio.uio_resid = iov.iov_len; if (uio.uio_resid == 0) break; do { error = VOP_WRITE(tvp, &uio, 0, cred); } while ((uio.uio_resid > 0) && (error == 0)); } } while (error == 0); free(buf, M_TEMP); return (error); } /* * (un) is assumed to be locked on entry and remains * locked on exit. */ int union_copyup(un, docopy, cred, p) struct union_node *un; int docopy; struct ucred *cred; struct proc *p; { int error; struct vnode *lvp, *uvp; /* * If the user does not have read permission, the vnode should not * be copied to upper layer. */ vn_lock(un->un_lowervp, LK_EXCLUSIVE | LK_RETRY, p); error = VOP_ACCESS(un->un_lowervp, VREAD, cred, p); VOP_UNLOCK(un->un_lowervp, 0, p); if (error) return (error); error = union_vn_create(&uvp, un, p); if (error) return (error); /* at this point, uppervp is locked */ union_newupper(un, uvp); un->un_flags |= UN_ULOCK; lvp = un->un_lowervp; if (docopy) { /* * XX - should not ignore errors * from VOP_CLOSE */ vn_lock(lvp, LK_EXCLUSIVE | LK_RETRY, p); error = VOP_OPEN(lvp, FREAD, cred, p); if (error == 0) { error = union_copyfile(lvp, uvp, cred, p); VOP_UNLOCK(lvp, 0, p); (void) VOP_CLOSE(lvp, FREAD, cred, p); } #ifdef UNION_DIAGNOSTIC if (error == 0) uprintf("union: copied up %s\n", un->un_path); #endif } un->un_flags &= ~UN_ULOCK; VOP_UNLOCK(uvp, 0, p); union_vn_close(uvp, FWRITE, cred, p); vn_lock(uvp, LK_EXCLUSIVE | LK_RETRY, p); un->un_flags |= UN_ULOCK; /* * Subsequent IOs will go to the top layer, so * call close on the lower vnode and open on the * upper vnode to ensure that the filesystem keeps * its references counts right. This doesn't do * the right thing with (cred) and (FREAD) though. * Ignoring error returns is not right, either. */ if (error == 0) { int i; for (i = 0; i < un->un_openl; i++) { (void) VOP_CLOSE(lvp, FREAD, cred, p); (void) VOP_OPEN(uvp, FREAD, cred, p); } un->un_openl = 0; } return (error); } static int union_relookup(um, dvp, vpp, cnp, cn, path, pathlen) struct union_mount *um; struct vnode *dvp; struct vnode **vpp; struct componentname *cnp; struct componentname *cn; char *path; int pathlen; { int error; /* * A new componentname structure must be faked up because * there is no way to know where the upper level cnp came * from or what it is being used for. This must duplicate * some of the work done by NDINIT, some of the work done * by namei, some of the work done by lookup and some of * the work done by VOP_LOOKUP when given a CREATE flag. * Conclusion: Horrible. * * The pathname buffer will be FREEed by VOP_MKDIR. */ cn->cn_namelen = pathlen; cn->cn_pnbuf = zalloc(namei_zone); bcopy(path, cn->cn_pnbuf, cn->cn_namelen); cn->cn_pnbuf[cn->cn_namelen] = '\0'; cn->cn_nameiop = CREATE; cn->cn_flags = (LOCKPARENT|HASBUF|SAVENAME|SAVESTART|ISLASTCN); cn->cn_proc = cnp->cn_proc; if (um->um_op == UNMNT_ABOVE) cn->cn_cred = cnp->cn_cred; else cn->cn_cred = um->um_cred; cn->cn_nameptr = cn->cn_pnbuf; cn->cn_hash = cnp->cn_hash; cn->cn_consume = cnp->cn_consume; VREF(dvp); error = relookup(dvp, vpp, cn); if (!error) vrele(dvp); else { zfree(namei_zone, cn->cn_pnbuf); cn->cn_pnbuf = '\0'; } return (error); } /* * Create a shadow directory in the upper layer. * The new vnode is returned locked. * * (um) points to the union mount structure for access to the * the mounting process's credentials. * (dvp) is the directory in which to create the shadow directory. * it is unlocked on entry and exit. * (cnp) is the componentname to be created. * (vpp) is the returned newly created shadow directory, which * is returned locked. */ int union_mkshadow(um, dvp, cnp, vpp) struct union_mount *um; struct vnode *dvp; struct componentname *cnp; struct vnode **vpp; { int error; struct vattr va; struct proc *p = cnp->cn_proc; struct componentname cn; error = union_relookup(um, dvp, vpp, cnp, &cn, cnp->cn_nameptr, cnp->cn_namelen); if (error) return (error); if (*vpp) { VOP_ABORTOP(dvp, &cn); VOP_UNLOCK(dvp, 0, p); vrele(*vpp); *vpp = NULLVP; return (EEXIST); } /* * policy: when creating the shadow directory in the * upper layer, create it owned by the user who did * the mount, group from parent directory, and mode * 777 modified by umask (ie mostly identical to the * mkdir syscall). (jsp, kb) */ VATTR_NULL(&va); va.va_type = VDIR; va.va_mode = um->um_cmode; /* VOP_LEASE: dvp is locked */ VOP_LEASE(dvp, p, cn.cn_cred, LEASE_WRITE); error = VOP_MKDIR(dvp, vpp, &cn, &va); + vput(dvp); return (error); } /* * Create a whiteout entry in the upper layer. * * (um) points to the union mount structure for access to the * the mounting process's credentials. * (dvp) is the directory in which to create the whiteout. * it is locked on entry and exit. * (cnp) is the componentname to be created. */ int union_mkwhiteout(um, dvp, cnp, path) struct union_mount *um; struct vnode *dvp; struct componentname *cnp; char *path; { int error; struct proc *p = cnp->cn_proc; struct vnode *wvp; struct componentname cn; VOP_UNLOCK(dvp, 0, p); error = union_relookup(um, dvp, &wvp, cnp, &cn, path, strlen(path)); if (error) { vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p); return (error); } if (wvp) { VOP_ABORTOP(dvp, &cn); vrele(dvp); vrele(wvp); return (EEXIST); } /* VOP_LEASE: dvp is locked */ VOP_LEASE(dvp, p, p->p_ucred, LEASE_WRITE); error = VOP_WHITEOUT(dvp, &cn, CREATE); if (error) VOP_ABORTOP(dvp, &cn); vrele(dvp); return (error); } /* * union_vn_create: creates and opens a new shadow file * on the upper union layer. this function is similar * in spirit to calling vn_open but it avoids calling namei(). * the problem with calling namei is that a) it locks too many * things, and b) it doesn't start at the "right" directory, * whereas relookup is told where to start. */ static int union_vn_create(vpp, un, p) struct vnode **vpp; struct union_node *un; struct proc *p; { struct vnode *vp; struct ucred *cred = p->p_ucred; struct vattr vat; struct vattr *vap = &vat; int fmode = FFLAGS(O_WRONLY|O_CREAT|O_TRUNC|O_EXCL); int error; int cmode = UN_FILEMODE & ~p->p_fd->fd_cmask; struct componentname cn; *vpp = NULLVP; /* * Build a new componentname structure (for the same * reasons outlines in union_mkshadow). * The difference here is that the file is owned by * the current user, rather than by the person who * did the mount, since the current user needs to be * able to write the file (that's why it is being * copied in the first place). */ cn.cn_namelen = strlen(un->un_path); cn.cn_pnbuf = zalloc(namei_zone); bcopy(un->un_path, cn.cn_pnbuf, cn.cn_namelen+1); cn.cn_nameiop = CREATE; cn.cn_flags = (LOCKPARENT|HASBUF|SAVENAME|SAVESTART|ISLASTCN); cn.cn_proc = p; cn.cn_cred = p->p_ucred; cn.cn_nameptr = cn.cn_pnbuf; cn.cn_hash = un->un_hash; cn.cn_consume = 0; VREF(un->un_dirvp); error = relookup(un->un_dirvp, &vp, &cn); if (error) return (error); vrele(un->un_dirvp); if (vp) { VOP_ABORTOP(un->un_dirvp, &cn); if (un->un_dirvp == vp) vrele(un->un_dirvp); else vput(un->un_dirvp); vrele(vp); return (EEXIST); } /* * Good - there was no race to create the file * so go ahead and create it. The permissions * on the file will be 0666 modified by the * current user's umask. Access to the file, while * it is unioned, will require access to the top *and* * bottom files. Access when not unioned will simply * require access to the top-level file. * TODO: confirm choice of access permissions. */ VATTR_NULL(vap); vap->va_type = VREG; vap->va_mode = cmode; VOP_LEASE(un->un_dirvp, p, cred, LEASE_WRITE); - if (error = VOP_CREATE(un->un_dirvp, &vp, &cn, vap)) + error = VOP_CREATE(un->un_dirvp, &vp, &cn, vap); + vput(un->un_dirvp); + if (error) return (error); error = VOP_OPEN(vp, fmode, cred, p); if (error) { vput(vp); return (error); } vp->v_writecount++; *vpp = vp; return (0); } static int union_vn_close(vp, fmode, cred, p) struct vnode *vp; int fmode; struct ucred *cred; struct proc *p; { if (fmode & FWRITE) --vp->v_writecount; return (VOP_CLOSE(vp, fmode, cred, p)); } void union_removed_upper(un) struct union_node *un; { struct proc *p = curproc; /* XXX */ struct vnode **vpp; /* * Do not set the uppervp to NULLVP. If lowervp is NULLVP, * union node will have neither uppervp nor lowervp. We remove * the union node from cache, so that it will not be referrenced. */ #if 0 union_newupper(un, NULLVP); #endif if (un->un_dircache != 0) { for (vpp = un->un_dircache; *vpp != NULLVP; vpp++) vrele(*vpp); free(un->un_dircache, M_TEMP); un->un_dircache = 0; } if (un->un_flags & UN_CACHED) { un->un_flags &= ~UN_CACHED; LIST_REMOVE(un, un_cache); } if (un->un_flags & UN_ULOCK) { un->un_flags &= ~UN_ULOCK; VOP_UNLOCK(un->un_uppervp, 0, p); } } #if 0 struct vnode * union_lowervp(vp) struct vnode *vp; { struct union_node *un = VTOUNION(vp); if ((un->un_lowervp != NULLVP) && (vp->v_type == un->un_lowervp->v_type)) { if (vget(un->un_lowervp, 0) == 0) return (un->un_lowervp); } return (NULLVP); } #endif /* * determine whether a whiteout is needed * during a remove/rmdir operation. */ int union_dowhiteout(un, cred, p) struct union_node *un; struct ucred *cred; struct proc *p; { struct vattr va; if (un->un_lowervp != NULLVP) return (1); if (VOP_GETATTR(un->un_uppervp, &va, cred, p) == 0 && (va.va_flags & OPAQUE)) return (1); return (0); } static void union_dircache_r(vp, vppp, cntp) struct vnode *vp; struct vnode ***vppp; int *cntp; { struct union_node *un; if (vp->v_op != union_vnodeop_p) { if (vppp) { VREF(vp); *(*vppp)++ = vp; if (--(*cntp) == 0) panic("union: dircache table too small"); } else { (*cntp)++; } return; } un = VTOUNION(vp); if (un->un_uppervp != NULLVP) union_dircache_r(un->un_uppervp, vppp, cntp); if (un->un_lowervp != NULLVP) union_dircache_r(un->un_lowervp, vppp, cntp); } struct vnode * union_dircache(vp, p) struct vnode *vp; struct proc *p; { int cnt; struct vnode *nvp; struct vnode **vpp; struct vnode **dircache; struct union_node *un; int error; vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); dircache = VTOUNION(vp)->un_dircache; nvp = NULLVP; if (dircache == 0) { cnt = 0; union_dircache_r(vp, 0, &cnt); cnt++; dircache = (struct vnode **) malloc(cnt * sizeof(struct vnode *), M_TEMP, M_WAITOK); vpp = dircache; union_dircache_r(vp, &vpp, &cnt); *vpp = NULLVP; vpp = dircache + 1; } else { vpp = dircache; do { if (*vpp++ == VTOUNION(vp)->un_uppervp) break; } while (*vpp != NULLVP); } if (*vpp == NULLVP) goto out; vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY, p); VREF(*vpp); error = union_allocvp(&nvp, vp->v_mount, NULLVP, NULLVP, 0, *vpp, NULLVP, 0); if (error) goto out; VTOUNION(vp)->un_dircache = 0; un = VTOUNION(nvp); un->un_dircache = dircache; out: VOP_UNLOCK(vp, 0, p); return (nvp); } Index: head/sys/miscfs/union/union_vnops.c =================================================================== --- head/sys/miscfs/union/union_vnops.c (revision 35822) +++ head/sys/miscfs/union/union_vnops.c (revision 35823) @@ -1,1798 +1,1799 @@ /* * Copyright (c) 1992, 1993, 1994, 1995 Jan-Simon Pendry. * Copyright (c) 1992, 1993, 1994, 1995 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Jan-Simon Pendry. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)union_vnops.c 8.32 (Berkeley) 6/23/95 - * $Id: union_vnops.c,v 1.55 1998/02/26 03:23:56 kato Exp $ + * $Id: union_vnops.c,v 1.56 1998/03/17 08:47:50 kato Exp $ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #define FIXUP(un, p) { \ if (((un)->un_flags & UN_ULOCK) == 0) { \ union_fixup(un, p); \ } \ } static int union_abortop __P((struct vop_abortop_args *ap)); static int union_access __P((struct vop_access_args *ap)); static int union_advlock __P((struct vop_advlock_args *ap)); static int union_bmap __P((struct vop_bmap_args *ap)); static int union_close __P((struct vop_close_args *ap)); static int union_create __P((struct vop_create_args *ap)); static void union_fixup __P((struct union_node *un, struct proc *p)); static int union_fsync __P((struct vop_fsync_args *ap)); static int union_getattr __P((struct vop_getattr_args *ap)); static int union_inactive __P((struct vop_inactive_args *ap)); static int union_ioctl __P((struct vop_ioctl_args *ap)); static int union_islocked __P((struct vop_islocked_args *ap)); static int union_lease __P((struct vop_lease_args *ap)); static int union_link __P((struct vop_link_args *ap)); static int union_lock __P((struct vop_lock_args *ap)); static int union_lookup __P((struct vop_lookup_args *ap)); static int union_lookup1 __P((struct vnode *udvp, struct vnode **dvpp, struct vnode **vpp, struct componentname *cnp)); static int union_mkdir __P((struct vop_mkdir_args *ap)); static int union_mknod __P((struct vop_mknod_args *ap)); static int union_mmap __P((struct vop_mmap_args *ap)); static int union_open __P((struct vop_open_args *ap)); static int union_pathconf __P((struct vop_pathconf_args *ap)); static int union_print __P((struct vop_print_args *ap)); static int union_read __P((struct vop_read_args *ap)); static int union_readdir __P((struct vop_readdir_args *ap)); static int union_readlink __P((struct vop_readlink_args *ap)); static int union_reclaim __P((struct vop_reclaim_args *ap)); static int union_remove __P((struct vop_remove_args *ap)); static int union_rename __P((struct vop_rename_args *ap)); static int union_revoke __P((struct vop_revoke_args *ap)); static int union_rmdir __P((struct vop_rmdir_args *ap)); static int union_poll __P((struct vop_poll_args *ap)); static int union_setattr __P((struct vop_setattr_args *ap)); static int union_strategy __P((struct vop_strategy_args *ap)); static int union_symlink __P((struct vop_symlink_args *ap)); static int union_unlock __P((struct vop_unlock_args *ap)); static int union_whiteout __P((struct vop_whiteout_args *ap)); static int union_write __P((struct vop_read_args *ap)); static void union_fixup(un, p) struct union_node *un; struct proc *p; { vn_lock(un->un_uppervp, LK_EXCLUSIVE | LK_RETRY, p); un->un_flags |= UN_ULOCK; } static int union_lookup1(udvp, dvpp, vpp, cnp) struct vnode *udvp; struct vnode **dvpp; struct vnode **vpp; struct componentname *cnp; { int error; struct proc *p = cnp->cn_proc; struct vnode *tdvp; struct vnode *dvp; struct mount *mp; dvp = *dvpp; /* * If stepping up the directory tree, check for going * back across the mount point, in which case do what * lookup would do by stepping back down the mount * hierarchy. */ if (cnp->cn_flags & ISDOTDOT) { while ((dvp != udvp) && (dvp->v_flag & VROOT)) { /* * Don't do the NOCROSSMOUNT check * at this level. By definition, * union fs deals with namespaces, not * filesystems. */ tdvp = dvp; *dvpp = dvp = dvp->v_mount->mnt_vnodecovered; vput(tdvp); VREF(dvp); vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p); } } error = VOP_LOOKUP(dvp, &tdvp, cnp); if (error) return (error); /* * The parent directory will have been unlocked, unless lookup * found the last component. In which case, re-lock the node * here to allow it to be unlocked again (phew) in union_lookup. */ if (dvp != tdvp && !(cnp->cn_flags & ISLASTCN)) vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p); dvp = tdvp; /* * Lastly check if the current node is a mount point in * which case walk up the mount hierarchy making sure not to * bump into the root of the mount tree (ie. dvp != udvp). */ while (dvp != udvp && (dvp->v_type == VDIR) && (mp = dvp->v_mountedhere)) { if (vfs_busy(mp, 0, 0, p)) continue; error = VFS_ROOT(mp, &tdvp); vfs_unbusy(mp, p); if (error) { vput(dvp); return (error); } vput(dvp); dvp = tdvp; } *vpp = dvp; return (0); } static int union_lookup(ap) struct vop_lookup_args /* { struct vnodeop_desc *a_desc; struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; } */ *ap; { int error; int uerror, lerror; struct vnode *uppervp, *lowervp; struct vnode *upperdvp, *lowerdvp; struct vnode *dvp = ap->a_dvp; struct union_node *dun = VTOUNION(dvp); struct componentname *cnp = ap->a_cnp; struct proc *p = cnp->cn_proc; int lockparent = cnp->cn_flags & LOCKPARENT; struct union_mount *um = MOUNTTOUNIONMOUNT(dvp->v_mount); struct ucred *saved_cred; int iswhiteout; struct vattr va; /* * Disallow write attemps to the filesystem mounted read-only. */ if ((cnp->cn_flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) && (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) return (EROFS); #ifdef notyet if (cnp->cn_namelen == 3 && cnp->cn_nameptr[2] == '.' && cnp->cn_nameptr[1] == '.' && cnp->cn_nameptr[0] == '.') { dvp = *ap->a_vpp = LOWERVP(ap->a_dvp); if (dvp == NULLVP) return (ENOENT); VREF(dvp); vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p); if (!lockparent || !(cnp->cn_flags & ISLASTCN)) VOP_UNLOCK(ap->a_dvp, 0, p); return (0); } #endif cnp->cn_flags |= LOCKPARENT; upperdvp = dun->un_uppervp; lowerdvp = dun->un_lowervp; uppervp = NULLVP; lowervp = NULLVP; iswhiteout = 0; if (cnp->cn_flags & ISDOTDOT) { if (upperdvp != NULL) VREF(upperdvp); if (lowerdvp != NULL) VREF(lowerdvp); } /* * do the lookup in the upper level. * if that level comsumes additional pathnames, * then assume that something special is going * on and just return that vnode. */ if (upperdvp != NULLVP) { FIXUP(dun, p); /* * If we're doing `..' in the underlying filesystem, * we must drop our lock on the union node before * going up the tree in the lower file system--if we block * on the lowervp lock, and that's held by someone else * coming down the tree and who's waiting for our lock, * we would be hosed. */ if (cnp->cn_flags & ISDOTDOT) { /* retain lock on underlying VP: */ dun->un_flags |= UN_KLOCK; VOP_UNLOCK(dvp, 0, p); } uerror = union_lookup1(um->um_uppervp, &upperdvp, &uppervp, cnp); /* * Disallow write attemps to the filesystem mounted read-only. */ if (uerror == EJUSTRETURN && (cnp->cn_flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) && (cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME)) { if (!lockparent) cnp->cn_flags &= ~LOCKPARENT; return (EROFS); } if (cnp->cn_flags & ISDOTDOT) { if (dun->un_uppervp == upperdvp) { /* * We got the underlying bugger back locked... * now take back the union node lock. Since we * hold the uppervp lock, we can diddle union * locking flags at will. :) */ dun->un_flags |= UN_ULOCK; } /* * If upperdvp got swapped out, it means we did * some mount point magic, and we do not have * dun->un_uppervp locked currently--so we get it * locked here (don't set the UN_ULOCK flag). */ vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p); } /*if (uppervp == upperdvp) dun->un_flags |= UN_KLOCK;*/ if (cnp->cn_consume != 0) { *ap->a_vpp = uppervp; if (!lockparent) cnp->cn_flags &= ~LOCKPARENT; error = uerror; goto out; } if (uerror == ENOENT || uerror == EJUSTRETURN) { if (cnp->cn_flags & ISWHITEOUT) { iswhiteout = 1; } else if (lowerdvp != NULLVP) { lerror = VOP_GETATTR(upperdvp, &va, cnp->cn_cred, cnp->cn_proc); if (lerror == 0 && (va.va_flags & OPAQUE)) iswhiteout = 1; } } } else { uerror = ENOENT; } /* * in a similar way to the upper layer, do the lookup * in the lower layer. this time, if there is some * component magic going on, then vput whatever we got * back from the upper layer and return the lower vnode * instead. */ if (lowerdvp != NULLVP && !iswhiteout) { int nameiop; vn_lock(lowerdvp, LK_EXCLUSIVE | LK_RETRY, p); /* * Only do a LOOKUP on the bottom node, since * we won't be making changes to it anyway. */ nameiop = cnp->cn_nameiop; cnp->cn_nameiop = LOOKUP; if (um->um_op == UNMNT_BELOW) { saved_cred = cnp->cn_cred; cnp->cn_cred = um->um_cred; } /* * We shouldn't have to worry about locking interactions * between the lower layer and our union layer (w.r.t. * `..' processing) because we don't futz with lowervp * locks in the union-node instantiation code path. */ lerror = union_lookup1(um->um_lowervp, &lowerdvp, &lowervp, cnp); if (um->um_op == UNMNT_BELOW) cnp->cn_cred = saved_cred; cnp->cn_nameiop = nameiop; if (lowervp != lowerdvp) VOP_UNLOCK(lowerdvp, 0, p); if (cnp->cn_consume != 0 || lerror == EACCES) { if (lerror == EACCES) lowervp = NULLVP; if (uppervp != NULLVP) { if (uppervp == upperdvp) vrele(uppervp); else vput(uppervp); uppervp = NULLVP; } *ap->a_vpp = lowervp; if (!lockparent) cnp->cn_flags &= ~LOCKPARENT; error = lerror; goto out; } } else { lerror = ENOENT; if ((cnp->cn_flags & ISDOTDOT) && dun->un_pvp != NULLVP) { lowervp = LOWERVP(dun->un_pvp); if (lowervp != NULLVP) { VREF(lowervp); vn_lock(lowervp, LK_EXCLUSIVE | LK_RETRY, p); lerror = 0; } } } if (!lockparent) cnp->cn_flags &= ~LOCKPARENT; /* * at this point, we have uerror and lerror indicating * possible errors with the lookups in the upper and lower * layers. additionally, uppervp and lowervp are (locked) * references to existing vnodes in the upper and lower layers. * * there are now three cases to consider. * 1. if both layers returned an error, then return whatever * error the upper layer generated. * * 2. if the top layer failed and the bottom layer succeeded * then two subcases occur. * a. the bottom vnode is not a directory, in which * case just return a new union vnode referencing * an empty top layer and the existing bottom layer. * b. the bottom vnode is a directory, in which case * create a new directory in the top-level and * continue as in case 3. * * 3. if the top layer succeeded then return a new union * vnode referencing whatever the new top layer and * whatever the bottom layer returned. */ *ap->a_vpp = NULLVP; /* case 1. */ if ((uerror != 0) && (lerror != 0)) { error = uerror; goto out; } /* case 2. */ if (uerror != 0 /* && (lerror == 0) */ ) { if (lowervp->v_type == VDIR) { /* case 2b. */ dun->un_flags &= ~UN_ULOCK; VOP_UNLOCK(upperdvp, 0, p); uerror = union_mkshadow(um, upperdvp, cnp, &uppervp); vn_lock(upperdvp, LK_EXCLUSIVE | LK_RETRY, p); dun->un_flags |= UN_ULOCK; if (uerror) { if (lowervp != NULLVP) { vput(lowervp); lowervp = NULLVP; } error = uerror; goto out; } } } if (lowervp != NULLVP) VOP_UNLOCK(lowervp, 0, p); error = union_allocvp(ap->a_vpp, dvp->v_mount, dvp, upperdvp, cnp, uppervp, lowervp, 1); if (error) { if (uppervp != NULLVP) vput(uppervp); if (lowervp != NULLVP) vrele(lowervp); } else { if (*ap->a_vpp != dvp) if (!lockparent || !(cnp->cn_flags & ISLASTCN)) VOP_UNLOCK(dvp, 0, p); #ifdef DIAGNOSTIC if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.' && *ap->a_vpp != dvp) { panic("union_lookup returning . (%p) not same as startdir (%p)", ap->a_vpp, dvp); } #endif } out: if (cnp->cn_flags & ISDOTDOT) { if (upperdvp != NULL) vrele(upperdvp); if (lowerdvp != NULL) vrele(lowerdvp); } return (error); } static int union_create(ap) struct vop_create_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; struct vattr *a_vap; } */ *ap; { - struct union_node *un = VTOUNION(ap->a_dvp); - struct vnode *dvp = un->un_uppervp; + struct union_node *dun = VTOUNION(ap->a_dvp); + struct vnode *dvp = dun->un_uppervp; struct componentname *cnp = ap->a_cnp; struct proc *p = cnp->cn_proc; if (dvp != NULLVP) { - int error; struct vnode *vp; struct mount *mp; + int error; - FIXUP(un, p); + FIXUP(dun, p); - VREF(dvp); - un->un_flags |= UN_KLOCK; - mp = ap->a_dvp->v_mount; - vput(ap->a_dvp); + dun->un_flags |= UN_KLOCK; + VOP_UNLOCK(ap->a_dvp, 0, p); error = VOP_CREATE(dvp, &vp, cnp, ap->a_vap); - if (error) + if (error) { + dun->un_flags |= UN_ULOCK; return (error); + } + mp = ap->a_dvp->v_mount; + VOP_UNLOCK(dvp, 0, p); error = union_allocvp(ap->a_vpp, mp, NULLVP, NULLVP, cnp, vp, NULLVP, 1); if (error) vput(vp); + vn_lock(ap->a_dvp, LK_EXCLUSIVE| LK_RETRY, p); return (error); } - vput(ap->a_dvp); return (EROFS); } static int union_whiteout(ap) struct vop_whiteout_args /* { struct vnode *a_dvp; struct componentname *a_cnp; int a_flags; } */ *ap; { struct union_node *un = VTOUNION(ap->a_dvp); struct componentname *cnp = ap->a_cnp; struct proc *p = cnp->cn_proc; if (un->un_uppervp == NULLVP) return (EOPNOTSUPP); FIXUP(un, p); return (VOP_WHITEOUT(un->un_uppervp, cnp, ap->a_flags)); } static int union_mknod(ap) struct vop_mknod_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; struct vattr *a_vap; } */ *ap; { - struct union_node *un = VTOUNION(ap->a_dvp); - struct vnode *dvp = un->un_uppervp; + struct union_node *dun = VTOUNION(ap->a_dvp); + struct vnode *dvp = dun->un_uppervp; struct componentname *cnp = ap->a_cnp; struct proc *p = cnp->cn_proc; if (dvp != NULLVP) { - int error; struct vnode *vp; struct mount *mp; + int error; - FIXUP(un, p); + FIXUP(dun, p); - VREF(dvp); - un->un_flags |= UN_KLOCK; - mp = ap->a_dvp->v_mount; - vput(ap->a_dvp); + dun->un_flags |= UN_KLOCK; + VOP_UNLOCK(ap->a_dvp, 0, p); error = VOP_MKNOD(dvp, &vp, cnp, ap->a_vap); - if (error) + if (error) { + dun->un_flags |= UN_ULOCK; return (error); + } if (vp != NULLVP) { + mp = ap->a_dvp->v_mount; + VOP_UNLOCK(dvp, 0, p); error = union_allocvp(ap->a_vpp, mp, NULLVP, NULLVP, cnp, vp, NULLVP, 1); if (error) vput(vp); + vn_lock(ap->a_dvp, LK_EXCLUSIVE| LK_RETRY, p); + } else { + dun->un_flags |= UN_ULOCK; } return (error); } - vput(ap->a_dvp); return (EROFS); } static int union_open(ap) struct vop_open_args /* { struct vnodeop_desc *a_desc; struct vnode *a_vp; int a_mode; struct ucred *a_cred; struct proc *a_p; } */ *ap; { struct union_node *un = VTOUNION(ap->a_vp); struct vnode *tvp; int mode = ap->a_mode; struct ucred *cred = ap->a_cred; struct proc *p = ap->a_p; int error; /* * If there is an existing upper vp then simply open that. */ tvp = un->un_uppervp; if (tvp == NULLVP) { /* * If the lower vnode is being opened for writing, then * copy the file contents to the upper vnode and open that, * otherwise can simply open the lower vnode. */ tvp = un->un_lowervp; if ((ap->a_mode & FWRITE) && (tvp->v_type == VREG)) { error = union_copyup(un, (mode&O_TRUNC) == 0, cred, p); if (error == 0) error = VOP_OPEN(un->un_uppervp, mode, cred, p); return (error); } /* * Just open the lower vnode */ un->un_openl++; vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY, p); error = VOP_OPEN(tvp, mode, cred, p); VOP_UNLOCK(tvp, 0, p); return (error); } FIXUP(un, p); error = VOP_OPEN(tvp, mode, cred, p); return (error); } static int union_close(ap) struct vop_close_args /* { struct vnode *a_vp; int a_fflag; struct ucred *a_cred; struct proc *a_p; } */ *ap; { struct union_node *un = VTOUNION(ap->a_vp); struct vnode *vp; if ((vp = un->un_uppervp) == NULLVP) { #ifdef UNION_DIAGNOSTIC if (un->un_openl <= 0) panic("union: un_openl cnt"); #endif --un->un_openl; vp = un->un_lowervp; } ap->a_vp = vp; return (VCALL(vp, VOFFSET(vop_close), ap)); } /* * Check access permission on the union vnode. * The access check being enforced is to check * against both the underlying vnode, and any * copied vnode. This ensures that no additional * file permissions are given away simply because * the user caused an implicit file copy. */ static int union_access(ap) struct vop_access_args /* { struct vnodeop_desc *a_desc; struct vnode *a_vp; int a_mode; struct ucred *a_cred; struct proc *a_p; } */ *ap; { struct union_node *un = VTOUNION(ap->a_vp); struct proc *p = ap->a_p; int error = EACCES; struct vnode *vp; struct vnode *savedvp; /* * Disallow write attempts on filesystems mounted read-only. */ if (ap->a_mode & VWRITE && (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY)) { switch (ap->a_vp->v_type) { case VREG: case VDIR: case VLNK: return (EROFS); } } if ((vp = un->un_uppervp) != NULLVP) { FIXUP(un, p); ap->a_vp = vp; return (VCALL(vp, VOFFSET(vop_access), ap)); } if ((vp = un->un_lowervp) != NULLVP) { vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); savedvp = ap->a_vp; ap->a_vp = vp; error = VCALL(vp, VOFFSET(vop_access), ap); if (error == 0) { struct union_mount *um = MOUNTTOUNIONMOUNT(savedvp->v_mount); if (um->um_op == UNMNT_BELOW) { ap->a_cred = um->um_cred; error = VCALL(vp, VOFFSET(vop_access), ap); } } VOP_UNLOCK(vp, 0, p); if (error) return (error); } return (error); } /* * We handle getattr only to change the fsid and * track object sizes */ static int union_getattr(ap) struct vop_getattr_args /* { struct vnode *a_vp; struct vattr *a_vap; struct ucred *a_cred; struct proc *a_p; } */ *ap; { int error; struct union_node *un = VTOUNION(ap->a_vp); struct vnode *vp = un->un_uppervp; struct proc *p = ap->a_p; struct vattr *vap; struct vattr va; /* * Some programs walk the filesystem hierarchy by counting * links to directories to avoid stat'ing all the time. * This means the link count on directories needs to be "correct". * The only way to do that is to call getattr on both layers * and fix up the link count. The link count will not necessarily * be accurate but will be large enough to defeat the tree walkers. */ vap = ap->a_vap; vp = un->un_uppervp; if (vp != NULLVP) { /* * It's not clear whether VOP_GETATTR is to be * called with the vnode locked or not. stat() calls * it with (vp) locked, and fstat calls it with * (vp) unlocked. * In the mean time, compensate here by checking * the union_node's lock flag. */ if (un->un_flags & UN_LOCKED) FIXUP(un, p); error = VOP_GETATTR(vp, vap, ap->a_cred, ap->a_p); if (error) return (error); union_newsize(ap->a_vp, vap->va_size, VNOVAL); } if (vp == NULLVP) { vp = un->un_lowervp; } else if (vp->v_type == VDIR && un->un_lowervp != NULLVP) { vp = un->un_lowervp; vap = &va; } else { vp = NULLVP; } if (vp != NULLVP) { error = VOP_GETATTR(vp, vap, ap->a_cred, ap->a_p); if (error) return (error); union_newsize(ap->a_vp, VNOVAL, vap->va_size); } if ((vap != ap->a_vap) && (vap->va_type == VDIR)) ap->a_vap->va_nlink += vap->va_nlink; ap->a_vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsid.val[0]; return (0); } static int union_setattr(ap) struct vop_setattr_args /* { struct vnode *a_vp; struct vattr *a_vap; struct ucred *a_cred; struct proc *a_p; } */ *ap; { struct union_node *un = VTOUNION(ap->a_vp); struct proc *p = ap->a_p; struct vattr *vap = ap->a_vap; int error; /* * Disallow write attempts on filesystems mounted read-only. */ if ((ap->a_vp->v_mount->mnt_flag & MNT_RDONLY) && (vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL)) return (EROFS); /* * Handle case of truncating lower object to zero size, * by creating a zero length upper object. This is to * handle the case of open with O_TRUNC and O_CREAT. */ if ((un->un_uppervp == NULLVP) && /* assert(un->un_lowervp != NULLVP) */ (un->un_lowervp->v_type == VREG)) { error = union_copyup(un, (ap->a_vap->va_size != 0), ap->a_cred, ap->a_p); if (error) return (error); } /* * Try to set attributes in upper layer, * otherwise return read-only filesystem error. */ if (un->un_uppervp != NULLVP) { FIXUP(un, p); error = VOP_SETATTR(un->un_uppervp, ap->a_vap, ap->a_cred, ap->a_p); if ((error == 0) && (ap->a_vap->va_size != VNOVAL)) union_newsize(ap->a_vp, ap->a_vap->va_size, VNOVAL); } else { error = EROFS; } return (error); } static int union_read(ap) struct vop_read_args /* { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; struct ucred *a_cred; } */ *ap; { int error; struct proc *p = ap->a_uio->uio_procp; struct vnode *vp = OTHERVP(ap->a_vp); int dolock = (vp == LOWERVP(ap->a_vp)); if (dolock) vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); else FIXUP(VTOUNION(ap->a_vp), p); error = VOP_READ(vp, ap->a_uio, ap->a_ioflag, ap->a_cred); if (dolock) VOP_UNLOCK(vp, 0, p); /* * XXX * perhaps the size of the underlying object has changed under * our feet. take advantage of the offset information present * in the uio structure. */ if (error == 0) { struct union_node *un = VTOUNION(ap->a_vp); off_t cur = ap->a_uio->uio_offset; if (vp == un->un_uppervp) { if (cur > un->un_uppersz) union_newsize(ap->a_vp, cur, VNOVAL); } else { if (cur > un->un_lowersz) union_newsize(ap->a_vp, VNOVAL, cur); } } return (error); } static int union_write(ap) struct vop_read_args /* { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; struct ucred *a_cred; } */ *ap; { int error; struct vnode *vp; struct union_node *un = VTOUNION(ap->a_vp); struct proc *p = ap->a_uio->uio_procp; vp = UPPERVP(ap->a_vp); if (vp == NULLVP) panic("union: missing upper layer in write"); FIXUP(un, p); error = VOP_WRITE(vp, ap->a_uio, ap->a_ioflag, ap->a_cred); /* * the size of the underlying object may be changed by the * write. */ if (error == 0) { off_t cur = ap->a_uio->uio_offset; if (cur > un->un_uppersz) union_newsize(ap->a_vp, cur, VNOVAL); } return (error); } static int union_lease(ap) struct vop_lease_args /* { struct vnode *a_vp; struct proc *a_p; struct ucred *a_cred; int a_flag; } */ *ap; { register struct vnode *ovp = OTHERVP(ap->a_vp); ap->a_vp = ovp; return (VCALL(ovp, VOFFSET(vop_lease), ap)); } static int union_ioctl(ap) struct vop_ioctl_args /* { struct vnode *a_vp; int a_command; caddr_t a_data; int a_fflag; struct ucred *a_cred; struct proc *a_p; } */ *ap; { register struct vnode *ovp = OTHERVP(ap->a_vp); ap->a_vp = ovp; return (VCALL(ovp, VOFFSET(vop_ioctl), ap)); } static int union_poll(ap) struct vop_poll_args /* { struct vnode *a_vp; int a_events; struct ucred *a_cred; struct proc *a_p; } */ *ap; { register struct vnode *ovp = OTHERVP(ap->a_vp); ap->a_vp = ovp; return (VCALL(ovp, VOFFSET(vop_poll), ap)); } static int union_revoke(ap) struct vop_revoke_args /* { struct vnode *a_vp; int a_flags; struct proc *a_p; } */ *ap; { struct vnode *vp = ap->a_vp; if (UPPERVP(vp)) VOP_REVOKE(UPPERVP(vp), ap->a_flags); if (LOWERVP(vp)) VOP_REVOKE(LOWERVP(vp), ap->a_flags); vgone(vp); return (0); } static int union_mmap(ap) struct vop_mmap_args /* { struct vnode *a_vp; int a_fflags; struct ucred *a_cred; struct proc *a_p; } */ *ap; { register struct vnode *ovp = OTHERVP(ap->a_vp); ap->a_vp = ovp; return (VCALL(ovp, VOFFSET(vop_mmap), ap)); } static int union_fsync(ap) struct vop_fsync_args /* { struct vnode *a_vp; struct ucred *a_cred; int a_waitfor; struct proc *a_p; } */ *ap; { int error = 0; struct proc *p = ap->a_p; struct vnode *targetvp = OTHERVP(ap->a_vp); struct union_node *un; if (targetvp != NULLVP) { int dolock = (targetvp == LOWERVP(ap->a_vp)); un = VTOUNION(ap->a_vp); if (dolock) vn_lock(targetvp, LK_EXCLUSIVE | LK_RETRY, p); else { un = VTOUNION(ap->a_vp); if ((un->un_flags & UN_ULOCK) == 0 && targetvp->v_data != NULL && ((struct lock *)targetvp->v_data)->lk_lockholder == curproc->p_pid && VOP_ISLOCKED(targetvp) != 0) return 0; /* XXX */ FIXUP(un, p); } error = VOP_FSYNC(targetvp, ap->a_cred, ap->a_waitfor, p); if (dolock) VOP_UNLOCK(targetvp, 0, p); } return (error); } static int union_remove(ap) struct vop_remove_args /* { struct vnode *a_dvp; struct vnode *a_vp; struct componentname *a_cnp; } */ *ap; { - int error; struct union_node *dun = VTOUNION(ap->a_dvp); struct union_node *un = VTOUNION(ap->a_vp); struct componentname *cnp = ap->a_cnp; struct proc *p = cnp->cn_proc; + int error; if (dun->un_uppervp == NULLVP) panic("union remove: null upper vnode"); if (un->un_uppervp != NULLVP) { struct vnode *dvp = dun->un_uppervp; struct vnode *vp = un->un_uppervp; FIXUP(dun, p); - VREF(dvp); dun->un_flags |= UN_KLOCK; - vput(ap->a_dvp); + VOP_UNLOCK(ap->a_dvp, 0, p); FIXUP(un, p); - VREF(vp); un->un_flags |= UN_KLOCK; - vput(ap->a_vp); + VOP_UNLOCK(ap->a_vp, 0, p); - if (union_dowhiteout(un, cnp->cn_cred, cnp->cn_proc)) + if (union_dowhiteout(un, cnp->cn_cred, p)) cnp->cn_flags |= DOWHITEOUT; error = VOP_REMOVE(dvp, vp, cnp); #if 0 /* XXX */ if (!error) union_removed_upper(un); #endif + dun->un_flags |= UN_ULOCK; + un->un_flags |= UN_ULOCK; } else { FIXUP(dun, p); error = union_mkwhiteout( MOUNTTOUNIONMOUNT(UNIONTOV(dun)->v_mount), dun->un_uppervp, ap->a_cnp, un->un_path); - vput(ap->a_dvp); - vput(ap->a_vp); } return (error); } static int union_link(ap) struct vop_link_args /* { struct vnode *a_tdvp; struct vnode *a_vp; struct componentname *a_cnp; } */ *ap; { - int error = 0; struct componentname *cnp = ap->a_cnp; struct proc *p = cnp->cn_proc; - struct union_node *un; + struct union_node *dun = VTOUNION(ap->a_tdvp); struct vnode *vp; struct vnode *tdvp; + int error = 0; - un = VTOUNION(ap->a_tdvp); if (ap->a_tdvp->v_op != ap->a_vp->v_op) { vp = ap->a_vp; } else { struct union_node *tun = VTOUNION(ap->a_vp); if (tun->un_uppervp == NULLVP) { vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY, p); - if (un->un_uppervp == tun->un_dirvp) { - un->un_flags &= ~UN_ULOCK; - VOP_UNLOCK(un->un_uppervp, 0, p); + if (dun->un_uppervp == tun->un_dirvp) { + dun->un_flags &= ~UN_ULOCK; + VOP_UNLOCK(dun->un_uppervp, 0, p); } error = union_copyup(tun, 1, cnp->cn_cred, p); - if (un->un_uppervp == tun->un_dirvp) { - vn_lock(un->un_uppervp, + if (dun->un_uppervp == tun->un_dirvp) { + vn_lock(dun->un_uppervp, LK_EXCLUSIVE | LK_RETRY, p); - un->un_flags |= UN_ULOCK; + dun->un_flags |= UN_ULOCK; } VOP_UNLOCK(ap->a_vp, 0, p); } vp = tun->un_uppervp; } - tdvp = un->un_uppervp; + tdvp = dun->un_uppervp; if (tdvp == NULLVP) error = EROFS; - if (error) { - vput(ap->a_tdvp); + if (error) return (error); - } - FIXUP(un, p); - VREF(tdvp); - un->un_flags |= UN_KLOCK; - vput(ap->a_tdvp); + FIXUP(dun, p); + dun->un_flags |= UN_KLOCK; + VOP_UNLOCK(ap->a_tdvp, 0, p); - return (VOP_LINK(tdvp, vp, cnp)); + error = VOP_LINK(tdvp, vp, cnp); + + dun->un_flags |= UN_ULOCK; + + return (error); } static int union_rename(ap) struct vop_rename_args /* { struct vnode *a_fdvp; struct vnode *a_fvp; struct componentname *a_fcnp; struct vnode *a_tdvp; struct vnode *a_tvp; struct componentname *a_tcnp; } */ *ap; { int error; struct vnode *fdvp = ap->a_fdvp; struct vnode *fvp = ap->a_fvp; struct vnode *tdvp = ap->a_tdvp; struct vnode *tvp = ap->a_tvp; if (fdvp->v_op == union_vnodeop_p) { /* always true */ struct union_node *un = VTOUNION(fdvp); if (un->un_uppervp == NULLVP) { /* * this should never happen in normal * operation but might if there was * a problem creating the top-level shadow * directory. */ error = EXDEV; goto bad; } fdvp = un->un_uppervp; VREF(fdvp); vrele(ap->a_fdvp); } if (fvp->v_op == union_vnodeop_p) { /* always true */ struct union_node *un = VTOUNION(fvp); if (un->un_uppervp == NULLVP) { /* XXX: should do a copyup */ error = EXDEV; goto bad; } if (un->un_lowervp != NULLVP) ap->a_fcnp->cn_flags |= DOWHITEOUT; fvp = un->un_uppervp; VREF(fvp); vrele(ap->a_fvp); } if (tdvp->v_op == union_vnodeop_p) { struct union_node *un = VTOUNION(tdvp); if (un->un_uppervp == NULLVP) { /* * this should never happen in normal * operation but might if there was * a problem creating the top-level shadow * directory. */ error = EXDEV; goto bad; } tdvp = un->un_uppervp; VREF(tdvp); un->un_flags |= UN_KLOCK; vput(ap->a_tdvp); } if (tvp != NULLVP && tvp->v_op == union_vnodeop_p) { struct union_node *un = VTOUNION(tvp); tvp = un->un_uppervp; if (tvp != NULLVP) { VREF(tvp); un->un_flags |= UN_KLOCK; } vput(ap->a_tvp); } return (VOP_RENAME(fdvp, fvp, ap->a_fcnp, tdvp, tvp, ap->a_tcnp)); bad: vrele(fdvp); vrele(fvp); vput(tdvp); if (tvp != NULLVP) vput(tvp); return (error); } static int union_mkdir(ap) struct vop_mkdir_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; struct vattr *a_vap; } */ *ap; { - struct union_node *un = VTOUNION(ap->a_dvp); - struct vnode *dvp = un->un_uppervp; + struct union_node *dun = VTOUNION(ap->a_dvp); + struct vnode *dvp = dun->un_uppervp; struct componentname *cnp = ap->a_cnp; struct proc *p = cnp->cn_proc; if (dvp != NULLVP) { - int error; struct vnode *vp; + int error; - FIXUP(un, p); - VREF(dvp); - un->un_flags |= UN_KLOCK; + FIXUP(dun, p); + dun->un_flags |= UN_KLOCK; VOP_UNLOCK(ap->a_dvp, 0, p); error = VOP_MKDIR(dvp, &vp, cnp, ap->a_vap); if (error) { - vrele(ap->a_dvp); + dun->un_flags |= UN_ULOCK; return (error); } + VOP_UNLOCK(dvp, 0, p); error = union_allocvp(ap->a_vpp, ap->a_dvp->v_mount, ap->a_dvp, NULLVP, cnp, vp, NULLVP, 1); - vrele(ap->a_dvp); if (error) vput(vp); + vn_lock(ap->a_dvp, LK_EXCLUSIVE| LK_RETRY, p); + return (error); } - vput(ap->a_dvp); return (EROFS); } static int union_rmdir(ap) struct vop_rmdir_args /* { struct vnode *a_dvp; struct vnode *a_vp; struct componentname *a_cnp; } */ *ap; { - int error; struct union_node *dun = VTOUNION(ap->a_dvp); struct union_node *un = VTOUNION(ap->a_vp); struct componentname *cnp = ap->a_cnp; struct proc *p = cnp->cn_proc; + int error; if (dun->un_uppervp == NULLVP) panic("union rmdir: null upper vnode"); if (un->un_uppervp != NULLVP) { struct vnode *dvp = dun->un_uppervp; struct vnode *vp = un->un_uppervp; FIXUP(dun, p); - VREF(dvp); dun->un_flags |= UN_KLOCK; - vput(ap->a_dvp); + VOP_UNLOCK(ap->a_dvp, 0, p); FIXUP(un, p); - VREF(vp); un->un_flags |= UN_KLOCK; - vput(ap->a_vp); + VOP_UNLOCK(ap->a_vp, 0, p); - if (union_dowhiteout(un, cnp->cn_cred, cnp->cn_proc)) + if (union_dowhiteout(un, cnp->cn_cred, p)) cnp->cn_flags |= DOWHITEOUT; error = VOP_RMDIR(dvp, vp, ap->a_cnp); #if 0 /* XXX */ if (!error) union_removed_upper(un); #endif + dun->un_flags |= UN_ULOCK; + un->un_flags |= UN_ULOCK; } else { FIXUP(dun, p); error = union_mkwhiteout( MOUNTTOUNIONMOUNT(UNIONTOV(dun)->v_mount), dun->un_uppervp, ap->a_cnp, un->un_path); - vput(ap->a_dvp); - vput(ap->a_vp); } return (error); } static int union_symlink(ap) struct vop_symlink_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; struct vattr *a_vap; char *a_target; } */ *ap; { - struct union_node *un = VTOUNION(ap->a_dvp); - struct vnode *dvp = un->un_uppervp; + struct union_node *dun = VTOUNION(ap->a_dvp); + struct vnode *dvp = dun->un_uppervp; struct componentname *cnp = ap->a_cnp; struct proc *p = cnp->cn_proc; if (dvp != NULLVP) { - int error; struct vnode *vp; + int error; - FIXUP(un, p); - VREF(dvp); - un->un_flags |= UN_KLOCK; - vput(ap->a_dvp); + FIXUP(dun, p); + dun->un_flags |= UN_KLOCK; + VOP_UNLOCK(ap->a_dvp, 0, p); error = VOP_SYMLINK(dvp, &vp, cnp, ap->a_vap, ap->a_target); + dun->un_flags |= UN_ULOCK; *ap->a_vpp = NULLVP; return (error); } - vput(ap->a_dvp); return (EROFS); } /* * union_readdir works in concert with getdirentries and * readdir(3) to provide a list of entries in the unioned * directories. getdirentries is responsible for walking * down the union stack. readdir(3) is responsible for * eliminating duplicate names from the returned data stream. */ static int union_readdir(ap) struct vop_readdir_args /* { struct vnode *a_vp; struct uio *a_uio; struct ucred *a_cred; int *a_eofflag; u_long *a_cookies; int a_ncookies; } */ *ap; { struct union_node *un = VTOUNION(ap->a_vp); struct vnode *uvp = un->un_uppervp; struct proc *p = ap->a_uio->uio_procp; if (uvp == NULLVP) return (0); FIXUP(un, p); ap->a_vp = uvp; return (VCALL(uvp, VOFFSET(vop_readdir), ap)); } static int union_readlink(ap) struct vop_readlink_args /* { struct vnode *a_vp; struct uio *a_uio; struct ucred *a_cred; } */ *ap; { int error; struct uio *uio = ap->a_uio; struct proc *p = uio->uio_procp; struct vnode *vp = OTHERVP(ap->a_vp); int dolock = (vp == LOWERVP(ap->a_vp)); if (dolock) vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); else FIXUP(VTOUNION(ap->a_vp), p); ap->a_vp = vp; error = VCALL(vp, VOFFSET(vop_readlink), ap); if (dolock) VOP_UNLOCK(vp, 0, p); return (error); } static int union_abortop(ap) struct vop_abortop_args /* { struct vnode *a_dvp; struct componentname *a_cnp; } */ *ap; { int error; struct componentname *cnp = ap->a_cnp; struct proc *p = cnp->cn_proc; struct vnode *vp = OTHERVP(ap->a_dvp); struct union_node *un = VTOUNION(ap->a_dvp); int islocked = un->un_flags & UN_LOCKED; int dolock = (vp == LOWERVP(ap->a_dvp)); if (islocked) { if (dolock) vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); else FIXUP(VTOUNION(ap->a_dvp), p); } ap->a_dvp = vp; error = VCALL(vp, VOFFSET(vop_abortop), ap); if (islocked && dolock) VOP_UNLOCK(vp, 0, p); return (error); } static int union_inactive(ap) struct vop_inactive_args /* { struct vnode *a_vp; struct proc *a_p; } */ *ap; { struct vnode *vp = ap->a_vp; struct proc *p = ap->a_p; struct union_node *un = VTOUNION(vp); struct vnode **vpp; /* * Do nothing (and _don't_ bypass). * Wait to vrele lowervp until reclaim, * so that until then our union_node is in the * cache and reusable. * * NEEDSWORK: Someday, consider inactive'ing * the lowervp and then trying to reactivate it * with capabilities (v_id) * like they do in the name lookup cache code. * That's too much work for now. */ if (un->un_dircache != 0) { for (vpp = un->un_dircache; *vpp != NULLVP; vpp++) vrele(*vpp); free(un->un_dircache, M_TEMP); un->un_dircache = 0; } VOP_UNLOCK(vp, 0, p); if ((un->un_flags & UN_CACHED) == 0) vgone(vp); return (0); } static int union_reclaim(ap) struct vop_reclaim_args /* { struct vnode *a_vp; } */ *ap; { union_freevp(ap->a_vp); return (0); } static int union_lock(ap) struct vop_lock_args *ap; { struct vnode *vp = ap->a_vp; struct proc *p = ap->a_p; int flags = ap->a_flags; struct union_node *un; int error; vop_nolock(ap); /* * Need to do real lockmgr-style locking here. * in the mean time, draining won't work quite right, * which could lead to a few race conditions. * the following test was here, but is not quite right, we * still need to take the lock: if ((flags & LK_TYPE_MASK) == LK_DRAIN) return (0); */ flags &= ~LK_INTERLOCK; start: un = VTOUNION(vp); if (un->un_uppervp != NULLVP) { if (((un->un_flags & UN_ULOCK) == 0) && (vp->v_usecount != 0)) { error = vn_lock(un->un_uppervp, flags, p); if (error) return (error); un->un_flags |= UN_ULOCK; } #ifdef DIAGNOSTIC if (un->un_flags & UN_KLOCK) { vprint("dangling upper lock", vp); panic("union: dangling upper lock"); } #endif } if (un->un_flags & UN_LOCKED) { #ifdef DIAGNOSTIC if (curproc && un->un_pid == curproc->p_pid && un->un_pid > -1 && curproc->p_pid > -1) panic("union: locking against myself"); #endif un->un_flags |= UN_WANT; tsleep((caddr_t)&un->un_flags, PINOD, "unionlk2", 0); goto start; } #ifdef DIAGNOSTIC if (curproc) un->un_pid = curproc->p_pid; else un->un_pid = -1; #endif un->un_flags |= UN_LOCKED; return (0); } /* * When operations want to vput() a union node yet retain a lock on * the upper vnode (say, to do some further operations like link(), * mkdir(), ...), they set UN_KLOCK on the union node, then call * vput() which calls VOP_UNLOCK() and comes here. union_unlock() * unlocks the union node (leaving the upper vnode alone), clears the * KLOCK flag, and then returns to vput(). The caller then does whatever * is left to do with the upper vnode, and ensures that it gets unlocked. * * If UN_KLOCK isn't set, then the upper vnode is unlocked here. */ static int union_unlock(ap) struct vop_unlock_args /* { struct vnode *a_vp; int a_flags; struct proc *a_p; } */ *ap; { struct union_node *un = VTOUNION(ap->a_vp); struct proc *p = ap->a_p; #ifdef DIAGNOSTIC if ((un->un_flags & UN_LOCKED) == 0) panic("union: unlock unlocked node"); if (curproc && un->un_pid != curproc->p_pid && curproc->p_pid > -1 && un->un_pid > -1) panic("union: unlocking other process's union node"); #endif un->un_flags &= ~UN_LOCKED; if ((un->un_flags & (UN_ULOCK|UN_KLOCK)) == UN_ULOCK) VOP_UNLOCK(un->un_uppervp, 0, p); un->un_flags &= ~(UN_ULOCK|UN_KLOCK); if (un->un_flags & UN_WANT) { un->un_flags &= ~UN_WANT; wakeup((caddr_t) &un->un_flags); } #ifdef DIAGNOSTIC un->un_pid = 0; #endif vop_nounlock(ap); return (0); } static int union_bmap(ap) struct vop_bmap_args /* { struct vnode *a_vp; daddr_t a_bn; struct vnode **a_vpp; daddr_t *a_bnp; int *a_runp; int *a_runb; } */ *ap; { int error; struct proc *p = curproc; /* XXX */ struct vnode *vp = OTHERVP(ap->a_vp); int dolock = (vp == LOWERVP(ap->a_vp)); if (dolock) vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); else FIXUP(VTOUNION(ap->a_vp), p); ap->a_vp = vp; error = VCALL(vp, VOFFSET(vop_bmap), ap); if (dolock) VOP_UNLOCK(vp, 0, p); return (error); } static int union_print(ap) struct vop_print_args /* { struct vnode *a_vp; } */ *ap; { struct vnode *vp = ap->a_vp; printf("\ttag VT_UNION, vp=%p, uppervp=%p, lowervp=%p\n", vp, UPPERVP(vp), LOWERVP(vp)); if (UPPERVP(vp) != NULLVP) vprint("union: upper", UPPERVP(vp)); if (LOWERVP(vp) != NULLVP) vprint("union: lower", LOWERVP(vp)); return (0); } static int union_islocked(ap) struct vop_islocked_args /* { struct vnode *a_vp; } */ *ap; { return ((VTOUNION(ap->a_vp)->un_flags & UN_LOCKED) ? 1 : 0); } static int union_pathconf(ap) struct vop_pathconf_args /* { struct vnode *a_vp; int a_name; int *a_retval; } */ *ap; { int error; struct proc *p = curproc; /* XXX */ struct vnode *vp = OTHERVP(ap->a_vp); int dolock = (vp == LOWERVP(ap->a_vp)); if (dolock) vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); else FIXUP(VTOUNION(ap->a_vp), p); ap->a_vp = vp; error = VCALL(vp, VOFFSET(vop_pathconf), ap); if (dolock) VOP_UNLOCK(vp, 0, p); return (error); } static int union_advlock(ap) struct vop_advlock_args /* { struct vnode *a_vp; caddr_t a_id; int a_op; struct flock *a_fl; int a_flags; } */ *ap; { register struct vnode *ovp = OTHERVP(ap->a_vp); ap->a_vp = ovp; return (VCALL(ovp, VOFFSET(vop_advlock), ap)); } /* * XXX - vop_strategy must be hand coded because it has no * vnode in its arguments. * This goes away with a merged VM/buffer cache. */ static int union_strategy(ap) struct vop_strategy_args /* { struct buf *a_bp; } */ *ap; { struct buf *bp = ap->a_bp; int error; struct vnode *savedvp; savedvp = bp->b_vp; bp->b_vp = OTHERVP(bp->b_vp); #ifdef DIAGNOSTIC if (bp->b_vp == NULLVP) panic("union_strategy: nil vp"); if (((bp->b_flags & B_READ) == 0) && (bp->b_vp == LOWERVP(savedvp))) panic("union_strategy: writing to lowervp"); #endif error = VOP_STRATEGY(bp); bp->b_vp = savedvp; return (error); } /* * Global vfs data structures */ vop_t **union_vnodeop_p; static struct vnodeopv_entry_desc union_vnodeop_entries[] = { { &vop_default_desc, (vop_t *) vop_defaultop }, { &vop_abortop_desc, (vop_t *) union_abortop }, { &vop_access_desc, (vop_t *) union_access }, { &vop_advlock_desc, (vop_t *) union_advlock }, { &vop_bmap_desc, (vop_t *) union_bmap }, { &vop_close_desc, (vop_t *) union_close }, { &vop_create_desc, (vop_t *) union_create }, { &vop_fsync_desc, (vop_t *) union_fsync }, { &vop_getattr_desc, (vop_t *) union_getattr }, { &vop_inactive_desc, (vop_t *) union_inactive }, { &vop_ioctl_desc, (vop_t *) union_ioctl }, { &vop_islocked_desc, (vop_t *) union_islocked }, { &vop_lease_desc, (vop_t *) union_lease }, { &vop_link_desc, (vop_t *) union_link }, { &vop_lock_desc, (vop_t *) union_lock }, { &vop_lookup_desc, (vop_t *) union_lookup }, { &vop_mkdir_desc, (vop_t *) union_mkdir }, { &vop_mknod_desc, (vop_t *) union_mknod }, { &vop_mmap_desc, (vop_t *) union_mmap }, { &vop_open_desc, (vop_t *) union_open }, { &vop_pathconf_desc, (vop_t *) union_pathconf }, { &vop_poll_desc, (vop_t *) union_poll }, { &vop_print_desc, (vop_t *) union_print }, { &vop_read_desc, (vop_t *) union_read }, { &vop_readdir_desc, (vop_t *) union_readdir }, { &vop_readlink_desc, (vop_t *) union_readlink }, { &vop_reclaim_desc, (vop_t *) union_reclaim }, { &vop_remove_desc, (vop_t *) union_remove }, { &vop_rename_desc, (vop_t *) union_rename }, { &vop_revoke_desc, (vop_t *) union_revoke }, { &vop_rmdir_desc, (vop_t *) union_rmdir }, { &vop_setattr_desc, (vop_t *) union_setattr }, { &vop_strategy_desc, (vop_t *) union_strategy }, { &vop_symlink_desc, (vop_t *) union_symlink }, { &vop_unlock_desc, (vop_t *) union_unlock }, { &vop_whiteout_desc, (vop_t *) union_whiteout }, { &vop_write_desc, (vop_t *) union_write }, { NULL, NULL } }; static struct vnodeopv_desc union_vnodeop_opv_desc = { &union_vnodeop_p, union_vnodeop_entries }; VNODEOP_SET(union_vnodeop_opv_desc); Index: head/sys/msdosfs/msdosfs_vnops.c =================================================================== --- head/sys/msdosfs/msdosfs_vnops.c (revision 35822) +++ head/sys/msdosfs/msdosfs_vnops.c (revision 35823) @@ -1,1987 +1,1973 @@ -/* $Id: msdosfs_vnops.c,v 1.66 1998/03/20 02:33:42 kato Exp $ */ +/* $Id: msdosfs_vnops.c,v 1.67 1998/03/26 20:52:58 phk Exp $ */ /* $NetBSD: msdosfs_vnops.c,v 1.68 1998/02/10 14:10:04 mrg Exp $ */ /*- * Copyright (C) 1994, 1995, 1997 Wolfgang Solfrank. * Copyright (C) 1994, 1995, 1997 TooLs GmbH. * All rights reserved. * Original code by Paul Popelka (paulp@uts.amdahl.com) (see below). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by TooLs GmbH. * 4. The name of TooLs GmbH may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* * Written by Paul Popelka (paulp@uts.amdahl.com) * * You can do anything you want with this software, just don't say you wrote * it, and don't remove this notice. * * This software is provided "as is". * * The author supplies this software to be publicly redistributed on the * understanding that the author is not responsible for the correct * functioning of this software in any circumstances and is not liable for * any damages caused by this software. * * October 1992 */ #include #include #include #include /* defines plimit structure in proc struct */ #include #include #include #include #include #include #include #include /* XXX */ /* defines v_rdev */ #include #include #include #include #include #include #include #include #include #include #include #include /* * Prototypes for MSDOSFS vnode operations */ static int msdosfs_create __P((struct vop_create_args *)); static int msdosfs_mknod __P((struct vop_mknod_args *)); static int msdosfs_close __P((struct vop_close_args *)); static int msdosfs_access __P((struct vop_access_args *)); static int msdosfs_getattr __P((struct vop_getattr_args *)); static int msdosfs_setattr __P((struct vop_setattr_args *)); static int msdosfs_read __P((struct vop_read_args *)); static int msdosfs_write __P((struct vop_write_args *)); static int msdosfs_fsync __P((struct vop_fsync_args *)); static int msdosfs_remove __P((struct vop_remove_args *)); static int msdosfs_link __P((struct vop_link_args *)); static int msdosfs_rename __P((struct vop_rename_args *)); static int msdosfs_mkdir __P((struct vop_mkdir_args *)); static int msdosfs_rmdir __P((struct vop_rmdir_args *)); static int msdosfs_symlink __P((struct vop_symlink_args *)); static int msdosfs_readdir __P((struct vop_readdir_args *)); static int msdosfs_abortop __P((struct vop_abortop_args *)); static int msdosfs_bmap __P((struct vop_bmap_args *)); static int msdosfs_strategy __P((struct vop_strategy_args *)); static int msdosfs_print __P((struct vop_print_args *)); static int msdosfs_pathconf __P((struct vop_pathconf_args *ap)); static int msdosfs_getpages __P((struct vop_getpages_args *)); static int msdosfs_putpages __P((struct vop_putpages_args *)); /* * Some general notes: * * In the ufs filesystem the inodes, superblocks, and indirect blocks are * read/written using the vnode for the filesystem. Blocks that represent * the contents of a file are read/written using the vnode for the file * (including directories when they are read/written as files). This * presents problems for the dos filesystem because data that should be in * an inode (if dos had them) resides in the directory itself. Since we * must update directory entries without the benefit of having the vnode * for the directory we must use the vnode for the filesystem. This means * that when a directory is actually read/written (via read, write, or * readdir, or seek) we must use the vnode for the filesystem instead of * the vnode for the directory as would happen in ufs. This is to insure we * retreive the correct block from the buffer cache since the hash value is * based upon the vnode address and the desired block number. */ /* * Create a regular file. On entry the directory to contain the file being * created is locked. We must release before we return. We must also free * the pathname buffer pointed at by cnp->cn_pnbuf, always on error, or * only if the SAVESTART bit in cn_flags is clear on success. */ static int msdosfs_create(ap) struct vop_create_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; struct vattr *a_vap; } */ *ap; { struct componentname *cnp = ap->a_cnp; struct denode ndirent; struct denode *dep; struct denode *pdep = VTODE(ap->a_dvp); struct timespec ts; int error; #ifdef MSDOSFS_DEBUG printf("msdosfs_create(cnp %p, vap %p\n", cnp, ap->a_vap); #endif /* * If this is the root directory and there is no space left we * can't do anything. This is because the root directory can not * change size. */ if (pdep->de_StartCluster == MSDOSFSROOT && pdep->de_fndoffset >= pdep->de_FileSize) { error = ENOSPC; goto bad; } /* * Create a directory entry for the file, then call createde() to * have it installed. NOTE: DOS files are always executable. We * use the absence of the owner write bit to make the file * readonly. */ #ifdef DIAGNOSTIC if ((cnp->cn_flags & HASBUF) == 0) panic("msdosfs_create: no name"); #endif bzero(&ndirent, sizeof(ndirent)); error = uniqdosname(pdep, cnp, ndirent.de_Name); if (error) goto bad; ndirent.de_Attributes = (ap->a_vap->va_mode & VWRITE) ? ATTR_ARCHIVE : ATTR_ARCHIVE | ATTR_READONLY; ndirent.de_StartCluster = 0; ndirent.de_FileSize = 0; ndirent.de_dev = pdep->de_dev; ndirent.de_devvp = pdep->de_devvp; ndirent.de_pmp = pdep->de_pmp; ndirent.de_flag = DE_ACCESS | DE_CREATE | DE_UPDATE; getnanotime(&ts); DETIMES(&ndirent, &ts, &ts, &ts); error = createde(&ndirent, pdep, &dep, cnp); if (error) goto bad; if ((cnp->cn_flags & SAVESTART) == 0) zfree(namei_zone, cnp->cn_pnbuf); - vput(ap->a_dvp); *ap->a_vpp = DETOV(dep); return (0); bad: zfree(namei_zone, cnp->cn_pnbuf); - vput(ap->a_dvp); return (error); } static int msdosfs_mknod(ap) struct vop_mknod_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; struct vattr *a_vap; } */ *ap; { switch (ap->a_vap->va_type) { case VDIR: return (msdosfs_mkdir((struct vop_mkdir_args *)ap)); break; case VREG: return (msdosfs_create((struct vop_create_args *)ap)); break; default: zfree(namei_zone, ap->a_cnp->cn_pnbuf); - vput(ap->a_dvp); return (EINVAL); } /* NOTREACHED */ } static int msdosfs_close(ap) struct vop_close_args /* { struct vnode *a_vp; int a_fflag; struct ucred *a_cred; struct proc *a_p; } */ *ap; { struct vnode *vp = ap->a_vp; struct denode *dep = VTODE(vp); struct timespec ts; simple_lock(&vp->v_interlock); if (vp->v_usecount > 1) { getnanotime(&ts); DETIMES(dep, &ts, &ts, &ts); } simple_unlock(&vp->v_interlock); return 0; } static int msdosfs_access(ap) struct vop_access_args /* { struct vnode *a_vp; int a_mode; struct ucred *a_cred; struct proc *a_p; } */ *ap; { struct vnode *vp = ap->a_vp; struct denode *dep = VTODE(ap->a_vp); struct msdosfsmount *pmp = dep->de_pmp; struct ucred *cred = ap->a_cred; mode_t mask, file_mode, mode = ap->a_mode; register gid_t *gp; int i; file_mode = (S_IXUSR|S_IXGRP|S_IXOTH) | (S_IRUSR|S_IRGRP|S_IROTH) | ((dep->de_Attributes & ATTR_READONLY) ? 0 : (S_IWUSR|S_IWGRP|S_IWOTH)); file_mode &= pmp->pm_mask; /* * Disallow write attempts on read-only file systems; * unless the file is a socket, fifo, or a block or * character device resident on the file system. */ if (mode & VWRITE) { switch (vp->v_type) { case VDIR: case VLNK: case VREG: if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); break; } } /* User id 0 always gets access. */ if (cred->cr_uid == 0) return 0; mask = 0; /* Otherwise, check the owner. */ if (cred->cr_uid == pmp->pm_uid) { if (mode & VEXEC) mask |= S_IXUSR; if (mode & VREAD) mask |= S_IRUSR; if (mode & VWRITE) mask |= S_IWUSR; return (file_mode & mask) == mask ? 0 : EACCES; } /* Otherwise, check the groups. */ for (i = 0, gp = cred->cr_groups; i < cred->cr_ngroups; i++, gp++) if (pmp->pm_gid == *gp) { if (mode & VEXEC) mask |= S_IXGRP; if (mode & VREAD) mask |= S_IRGRP; if (mode & VWRITE) mask |= S_IWGRP; return (file_mode & mask) == mask ? 0 : EACCES; } /* Otherwise, check everyone else. */ if (mode & VEXEC) mask |= S_IXOTH; if (mode & VREAD) mask |= S_IROTH; if (mode & VWRITE) mask |= S_IWOTH; return (file_mode & mask) == mask ? 0 : EACCES; } static int msdosfs_getattr(ap) struct vop_getattr_args /* { struct vnode *a_vp; struct vattr *a_vap; struct ucred *a_cred; struct proc *a_p; } */ *ap; { u_int cn; struct denode *dep = VTODE(ap->a_vp); struct msdosfsmount *pmp = dep->de_pmp; struct vattr *vap = ap->a_vap; mode_t mode; struct timespec ts; u_long dirsperblk = pmp->pm_BytesPerSec / sizeof(struct direntry); u_long fileid; getnanotime(&ts); DETIMES(dep, &ts, &ts, &ts); vap->va_fsid = dep->de_dev; /* * The following computation of the fileid must be the same as that * used in msdosfs_readdir() to compute d_fileno. If not, pwd * doesn't work. */ if (dep->de_Attributes & ATTR_DIRECTORY) { fileid = cntobn(pmp, dep->de_StartCluster) * dirsperblk; if (dep->de_StartCluster == MSDOSFSROOT) fileid = 1; } else { fileid = cntobn(pmp, dep->de_dirclust) * dirsperblk; if (dep->de_dirclust == MSDOSFSROOT) fileid = roottobn(pmp, 0) * dirsperblk; fileid += dep->de_diroffset / sizeof(struct direntry); } vap->va_fileid = fileid; if ((dep->de_Attributes & ATTR_READONLY) == 0) mode = S_IRWXU|S_IRWXG|S_IRWXO; else mode = S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH; vap->va_mode = mode & pmp->pm_mask; vap->va_uid = pmp->pm_uid; vap->va_gid = pmp->pm_gid; vap->va_nlink = 1; vap->va_rdev = 0; vap->va_size = dep->de_FileSize; dos2unixtime(dep->de_MDate, dep->de_MTime, 0, &vap->va_mtime); if (pmp->pm_flags & MSDOSFSMNT_LONGNAME) { dos2unixtime(dep->de_ADate, 0, 0, &vap->va_atime); dos2unixtime(dep->de_CDate, dep->de_CTime, dep->de_CHun, &vap->va_ctime); } else { vap->va_atime = vap->va_mtime; vap->va_ctime = vap->va_mtime; } vap->va_flags = 0; if ((dep->de_Attributes & ATTR_ARCHIVE) == 0) vap->va_flags |= SF_ARCHIVED; vap->va_gen = 0; vap->va_blocksize = pmp->pm_bpcluster; vap->va_bytes = (dep->de_FileSize + pmp->pm_crbomask) & ~pmp->pm_crbomask; vap->va_type = ap->a_vp->v_type; vap->va_filerev = dep->de_modrev; return (0); } static int msdosfs_setattr(ap) struct vop_setattr_args /* { struct vnode *a_vp; struct vattr *a_vap; struct ucred *a_cred; struct proc *a_p; } */ *ap; { struct vnode *vp = ap->a_vp; struct denode *dep = VTODE(ap->a_vp); struct msdosfsmount *pmp = dep->de_pmp; struct vattr *vap = ap->a_vap; struct ucred *cred = ap->a_cred; int error = 0; #ifdef MSDOSFS_DEBUG printf("msdosfs_setattr(): vp %p, vap %p, cred %p, p %p\n", ap->a_vp, vap, cred, ap->a_p); #endif /* * Check for unsettable attributes. */ if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) || (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) || (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) || (vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) { #ifdef MSDOSFS_DEBUG printf("msdosfs_setattr(): returning EINVAL\n"); printf(" va_type %d, va_nlink %x, va_fsid %lx, va_fileid %lx\n", vap->va_type, vap->va_nlink, vap->va_fsid, vap->va_fileid); printf(" va_blocksize %lx, va_rdev %x, va_bytes %qx, va_gen %lx\n", vap->va_blocksize, vap->va_rdev, vap->va_bytes, vap->va_gen); printf(" va_uid %x, va_gid %x\n", vap->va_uid, vap->va_gid); #endif return (EINVAL); } if (vap->va_flags != VNOVAL) { if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); if (cred->cr_uid != pmp->pm_uid && (error = suser(cred, &ap->a_p->p_acflag))) return (error); /* * We are very inconsistent about handling unsupported * attributes. We ignored the the access time and the * read and execute bits. We were strict for the other * attributes. * * Here we are strict, stricter than ufs in not allowing * users to attempt to set SF_SETTABLE bits or anyone to * set unsupported bits. However, we ignore attempts to * set ATTR_ARCHIVE for directories `cp -pr' from a more * sensible file system attempts it a lot. */ if (cred->cr_uid != 0) { if (vap->va_flags & SF_SETTABLE) return EPERM; } if (vap->va_flags & ~SF_ARCHIVED) return EINVAL; if (vap->va_flags & SF_ARCHIVED) dep->de_Attributes &= ~ATTR_ARCHIVE; else if (!(dep->de_Attributes & ATTR_DIRECTORY)) dep->de_Attributes |= ATTR_ARCHIVE; dep->de_flag |= DE_MODIFIED; } if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) { uid_t uid; gid_t gid; if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); uid = vap->va_uid; if (uid == (uid_t)VNOVAL) uid = pmp->pm_uid; gid = vap->va_gid; if (gid == (gid_t)VNOVAL) gid = pmp->pm_gid; if ((cred->cr_uid != pmp->pm_uid || uid != pmp->pm_uid || (gid != pmp->pm_gid && !groupmember(gid, cred))) && (error = suser(cred, &ap->a_p->p_acflag))) return error; if (uid != pmp->pm_uid || gid != pmp->pm_gid) return EINVAL; } if (vap->va_size != VNOVAL) { /* * Disallow write attempts on read-only file systems; * unless the file is a socket, fifo, or a block or * character device resident on the file system. */ switch (vp->v_type) { case VDIR: return (EISDIR); case VLNK: case VREG: if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); break; } error = detrunc(dep, vap->va_size, 0, cred, ap->a_p); if (error) return error; } if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) { if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); if (cred->cr_uid != pmp->pm_uid && (error = suser(cred, &ap->a_p->p_acflag)) && ((vap->va_vaflags & VA_UTIMES_NULL) == 0 || (error = VOP_ACCESS(ap->a_vp, VWRITE, cred, ap->a_p)))) return (error); if (vp->v_type != VDIR) { if ((pmp->pm_flags & MSDOSFSMNT_NOWIN95) == 0 && vap->va_atime.tv_sec != VNOVAL) unix2dostime(&vap->va_atime, &dep->de_ADate, NULL, NULL); if (vap->va_mtime.tv_sec != VNOVAL) unix2dostime(&vap->va_mtime, &dep->de_MDate, &dep->de_MTime, NULL); dep->de_Attributes |= ATTR_ARCHIVE; dep->de_flag |= DE_MODIFIED; } } /* * DOS files only have the ability to have their writability * attribute set, so we use the owner write bit to set the readonly * attribute. */ if (vap->va_mode != (mode_t)VNOVAL) { if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); if (cred->cr_uid != pmp->pm_uid && (error = suser(cred, &ap->a_p->p_acflag))) return (error); if (vp->v_type != VDIR) { /* We ignore the read and execute bits. */ if (vap->va_mode & VWRITE) dep->de_Attributes &= ~ATTR_READONLY; else dep->de_Attributes |= ATTR_READONLY; dep->de_flag |= DE_MODIFIED; } } return (deupdat(dep, 1)); } static int msdosfs_read(ap) struct vop_read_args /* { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; struct ucred *a_cred; } */ *ap; { int error = 0; int diff; int blsize; int isadir; long n; long on; daddr_t lbn; daddr_t rablock; int rasize; struct buf *bp; struct vnode *vp = ap->a_vp; struct denode *dep = VTODE(vp); struct msdosfsmount *pmp = dep->de_pmp; struct uio *uio = ap->a_uio; /* * If they didn't ask for any data, then we are done. */ if (uio->uio_resid == 0) return (0); if (uio->uio_offset < 0) return (EINVAL); isadir = dep->de_Attributes & ATTR_DIRECTORY; do { lbn = de_cluster(pmp, uio->uio_offset); on = uio->uio_offset & pmp->pm_crbomask; n = min((u_long) (pmp->pm_bpcluster - on), uio->uio_resid); diff = dep->de_FileSize - uio->uio_offset; if (diff <= 0) return (0); if (diff < n) n = diff; /* convert cluster # to block # if a directory */ if (isadir) { error = pcbmap(dep, lbn, &lbn, 0, &blsize); if (error) return (error); } /* * If we are operating on a directory file then be sure to * do i/o with the vnode for the filesystem instead of the * vnode for the directory. */ if (isadir) { error = bread(pmp->pm_devvp, lbn, blsize, NOCRED, &bp); } else { rablock = lbn + 1; if (vp->v_lastr + 1 == lbn && de_cn2off(pmp, rablock) < dep->de_FileSize) { rasize = pmp->pm_bpcluster; error = breadn(vp, lbn, pmp->pm_bpcluster, &rablock, &rasize, 1, NOCRED, &bp); } else error = bread(vp, lbn, pmp->pm_bpcluster, NOCRED, &bp); vp->v_lastr = lbn; } n = min(n, pmp->pm_bpcluster - bp->b_resid); if (error) { brelse(bp); return (error); } error = uiomove(bp->b_data + on, (int) n, uio); if (!isadir) dep->de_flag |= DE_ACCESS; brelse(bp); } while (error == 0 && uio->uio_resid > 0 && n != 0); return (error); } /* * Write data to a file or directory. */ static int msdosfs_write(ap) struct vop_write_args /* { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; struct ucred *a_cred; } */ *ap; { int n; int croffset; int resid; u_long osize; int error = 0; u_long count; daddr_t bn, lastcn; struct buf *bp; int ioflag = ap->a_ioflag; struct uio *uio = ap->a_uio; struct proc *p = uio->uio_procp; struct vnode *vp = ap->a_vp; struct vnode *thisvp; struct denode *dep = VTODE(vp); struct msdosfsmount *pmp = dep->de_pmp; struct ucred *cred = ap->a_cred; #ifdef MSDOSFS_DEBUG printf("msdosfs_write(vp %p, uio %p, ioflag %x, cred %p\n", vp, uio, ioflag, cred); printf("msdosfs_write(): diroff %lu, dirclust %lu, startcluster %lu\n", dep->de_diroffset, dep->de_dirclust, dep->de_StartCluster); #endif switch (vp->v_type) { case VREG: if (ioflag & IO_APPEND) uio->uio_offset = dep->de_FileSize; thisvp = vp; break; case VDIR: return EISDIR; default: panic("msdosfs_write(): bad file type"); } if (uio->uio_offset < 0) return (EINVAL); if (uio->uio_resid == 0) return (0); /* * If they've exceeded their filesize limit, tell them about it. */ if (p && ((uio->uio_offset + uio->uio_resid) > p->p_rlimit[RLIMIT_FSIZE].rlim_cur)) { psignal(p, SIGXFSZ); return (EFBIG); } /* * If the offset we are starting the write at is beyond the end of * the file, then they've done a seek. Unix filesystems allow * files with holes in them, DOS doesn't so we must fill the hole * with zeroed blocks. */ if (uio->uio_offset > dep->de_FileSize) { error = deextend(dep, uio->uio_offset, cred); if (error) return (error); } /* * Remember some values in case the write fails. */ resid = uio->uio_resid; osize = dep->de_FileSize; /* * If we write beyond the end of the file, extend it to its ultimate * size ahead of the time to hopefully get a contiguous area. */ if (uio->uio_offset + resid > osize) { count = de_clcount(pmp, uio->uio_offset + resid) - de_clcount(pmp, osize); error = extendfile(dep, count, NULL, NULL, 0); if (error && (error != ENOSPC || (ioflag & IO_UNIT))) goto errexit; lastcn = dep->de_fc[FC_LASTFC].fc_frcn; } else lastcn = de_clcount(pmp, osize) - 1; do { if (de_cluster(pmp, uio->uio_offset) > lastcn) { error = ENOSPC; break; } croffset = uio->uio_offset & pmp->pm_crbomask; n = min(uio->uio_resid, pmp->pm_bpcluster - croffset); if (uio->uio_offset + n > dep->de_FileSize) { dep->de_FileSize = uio->uio_offset + n; /* The object size needs to be set before buffer is allocated */ vnode_pager_setsize(vp, dep->de_FileSize); } bn = de_cluster(pmp, uio->uio_offset); if ((uio->uio_offset & pmp->pm_crbomask) == 0 && (de_cluster(pmp, uio->uio_offset + uio->uio_resid) > de_cluster(pmp, uio->uio_offset) || uio->uio_offset + uio->uio_resid >= dep->de_FileSize)) { /* * If either the whole cluster gets written, * or we write the cluster from its start beyond EOF, * then no need to read data from disk. */ bp = getblk(thisvp, bn, pmp->pm_bpcluster, 0, 0); clrbuf(bp); /* * Do the bmap now, since pcbmap needs buffers * for the fat table. (see msdosfs_strategy) */ if (bp->b_blkno == bp->b_lblkno) { error = pcbmap(dep, bp->b_lblkno, &bp->b_blkno, 0, 0); if (error) bp->b_blkno = -1; } if (bp->b_blkno == -1) { brelse(bp); if (!error) error = EIO; /* XXX */ break; } } else { /* * The block we need to write into exists, so read it in. */ error = bread(thisvp, bn, pmp->pm_bpcluster, cred, &bp); if (error) { brelse(bp); break; } } /* * Should these vnode_pager_* functions be done on dir * files? */ /* * Copy the data from user space into the buf header. */ error = uiomove(bp->b_data + croffset, n, uio); /* * If they want this synchronous then write it and wait for * it. Otherwise, if on a cluster boundary write it * asynchronously so we can move on to the next block * without delay. Otherwise do a delayed write because we * may want to write somemore into the block later. */ if (ioflag & IO_SYNC) (void) bwrite(bp); else if (n + croffset == pmp->pm_bpcluster) bawrite(bp); else bdwrite(bp); dep->de_flag |= DE_UPDATE; } while (error == 0 && uio->uio_resid > 0); /* * If the write failed and they want us to, truncate the file back * to the size it was before the write was attempted. */ errexit: if (error) { if (ioflag & IO_UNIT) { detrunc(dep, osize, ioflag & IO_SYNC, NOCRED, NULL); uio->uio_offset -= resid - uio->uio_resid; uio->uio_resid = resid; } else { detrunc(dep, dep->de_FileSize, ioflag & IO_SYNC, NOCRED, NULL); if (uio->uio_resid != resid) error = 0; } } else if (ioflag & IO_SYNC) error = deupdat(dep, 1); return (error); } /* * Flush the blocks of a file to disk. * * This function is worthless for vnodes that represent directories. Maybe we * could just do a sync if they try an fsync on a directory file. */ static int msdosfs_fsync(ap) struct vop_fsync_args /* { struct vnode *a_vp; struct ucred *a_cred; int a_waitfor; struct proc *a_p; } */ *ap; { struct vnode *vp = ap->a_vp; int s; struct buf *bp, *nbp; /* * Flush all dirty buffers associated with a vnode. */ loop: s = splbio(); for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) { nbp = bp->b_vnbufs.le_next; if ((bp->b_flags & B_BUSY)) continue; if ((bp->b_flags & B_DELWRI) == 0) panic("msdosfs_fsync: not dirty"); bremfree(bp); bp->b_flags |= B_BUSY; splx(s); (void) bwrite(bp); goto loop; } while (vp->v_numoutput) { vp->v_flag |= VBWAIT; (void) tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "msdosfsn", 0); } #ifdef DIAGNOSTIC if (vp->v_dirtyblkhd.lh_first) { vprint("msdosfs_fsync: dirty", vp); goto loop; } #endif splx(s); return (deupdat(VTODE(vp), ap->a_waitfor == MNT_WAIT)); } static int msdosfs_remove(ap) struct vop_remove_args /* { struct vnode *a_dvp; struct vnode *a_vp; struct componentname *a_cnp; } */ *ap; { - int error; struct denode *dep = VTODE(ap->a_vp); struct denode *ddep = VTODE(ap->a_dvp); + int error; if (ap->a_vp->v_type == VDIR) error = EPERM; else error = removede(ddep, dep); #ifdef MSDOSFS_DEBUG printf("msdosfs_remove(), dep %p, v_usecount %d\n", dep, ap->a_vp->v_usecount); #endif - if (ddep == dep) - vrele(ap->a_vp); - else - vput(ap->a_vp); /* causes msdosfs_inactive() to be called - * via vrele() */ - vput(ap->a_dvp); return (error); } /* * DOS filesystems don't know what links are. But since we already called * msdosfs_lookup() with create and lockparent, the parent is locked so we * have to free it before we return the error. */ static int msdosfs_link(ap) struct vop_link_args /* { struct vnode *a_tdvp; struct vnode *a_vp; struct componentname *a_cnp; } */ *ap; { VOP_ABORTOP(ap->a_tdvp, ap->a_cnp); - vput(ap->a_tdvp); - return EOPNOTSUPP; + return (EOPNOTSUPP); } /* * Renames on files require moving the denode to a new hash queue since the * denode's location is used to compute which hash queue to put the file * in. Unless it is a rename in place. For example "mv a b". * * What follows is the basic algorithm: * * if (file move) { * if (dest file exists) { * remove dest file * } * if (dest and src in same directory) { * rewrite name in existing directory slot * } else { * write new entry in dest directory * update offset and dirclust in denode * move denode to new hash chain * clear old directory entry * } * } else { * directory move * if (dest directory exists) { * if (dest is not empty) { * return ENOTEMPTY * } * remove dest directory * } * if (dest and src in same directory) { * rewrite name in existing entry * } else { * be sure dest is not a child of src directory * write entry in dest directory * update "." and ".." in moved directory * clear old directory entry for moved directory * } * } * * On entry: * source's parent directory is unlocked * source file or directory is unlocked * destination's parent directory is locked * destination file or directory is locked if it exists * * On exit: * all denodes should be released * * Notes: * I'm not sure how the memory containing the pathnames pointed at by the * componentname structures is freed, there may be some memory bleeding * for each rename done. */ static int msdosfs_rename(ap) struct vop_rename_args /* { struct vnode *a_fdvp; struct vnode *a_fvp; struct componentname *a_fcnp; struct vnode *a_tdvp; struct vnode *a_tvp; struct componentname *a_tcnp; } */ *ap; { struct vnode *tdvp = ap->a_tdvp; struct vnode *fvp = ap->a_fvp; struct vnode *fdvp = ap->a_fdvp; struct vnode *tvp = ap->a_tvp; struct componentname *tcnp = ap->a_tcnp; struct componentname *fcnp = ap->a_fcnp; struct proc *p = fcnp->cn_proc; struct denode *ip, *xp, *dp, *zp; u_char toname[11], oldname[11]; u_long from_diroffset, to_diroffset; u_char to_count; int doingdirectory = 0, newparent = 0; int error; u_long cn; daddr_t bn; struct denode *fddep; /* from file's parent directory */ struct denode *fdep; /* from file or directory */ struct denode *tddep; /* to file's parent directory */ struct denode *tdep; /* to file or directory */ struct msdosfsmount *pmp; struct direntry *dotdotp; struct buf *bp; fddep = VTODE(ap->a_fdvp); fdep = VTODE(ap->a_fvp); tddep = VTODE(ap->a_tdvp); tdep = tvp ? VTODE(tvp) : NULL; pmp = fddep->de_pmp; pmp = VFSTOMSDOSFS(fdvp->v_mount); #ifdef DIAGNOSTIC if ((tcnp->cn_flags & HASBUF) == 0 || (fcnp->cn_flags & HASBUF) == 0) panic("msdosfs_rename: no name"); #endif /* * Check for cross-device rename. */ if ((fvp->v_mount != tdvp->v_mount) || (tvp && (fvp->v_mount != tvp->v_mount))) { error = EXDEV; abortit: VOP_ABORTOP(tdvp, tcnp); if (tdvp == tvp) vrele(tdvp); else vput(tdvp); if (tvp) vput(tvp); VOP_ABORTOP(fdvp, fcnp); vrele(fdvp); vrele(fvp); return (error); } /* * If source and dest are the same, do nothing. */ if (tvp == fvp) { error = 0; goto abortit; } error = vn_lock(fvp, LK_EXCLUSIVE | LK_RETRY, p); if (error) goto abortit; dp = VTODE(fdvp); ip = VTODE(fvp); /* * Be sure we are not renaming ".", "..", or an alias of ".". This * leads to a crippled directory tree. It's pretty tough to do a * "ls" or "pwd" with the "." directory entry missing, and "cd .." * doesn't work if the ".." entry is missing. */ if (ip->de_Attributes & ATTR_DIRECTORY) { /* * Avoid ".", "..", and aliases of "." for obvious reasons. */ if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') || dp == ip || (fcnp->cn_flags & ISDOTDOT) || (tcnp->cn_flags & ISDOTDOT) || (ip->de_flag & DE_RENAME)) { VOP_UNLOCK(fvp, 0, p); error = EINVAL; goto abortit; } ip->de_flag |= DE_RENAME; doingdirectory++; } /* * When the target exists, both the directory * and target vnodes are returned locked. */ dp = VTODE(tdvp); xp = tvp ? VTODE(tvp) : NULL; /* * Remember direntry place to use for destination */ to_diroffset = dp->de_fndoffset; to_count = dp->de_fndcnt; /* * If ".." must be changed (ie the directory gets a new * parent) then the source directory must not be in the * directory heirarchy above the target, as this would * orphan everything below the source directory. Also * the user must have write permission in the source so * as to be able to change "..". We must repeat the call * to namei, as the parent directory is unlocked by the * call to doscheckpath(). */ error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, tcnp->cn_proc); VOP_UNLOCK(fvp, 0, p); if (VTODE(fdvp)->de_StartCluster != VTODE(tdvp)->de_StartCluster) newparent = 1; vrele(fdvp); if (doingdirectory && newparent) { if (error) /* write access check above */ goto bad; if (xp != NULL) vput(tvp); /* * doscheckpath() vput()'s dp, * so we have to do a relookup afterwards */ error = doscheckpath(ip, dp); if (error) goto out; if ((tcnp->cn_flags & SAVESTART) == 0) panic("msdosfs_rename: lost to startdir"); error = relookup(tdvp, &tvp, tcnp); if (error) goto out; dp = VTODE(tdvp); xp = tvp ? VTODE(tvp) : NULL; } if (xp != NULL) { /* * Target must be empty if a directory and have no links * to it. Also, ensure source and target are compatible * (both directories, or both not directories). */ if (xp->de_Attributes & ATTR_DIRECTORY) { if (!dosdirempty(xp)) { error = ENOTEMPTY; goto bad; } if (!doingdirectory) { error = ENOTDIR; goto bad; } cache_purge(tdvp); } else if (doingdirectory) { error = EISDIR; goto bad; } error = removede(dp, xp); if (error) goto bad; vput(tvp); xp = NULL; } /* * Convert the filename in tcnp into a dos filename. We copy this * into the denode and directory entry for the destination * file/directory. */ error = uniqdosname(VTODE(tdvp), tcnp, toname); if (error) goto abortit; /* * Since from wasn't locked at various places above, * have to do a relookup here. */ fcnp->cn_flags &= ~MODMASK; fcnp->cn_flags |= LOCKPARENT | LOCKLEAF; if ((fcnp->cn_flags & SAVESTART) == 0) panic("msdosfs_rename: lost from startdir"); if (!newparent) VOP_UNLOCK(tdvp, 0, p); (void) relookup(fdvp, &fvp, fcnp); if (fvp == NULL) { /* * From name has disappeared. */ if (doingdirectory) panic("rename: lost dir entry"); vrele(ap->a_fvp); if (newparent) VOP_UNLOCK(tdvp, 0, p); vrele(tdvp); return 0; } xp = VTODE(fvp); zp = VTODE(fdvp); from_diroffset = zp->de_fndoffset; /* * Ensure that the directory entry still exists and has not * changed till now. If the source is a file the entry may * have been unlinked or renamed. In either case there is * no further work to be done. If the source is a directory * then it cannot have been rmdir'ed or renamed; this is * prohibited by the DE_RENAME flag. */ if (xp != ip) { if (doingdirectory) panic("rename: lost dir entry"); vrele(ap->a_fvp); VOP_UNLOCK(fvp, 0, p); if (newparent) VOP_UNLOCK(fdvp, 0, p); xp = NULL; } else { vrele(fvp); xp = NULL; /* * First write a new entry in the destination * directory and mark the entry in the source directory * as deleted. Then move the denode to the correct hash * chain for its new location in the filesystem. And, if * we moved a directory, then update its .. entry to point * to the new parent directory. */ bcopy(ip->de_Name, oldname, 11); bcopy(toname, ip->de_Name, 11); /* update denode */ dp->de_fndoffset = to_diroffset; dp->de_fndcnt = to_count; error = createde(ip, dp, (struct denode **)0, tcnp); if (error) { bcopy(oldname, ip->de_Name, 11); if (newparent) VOP_UNLOCK(fdvp, 0, p); VOP_UNLOCK(fvp, 0, p); goto bad; } ip->de_refcnt++; zp->de_fndoffset = from_diroffset; error = removede(zp, ip); if (error) { /* XXX should really panic here, fs is corrupt */ if (newparent) VOP_UNLOCK(fdvp, 0, p); VOP_UNLOCK(fvp, 0, p); goto bad; } if (!doingdirectory) { error = pcbmap(dp, de_cluster(pmp, to_diroffset), 0, &ip->de_dirclust, 0); if (error) { /* XXX should really panic here, fs is corrupt */ if (newparent) VOP_UNLOCK(fdvp, 0, p); VOP_UNLOCK(fvp, 0, p); goto bad; } if (ip->de_dirclust != MSDOSFSROOT) ip->de_diroffset = to_diroffset & pmp->pm_crbomask; } reinsert(ip); if (newparent) VOP_UNLOCK(fdvp, 0, p); } /* * If we moved a directory to a new parent directory, then we must * fixup the ".." entry in the moved directory. */ if (doingdirectory && newparent) { cn = ip->de_StartCluster; if (cn == MSDOSFSROOT) { /* this should never happen */ panic("msdosfs_rename(): updating .. in root directory?"); } else bn = cntobn(pmp, cn); error = bread(pmp->pm_devvp, bn, pmp->pm_bpcluster, NOCRED, &bp); if (error) { /* XXX should really panic here, fs is corrupt */ brelse(bp); VOP_UNLOCK(fvp, 0, p); goto bad; } dotdotp = (struct direntry *)bp->b_data + 1; putushort(dotdotp->deStartCluster, dp->de_StartCluster); if (FAT32(pmp)) putushort(dotdotp->deHighClust, dp->de_StartCluster >> 16); error = bwrite(bp); if (error) { /* XXX should really panic here, fs is corrupt */ VOP_UNLOCK(fvp, 0, p); goto bad; } } VOP_UNLOCK(fvp, 0, p); bad: if (xp) vput(tvp); vput(tdvp); out: ip->de_flag &= ~DE_RENAME; vrele(fdvp); vrele(fvp); return (error); } static struct { struct direntry dot; struct direntry dotdot; } dosdirtemplate = { { ". ", " ", /* the . entry */ ATTR_DIRECTORY, /* file attribute */ 0, /* reserved */ 0, { 0, 0 }, { 0, 0 }, /* create time & date */ { 0, 0 }, /* access date */ { 0, 0 }, /* high bits of start cluster */ { 210, 4 }, { 210, 4 }, /* modify time & date */ { 0, 0 }, /* startcluster */ { 0, 0, 0, 0 } /* filesize */ }, { ".. ", " ", /* the .. entry */ ATTR_DIRECTORY, /* file attribute */ 0, /* reserved */ 0, { 0, 0 }, { 0, 0 }, /* create time & date */ { 0, 0 }, /* access date */ { 0, 0 }, /* high bits of start cluster */ { 210, 4 }, { 210, 4 }, /* modify time & date */ { 0, 0 }, /* startcluster */ { 0, 0, 0, 0 } /* filesize */ } }; static int msdosfs_mkdir(ap) struct vop_mkdir_args /* { struct vnode *a_dvp; struvt vnode **a_vpp; struvt componentname *a_cnp; struct vattr *a_vap; } */ *ap; { struct componentname *cnp = ap->a_cnp; - struct denode ndirent; struct denode *dep; struct denode *pdep = VTODE(ap->a_dvp); - int error; - int bn; - u_long newcluster, pcl; struct direntry *denp; struct msdosfsmount *pmp = pdep->de_pmp; struct buf *bp; + u_long newcluster, pcl; + int bn; + int error; + struct denode ndirent; struct timespec ts; /* * If this is the root directory and there is no space left we * can't do anything. This is because the root directory can not * change size. */ if (pdep->de_StartCluster == MSDOSFSROOT && pdep->de_fndoffset >= pdep->de_FileSize) { error = ENOSPC; goto bad2; } /* * Allocate a cluster to hold the about to be created directory. */ error = clusteralloc(pmp, 0, 1, CLUST_EOFE, &newcluster, NULL); if (error) goto bad2; bzero(&ndirent, sizeof(ndirent)); ndirent.de_pmp = pmp; ndirent.de_flag = DE_ACCESS | DE_CREATE | DE_UPDATE; getnanotime(&ts); DETIMES(&ndirent, &ts, &ts, &ts); /* * Now fill the cluster with the "." and ".." entries. And write * the cluster to disk. This way it is there for the parent * directory to be pointing at if there were a crash. */ bn = cntobn(pmp, newcluster); /* always succeeds */ bp = getblk(pmp->pm_devvp, bn, pmp->pm_bpcluster, 0, 0); bzero(bp->b_data, pmp->pm_bpcluster); bcopy(&dosdirtemplate, bp->b_data, sizeof dosdirtemplate); denp = (struct direntry *)bp->b_data; putushort(denp[0].deStartCluster, newcluster); putushort(denp[0].deCDate, ndirent.de_CDate); putushort(denp[0].deCTime, ndirent.de_CTime); denp[0].deCHundredth = ndirent.de_CHun; putushort(denp[0].deADate, ndirent.de_ADate); putushort(denp[0].deMDate, ndirent.de_MDate); putushort(denp[0].deMTime, ndirent.de_MTime); pcl = pdep->de_StartCluster; if (FAT32(pmp) && pcl == pmp->pm_rootdirblk) pcl = 0; putushort(denp[1].deStartCluster, pcl); putushort(denp[1].deCDate, ndirent.de_CDate); putushort(denp[1].deCTime, ndirent.de_CTime); denp[1].deCHundredth = ndirent.de_CHun; putushort(denp[1].deADate, ndirent.de_ADate); putushort(denp[1].deMDate, ndirent.de_MDate); putushort(denp[1].deMTime, ndirent.de_MTime); if (FAT32(pmp)) { putushort(denp[0].deHighClust, newcluster >> 16); putushort(denp[1].deHighClust, pdep->de_StartCluster >> 16); } error = bwrite(bp); if (error) goto bad; /* * Now build up a directory entry pointing to the newly allocated * cluster. This will be written to an empty slot in the parent * directory. */ #ifdef DIAGNOSTIC if ((cnp->cn_flags & HASBUF) == 0) panic("msdosfs_mkdir: no name"); #endif error = uniqdosname(pdep, cnp, ndirent.de_Name); if (error) goto bad; ndirent.de_Attributes = ATTR_DIRECTORY; ndirent.de_StartCluster = newcluster; ndirent.de_FileSize = 0; ndirent.de_dev = pdep->de_dev; ndirent.de_devvp = pdep->de_devvp; error = createde(&ndirent, pdep, &dep, cnp); if (error) goto bad; if ((cnp->cn_flags & SAVESTART) == 0) zfree(namei_zone, cnp->cn_pnbuf); - vput(ap->a_dvp); *ap->a_vpp = DETOV(dep); return (0); bad: clusterfree(pmp, newcluster, NULL); bad2: zfree(namei_zone, cnp->cn_pnbuf); - vput(ap->a_dvp); return (error); } static int msdosfs_rmdir(ap) struct vop_rmdir_args /* { struct vnode *a_dvp; struct vnode *a_vp; struct componentname *a_cnp; } */ *ap; { register struct vnode *vp = ap->a_vp; register struct vnode *dvp = ap->a_dvp; register struct componentname *cnp = ap->a_cnp; register struct denode *ip, *dp; + struct proc *p = cnp->cn_proc; int error; ip = VTODE(vp); dp = VTODE(dvp); /* * Verify the directory is empty (and valid). * (Rmdir ".." won't be valid since * ".." will contain a reference to * the current directory and thus be * non-empty.) */ error = 0; if (!dosdirempty(ip) || ip->de_flag & DE_RENAME) { error = ENOTEMPTY; goto out; } /* * Delete the entry from the directory. For dos filesystems this * gets rid of the directory entry on disk, the in memory copy * still exists but the de_refcnt is <= 0. This prevents it from * being found by deget(). When the vput() on dep is done we give * up access and eventually msdosfs_reclaim() will be called which * will remove it from the denode cache. */ error = removede(dp, ip); if (error) goto out; /* * This is where we decrement the link count in the parent * directory. Since dos filesystems don't do this we just purge - * the name cache and let go of the parent directory denode. + * the name cache. */ cache_purge(dvp); - vput(dvp); - dvp = NULL; + VOP_UNLOCK(dvp, 0, p); /* * Truncate the directory that is being deleted. */ - error = detrunc(ip, (u_long)0, IO_SYNC, cnp->cn_cred, cnp->cn_proc); + error = detrunc(ip, (u_long)0, IO_SYNC, cnp->cn_cred, p); cache_purge(vp); + + vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p); out: - if (dvp) - vput(dvp); - vput(vp); return (error); } /* * DOS filesystems don't know what symlinks are. */ static int msdosfs_symlink(ap) struct vop_symlink_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; struct vattr *a_vap; char *a_target; } */ *ap; { zfree(namei_zone, ap->a_cnp->cn_pnbuf); /* VOP_ABORTOP(ap->a_dvp, ap->a_cnp); ??? */ - vput(ap->a_dvp); return (EOPNOTSUPP); } static int msdosfs_readdir(ap) struct vop_readdir_args /* { struct vnode *a_vp; struct uio *a_uio; struct ucred *a_cred; int *a_eofflag; int *a_ncookies; u_long **a_cookies; } */ *ap; { int error = 0; int diff; long n; int blsize; long on; u_long cn; u_long fileno; u_long dirsperblk; long bias = 0; daddr_t bn, lbn; struct buf *bp; struct denode *dep = VTODE(ap->a_vp); struct msdosfsmount *pmp = dep->de_pmp; struct direntry *dentp; struct dirent dirbuf; struct uio *uio = ap->a_uio; u_long *cookies = NULL; int ncookies = 0; off_t offset, off; int chksum = -1; #ifdef MSDOSFS_DEBUG printf("msdosfs_readdir(): vp %p, uio %p, cred %p, eofflagp %p\n", ap->a_vp, uio, ap->a_cred, ap->a_eofflag); #endif /* * msdosfs_readdir() won't operate properly on regular files since * it does i/o only with the the filesystem vnode, and hence can * retrieve the wrong block from the buffer cache for a plain file. * So, fail attempts to readdir() on a plain file. */ if ((dep->de_Attributes & ATTR_DIRECTORY) == 0) return (ENOTDIR); /* * To be safe, initialize dirbuf */ bzero(dirbuf.d_name, sizeof(dirbuf.d_name)); /* * If the user buffer is smaller than the size of one dos directory * entry or the file offset is not a multiple of the size of a * directory entry, then we fail the read. */ off = offset = uio->uio_offset; if (uio->uio_resid < sizeof(struct direntry) || (offset & (sizeof(struct direntry) - 1))) return (EINVAL); if (ap->a_ncookies) { ncookies = uio->uio_resid / 16; MALLOC(cookies, u_long *, ncookies * sizeof(u_long), M_TEMP, M_WAITOK); *ap->a_cookies = cookies; *ap->a_ncookies = ncookies; } dirsperblk = pmp->pm_BytesPerSec / sizeof(struct direntry); /* * If they are reading from the root directory then, we simulate * the . and .. entries since these don't exist in the root * directory. We also set the offset bias to make up for having to * simulate these entries. By this I mean that at file offset 64 we * read the first entry in the root directory that lives on disk. */ if (dep->de_StartCluster == MSDOSFSROOT || (FAT32(pmp) && dep->de_StartCluster == pmp->pm_rootdirblk)) { #if 0 printf("msdosfs_readdir(): going after . or .. in root dir, offset %d\n", offset); #endif bias = 2 * sizeof(struct direntry); if (offset < bias) { for (n = (int)offset / sizeof(struct direntry); n < 2; n++) { if (FAT32(pmp)) dirbuf.d_fileno = cntobn(pmp, pmp->pm_rootdirblk) * dirsperblk; else dirbuf.d_fileno = 1; dirbuf.d_type = DT_DIR; switch (n) { case 0: dirbuf.d_namlen = 1; strcpy(dirbuf.d_name, "."); break; case 1: dirbuf.d_namlen = 2; strcpy(dirbuf.d_name, ".."); break; } dirbuf.d_reclen = GENERIC_DIRSIZ(&dirbuf); if (uio->uio_resid < dirbuf.d_reclen) goto out; error = uiomove((caddr_t) &dirbuf, dirbuf.d_reclen, uio); if (error) goto out; offset += sizeof(struct direntry); off = offset; if (cookies) { *cookies++ = offset; if (--ncookies <= 0) goto out; } } } } off = offset; while (uio->uio_resid > 0) { lbn = de_cluster(pmp, offset - bias); on = (offset - bias) & pmp->pm_crbomask; n = min(pmp->pm_bpcluster - on, uio->uio_resid); diff = dep->de_FileSize - (offset - bias); if (diff <= 0) break; n = min(n, diff); error = pcbmap(dep, lbn, &bn, &cn, &blsize); if (error) break; error = bread(pmp->pm_devvp, bn, blsize, NOCRED, &bp); if (error) { brelse(bp); return (error); } n = min(n, blsize - bp->b_resid); /* * Convert from dos directory entries to fs-independent * directory entries. */ for (dentp = (struct direntry *)(bp->b_data + on); (char *)dentp < bp->b_data + on + n; dentp++, offset += sizeof(struct direntry)) { #if 0 printf("rd: dentp %08x prev %08x crnt %08x deName %02x attr %02x\n", dentp, prev, crnt, dentp->deName[0], dentp->deAttributes); #endif /* * If this is an unused entry, we can stop. */ if (dentp->deName[0] == SLOT_EMPTY) { brelse(bp); goto out; } /* * Skip deleted entries. */ if (dentp->deName[0] == SLOT_DELETED) { chksum = -1; continue; } /* * Handle Win95 long directory entries */ if (dentp->deAttributes == ATTR_WIN95) { if (pmp->pm_flags & MSDOSFSMNT_SHORTNAME) continue; chksum = win2unixfn((struct winentry *)dentp, &dirbuf, chksum, pmp->pm_flags & MSDOSFSMNT_U2WTABLE, pmp->pm_u2w); continue; } /* * Skip volume labels */ if (dentp->deAttributes & ATTR_VOLUME) { chksum = -1; continue; } /* * This computation of d_fileno must match * the computation of va_fileid in * msdosfs_getattr. */ if (dentp->deAttributes & ATTR_DIRECTORY) { fileno = getushort(dentp->deStartCluster); if (FAT32(pmp)) fileno |= getushort(dentp->deHighClust) << 16; /* if this is the root directory */ if (fileno == MSDOSFSROOT) if (FAT32(pmp)) fileno = cntobn(pmp, pmp->pm_rootdirblk) * dirsperblk; else fileno = 1; else fileno = cntobn(pmp, fileno) * dirsperblk; dirbuf.d_fileno = fileno; dirbuf.d_type = DT_DIR; } else { dirbuf.d_fileno = offset / sizeof(struct direntry); dirbuf.d_type = DT_REG; } if (chksum != winChksum(dentp->deName)) dirbuf.d_namlen = dos2unixfn(dentp->deName, (u_char *)dirbuf.d_name, pmp->pm_flags & MSDOSFSMNT_SHORTNAME, pmp->pm_flags & MSDOSFSMNT_U2WTABLE, pmp->pm_d2u, pmp->pm_flags & MSDOSFSMNT_ULTABLE, pmp->pm_ul); else dirbuf.d_name[dirbuf.d_namlen] = 0; chksum = -1; dirbuf.d_reclen = GENERIC_DIRSIZ(&dirbuf); if (uio->uio_resid < dirbuf.d_reclen) { brelse(bp); goto out; } error = uiomove((caddr_t) &dirbuf, dirbuf.d_reclen, uio); if (error) { brelse(bp); goto out; } if (cookies) { *cookies++ = offset + sizeof(struct direntry); if (--ncookies <= 0) { brelse(bp); goto out; } } off = offset + sizeof(struct direntry); } brelse(bp); } out: /* Subtract unused cookies */ if (ap->a_ncookies) *ap->a_ncookies -= ncookies; uio->uio_offset = off; /* * Set the eofflag (NFS uses it) */ if (ap->a_eofflag) if (dep->de_FileSize - (offset - bias) <= 0) *ap->a_eofflag = 1; else *ap->a_eofflag = 0; return (error); } static int msdosfs_abortop(ap) struct vop_abortop_args /* { struct vnode *a_dvp; struct componentname *a_cnp; } */ *ap; { if ((ap->a_cnp->cn_flags & (HASBUF | SAVESTART)) == HASBUF) zfree(namei_zone, ap->a_cnp->cn_pnbuf); return (0); } /* * vp - address of vnode file the file * bn - which cluster we are interested in mapping to a filesystem block number. * vpp - returns the vnode for the block special file holding the filesystem * containing the file of interest * bnp - address of where to return the filesystem relative block number */ static int msdosfs_bmap(ap) struct vop_bmap_args /* { struct vnode *a_vp; daddr_t a_bn; struct vnode **a_vpp; daddr_t *a_bnp; int *a_runp; int *a_runb; } */ *ap; { struct denode *dep = VTODE(ap->a_vp); struct msdosfsmount *pmp = dep->de_pmp; if (ap->a_vpp != NULL) *ap->a_vpp = dep->de_devvp; if (ap->a_bnp == NULL) return (0); if (ap->a_runp) { /* * Sequential clusters should be counted here. */ *ap->a_runp = 0; } if (ap->a_runb) { *ap->a_runb = 0; } return (pcbmap(dep, ap->a_bn, ap->a_bnp, 0, 0)); } static int msdosfs_strategy(ap) struct vop_strategy_args /* { struct buf *a_bp; } */ *ap; { struct buf *bp = ap->a_bp; struct denode *dep = VTODE(bp->b_vp); struct vnode *vp; int error = 0; if (bp->b_vp->v_type == VBLK || bp->b_vp->v_type == VCHR) panic("msdosfs_strategy: spec"); /* * If we don't already know the filesystem relative block number * then get it using pcbmap(). If pcbmap() returns the block * number as -1 then we've got a hole in the file. DOS filesystems * don't allow files with holes, so we shouldn't ever see this. */ if (bp->b_blkno == bp->b_lblkno) { error = pcbmap(dep, bp->b_lblkno, &bp->b_blkno, 0, 0); if (error) { bp->b_error = error; bp->b_flags |= B_ERROR; biodone(bp); return (error); } if ((long)bp->b_blkno == -1) vfs_bio_clrbuf(bp); } if (bp->b_blkno == -1) { biodone(bp); return (0); } /* * Read/write the block from/to the disk that contains the desired * file block. */ vp = dep->de_devvp; bp->b_dev = vp->v_rdev; VOCALL(vp->v_op, VOFFSET(vop_strategy), ap); return (0); } static int msdosfs_print(ap) struct vop_print_args /* { struct vnode *vp; } */ *ap; { struct denode *dep = VTODE(ap->a_vp); printf( "tag VT_MSDOSFS, startcluster %d, dircluster %ld, diroffset %ld ", dep->de_StartCluster, dep->de_dirclust, dep->de_diroffset); printf(" dev %d, %d", major(dep->de_dev), minor(dep->de_dev)); lockmgr_printinfo(&dep->de_lock); printf("\n"); return (0); } static int msdosfs_pathconf(ap) struct vop_pathconf_args /* { struct vnode *a_vp; int a_name; int *a_retval; } */ *ap; { struct msdosfsmount *pmp = VTODE(ap->a_vp)->de_pmp; switch (ap->a_name) { case _PC_LINK_MAX: *ap->a_retval = 1; return (0); case _PC_NAME_MAX: *ap->a_retval = pmp->pm_flags & MSDOSFSMNT_LONGNAME ? WIN_MAXLEN : 12; return (0); case _PC_PATH_MAX: *ap->a_retval = PATH_MAX; return (0); case _PC_CHOWN_RESTRICTED: *ap->a_retval = 1; return (0); case _PC_NO_TRUNC: *ap->a_retval = 0; return (0); default: return (EINVAL); } /* NOTREACHED */ } /* * get page routine * * XXX By default, wimp out... note that a_offset is ignored (and always * XXX has been). */ int msdosfs_getpages(ap) struct vop_getpages_args *ap; { return vnode_pager_generic_getpages(ap->a_vp, ap->a_m, ap->a_count, ap->a_reqpage); } /* * put page routine * * XXX By default, wimp out... note that a_offset is ignored (and always * XXX has been). */ int msdosfs_putpages(ap) struct vop_putpages_args *ap; { return vnode_pager_generic_putpages(ap->a_vp, ap->a_m, ap->a_count, ap->a_sync, ap->a_rtvals); } /* Global vfs data structures for msdosfs */ vop_t **msdosfs_vnodeop_p; static struct vnodeopv_entry_desc msdosfs_vnodeop_entries[] = { { &vop_default_desc, (vop_t *) vop_defaultop }, { &vop_abortop_desc, (vop_t *) msdosfs_abortop }, { &vop_access_desc, (vop_t *) msdosfs_access }, { &vop_bmap_desc, (vop_t *) msdosfs_bmap }, { &vop_cachedlookup_desc, (vop_t *) msdosfs_lookup }, { &vop_close_desc, (vop_t *) msdosfs_close }, { &vop_create_desc, (vop_t *) msdosfs_create }, { &vop_fsync_desc, (vop_t *) msdosfs_fsync }, { &vop_getattr_desc, (vop_t *) msdosfs_getattr }, { &vop_inactive_desc, (vop_t *) msdosfs_inactive }, { &vop_islocked_desc, (vop_t *) vop_stdislocked }, { &vop_link_desc, (vop_t *) msdosfs_link }, { &vop_lock_desc, (vop_t *) vop_stdlock }, { &vop_lookup_desc, (vop_t *) vfs_cache_lookup }, { &vop_mkdir_desc, (vop_t *) msdosfs_mkdir }, { &vop_mknod_desc, (vop_t *) msdosfs_mknod }, { &vop_pathconf_desc, (vop_t *) msdosfs_pathconf }, { &vop_print_desc, (vop_t *) msdosfs_print }, { &vop_read_desc, (vop_t *) msdosfs_read }, { &vop_readdir_desc, (vop_t *) msdosfs_readdir }, { &vop_reclaim_desc, (vop_t *) msdosfs_reclaim }, { &vop_remove_desc, (vop_t *) msdosfs_remove }, { &vop_rename_desc, (vop_t *) msdosfs_rename }, { &vop_rmdir_desc, (vop_t *) msdosfs_rmdir }, { &vop_setattr_desc, (vop_t *) msdosfs_setattr }, { &vop_strategy_desc, (vop_t *) msdosfs_strategy }, { &vop_symlink_desc, (vop_t *) msdosfs_symlink }, { &vop_unlock_desc, (vop_t *) vop_stdunlock }, { &vop_write_desc, (vop_t *) msdosfs_write }, { &vop_getpages_desc, (vop_t *) msdosfs_getpages }, { &vop_putpages_desc, (vop_t *) msdosfs_putpages }, { NULL, NULL } }; static struct vnodeopv_desc msdosfs_vnodeop_opv_desc = { &msdosfs_vnodeop_p, msdosfs_vnodeop_entries }; VNODEOP_SET(msdosfs_vnodeop_opv_desc); Index: head/sys/netinet/mlf_ipl.c =================================================================== --- head/sys/netinet/mlf_ipl.c (revision 35822) +++ head/sys/netinet/mlf_ipl.c (revision 35823) @@ -1,383 +1,390 @@ /* * Copyright (C) 1993-1997 by Darren Reed. * * Redistribution and use in source and binary forms are permitted * provided that this notice is preserved and due credit is given * to the original author and the contributors. */ /* * 29/12/94 Added code from Marc Huber to allow it to allocate * its own major char number! Way cool patch! */ #include #if defined(__FreeBSD__) && (__FreeBSD__ > 1) # ifdef IPFILTER_LKM # include # define ACTUALLY_LKM_NOT_KERNEL # else # define __FreeBSD_version 300000 /* this will do as a hack */ # endif #endif #include #if defined(__FreeBSD_version) && (__FreeBSD_version >= 220000) # include "opt_devfs.h" # include # include # ifdef DEVFS # include # endif /*DEVFS*/ #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #if BSD >= 199506 # include #endif #if (__FreeBSD_version >= 199511) #include #include #include #include #include #include #include #include #endif #if (__FreeBSD__ > 1) # include #endif #include #include "netinet/ipl.h" #include "netinet/ip_compat.h" #include "netinet/ip_fil.h" #include "netinet/ip_state.h" #include "netinet/ip_nat.h" #include "netinet/ip_auth.h" #include "netinet/ip_frag.h" #if !defined(VOP_LEASE) && defined(LEASE_CHECK) #define VOP_LEASE LEASE_CHECK #endif #ifndef MIN #define MIN(a,b) (((a)<(b))?(a):(b)) #endif extern int lkmenodev __P((void)); static char *ipf_devfiles[] = { IPL_NAME, IPL_NAT, IPL_STATE, IPL_AUTH, NULL }; static int if_ipl_unload __P((struct lkm_table *, int)); static int if_ipl_load __P((struct lkm_table *, int)); static int if_ipl_remove __P((void)); int xxxinit __P((struct lkm_table *, int, int)); struct cdevsw ipldevsw = { iplopen, /* open */ iplclose, /* close */ iplread, /* read */ (void *)nullop, /* write */ iplioctl, /* ioctl */ (void *)nullop, /* stop */ (void *)nullop, /* reset */ (void *)NULL, /* tty */ (void *)nullop, /* select */ (void *)nullop, /* mmap */ NULL /* strategy */ }; #ifdef SYSCTL_INT SYSCTL_NODE(_net_inet, OID_AUTO, ipf, CTLFLAG_RW, 0, "IPF"); SYSCTL_INT(_net_inet_ipf, OID_AUTO, fr_flags, CTLFLAG_RW, &fr_flags, 0, ""); SYSCTL_INT(_net_inet_ipf, OID_AUTO, fr_pass, CTLFLAG_RW, &fr_pass, 0, ""); SYSCTL_INT(_net_inet_ipf, OID_AUTO, fr_active, CTLFLAG_RD, &fr_active, 0, ""); SYSCTL_INT(_net_inet_ipf, OID_AUTO, fr_tcpidletimeout, CTLFLAG_RW, &fr_tcpidletimeout, 0, ""); SYSCTL_INT(_net_inet_ipf, OID_AUTO, fr_tcpclosewait, CTLFLAG_RW, &fr_tcpclosewait, 0, ""); SYSCTL_INT(_net_inet_ipf, OID_AUTO, fr_tcplastack, CTLFLAG_RW, &fr_tcplastack, 0, ""); SYSCTL_INT(_net_inet_ipf, OID_AUTO, fr_tcptimeout, CTLFLAG_RW, &fr_tcptimeout, 0, ""); SYSCTL_INT(_net_inet_ipf, OID_AUTO, fr_tcpclosed, CTLFLAG_RW, &fr_tcpclosed, 0, ""); SYSCTL_INT(_net_inet_ipf, OID_AUTO, fr_udptimeout, CTLFLAG_RW, &fr_udptimeout, 0, ""); SYSCTL_INT(_net_inet_ipf, OID_AUTO, fr_icmptimeout, CTLFLAG_RW, &fr_icmptimeout, 0, ""); SYSCTL_INT(_net_inet_ipf, OID_AUTO, fr_defnatage, CTLFLAG_RW, &fr_defnatage, 0, ""); SYSCTL_INT(_net_inet_ipf, OID_AUTO, fr_ipfrttl, CTLFLAG_RW, &fr_ipfrttl, 0, ""); SYSCTL_INT(_net_inet_ipf, OID_AUTO, ipl_unreach, CTLFLAG_RW, &ipl_unreach, 0, ""); SYSCTL_INT(_net_inet_ipf, OID_AUTO, ipl_inited, CTLFLAG_RD, &ipl_inited, 0, ""); SYSCTL_INT(_net_inet_ipf, OID_AUTO, fr_authsize, CTLFLAG_RD, &fr_authsize, 0, ""); SYSCTL_INT(_net_inet_ipf, OID_AUTO, fr_authused, CTLFLAG_RD, &fr_authused, 0, ""); SYSCTL_INT(_net_inet_ipf, OID_AUTO, fr_defaultauthage, CTLFLAG_RW, &fr_defaultauthage, 0, ""); #endif #ifdef DEVFS void *ipf_devfs[IPL_LOGMAX + 1]; #endif #if !defined(__FreeBSD_version) || (__FreeBSD_version < 220000) int ipl_major = 0; MOD_DEV(IPL_VERSION, LM_DT_CHAR, -1, &ipldevsw); extern struct cdevsw cdevsw[]; extern int vd_unuseddev __P((void)); extern int nchrdev; #else int ipl_major = CDEV_MAJOR; static struct cdevsw ipl_cdevsw = { iplopen, iplclose, iplread, nowrite, /* 79 */ iplioctl, nostop, noreset, nodevtotty, seltrue, nommap, nostrategy, "ipl", NULL, -1 }; #endif static void ipl_drvinit __P((void *)); #ifdef ACTUALLY_LKM_NOT_KERNEL static int iplaction __P((struct lkm_table *, int)); static int iplaction(lkmtp, cmd) struct lkm_table *lkmtp; int cmd; { #if !defined(__FreeBSD_version) || (__FreeBSD_version < 220000) int i = ipl_major; struct lkm_dev *args = lkmtp->private.lkm_dev; #endif int err = 0; switch (cmd) { case LKM_E_LOAD : if (lkmexists(lkmtp)) return EEXIST; #if !defined(__FreeBSD_version) || (__FreeBSD_version < 220000) for (i = 0; i < nchrdev; i++) if (cdevsw[i].d_open == lkmenodev || cdevsw[i].d_open == iplopen) break; if (i == nchrdev) { printf("IP Filter: No free cdevsw slots\n"); return ENODEV; } ipl_major = i; args->lkm_offset = i; /* slot in cdevsw[] */ #endif printf("IP Filter: loaded into slot %d\n", ipl_major); err = if_ipl_load(lkmtp, cmd); if (!err) ipl_drvinit((void *)NULL); return err; break; case LKM_E_UNLOAD : err = if_ipl_unload(lkmtp, cmd); if (!err) { printf("IP Filter: unloaded from slot %d\n", ipl_major); # ifdef DEVFS if (ipf_devfs[IPL_LOGIPF]) devfs_remove_dev(ipf_devfs[IPL_LOGIPF]); if (ipf_devfs[IPL_LOGNAT]) devfs_remove_dev(ipf_devfs[IPL_LOGNAT]); if (ipf_devfs[IPL_LOGSTATE]) devfs_remove_dev(ipf_devfs[IPL_LOGSTATE]); if (ipf_devfs[IPL_LOGAUTH]) devfs_remove_dev(ipf_devfs[IPL_LOGAUTH]); # endif } return err; case LKM_E_STAT : break; default: err = EIO; break; } return 0; } static int if_ipl_remove __P((void)) { char *name; struct nameidata nd; int error, i; for (i = 0; (name = ipf_devfiles[i]); i++) { NDINIT(&nd, DELETE, LOCKPARENT, UIO_SYSSPACE, name, curproc); if ((error = namei(&nd))) return (error); VOP_LEASE(nd.ni_vp, curproc, curproc->p_ucred, LEASE_WRITE); vn_lock(nd.ni_vp, LK_EXCLUSIVE | LK_RETRY, curproc); VOP_LEASE(nd.ni_dvp, curproc, curproc->p_ucred, LEASE_WRITE); (void) VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); + if (nd.ni_dvp == nd.ni_vp) + vrele(nd.ni_dvp); + else + vput(nd.ni_dvp); + if (nd.ni_vp != NULLVP) + vput(nd.ni_vp); } return 0; } static int if_ipl_unload(lkmtp, cmd) struct lkm_table *lkmtp; int cmd; { int error = 0; error = ipldetach(); if (!error) error = if_ipl_remove(); return error; } static int if_ipl_load(lkmtp, cmd) struct lkm_table *lkmtp; int cmd; { struct nameidata nd; struct vattr vattr; int error = 0, fmode = S_IFCHR|0600, i; char *name; error = iplattach(); if (error) return error; (void) if_ipl_remove(); for (i = 0; (name = ipf_devfiles[i]); i++) { NDINIT(&nd, CREATE, LOCKPARENT, UIO_SYSSPACE, name, curproc); if ((error = namei(&nd))) return error; if (nd.ni_vp != NULL) { VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); if (nd.ni_dvp == nd.ni_vp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); vrele(nd.ni_vp); return (EEXIST); } VATTR_NULL(&vattr); vattr.va_type = VCHR; vattr.va_mode = (fmode & 07777); vattr.va_rdev = (ipl_major << 8) | i; VOP_LEASE(nd.ni_dvp, curproc, curproc->p_ucred, LEASE_WRITE); error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); + vput(nd.ni_dvp); if (error) return error; } return 0; } #endif /* actually LKM */ #if defined(__FreeBSD_version) && (__FreeBSD_version < 220000) /* * strlen isn't present in 2.1.* kernels. */ size_t strlen(string) char *string; { register char *s; for (s = string; *s; s++) ; return (size_t)(s - string); } int xxxinit(lkmtp, cmd, ver) struct lkm_table *lkmtp; int cmd, ver; { DISPATCH(lkmtp, cmd, ver, iplaction, iplaction, iplaction); } #else # ifdef IPFILTER_LKM # include MOD_DECL(if_ipl); static struct lkm_dev _module = { LM_DEV, LKM_VERSION, IPL_VERSION, CDEV_MAJOR, LM_DT_CHAR, { (void *)&ipl_cdevsw } }; int if_ipl __P((struct lkm_table *, int, int)); int if_ipl(lkmtp, cmd, ver) struct lkm_table *lkmtp; int cmd, ver; { DISPATCH(lkmtp, cmd, ver, iplaction, iplaction, iplaction); } # endif static ipl_devsw_installed = 0; static void ipl_drvinit __P((void *unused)) { dev_t dev; # ifdef DEVFS void **tp = ipf_devfs; # endif if (!ipl_devsw_installed ) { dev = makedev(CDEV_MAJOR, 0); cdevsw_add(&dev, &ipl_cdevsw, NULL); ipl_devsw_installed = 1; # ifdef DEVFS tp[IPL_LOGIPF] = devfs_add_devswf(&ipl_cdevsw, IPL_LOGIPF, DV_CHR, 0, 0, 0600, "ipf"); tp[IPL_LOGNAT] = devfs_add_devswf(&ipl_cdevsw, IPL_LOGNAT, DV_CHR, 0, 0, 0600, "ipnat"); tp[IPL_LOGSTATE] = devfs_add_devswf(&ipl_cdevsw, IPL_LOGSTATE, DV_CHR, 0, 0, 0600, "ipstate"); tp[IPL_LOGAUTH] = devfs_add_devswf(&ipl_cdevsw, IPL_LOGAUTH, DV_CHR, 0, 0, 0600, "ipauth"); # endif } } SYSINIT(ipldev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR,ipl_drvinit,NULL); #endif /* _FreeBSD_version */ Index: head/sys/nfs/nfs_serv.c =================================================================== --- head/sys/nfs/nfs_serv.c (revision 35822) +++ head/sys/nfs/nfs_serv.c (revision 35823) @@ -1,3462 +1,3473 @@ /* * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)nfs_serv.c 8.3 (Berkeley) 1/12/94 - * $Id: nfs_serv.c,v 1.58 1998/02/09 06:10:35 eivind Exp $ + * $Id: nfs_serv.c,v 1.59 1998/03/30 09:53:56 phk Exp $ */ /* * nfs version 2 and 3 server calls to vnode ops * - these routines generally have 3 phases * 1 - break down and validate rpc request in mbuf list * 2 - do the vnode ops for the request * (surprisingly ?? many are very similar to syscalls in vfs_syscalls.c) * 3 - build the rpc reply in an mbuf list * nb: * - do not mix the phases, since the nfsm_?? macros can return failures * on a bad rpc or similar and do not do any vrele() or vput()'s * * - the nfsm_reply() macro generates an nfs rpc reply with the nfs * error number iff error != 0 whereas * returning an error from the server function implies a fatal error * such as a badly constructed rpc request that should be dropped without * a reply. * For Version 3, nfsm_reply() does not return for the error case, since * most version 3 rpcs return more than the status for error cases. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include nfstype nfsv3_type[9] = { NFNON, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, NFSOCK, NFFIFO, NFNON }; #ifndef NFS_NOSERVER nfstype nfsv2_type[9] = { NFNON, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, NFNON, NFCHR, NFNON }; /* Global vars */ extern u_long nfs_xdrneg1; extern u_long nfs_false, nfs_true; extern enum vtype nv3tov_type[8]; extern struct nfsstats nfsstats; int nfsrvw_procrastinate = NFS_GATHERDELAY * 1000; int nfsrvw_procrastinate_v3 = 0; static int nfs_async; SYSCTL_INT(_vfs_nfs, OID_AUTO, async, CTLFLAG_RW, &nfs_async, 0, ""); static int nfsrv_access __P((struct vnode *,int,struct ucred *,int, struct proc *)); static void nfsrvw_coalesce __P((struct nfsrv_descript *, struct nfsrv_descript *)); /* * nfs v3 access service */ int nfsrv3_access(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; struct sockaddr *nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; struct ucred *cred = &nfsd->nd_cr; struct vnode *vp; nfsfh_t nfh; fhandle_t *fhp; register u_long *tl; register long t1; caddr_t bpos; int error = 0, rdonly, cache, getret; char *cp2; struct mbuf *mb, *mreq, *mb2; struct vattr vattr, *vap = &vattr; u_long testmode, nfsmode; u_quad_t frev; #ifndef nolint cache = 0; #endif fhp = &nfh.fh_generic; nfsm_srvmtofh(fhp); nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); if (error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE)) { nfsm_reply(NFSX_UNSIGNED); nfsm_srvpostop_attr(1, (struct vattr *)0); return (0); } nfsmode = fxdr_unsigned(u_long, *tl); if ((nfsmode & NFSV3ACCESS_READ) && nfsrv_access(vp, VREAD, cred, rdonly, procp)) nfsmode &= ~NFSV3ACCESS_READ; if (vp->v_type == VDIR) testmode = (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND | NFSV3ACCESS_DELETE); else testmode = (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND); if ((nfsmode & testmode) && nfsrv_access(vp, VWRITE, cred, rdonly, procp)) nfsmode &= ~testmode; if (vp->v_type == VDIR) testmode = NFSV3ACCESS_LOOKUP; else testmode = NFSV3ACCESS_EXECUTE; if ((nfsmode & testmode) && nfsrv_access(vp, VEXEC, cred, rdonly, procp)) nfsmode &= ~testmode; getret = VOP_GETATTR(vp, vap, cred, procp); vput(vp); nfsm_reply(NFSX_POSTOPATTR(1) + NFSX_UNSIGNED); nfsm_srvpostop_attr(getret, vap); nfsm_build(tl, u_long *, NFSX_UNSIGNED); *tl = txdr_unsigned(nfsmode); nfsm_srvdone; } /* * nfs getattr service */ int nfsrv_getattr(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; struct sockaddr *nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; struct ucred *cred = &nfsd->nd_cr; register struct nfs_fattr *fp; struct vattr va; register struct vattr *vap = &va; struct vnode *vp; nfsfh_t nfh; fhandle_t *fhp; register u_long *tl; register long t1; caddr_t bpos; int error = 0, rdonly, cache; char *cp2; struct mbuf *mb, *mb2, *mreq; u_quad_t frev; fhp = &nfh.fh_generic; nfsm_srvmtofh(fhp); if (error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE)) { nfsm_reply(0); return (0); } nqsrv_getl(vp, ND_READ); error = VOP_GETATTR(vp, vap, cred, procp); vput(vp); nfsm_reply(NFSX_FATTR(nfsd->nd_flag & ND_NFSV3)); if (error) return (0); nfsm_build(fp, struct nfs_fattr *, NFSX_FATTR(nfsd->nd_flag & ND_NFSV3)); nfsm_srvfillattr(vap, fp); nfsm_srvdone; } /* * nfs setattr service */ int nfsrv_setattr(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; struct sockaddr *nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; struct ucred *cred = &nfsd->nd_cr; struct vattr va, preat; register struct vattr *vap = &va; register struct nfsv2_sattr *sp; register struct nfs_fattr *fp; struct vnode *vp; nfsfh_t nfh; fhandle_t *fhp; register u_long *tl; register long t1; caddr_t bpos; int error = 0, rdonly, cache, preat_ret = 1, postat_ret = 1; int v3 = (nfsd->nd_flag & ND_NFSV3), gcheck = 0; char *cp2; struct mbuf *mb, *mb2, *mreq; u_quad_t frev; struct timespec guard; fhp = &nfh.fh_generic; nfsm_srvmtofh(fhp); VATTR_NULL(vap); if (v3) { nfsm_srvsattr(vap); nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); gcheck = fxdr_unsigned(int, *tl); if (gcheck) { nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED); fxdr_nfsv3time(tl, &guard); } } else { nfsm_dissect(sp, struct nfsv2_sattr *, NFSX_V2SATTR); /* * Nah nah nah nah na nah * There is a bug in the Sun client that puts 0xffff in the mode * field of sattr when it should put in 0xffffffff. The u_short * doesn't sign extend. * --> check the low order 2 bytes for 0xffff */ if ((fxdr_unsigned(int, sp->sa_mode) & 0xffff) != 0xffff) vap->va_mode = nfstov_mode(sp->sa_mode); if (sp->sa_uid != nfs_xdrneg1) vap->va_uid = fxdr_unsigned(uid_t, sp->sa_uid); if (sp->sa_gid != nfs_xdrneg1) vap->va_gid = fxdr_unsigned(gid_t, sp->sa_gid); if (sp->sa_size != nfs_xdrneg1) vap->va_size = fxdr_unsigned(u_quad_t, sp->sa_size); if (sp->sa_atime.nfsv2_sec != nfs_xdrneg1) { #ifdef notyet fxdr_nfsv2time(&sp->sa_atime, &vap->va_atime); #else vap->va_atime.tv_sec = fxdr_unsigned(long, sp->sa_atime.nfsv2_sec); vap->va_atime.tv_nsec = 0; #endif } if (sp->sa_mtime.nfsv2_sec != nfs_xdrneg1) fxdr_nfsv2time(&sp->sa_mtime, &vap->va_mtime); } /* * Now that we have all the fields, lets do it. */ if (error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE)) { nfsm_reply(2 * NFSX_UNSIGNED); nfsm_srvwcc_data(preat_ret, &preat, postat_ret, vap); return (0); } nqsrv_getl(vp, ND_WRITE); if (v3) { error = preat_ret = VOP_GETATTR(vp, &preat, cred, procp); if (!error && gcheck && (preat.va_ctime.tv_sec != guard.tv_sec || preat.va_ctime.tv_nsec != guard.tv_nsec)) error = NFSERR_NOT_SYNC; if (error) { vput(vp); nfsm_reply(NFSX_WCCDATA(v3)); nfsm_srvwcc_data(preat_ret, &preat, postat_ret, vap); return (0); } } /* * If the size is being changed write acces is required, otherwise * just check for a read only file system. */ if (vap->va_size == ((u_quad_t)((quad_t) -1))) { if (rdonly || (vp->v_mount->mnt_flag & MNT_RDONLY)) { error = EROFS; goto out; } } else { if (vp->v_type == VDIR) { error = EISDIR; goto out; } else if (error = nfsrv_access(vp, VWRITE, cred, rdonly, procp)) goto out; } error = VOP_SETATTR(vp, vap, cred, procp); postat_ret = VOP_GETATTR(vp, vap, cred, procp); if (!error) error = postat_ret; out: vput(vp); nfsm_reply(NFSX_WCCORFATTR(v3)); if (v3) { nfsm_srvwcc_data(preat_ret, &preat, postat_ret, vap); return (0); } else { nfsm_build(fp, struct nfs_fattr *, NFSX_V2FATTR); nfsm_srvfillattr(vap, fp); } nfsm_srvdone; } /* * nfs lookup rpc */ int nfsrv_lookup(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; struct sockaddr *nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; struct ucred *cred = &nfsd->nd_cr; register struct nfs_fattr *fp; struct nameidata nd, ind, *ndp = &nd; struct vnode *vp, *dirp; nfsfh_t nfh; fhandle_t *fhp; register caddr_t cp; register u_long *tl; register long t1; caddr_t bpos; int error = 0, cache, len, dirattr_ret = 1; int v3 = (nfsd->nd_flag & ND_NFSV3), pubflag; char *cp2; struct mbuf *mb, *mb2, *mreq; struct vattr va, dirattr, *vap = &va; u_quad_t frev; fhp = &nfh.fh_generic; nfsm_srvmtofh(fhp); nfsm_srvnamesiz(len); pubflag = nfs_ispublicfh(fhp); nd.ni_cnd.cn_cred = cred; nd.ni_cnd.cn_nameiop = LOOKUP; nd.ni_cnd.cn_flags = LOCKLEAF | SAVESTART; error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos, &dirp, procp, (nfsd->nd_flag & ND_KERBAUTH), pubflag); if (!error && pubflag) { if (nd.ni_vp->v_type == VDIR && nfs_pub.np_index != NULL) { /* * Setup call to lookup() to see if we can find * the index file. Arguably, this doesn't belong * in a kernel.. Ugh. */ ind = nd; VOP_UNLOCK(nd.ni_vp, 0, procp); ind.ni_pathlen = strlen(nfs_pub.np_index); ind.ni_cnd.cn_nameptr = ind.ni_cnd.cn_pnbuf = nfs_pub.np_index; ind.ni_startdir = nd.ni_vp; VREF(ind.ni_startdir); error = lookup(&ind); if (!error) { /* * Found an index file. Get rid of * the old references. */ if (dirp) vrele(dirp); dirp = nd.ni_vp; vrele(nd.ni_startdir); ndp = &ind; } else error = 0; } /* * If the public filehandle was used, check that this lookup * didn't result in a filehandle outside the publicly exported * filesystem. */ if (!error && ndp->ni_vp->v_mount != nfs_pub.np_mount) { vput(nd.ni_vp); error = EPERM; } } if (dirp) { if (v3) dirattr_ret = VOP_GETATTR(dirp, &dirattr, cred, procp); vrele(dirp); } if (error) { nfsm_reply(NFSX_POSTOPATTR(v3)); nfsm_srvpostop_attr(dirattr_ret, &dirattr); return (0); } nqsrv_getl(ndp->ni_startdir, ND_READ); vrele(ndp->ni_startdir); zfree(namei_zone, nd.ni_cnd.cn_pnbuf); vp = ndp->ni_vp; bzero((caddr_t)fhp, sizeof(nfh)); fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid; error = VFS_VPTOFH(vp, &fhp->fh_fid); if (!error) error = VOP_GETATTR(vp, vap, cred, procp); vput(vp); nfsm_reply(NFSX_SRVFH(v3) + NFSX_POSTOPORFATTR(v3) + NFSX_POSTOPATTR(v3)); if (error) { nfsm_srvpostop_attr(dirattr_ret, &dirattr); return (0); } nfsm_srvfhtom(fhp, v3); if (v3) { nfsm_srvpostop_attr(0, vap); nfsm_srvpostop_attr(dirattr_ret, &dirattr); } else { nfsm_build(fp, struct nfs_fattr *, NFSX_V2FATTR); nfsm_srvfillattr(vap, fp); } nfsm_srvdone; } /* * nfs readlink service */ int nfsrv_readlink(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; struct sockaddr *nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; struct ucred *cred = &nfsd->nd_cr; struct iovec iv[(NFS_MAXPATHLEN+MLEN-1)/MLEN]; register struct iovec *ivp = iv; register struct mbuf *mp; register u_long *tl; register long t1; caddr_t bpos; int error = 0, rdonly, cache, i, tlen, len, getret; int v3 = (nfsd->nd_flag & ND_NFSV3); char *cp2; struct mbuf *mb, *mb2, *mp2, *mp3, *mreq; struct vnode *vp; struct vattr attr; nfsfh_t nfh; fhandle_t *fhp; struct uio io, *uiop = &io; u_quad_t frev; #ifndef nolint mp2 = mp3 = (struct mbuf *)0; #endif fhp = &nfh.fh_generic; nfsm_srvmtofh(fhp); len = 0; i = 0; while (len < NFS_MAXPATHLEN) { MGET(mp, M_WAIT, MT_DATA); MCLGET(mp, M_WAIT); mp->m_len = NFSMSIZ(mp); if (len == 0) mp3 = mp2 = mp; else { mp2->m_next = mp; mp2 = mp; } if ((len+mp->m_len) > NFS_MAXPATHLEN) { mp->m_len = NFS_MAXPATHLEN-len; len = NFS_MAXPATHLEN; } else len += mp->m_len; ivp->iov_base = mtod(mp, caddr_t); ivp->iov_len = mp->m_len; i++; ivp++; } uiop->uio_iov = iv; uiop->uio_iovcnt = i; uiop->uio_offset = 0; uiop->uio_resid = len; uiop->uio_rw = UIO_READ; uiop->uio_segflg = UIO_SYSSPACE; uiop->uio_procp = (struct proc *)0; if (error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE)) { m_freem(mp3); nfsm_reply(2 * NFSX_UNSIGNED); nfsm_srvpostop_attr(1, (struct vattr *)0); return (0); } if (vp->v_type != VLNK) { if (v3) error = EINVAL; else error = ENXIO; goto out; } nqsrv_getl(vp, ND_READ); error = VOP_READLINK(vp, uiop, cred); out: getret = VOP_GETATTR(vp, &attr, cred, procp); vput(vp); if (error) m_freem(mp3); nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_UNSIGNED); if (v3) { nfsm_srvpostop_attr(getret, &attr); if (error) return (0); } if (uiop->uio_resid > 0) { len -= uiop->uio_resid; tlen = nfsm_rndup(len); nfsm_adj(mp3, NFS_MAXPATHLEN-tlen, tlen-len); } nfsm_build(tl, u_long *, NFSX_UNSIGNED); *tl = txdr_unsigned(len); mb->m_next = mp3; nfsm_srvdone; } /* * nfs read service */ int nfsrv_read(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; struct sockaddr *nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; struct ucred *cred = &nfsd->nd_cr; register struct iovec *iv; struct iovec *iv2; register struct mbuf *m; register struct nfs_fattr *fp; register u_long *tl; register long t1; register int i; caddr_t bpos; int error = 0, rdonly, cache, cnt, len, left, siz, tlen, getret; int v3 = (nfsd->nd_flag & ND_NFSV3), reqlen; char *cp2; struct mbuf *mb, *mb2, *mreq; struct mbuf *m2; struct vnode *vp; nfsfh_t nfh; fhandle_t *fhp; struct uio io, *uiop = &io; struct vattr va, *vap = &va; off_t off; u_quad_t frev; fhp = &nfh.fh_generic; nfsm_srvmtofh(fhp); if (v3) { nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED); fxdr_hyper(tl, &off); } else { nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); off = (off_t)fxdr_unsigned(u_long, *tl); } nfsm_srvstrsiz(reqlen, NFS_SRVMAXDATA(nfsd)); if (error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE)) { nfsm_reply(2 * NFSX_UNSIGNED); nfsm_srvpostop_attr(1, (struct vattr *)0); return (0); } if (vp->v_type != VREG) { if (v3) error = EINVAL; else error = (vp->v_type == VDIR) ? EISDIR : EACCES; } if (!error) { nqsrv_getl(vp, ND_READ); if (error = nfsrv_access(vp, VREAD, cred, rdonly, procp)) error = nfsrv_access(vp, VEXEC, cred, rdonly, procp); } getret = VOP_GETATTR(vp, vap, cred, procp); if (!error) error = getret; if (error) { vput(vp); nfsm_reply(NFSX_POSTOPATTR(v3)); nfsm_srvpostop_attr(getret, vap); return (0); } if (off >= vap->va_size) cnt = 0; else if ((off + reqlen) > vap->va_size) cnt = nfsm_rndup(vap->va_size - off); else cnt = reqlen; nfsm_reply(NFSX_POSTOPORFATTR(v3) + 3 * NFSX_UNSIGNED+nfsm_rndup(cnt)); if (v3) { nfsm_build(tl, u_long *, NFSX_V3FATTR + 4 * NFSX_UNSIGNED); *tl++ = nfs_true; fp = (struct nfs_fattr *)tl; tl += (NFSX_V3FATTR / sizeof (u_long)); } else { nfsm_build(tl, u_long *, NFSX_V2FATTR + NFSX_UNSIGNED); fp = (struct nfs_fattr *)tl; tl += (NFSX_V2FATTR / sizeof (u_long)); } len = left = cnt; if (cnt > 0) { /* * Generate the mbuf list with the uio_iov ref. to it. */ i = 0; m = m2 = mb; while (left > 0) { siz = min(M_TRAILINGSPACE(m), left); if (siz > 0) { left -= siz; i++; } if (left > 0) { MGET(m, M_WAIT, MT_DATA); MCLGET(m, M_WAIT); m->m_len = 0; m2->m_next = m; m2 = m; } } MALLOC(iv, struct iovec *, i * sizeof (struct iovec), M_TEMP, M_WAITOK); uiop->uio_iov = iv2 = iv; m = mb; left = cnt; i = 0; while (left > 0) { if (m == NULL) panic("nfsrv_read iov"); siz = min(M_TRAILINGSPACE(m), left); if (siz > 0) { iv->iov_base = mtod(m, caddr_t) + m->m_len; iv->iov_len = siz; m->m_len += siz; left -= siz; iv++; i++; } m = m->m_next; } uiop->uio_iovcnt = i; uiop->uio_offset = off; uiop->uio_resid = cnt; uiop->uio_rw = UIO_READ; uiop->uio_segflg = UIO_SYSSPACE; error = VOP_READ(vp, uiop, IO_NODELOCKED, cred); off = uiop->uio_offset; FREE((caddr_t)iv2, M_TEMP); if (error || (getret = VOP_GETATTR(vp, vap, cred, procp))) { if (!error) error = getret; m_freem(mreq); vput(vp); nfsm_reply(NFSX_POSTOPATTR(v3)); nfsm_srvpostop_attr(getret, vap); return (0); } } else uiop->uio_resid = 0; vput(vp); nfsm_srvfillattr(vap, fp); len -= uiop->uio_resid; tlen = nfsm_rndup(len); if (cnt != tlen || tlen != len) nfsm_adj(mb, cnt - tlen, tlen - len); if (v3) { *tl++ = txdr_unsigned(len); if (len < reqlen) *tl++ = nfs_true; else *tl++ = nfs_false; } *tl = txdr_unsigned(len); nfsm_srvdone; } /* * nfs write service */ int nfsrv_write(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; struct sockaddr *nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; struct ucred *cred = &nfsd->nd_cr; register struct iovec *ivp; register int i, cnt; register struct mbuf *mp; register struct nfs_fattr *fp; struct iovec *iv; struct vattr va, forat; register struct vattr *vap = &va; register u_long *tl; register long t1; caddr_t bpos; int error = 0, rdonly, cache, len, forat_ret = 1; int ioflags, aftat_ret = 1, retlen, zeroing, adjust; int stable = NFSV3WRITE_FILESYNC; int v3 = (nfsd->nd_flag & ND_NFSV3); char *cp2; struct mbuf *mb, *mb2, *mreq; struct vnode *vp; nfsfh_t nfh; fhandle_t *fhp; struct uio io, *uiop = &io; off_t off; u_quad_t frev; if (mrep == NULL) { *mrq = NULL; return (0); } fhp = &nfh.fh_generic; nfsm_srvmtofh(fhp); if (v3) { nfsm_dissect(tl, u_long *, 5 * NFSX_UNSIGNED); fxdr_hyper(tl, &off); tl += 3; stable = fxdr_unsigned(int, *tl++); } else { nfsm_dissect(tl, u_long *, 4 * NFSX_UNSIGNED); off = (off_t)fxdr_unsigned(u_long, *++tl); tl += 2; if (nfs_async) stable = NFSV3WRITE_UNSTABLE; } retlen = len = fxdr_unsigned(long, *tl); cnt = i = 0; /* * For NFS Version 2, it is not obvious what a write of zero length * should do, but I might as well be consistent with Version 3, * which is to return ok so long as there are no permission problems. */ if (len > 0) { zeroing = 1; mp = mrep; while (mp) { if (mp == md) { zeroing = 0; adjust = dpos - mtod(mp, caddr_t); mp->m_len -= adjust; if (mp->m_len > 0 && adjust > 0) NFSMADV(mp, adjust); } if (zeroing) mp->m_len = 0; else if (mp->m_len > 0) { i += mp->m_len; if (i > len) { mp->m_len -= (i - len); zeroing = 1; } if (mp->m_len > 0) cnt++; } mp = mp->m_next; } } if (len > NFS_MAXDATA || len < 0 || i < len) { error = EIO; nfsm_reply(2 * NFSX_UNSIGNED); nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, vap); return (0); } if (error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE)) { nfsm_reply(2 * NFSX_UNSIGNED); nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, vap); return (0); } if (v3) forat_ret = VOP_GETATTR(vp, &forat, cred, procp); if (vp->v_type != VREG) { if (v3) error = EINVAL; else error = (vp->v_type == VDIR) ? EISDIR : EACCES; } if (!error) { nqsrv_getl(vp, ND_WRITE); error = nfsrv_access(vp, VWRITE, cred, rdonly, procp); } if (error) { vput(vp); nfsm_reply(NFSX_WCCDATA(v3)); nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, vap); return (0); } if (len > 0) { MALLOC(ivp, struct iovec *, cnt * sizeof (struct iovec), M_TEMP, M_WAITOK); uiop->uio_iov = iv = ivp; uiop->uio_iovcnt = cnt; mp = mrep; while (mp) { if (mp->m_len > 0) { ivp->iov_base = mtod(mp, caddr_t); ivp->iov_len = mp->m_len; ivp++; } mp = mp->m_next; } /* * XXX * The IO_METASYNC flag indicates that all metadata (and not just * enough to ensure data integrity) mus be written to stable storage * synchronously. * (IO_METASYNC is not yet implemented in 4.4BSD-Lite.) */ if (stable == NFSV3WRITE_UNSTABLE) ioflags = IO_NODELOCKED; else if (stable == NFSV3WRITE_DATASYNC) ioflags = (IO_SYNC | IO_NODELOCKED); else ioflags = (IO_METASYNC | IO_SYNC | IO_NODELOCKED); uiop->uio_resid = len; uiop->uio_rw = UIO_WRITE; uiop->uio_segflg = UIO_SYSSPACE; uiop->uio_procp = (struct proc *)0; uiop->uio_offset = off; error = VOP_WRITE(vp, uiop, ioflags, cred); nfsstats.srvvop_writes++; FREE((caddr_t)iv, M_TEMP); } aftat_ret = VOP_GETATTR(vp, vap, cred, procp); vput(vp); if (!error) error = aftat_ret; nfsm_reply(NFSX_PREOPATTR(v3) + NFSX_POSTOPORFATTR(v3) + 2 * NFSX_UNSIGNED + NFSX_WRITEVERF(v3)); if (v3) { nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, vap); if (error) return (0); nfsm_build(tl, u_long *, 4 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(retlen); /* * If nfs_async is set, then pretend the write was FILESYNC. */ if (stable == NFSV3WRITE_UNSTABLE && !nfs_async) *tl++ = txdr_unsigned(stable); else *tl++ = txdr_unsigned(NFSV3WRITE_FILESYNC); /* * Actually, there is no need to txdr these fields, * but it may make the values more human readable, * for debugging purposes. */ *tl++ = txdr_unsigned(boottime.tv_sec); *tl = txdr_unsigned(boottime.tv_usec); } else { nfsm_build(fp, struct nfs_fattr *, NFSX_V2FATTR); nfsm_srvfillattr(vap, fp); } nfsm_srvdone; } /* * NFS write service with write gathering support. Called when * nfsrvw_procrastinate > 0. * See: Chet Juszczak, "Improving the Write Performance of an NFS Server", * in Proc. of the Winter 1994 Usenix Conference, pg. 247-259, San Franscisco, * Jan. 1994. */ int nfsrv_writegather(ndp, slp, procp, mrq) struct nfsrv_descript **ndp; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { register struct iovec *ivp; register struct mbuf *mp; register struct nfsrv_descript *wp, *nfsd, *owp, *swp; register struct nfs_fattr *fp; register int i; struct iovec *iov; struct nfsrvw_delayhash *wpp; struct ucred *cred; struct vattr va, forat; register u_long *tl; register long t1; caddr_t bpos, dpos; int error = 0, rdonly, cache, len, forat_ret = 1; int ioflags, aftat_ret = 1, s, adjust, v3, zeroing; char *cp2; struct mbuf *mb, *mb2, *mreq, *mrep, *md; struct vnode *vp; struct uio io, *uiop = &io; u_quad_t frev, cur_usec; #ifndef nolint i = 0; len = 0; #endif *mrq = NULL; if (*ndp) { nfsd = *ndp; *ndp = NULL; mrep = nfsd->nd_mrep; md = nfsd->nd_md; dpos = nfsd->nd_dpos; cred = &nfsd->nd_cr; v3 = (nfsd->nd_flag & ND_NFSV3); LIST_INIT(&nfsd->nd_coalesce); nfsd->nd_mreq = NULL; nfsd->nd_stable = NFSV3WRITE_FILESYNC; cur_usec = nfs_curusec(); nfsd->nd_time = cur_usec + (v3 ? nfsrvw_procrastinate_v3 : nfsrvw_procrastinate); /* * Now, get the write header.. */ nfsm_srvmtofh(&nfsd->nd_fh); if (v3) { nfsm_dissect(tl, u_long *, 5 * NFSX_UNSIGNED); fxdr_hyper(tl, &nfsd->nd_off); tl += 3; nfsd->nd_stable = fxdr_unsigned(int, *tl++); } else { nfsm_dissect(tl, u_long *, 4 * NFSX_UNSIGNED); nfsd->nd_off = (off_t)fxdr_unsigned(u_long, *++tl); tl += 2; if (nfs_async) nfsd->nd_stable = NFSV3WRITE_UNSTABLE; } len = fxdr_unsigned(long, *tl); nfsd->nd_len = len; nfsd->nd_eoff = nfsd->nd_off + len; /* * Trim the header out of the mbuf list and trim off any trailing * junk so that the mbuf list has only the write data. */ zeroing = 1; i = 0; mp = mrep; while (mp) { if (mp == md) { zeroing = 0; adjust = dpos - mtod(mp, caddr_t); mp->m_len -= adjust; if (mp->m_len > 0 && adjust > 0) NFSMADV(mp, adjust); } if (zeroing) mp->m_len = 0; else { i += mp->m_len; if (i > len) { mp->m_len -= (i - len); zeroing = 1; } } mp = mp->m_next; } if (len > NFS_MAXDATA || len < 0 || i < len) { nfsmout: m_freem(mrep); error = EIO; nfsm_writereply(2 * NFSX_UNSIGNED, v3); if (v3) nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, &va); nfsd->nd_mreq = mreq; nfsd->nd_mrep = NULL; nfsd->nd_time = 0; } /* * Add this entry to the hash and time queues. */ s = splsoftclock(); owp = NULL; wp = slp->ns_tq.lh_first; while (wp && wp->nd_time < nfsd->nd_time) { owp = wp; wp = wp->nd_tq.le_next; } NFS_DPF(WG, ("Q%03x", nfsd->nd_retxid & 0xfff)); if (owp) { LIST_INSERT_AFTER(owp, nfsd, nd_tq); } else { LIST_INSERT_HEAD(&slp->ns_tq, nfsd, nd_tq); } if (nfsd->nd_mrep) { wpp = NWDELAYHASH(slp, nfsd->nd_fh.fh_fid.fid_data); owp = NULL; wp = wpp->lh_first; while (wp && bcmp((caddr_t)&nfsd->nd_fh,(caddr_t)&wp->nd_fh,NFSX_V3FH)) { owp = wp; wp = wp->nd_hash.le_next; } while (wp && wp->nd_off < nfsd->nd_off && !bcmp((caddr_t)&nfsd->nd_fh,(caddr_t)&wp->nd_fh,NFSX_V3FH)) { owp = wp; wp = wp->nd_hash.le_next; } if (owp) { LIST_INSERT_AFTER(owp, nfsd, nd_hash); /* * Search the hash list for overlapping entries and * coalesce. */ for(; nfsd && NFSW_CONTIG(owp, nfsd); nfsd = wp) { wp = nfsd->nd_hash.le_next; if (NFSW_SAMECRED(owp, nfsd)) nfsrvw_coalesce(owp, nfsd); } } else { LIST_INSERT_HEAD(wpp, nfsd, nd_hash); } } splx(s); } /* * Now, do VOP_WRITE()s for any one(s) that need to be done now * and generate the associated reply mbuf list(s). */ loop1: cur_usec = nfs_curusec(); s = splsoftclock(); for (nfsd = slp->ns_tq.lh_first; nfsd; nfsd = owp) { owp = nfsd->nd_tq.le_next; if (nfsd->nd_time > cur_usec) break; if (nfsd->nd_mreq) continue; NFS_DPF(WG, ("P%03x", nfsd->nd_retxid & 0xfff)); LIST_REMOVE(nfsd, nd_tq); LIST_REMOVE(nfsd, nd_hash); splx(s); mrep = nfsd->nd_mrep; nfsd->nd_mrep = NULL; cred = &nfsd->nd_cr; v3 = (nfsd->nd_flag & ND_NFSV3); forat_ret = aftat_ret = 1; error = nfsrv_fhtovp(&nfsd->nd_fh, 1, &vp, cred, slp, nfsd->nd_nam, &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE); if (!error) { if (v3) forat_ret = VOP_GETATTR(vp, &forat, cred, procp); if (vp->v_type != VREG) { if (v3) error = EINVAL; else error = (vp->v_type == VDIR) ? EISDIR : EACCES; } } else vp = NULL; if (!error) { nqsrv_getl(vp, ND_WRITE); error = nfsrv_access(vp, VWRITE, cred, rdonly, procp); } if (nfsd->nd_stable == NFSV3WRITE_UNSTABLE) ioflags = IO_NODELOCKED; else if (nfsd->nd_stable == NFSV3WRITE_DATASYNC) ioflags = (IO_SYNC | IO_NODELOCKED); else ioflags = (IO_METASYNC | IO_SYNC | IO_NODELOCKED); uiop->uio_rw = UIO_WRITE; uiop->uio_segflg = UIO_SYSSPACE; uiop->uio_procp = (struct proc *)0; uiop->uio_offset = nfsd->nd_off; uiop->uio_resid = nfsd->nd_eoff - nfsd->nd_off; if (uiop->uio_resid > 0) { mp = mrep; i = 0; while (mp) { if (mp->m_len > 0) i++; mp = mp->m_next; } uiop->uio_iovcnt = i; MALLOC(iov, struct iovec *, i * sizeof (struct iovec), M_TEMP, M_WAITOK); uiop->uio_iov = ivp = iov; mp = mrep; while (mp) { if (mp->m_len > 0) { ivp->iov_base = mtod(mp, caddr_t); ivp->iov_len = mp->m_len; ivp++; } mp = mp->m_next; } if (!error) { error = VOP_WRITE(vp, uiop, ioflags, cred); nfsstats.srvvop_writes++; } FREE((caddr_t)iov, M_TEMP); } m_freem(mrep); if (vp) { aftat_ret = VOP_GETATTR(vp, &va, cred, procp); vput(vp); } /* * Loop around generating replies for all write rpcs that have * now been completed. */ swp = nfsd; do { NFS_DPF(WG, ("R%03x", nfsd->nd_retxid & 0xfff)); if (error) { nfsm_writereply(NFSX_WCCDATA(v3), v3); if (v3) { nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, &va); } } else { nfsm_writereply(NFSX_PREOPATTR(v3) + NFSX_POSTOPORFATTR(v3) + 2 * NFSX_UNSIGNED + NFSX_WRITEVERF(v3), v3); if (v3) { nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, &va); nfsm_build(tl, u_long *, 4 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(nfsd->nd_len); *tl++ = txdr_unsigned(swp->nd_stable); /* * Actually, there is no need to txdr these fields, * but it may make the values more human readable, * for debugging purposes. */ *tl++ = txdr_unsigned(boottime.tv_sec); *tl = txdr_unsigned(boottime.tv_usec); } else { nfsm_build(fp, struct nfs_fattr *, NFSX_V2FATTR); nfsm_srvfillattr(&va, fp); } } nfsd->nd_mreq = mreq; if (nfsd->nd_mrep) panic("nfsrv_write: nd_mrep not free"); /* * Done. Put it at the head of the timer queue so that * the final phase can return the reply. */ s = splsoftclock(); if (nfsd != swp) { nfsd->nd_time = 0; LIST_INSERT_HEAD(&slp->ns_tq, nfsd, nd_tq); } nfsd = swp->nd_coalesce.lh_first; if (nfsd) { LIST_REMOVE(nfsd, nd_tq); } splx(s); } while (nfsd); s = splsoftclock(); swp->nd_time = 0; LIST_INSERT_HEAD(&slp->ns_tq, swp, nd_tq); splx(s); goto loop1; } splx(s); /* * Search for a reply to return. */ s = splsoftclock(); for (nfsd = slp->ns_tq.lh_first; nfsd; nfsd = nfsd->nd_tq.le_next) if (nfsd->nd_mreq) { NFS_DPF(WG, ("X%03x", nfsd->nd_retxid & 0xfff)); LIST_REMOVE(nfsd, nd_tq); *mrq = nfsd->nd_mreq; *ndp = nfsd; break; } splx(s); return (0); } /* * Coalesce the write request nfsd into owp. To do this we must: * - remove nfsd from the queues * - merge nfsd->nd_mrep into owp->nd_mrep * - update the nd_eoff and nd_stable for owp * - put nfsd on owp's nd_coalesce list * NB: Must be called at splsoftclock(). */ static void nfsrvw_coalesce(owp, nfsd) register struct nfsrv_descript *owp; register struct nfsrv_descript *nfsd; { register int overlap; register struct mbuf *mp; struct nfsrv_descript *p; NFS_DPF(WG, ("C%03x-%03x", nfsd->nd_retxid & 0xfff, owp->nd_retxid & 0xfff)); LIST_REMOVE(nfsd, nd_hash); LIST_REMOVE(nfsd, nd_tq); if (owp->nd_eoff < nfsd->nd_eoff) { overlap = owp->nd_eoff - nfsd->nd_off; if (overlap < 0) panic("nfsrv_coalesce: bad off"); if (overlap > 0) m_adj(nfsd->nd_mrep, overlap); mp = owp->nd_mrep; while (mp->m_next) mp = mp->m_next; mp->m_next = nfsd->nd_mrep; owp->nd_eoff = nfsd->nd_eoff; } else m_freem(nfsd->nd_mrep); nfsd->nd_mrep = NULL; if (nfsd->nd_stable == NFSV3WRITE_FILESYNC) owp->nd_stable = NFSV3WRITE_FILESYNC; else if (nfsd->nd_stable == NFSV3WRITE_DATASYNC && owp->nd_stable == NFSV3WRITE_UNSTABLE) owp->nd_stable = NFSV3WRITE_DATASYNC; LIST_INSERT_HEAD(&owp->nd_coalesce, nfsd, nd_tq); /* * If nfsd had anything else coalesced into it, transfer them * to owp, otherwise their replies will never get sent. */ for (p = nfsd->nd_coalesce.lh_first; p; p = nfsd->nd_coalesce.lh_first) { LIST_REMOVE(p, nd_tq); LIST_INSERT_HEAD(&owp->nd_coalesce, p, nd_tq); } } /* * Sort the group list in increasing numerical order. * (Insertion sort by Chris Torek, who was grossed out by the bubble sort * that used to be here.) */ void nfsrvw_sort(list, num) register gid_t *list; register int num; { register int i, j; gid_t v; /* Insertion sort. */ for (i = 1; i < num; i++) { v = list[i]; /* find correct slot for value v, moving others up */ for (j = i; --j >= 0 && v < list[j];) list[j + 1] = list[j]; list[j + 1] = v; } } /* * copy credentials making sure that the result can be compared with bcmp(). */ void nfsrv_setcred(incred, outcred) register struct ucred *incred, *outcred; { register int i; bzero((caddr_t)outcred, sizeof (struct ucred)); outcred->cr_ref = 1; outcred->cr_uid = incred->cr_uid; outcred->cr_ngroups = incred->cr_ngroups; for (i = 0; i < incred->cr_ngroups; i++) outcred->cr_groups[i] = incred->cr_groups[i]; nfsrvw_sort(outcred->cr_groups, outcred->cr_ngroups); } /* * nfs create service * now does a truncate to 0 length via. setattr if it already exists */ int nfsrv_create(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; struct sockaddr *nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; struct ucred *cred = &nfsd->nd_cr; register struct nfs_fattr *fp; struct vattr va, dirfor, diraft; register struct vattr *vap = &va; register struct nfsv2_sattr *sp; register u_long *tl; struct nameidata nd; register caddr_t cp; register long t1; caddr_t bpos; int error = 0, rdev, cache, len, tsize, dirfor_ret = 1, diraft_ret = 1; int v3 = (nfsd->nd_flag & ND_NFSV3), how, exclusive_flag = 0; char *cp2; struct mbuf *mb, *mb2, *mreq; struct vnode *vp, *dirp = (struct vnode *)0; nfsfh_t nfh; fhandle_t *fhp; u_quad_t frev, tempsize; u_char cverf[NFSX_V3CREATEVERF]; #ifndef nolint rdev = 0; #endif nd.ni_cnd.cn_nameiop = 0; fhp = &nfh.fh_generic; nfsm_srvmtofh(fhp); nfsm_srvnamesiz(len); nd.ni_cnd.cn_cred = cred; nd.ni_cnd.cn_nameiop = CREATE; nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF | SAVESTART; error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos, &dirp, procp, (nfsd->nd_flag & ND_KERBAUTH), FALSE); if (dirp) { if (v3) dirfor_ret = VOP_GETATTR(dirp, &dirfor, cred, procp); else { vrele(dirp); dirp = (struct vnode *)0; } } if (error) { nfsm_reply(NFSX_WCCDATA(v3)); nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft); if (dirp) vrele(dirp); return (0); } VATTR_NULL(vap); if (v3) { nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); how = fxdr_unsigned(int, *tl); switch (how) { case NFSV3CREATE_GUARDED: if (nd.ni_vp) { error = EEXIST; break; } case NFSV3CREATE_UNCHECKED: nfsm_srvsattr(vap); break; case NFSV3CREATE_EXCLUSIVE: nfsm_dissect(cp, caddr_t, NFSX_V3CREATEVERF); bcopy(cp, cverf, NFSX_V3CREATEVERF); exclusive_flag = 1; if (nd.ni_vp == NULL) vap->va_mode = 0; break; }; vap->va_type = VREG; } else { nfsm_dissect(sp, struct nfsv2_sattr *, NFSX_V2SATTR); vap->va_type = IFTOVT(fxdr_unsigned(u_long, sp->sa_mode)); if (vap->va_type == VNON) vap->va_type = VREG; vap->va_mode = nfstov_mode(sp->sa_mode); switch (vap->va_type) { case VREG: tsize = fxdr_unsigned(long, sp->sa_size); if (tsize != -1) vap->va_size = (u_quad_t)tsize; break; case VCHR: case VBLK: case VFIFO: rdev = fxdr_unsigned(long, sp->sa_size); break; }; } /* * Iff doesn't exist, create it * otherwise just truncate to 0 length * should I set the mode too ?? */ if (nd.ni_vp == NULL) { if (vap->va_type == VREG || vap->va_type == VSOCK) { vrele(nd.ni_startdir); nqsrv_getl(nd.ni_dvp, ND_WRITE); error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap); + vput(nd.ni_dvp); if (!error) { nfsrv_object_create(nd.ni_vp); zfree(namei_zone, nd.ni_cnd.cn_pnbuf); if (exclusive_flag) { exclusive_flag = 0; VATTR_NULL(vap); bcopy(cverf, (caddr_t)&vap->va_atime, NFSX_V3CREATEVERF); error = VOP_SETATTR(nd.ni_vp, vap, cred, procp); } } } else if (vap->va_type == VCHR || vap->va_type == VBLK || vap->va_type == VFIFO) { if (vap->va_type == VCHR && rdev == 0xffffffff) vap->va_type = VFIFO; if (vap->va_type != VFIFO && (error = suser(cred, (u_short *)0))) { vrele(nd.ni_startdir); zfree(namei_zone, nd.ni_cnd.cn_pnbuf); VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); vput(nd.ni_dvp); nfsm_reply(0); return (error); } else vap->va_rdev = (dev_t)rdev; nqsrv_getl(nd.ni_dvp, ND_WRITE); - if (error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap)) { + error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap); + vput(nd.ni_dvp); + if (error) { vrele(nd.ni_startdir); nfsm_reply(0); } nd.ni_cnd.cn_nameiop = LOOKUP; nd.ni_cnd.cn_flags &= ~(LOCKPARENT | SAVESTART); nd.ni_cnd.cn_proc = procp; nd.ni_cnd.cn_cred = cred; if (error = lookup(&nd)) { zfree(namei_zone, nd.ni_cnd.cn_pnbuf); nfsm_reply(0); } nfsrv_object_create(nd.ni_vp); zfree(namei_zone, nd.ni_cnd.cn_pnbuf); if (nd.ni_cnd.cn_flags & ISSYMLINK) { vrele(nd.ni_dvp); vput(nd.ni_vp); VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); error = EINVAL; nfsm_reply(0); } } else { vrele(nd.ni_startdir); zfree(namei_zone, nd.ni_cnd.cn_pnbuf); VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); vput(nd.ni_dvp); error = ENXIO; } vp = nd.ni_vp; } else { vrele(nd.ni_startdir); zfree(namei_zone, nd.ni_cnd.cn_pnbuf); vp = nd.ni_vp; if (nd.ni_dvp == vp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); if (vap->va_size != -1) { error = nfsrv_access(vp, VWRITE, cred, (nd.ni_cnd.cn_flags & RDONLY), procp); if (!error) { nqsrv_getl(vp, ND_WRITE); tempsize = vap->va_size; VATTR_NULL(vap); vap->va_size = tempsize; error = VOP_SETATTR(vp, vap, cred, procp); } if (error) vput(vp); } } if (!error) { bzero((caddr_t)fhp, sizeof(nfh)); fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid; error = VFS_VPTOFH(vp, &fhp->fh_fid); if (!error) error = VOP_GETATTR(vp, vap, cred, procp); vput(vp); } if (v3) { if (exclusive_flag && !error && bcmp(cverf, (caddr_t)&vap->va_atime, NFSX_V3CREATEVERF)) error = EEXIST; diraft_ret = VOP_GETATTR(dirp, &diraft, cred, procp); vrele(dirp); } nfsm_reply(NFSX_SRVFH(v3) + NFSX_FATTR(v3) + NFSX_WCCDATA(v3)); if (v3) { if (!error) { nfsm_srvpostop_fh(fhp); nfsm_srvpostop_attr(0, vap); } nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft); } else { nfsm_srvfhtom(fhp, v3); nfsm_build(fp, struct nfs_fattr *, NFSX_V2FATTR); nfsm_srvfillattr(vap, fp); } return (0); nfsmout: if (dirp) vrele(dirp); if (nd.ni_cnd.cn_nameiop) { vrele(nd.ni_startdir); zfree(namei_zone, nd.ni_cnd.cn_pnbuf); } VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); if (nd.ni_dvp == nd.ni_vp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); if (nd.ni_vp) vput(nd.ni_vp); return (error); } /* * nfs v3 mknod service */ int nfsrv_mknod(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; struct sockaddr *nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; struct ucred *cred = &nfsd->nd_cr; struct vattr va, dirfor, diraft; register struct vattr *vap = &va; register u_long *tl; struct nameidata nd; register long t1; caddr_t bpos; int error = 0, cache, len, dirfor_ret = 1, diraft_ret = 1; u_long major, minor; enum vtype vtyp; char *cp2; struct mbuf *mb, *mb2, *mreq; struct vnode *vp, *dirp = (struct vnode *)0; nfsfh_t nfh; fhandle_t *fhp; u_quad_t frev; nd.ni_cnd.cn_nameiop = 0; fhp = &nfh.fh_generic; nfsm_srvmtofh(fhp); nfsm_srvnamesiz(len); nd.ni_cnd.cn_cred = cred; nd.ni_cnd.cn_nameiop = CREATE; nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF | SAVESTART; error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos, &dirp, procp, (nfsd->nd_flag & ND_KERBAUTH), FALSE); if (dirp) dirfor_ret = VOP_GETATTR(dirp, &dirfor, cred, procp); if (error) { nfsm_reply(NFSX_WCCDATA(1)); nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft); if (dirp) vrele(dirp); return (0); } nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); vtyp = nfsv3tov_type(*tl); if (vtyp != VCHR && vtyp != VBLK && vtyp != VSOCK && vtyp != VFIFO) { vrele(nd.ni_startdir); zfree(namei_zone, nd.ni_cnd.cn_pnbuf); error = NFSERR_BADTYPE; VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); vput(nd.ni_dvp); goto out; } VATTR_NULL(vap); nfsm_srvsattr(vap); if (vtyp == VCHR || vtyp == VBLK) { nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED); major = fxdr_unsigned(u_long, *tl++); minor = fxdr_unsigned(u_long, *tl); vap->va_rdev = makedev(major, minor); } /* * Iff doesn't exist, create it. */ if (nd.ni_vp) { vrele(nd.ni_startdir); zfree(namei_zone, nd.ni_cnd.cn_pnbuf); error = EEXIST; VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); vput(nd.ni_dvp); goto out; } vap->va_type = vtyp; if (vtyp == VSOCK) { vrele(nd.ni_startdir); nqsrv_getl(nd.ni_dvp, ND_WRITE); error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap); + vput(nd.ni_dvp); if (!error) zfree(namei_zone, nd.ni_cnd.cn_pnbuf); } else { if (vtyp != VFIFO && (error = suser(cred, (u_short *)0))) { vrele(nd.ni_startdir); zfree(namei_zone, nd.ni_cnd.cn_pnbuf); VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); vput(nd.ni_dvp); goto out; } nqsrv_getl(nd.ni_dvp, ND_WRITE); - if (error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap)) { + error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap); + vput(nd.ni_dvp); + if (error) { vrele(nd.ni_startdir); goto out; } nd.ni_cnd.cn_nameiop = LOOKUP; nd.ni_cnd.cn_flags &= ~(LOCKPARENT | SAVESTART); nd.ni_cnd.cn_proc = procp; nd.ni_cnd.cn_cred = procp->p_ucred; error = lookup(&nd); zfree(namei_zone, nd.ni_cnd.cn_pnbuf); if (error) goto out; if (nd.ni_cnd.cn_flags & ISSYMLINK) { vrele(nd.ni_dvp); vput(nd.ni_vp); VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); error = EINVAL; } } out: vp = nd.ni_vp; if (!error) { bzero((caddr_t)fhp, sizeof(nfh)); fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid; error = VFS_VPTOFH(vp, &fhp->fh_fid); if (!error) error = VOP_GETATTR(vp, vap, cred, procp); vput(vp); } diraft_ret = VOP_GETATTR(dirp, &diraft, cred, procp); vrele(dirp); nfsm_reply(NFSX_SRVFH(1) + NFSX_POSTOPATTR(1) + NFSX_WCCDATA(1)); if (!error) { nfsm_srvpostop_fh(fhp); nfsm_srvpostop_attr(0, vap); } nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft); return (0); nfsmout: if (dirp) vrele(dirp); if (nd.ni_cnd.cn_nameiop) { vrele(nd.ni_startdir); zfree(namei_zone, nd.ni_cnd.cn_pnbuf); } VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); if (nd.ni_dvp == nd.ni_vp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); if (nd.ni_vp) vput(nd.ni_vp); return (error); } /* * nfs remove service */ int nfsrv_remove(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; struct sockaddr *nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; struct ucred *cred = &nfsd->nd_cr; struct nameidata nd; register u_long *tl; register long t1; caddr_t bpos; int error = 0, cache, len, dirfor_ret = 1, diraft_ret = 1; int v3 = (nfsd->nd_flag & ND_NFSV3); char *cp2; struct mbuf *mb, *mreq; struct vnode *vp, *dirp; struct vattr dirfor, diraft; nfsfh_t nfh; fhandle_t *fhp; u_quad_t frev; #ifndef nolint vp = (struct vnode *)0; #endif fhp = &nfh.fh_generic; nfsm_srvmtofh(fhp); nfsm_srvnamesiz(len); nd.ni_cnd.cn_cred = cred; nd.ni_cnd.cn_nameiop = DELETE; nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF; error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos, &dirp, procp, (nfsd->nd_flag & ND_KERBAUTH), FALSE); if (dirp) { if (v3) dirfor_ret = VOP_GETATTR(dirp, &dirfor, cred, procp); else vrele(dirp); } if (!error) { vp = nd.ni_vp; if (vp->v_type == VDIR) { error = EPERM; /* POSIX */ goto out; } /* * The root of a mounted filesystem cannot be deleted. */ if (vp->v_flag & VROOT) { error = EBUSY; goto out; } out: if (!error) { nqsrv_getl(nd.ni_dvp, ND_WRITE); nqsrv_getl(vp, ND_WRITE); error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); } else { VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); - if (nd.ni_dvp == vp) - vrele(nd.ni_dvp); - else - vput(nd.ni_dvp); - vput(vp); } + if (nd.ni_dvp == vp) + vrele(nd.ni_dvp); + else + vput(nd.ni_dvp); + if (vp != NULLVP); + vput(vp); } if (dirp && v3) { diraft_ret = VOP_GETATTR(dirp, &diraft, cred, procp); vrele(dirp); } nfsm_reply(NFSX_WCCDATA(v3)); if (v3) { nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft); return (0); } nfsm_srvdone; } /* * nfs rename service */ int nfsrv_rename(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; struct sockaddr *nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; struct ucred *cred = &nfsd->nd_cr; register u_long *tl; register long t1; caddr_t bpos; int error = 0, cache, len, len2, fdirfor_ret = 1, fdiraft_ret = 1; int tdirfor_ret = 1, tdiraft_ret = 1; int v3 = (nfsd->nd_flag & ND_NFSV3); char *cp2; struct mbuf *mb, *mreq; struct nameidata fromnd, tond; struct vnode *fvp, *tvp, *tdvp, *fdirp = (struct vnode *)0; struct vnode *tdirp = (struct vnode *)0; struct vattr fdirfor, fdiraft, tdirfor, tdiraft; nfsfh_t fnfh, tnfh; fhandle_t *ffhp, *tfhp; u_quad_t frev; uid_t saved_uid; #ifndef nolint fvp = (struct vnode *)0; #endif ffhp = &fnfh.fh_generic; tfhp = &tnfh.fh_generic; fromnd.ni_cnd.cn_nameiop = 0; tond.ni_cnd.cn_nameiop = 0; nfsm_srvmtofh(ffhp); nfsm_srvnamesiz(len); /* * Remember our original uid so that we can reset cr_uid before * the second nfs_namei() call, in case it is remapped. */ saved_uid = cred->cr_uid; fromnd.ni_cnd.cn_cred = cred; fromnd.ni_cnd.cn_nameiop = DELETE; fromnd.ni_cnd.cn_flags = WANTPARENT | SAVESTART; error = nfs_namei(&fromnd, ffhp, len, slp, nam, &md, &dpos, &fdirp, procp, (nfsd->nd_flag & ND_KERBAUTH), FALSE); if (fdirp) { if (v3) fdirfor_ret = VOP_GETATTR(fdirp, &fdirfor, cred, procp); else { vrele(fdirp); fdirp = (struct vnode *)0; } } if (error) { nfsm_reply(2 * NFSX_WCCDATA(v3)); nfsm_srvwcc_data(fdirfor_ret, &fdirfor, fdiraft_ret, &fdiraft); nfsm_srvwcc_data(tdirfor_ret, &tdirfor, tdiraft_ret, &tdiraft); if (fdirp) vrele(fdirp); return (0); } fvp = fromnd.ni_vp; nfsm_srvmtofh(tfhp); nfsm_strsiz(len2, NFS_MAXNAMLEN); cred->cr_uid = saved_uid; tond.ni_cnd.cn_cred = cred; tond.ni_cnd.cn_nameiop = RENAME; tond.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART; error = nfs_namei(&tond, tfhp, len2, slp, nam, &md, &dpos, &tdirp, procp, (nfsd->nd_flag & ND_KERBAUTH), FALSE); if (tdirp) { if (v3) tdirfor_ret = VOP_GETATTR(tdirp, &tdirfor, cred, procp); else { vrele(tdirp); tdirp = (struct vnode *)0; } } if (error) { VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); vrele(fromnd.ni_dvp); vrele(fvp); goto out1; } tdvp = tond.ni_dvp; tvp = tond.ni_vp; if (tvp != NULL) { if (fvp->v_type == VDIR && tvp->v_type != VDIR) { if (v3) error = EEXIST; else error = EISDIR; goto out; } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { if (v3) error = EEXIST; else error = ENOTDIR; goto out; } if (tvp->v_type == VDIR && tvp->v_mountedhere) { if (v3) error = EXDEV; else error = ENOTEMPTY; goto out; } } if (fvp->v_type == VDIR && fvp->v_mountedhere) { if (v3) error = EXDEV; else error = ENOTEMPTY; goto out; } if (fvp->v_mount != tdvp->v_mount) { if (v3) error = EXDEV; else error = ENOTEMPTY; goto out; } if (fvp == tdvp) if (v3) error = EINVAL; else error = ENOTEMPTY; /* * If source is the same as the destination (that is the * same vnode with the same name in the same directory), * then there is nothing to do. */ if (fvp == tvp && fromnd.ni_dvp == tdvp && fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen && !bcmp(fromnd.ni_cnd.cn_nameptr, tond.ni_cnd.cn_nameptr, fromnd.ni_cnd.cn_namelen)) error = -1; out: if (!error) { nqsrv_getl(fromnd.ni_dvp, ND_WRITE); nqsrv_getl(tdvp, ND_WRITE); if (tvp) { nqsrv_getl(tvp, ND_WRITE); } error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); } else { VOP_ABORTOP(tond.ni_dvp, &tond.ni_cnd); if (tdvp == tvp) vrele(tdvp); else vput(tdvp); if (tvp) vput(tvp); VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); vrele(fromnd.ni_dvp); vrele(fvp); if (error == -1) error = 0; } vrele(tond.ni_startdir); zfree(namei_zone, tond.ni_cnd.cn_pnbuf); out1: if (fdirp) { fdiraft_ret = VOP_GETATTR(fdirp, &fdiraft, cred, procp); vrele(fdirp); } if (tdirp) { tdiraft_ret = VOP_GETATTR(tdirp, &tdiraft, cred, procp); vrele(tdirp); } vrele(fromnd.ni_startdir); zfree(namei_zone, fromnd.ni_cnd.cn_pnbuf); nfsm_reply(2 * NFSX_WCCDATA(v3)); if (v3) { nfsm_srvwcc_data(fdirfor_ret, &fdirfor, fdiraft_ret, &fdiraft); nfsm_srvwcc_data(tdirfor_ret, &tdirfor, tdiraft_ret, &tdiraft); } return (0); nfsmout: if (fdirp) vrele(fdirp); if (tdirp) vrele(tdirp); if (tond.ni_cnd.cn_nameiop) { vrele(tond.ni_startdir); zfree(namei_zone, tond.ni_cnd.cn_pnbuf); } if (fromnd.ni_cnd.cn_nameiop) { vrele(fromnd.ni_startdir); zfree(namei_zone, fromnd.ni_cnd.cn_pnbuf); VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); vrele(fromnd.ni_dvp); vrele(fvp); } return (error); } /* * nfs link service */ int nfsrv_link(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; struct sockaddr *nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; struct ucred *cred = &nfsd->nd_cr; struct nameidata nd; register u_long *tl; register long t1; caddr_t bpos; int error = 0, rdonly, cache, len, dirfor_ret = 1, diraft_ret = 1; int getret = 1, v3 = (nfsd->nd_flag & ND_NFSV3); char *cp2; struct mbuf *mb, *mreq; struct vnode *vp, *xp, *dirp = (struct vnode *)0; struct vattr dirfor, diraft, at; nfsfh_t nfh, dnfh; fhandle_t *fhp, *dfhp; u_quad_t frev; fhp = &nfh.fh_generic; dfhp = &dnfh.fh_generic; nfsm_srvmtofh(fhp); nfsm_srvmtofh(dfhp); nfsm_srvnamesiz(len); if (error = nfsrv_fhtovp(fhp, FALSE, &vp, cred, slp, nam, &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE)) { nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_WCCDATA(v3)); nfsm_srvpostop_attr(getret, &at); nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft); return (0); } if (vp->v_type == VDIR) { error = EPERM; /* POSIX */ goto out1; } nd.ni_cnd.cn_cred = cred; nd.ni_cnd.cn_nameiop = CREATE; nd.ni_cnd.cn_flags = LOCKPARENT; error = nfs_namei(&nd, dfhp, len, slp, nam, &md, &dpos, &dirp, procp, (nfsd->nd_flag & ND_KERBAUTH), FALSE); if (dirp) { if (v3) dirfor_ret = VOP_GETATTR(dirp, &dirfor, cred, procp); else { vrele(dirp); dirp = (struct vnode *)0; } } if (error) goto out1; xp = nd.ni_vp; if (xp != NULL) { error = EEXIST; goto out; } xp = nd.ni_dvp; if (vp->v_mount != xp->v_mount) error = EXDEV; out: if (!error) { nqsrv_getl(vp, ND_WRITE); nqsrv_getl(xp, ND_WRITE); error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); + vput(nd.ni_dvp); } else { VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); if (nd.ni_dvp == nd.ni_vp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); if (nd.ni_vp) vrele(nd.ni_vp); } out1: if (v3) getret = VOP_GETATTR(vp, &at, cred, procp); if (dirp) { diraft_ret = VOP_GETATTR(dirp, &diraft, cred, procp); vrele(dirp); } vrele(vp); nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_WCCDATA(v3)); if (v3) { nfsm_srvpostop_attr(getret, &at); nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft); return (0); } nfsm_srvdone; } /* * nfs symbolic link service */ int nfsrv_symlink(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; struct sockaddr *nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; struct ucred *cred = &nfsd->nd_cr; struct vattr va, dirfor, diraft; struct nameidata nd; register struct vattr *vap = &va; register u_long *tl; register long t1; struct nfsv2_sattr *sp; char *bpos, *pathcp = (char *)0, *cp2; struct uio io; struct iovec iv; int error = 0, cache, len, len2, dirfor_ret = 1, diraft_ret = 1; int v3 = (nfsd->nd_flag & ND_NFSV3); struct mbuf *mb, *mreq, *mb2; struct vnode *dirp = (struct vnode *)0; nfsfh_t nfh; fhandle_t *fhp; u_quad_t frev; nd.ni_cnd.cn_nameiop = 0; fhp = &nfh.fh_generic; nfsm_srvmtofh(fhp); nfsm_srvnamesiz(len); nd.ni_cnd.cn_cred = cred; nd.ni_cnd.cn_nameiop = CREATE; nd.ni_cnd.cn_flags = LOCKPARENT | SAVESTART; error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos, &dirp, procp, (nfsd->nd_flag & ND_KERBAUTH), FALSE); if (dirp) { if (v3) dirfor_ret = VOP_GETATTR(dirp, &dirfor, cred, procp); else { vrele(dirp); dirp = (struct vnode *)0; } } if (error) goto out; VATTR_NULL(vap); if (v3) nfsm_srvsattr(vap); nfsm_strsiz(len2, NFS_MAXPATHLEN); MALLOC(pathcp, caddr_t, len2 + 1, M_TEMP, M_WAITOK); iv.iov_base = pathcp; iv.iov_len = len2; io.uio_resid = len2; io.uio_offset = 0; io.uio_iov = &iv; io.uio_iovcnt = 1; io.uio_segflg = UIO_SYSSPACE; io.uio_rw = UIO_READ; io.uio_procp = (struct proc *)0; nfsm_mtouio(&io, len2); if (!v3) { nfsm_dissect(sp, struct nfsv2_sattr *, NFSX_V2SATTR); vap->va_mode = fxdr_unsigned(u_short, sp->sa_mode); } *(pathcp + len2) = '\0'; if (nd.ni_vp) { vrele(nd.ni_startdir); zfree(namei_zone, nd.ni_cnd.cn_pnbuf); VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); if (nd.ni_dvp == nd.ni_vp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); vrele(nd.ni_vp); error = EEXIST; goto out; } nqsrv_getl(nd.ni_dvp, ND_WRITE); error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap, pathcp); + vput(nd.ni_dvp); if (error) vrele(nd.ni_startdir); else { if (v3) { nd.ni_cnd.cn_nameiop = LOOKUP; nd.ni_cnd.cn_flags &= ~(LOCKPARENT | SAVESTART | FOLLOW); nd.ni_cnd.cn_flags |= (NOFOLLOW | LOCKLEAF); nd.ni_cnd.cn_proc = procp; nd.ni_cnd.cn_cred = cred; error = lookup(&nd); if (!error) { bzero((caddr_t)fhp, sizeof(nfh)); fhp->fh_fsid = nd.ni_vp->v_mount->mnt_stat.f_fsid; error = VFS_VPTOFH(nd.ni_vp, &fhp->fh_fid); if (!error) error = VOP_GETATTR(nd.ni_vp, vap, cred, procp); vput(nd.ni_vp); } } else vrele(nd.ni_startdir); zfree(namei_zone, nd.ni_cnd.cn_pnbuf); } out: if (pathcp) FREE(pathcp, M_TEMP); if (dirp) { diraft_ret = VOP_GETATTR(dirp, &diraft, cred, procp); vrele(dirp); } nfsm_reply(NFSX_SRVFH(v3) + NFSX_POSTOPATTR(v3) + NFSX_WCCDATA(v3)); if (v3) { if (!error) { nfsm_srvpostop_fh(fhp); nfsm_srvpostop_attr(0, vap); } nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft); } return (0); nfsmout: if (nd.ni_cnd.cn_nameiop) { vrele(nd.ni_startdir); zfree(namei_zone, nd.ni_cnd.cn_pnbuf); } if (dirp) vrele(dirp); VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); if (nd.ni_dvp == nd.ni_vp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); if (nd.ni_vp) vrele(nd.ni_vp); if (pathcp) FREE(pathcp, M_TEMP); return (error); } /* * nfs mkdir service */ int nfsrv_mkdir(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; struct sockaddr *nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; struct ucred *cred = &nfsd->nd_cr; struct vattr va, dirfor, diraft; register struct vattr *vap = &va; register struct nfs_fattr *fp; struct nameidata nd; register caddr_t cp; register u_long *tl; register long t1; caddr_t bpos; int error = 0, cache, len, dirfor_ret = 1, diraft_ret = 1; int v3 = (nfsd->nd_flag & ND_NFSV3); char *cp2; struct mbuf *mb, *mb2, *mreq; struct vnode *vp, *dirp = (struct vnode *)0; nfsfh_t nfh; fhandle_t *fhp; u_quad_t frev; fhp = &nfh.fh_generic; nfsm_srvmtofh(fhp); nfsm_srvnamesiz(len); nd.ni_cnd.cn_cred = cred; nd.ni_cnd.cn_nameiop = CREATE; nd.ni_cnd.cn_flags = LOCKPARENT; error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos, &dirp, procp, (nfsd->nd_flag & ND_KERBAUTH), FALSE); if (dirp) { if (v3) dirfor_ret = VOP_GETATTR(dirp, &dirfor, cred, procp); else { vrele(dirp); dirp = (struct vnode *)0; } } if (error) { nfsm_reply(NFSX_WCCDATA(v3)); nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft); if (dirp) vrele(dirp); return (0); } VATTR_NULL(vap); if (v3) { nfsm_srvsattr(vap); } else { nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); vap->va_mode = nfstov_mode(*tl++); } vap->va_type = VDIR; vp = nd.ni_vp; if (vp != NULL) { VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); if (nd.ni_dvp == vp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); vrele(vp); error = EEXIST; goto out; } nqsrv_getl(nd.ni_dvp, ND_WRITE); error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap); + vput(nd.ni_dvp); if (!error) { vp = nd.ni_vp; bzero((caddr_t)fhp, sizeof(nfh)); fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid; error = VFS_VPTOFH(vp, &fhp->fh_fid); if (!error) error = VOP_GETATTR(vp, vap, cred, procp); vput(vp); } out: if (dirp) { diraft_ret = VOP_GETATTR(dirp, &diraft, cred, procp); vrele(dirp); } nfsm_reply(NFSX_SRVFH(v3) + NFSX_POSTOPATTR(v3) + NFSX_WCCDATA(v3)); if (v3) { if (!error) { nfsm_srvpostop_fh(fhp); nfsm_srvpostop_attr(0, vap); } nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft); } else { nfsm_srvfhtom(fhp, v3); nfsm_build(fp, struct nfs_fattr *, NFSX_V2FATTR); nfsm_srvfillattr(vap, fp); } return (0); nfsmout: if (dirp) vrele(dirp); VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); if (nd.ni_dvp == nd.ni_vp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); if (nd.ni_vp) vrele(nd.ni_vp); return (error); } /* * nfs rmdir service */ int nfsrv_rmdir(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; struct sockaddr *nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; struct ucred *cred = &nfsd->nd_cr; register u_long *tl; register long t1; caddr_t bpos; int error = 0, cache, len, dirfor_ret = 1, diraft_ret = 1; int v3 = (nfsd->nd_flag & ND_NFSV3); char *cp2; struct mbuf *mb, *mreq; struct vnode *vp, *dirp = (struct vnode *)0; struct vattr dirfor, diraft; nfsfh_t nfh; fhandle_t *fhp; struct nameidata nd; u_quad_t frev; fhp = &nfh.fh_generic; nfsm_srvmtofh(fhp); nfsm_srvnamesiz(len); nd.ni_cnd.cn_cred = cred; nd.ni_cnd.cn_nameiop = DELETE; nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF; error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos, &dirp, procp, (nfsd->nd_flag & ND_KERBAUTH), FALSE); if (dirp) { if (v3) dirfor_ret = VOP_GETATTR(dirp, &dirfor, cred, procp); else { vrele(dirp); dirp = (struct vnode *)0; } } if (error) { nfsm_reply(NFSX_WCCDATA(v3)); nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft); if (dirp) vrele(dirp); return (0); } vp = nd.ni_vp; if (vp->v_type != VDIR) { error = ENOTDIR; goto out; } /* * No rmdir "." please. */ if (nd.ni_dvp == vp) { error = EINVAL; goto out; } /* * The root of a mounted filesystem cannot be deleted. */ if (vp->v_flag & VROOT) error = EBUSY; out: if (!error) { nqsrv_getl(nd.ni_dvp, ND_WRITE); nqsrv_getl(vp, ND_WRITE); error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); } else { VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); - if (nd.ni_dvp == nd.ni_vp) - vrele(nd.ni_dvp); - else - vput(nd.ni_dvp); - vput(vp); } + if (nd.ni_dvp == nd.ni_vp) + vrele(nd.ni_dvp); + else + vput(nd.ni_dvp); + if (vp != NULLVP) + vput(vp); if (dirp) { diraft_ret = VOP_GETATTR(dirp, &diraft, cred, procp); vrele(dirp); } nfsm_reply(NFSX_WCCDATA(v3)); if (v3) { nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft); return (0); } nfsm_srvdone; } /* * nfs readdir service * - mallocs what it thinks is enough to read * count rounded up to a multiple of NFS_DIRBLKSIZ <= NFS_MAXREADDIR * - calls VOP_READDIR() * - loops around building the reply * if the output generated exceeds count break out of loop * The nfsm_clget macro is used here so that the reply will be packed * tightly in mbuf clusters. * - it only knows that it has encountered eof when the VOP_READDIR() * reads nothing * - as such one readdir rpc will return eof false although you are there * and then the next will return eof * - it trims out records with d_fileno == 0 * this doesn't matter for Unix clients, but they might confuse clients * for other os'. * NB: It is tempting to set eof to true if the VOP_READDIR() reads less * than requested, but this may not apply to all filesystems. For * example, client NFS does not { although it is never remote mounted * anyhow } * The alternate call nfsrv_readdirplus() does lookups as well. * PS: The NFS protocol spec. does not clarify what the "count" byte * argument is a count of.. just name strings and file id's or the * entire reply rpc or ... * I tried just file name and id sizes and it confused the Sun client, * so I am using the full rpc size now. The "paranoia.." comment refers * to including the status longwords that are not a part of the dir. * "entry" structures, but are in the rpc. */ struct flrep { nfsuint64 fl_off; u_long fl_postopok; u_long fl_fattr[NFSX_V3FATTR / sizeof (u_long)]; u_long fl_fhok; u_long fl_fhsize; u_long fl_nfh[NFSX_V3FH / sizeof (u_long)]; }; int nfsrv_readdir(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; struct sockaddr *nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; struct ucred *cred = &nfsd->nd_cr; register char *bp, *be; register struct mbuf *mp; register struct dirent *dp; register caddr_t cp; register u_long *tl; register long t1; caddr_t bpos; struct mbuf *mb, *mb2, *mreq, *mp2; char *cpos, *cend, *cp2, *rbuf; struct vnode *vp; struct vattr at; nfsfh_t nfh; fhandle_t *fhp; struct uio io; struct iovec iv; int len, nlen, rem, xfer, tsiz, i, error = 0, getret = 1; int siz, cnt, fullsiz, eofflag, rdonly, cache, ncookies; int v3 = (nfsd->nd_flag & ND_NFSV3); u_quad_t frev, off, toff, verf; u_long *cookies = NULL, *cookiep; fhp = &nfh.fh_generic; nfsm_srvmtofh(fhp); if (v3) { nfsm_dissect(tl, u_long *, 5 * NFSX_UNSIGNED); fxdr_hyper(tl, &toff); tl += 2; fxdr_hyper(tl, &verf); tl += 2; } else { nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED); toff = fxdr_unsigned(u_quad_t, *tl++); } off = toff; cnt = fxdr_unsigned(int, *tl); siz = ((cnt + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1)); xfer = NFS_SRVMAXDATA(nfsd); if (siz > xfer) siz = xfer; fullsiz = siz; if (error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE)) { nfsm_reply(NFSX_UNSIGNED); nfsm_srvpostop_attr(getret, &at); return (0); } nqsrv_getl(vp, ND_READ); if (v3) { error = getret = VOP_GETATTR(vp, &at, cred, procp); if (!error && toff && verf && verf != at.va_filerev) error = NFSERR_BAD_COOKIE; } if (!error) error = nfsrv_access(vp, VEXEC, cred, rdonly, procp); if (error) { vput(vp); nfsm_reply(NFSX_POSTOPATTR(v3)); nfsm_srvpostop_attr(getret, &at); return (0); } VOP_UNLOCK(vp, 0, procp); MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK); again: iv.iov_base = rbuf; iv.iov_len = fullsiz; io.uio_iov = &iv; io.uio_iovcnt = 1; io.uio_offset = (off_t)off; io.uio_resid = fullsiz; io.uio_segflg = UIO_SYSSPACE; io.uio_rw = UIO_READ; io.uio_procp = (struct proc *)0; eofflag = 0; vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, procp); if (cookies) { free((caddr_t)cookies, M_TEMP); cookies = NULL; } error = VOP_READDIR(vp, &io, cred, &eofflag, &ncookies, &cookies); off = (off_t)io.uio_offset; if (!cookies && !error) error = NFSERR_PERM; if (v3) { getret = VOP_GETATTR(vp, &at, cred, procp); if (!error) error = getret; } VOP_UNLOCK(vp, 0, procp); if (error) { vrele(vp); free((caddr_t)rbuf, M_TEMP); if (cookies) free((caddr_t)cookies, M_TEMP); nfsm_reply(NFSX_POSTOPATTR(v3)); nfsm_srvpostop_attr(getret, &at); return (0); } if (io.uio_resid) { siz -= io.uio_resid; /* * If nothing read, return eof * rpc reply */ if (siz == 0) { vrele(vp); nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_COOKIEVERF(v3) + 2 * NFSX_UNSIGNED); if (v3) { nfsm_srvpostop_attr(getret, &at); nfsm_build(tl, u_long *, 4 * NFSX_UNSIGNED); txdr_hyper(&at.va_filerev, tl); tl += 2; } else nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED); *tl++ = nfs_false; *tl = nfs_true; FREE((caddr_t)rbuf, M_TEMP); FREE((caddr_t)cookies, M_TEMP); return (0); } } /* * Check for degenerate cases of nothing useful read. * If so go try again */ cpos = rbuf; cend = rbuf + siz; dp = (struct dirent *)cpos; cookiep = cookies; /* * For some reason FreeBSD's ufs_readdir() chooses to back the * directory offset up to a block boundary, so it is necessary to * skip over the records that preceed the requested offset. This * requires the assumption that file offset cookies monotonically * increase. */ while (cpos < cend && ncookies > 0 && (dp->d_fileno == 0 || ((u_quad_t)(*cookiep)) <= toff)) { cpos += dp->d_reclen; dp = (struct dirent *)cpos; cookiep++; ncookies--; } if (cpos >= cend || ncookies == 0) { toff = off; siz = fullsiz; goto again; } len = 3 * NFSX_UNSIGNED; /* paranoia, probably can be 0 */ nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_COOKIEVERF(v3) + siz); if (v3) { nfsm_srvpostop_attr(getret, &at); nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED); txdr_hyper(&at.va_filerev, tl); } mp = mp2 = mb; bp = bpos; be = bp + M_TRAILINGSPACE(mp); /* Loop through the records and build reply */ while (cpos < cend && ncookies > 0) { if (dp->d_fileno != 0) { nlen = dp->d_namlen; rem = nfsm_rndup(nlen)-nlen; len += (4 * NFSX_UNSIGNED + nlen + rem); if (v3) len += 2 * NFSX_UNSIGNED; if (len > cnt) { eofflag = 0; break; } /* * Build the directory record xdr from * the dirent entry. */ nfsm_clget; *tl = nfs_true; bp += NFSX_UNSIGNED; if (v3) { nfsm_clget; *tl = 0; bp += NFSX_UNSIGNED; } nfsm_clget; *tl = txdr_unsigned(dp->d_fileno); bp += NFSX_UNSIGNED; nfsm_clget; *tl = txdr_unsigned(nlen); bp += NFSX_UNSIGNED; /* And loop around copying the name */ xfer = nlen; cp = dp->d_name; while (xfer > 0) { nfsm_clget; if ((bp+xfer) > be) tsiz = be-bp; else tsiz = xfer; bcopy(cp, bp, tsiz); bp += tsiz; xfer -= tsiz; if (xfer > 0) cp += tsiz; } /* And null pad to a long boundary */ for (i = 0; i < rem; i++) *bp++ = '\0'; nfsm_clget; /* Finish off the record */ if (v3) { *tl = 0; bp += NFSX_UNSIGNED; nfsm_clget; } *tl = txdr_unsigned(*cookiep); bp += NFSX_UNSIGNED; } cpos += dp->d_reclen; dp = (struct dirent *)cpos; cookiep++; ncookies--; } vrele(vp); nfsm_clget; *tl = nfs_false; bp += NFSX_UNSIGNED; nfsm_clget; if (eofflag) *tl = nfs_true; else *tl = nfs_false; bp += NFSX_UNSIGNED; if (mp != mb) { if (bp < be) mp->m_len = bp - mtod(mp, caddr_t); } else mp->m_len += bp - bpos; FREE((caddr_t)rbuf, M_TEMP); FREE((caddr_t)cookies, M_TEMP); nfsm_srvdone; } int nfsrv_readdirplus(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; struct sockaddr *nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; struct ucred *cred = &nfsd->nd_cr; register char *bp, *be; register struct mbuf *mp; register struct dirent *dp; register caddr_t cp; register u_long *tl; register long t1; caddr_t bpos; struct mbuf *mb, *mb2, *mreq, *mp2; char *cpos, *cend, *cp2, *rbuf; struct vnode *vp, *nvp; struct flrep fl; nfsfh_t nfh; fhandle_t *fhp, *nfhp = (fhandle_t *)fl.fl_nfh; struct uio io; struct iovec iv; struct vattr va, at, *vap = &va; struct nfs_fattr *fp; int len, nlen, rem, xfer, tsiz, i, error = 0, getret = 1; int siz, cnt, fullsiz, eofflag, rdonly, cache, dirlen, ncookies; u_quad_t frev, off, toff, verf; u_long *cookies = NULL, *cookiep; fhp = &nfh.fh_generic; nfsm_srvmtofh(fhp); nfsm_dissect(tl, u_long *, 6 * NFSX_UNSIGNED); fxdr_hyper(tl, &toff); tl += 2; fxdr_hyper(tl, &verf); tl += 2; siz = fxdr_unsigned(int, *tl++); cnt = fxdr_unsigned(int, *tl); off = toff; siz = ((siz + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1)); xfer = NFS_SRVMAXDATA(nfsd); if (siz > xfer) siz = xfer; fullsiz = siz; if (error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE)) { nfsm_reply(NFSX_UNSIGNED); nfsm_srvpostop_attr(getret, &at); return (0); } error = getret = VOP_GETATTR(vp, &at, cred, procp); if (!error && toff && verf && verf != at.va_filerev) error = NFSERR_BAD_COOKIE; if (!error) { nqsrv_getl(vp, ND_READ); error = nfsrv_access(vp, VEXEC, cred, rdonly, procp); } if (error) { vput(vp); nfsm_reply(NFSX_V3POSTOPATTR); nfsm_srvpostop_attr(getret, &at); return (0); } VOP_UNLOCK(vp, 0, procp); MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK); again: iv.iov_base = rbuf; iv.iov_len = fullsiz; io.uio_iov = &iv; io.uio_iovcnt = 1; io.uio_offset = (off_t)off; io.uio_resid = fullsiz; io.uio_segflg = UIO_SYSSPACE; io.uio_rw = UIO_READ; io.uio_procp = (struct proc *)0; eofflag = 0; vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, procp); if (cookies) { free((caddr_t)cookies, M_TEMP); cookies = NULL; } error = VOP_READDIR(vp, &io, cred, &eofflag, &ncookies, &cookies); off = (u_quad_t)io.uio_offset; getret = VOP_GETATTR(vp, &at, cred, procp); VOP_UNLOCK(vp, 0, procp); if (!cookies && !error) error = NFSERR_PERM; if (!error) error = getret; if (error) { vrele(vp); if (cookies) free((caddr_t)cookies, M_TEMP); free((caddr_t)rbuf, M_TEMP); nfsm_reply(NFSX_V3POSTOPATTR); nfsm_srvpostop_attr(getret, &at); return (0); } if (io.uio_resid) { siz -= io.uio_resid; /* * If nothing read, return eof * rpc reply */ if (siz == 0) { vrele(vp); nfsm_reply(NFSX_V3POSTOPATTR + NFSX_V3COOKIEVERF + 2 * NFSX_UNSIGNED); nfsm_srvpostop_attr(getret, &at); nfsm_build(tl, u_long *, 4 * NFSX_UNSIGNED); txdr_hyper(&at.va_filerev, tl); tl += 2; *tl++ = nfs_false; *tl = nfs_true; FREE((caddr_t)cookies, M_TEMP); FREE((caddr_t)rbuf, M_TEMP); return (0); } } /* * Check for degenerate cases of nothing useful read. * If so go try again */ cpos = rbuf; cend = rbuf + siz; dp = (struct dirent *)cpos; cookiep = cookies; /* * For some reason FreeBSD's ufs_readdir() chooses to back the * directory offset up to a block boundary, so it is necessary to * skip over the records that preceed the requested offset. This * requires the assumption that file offset cookies monotonically * increase. */ while (cpos < cend && ncookies > 0 && (dp->d_fileno == 0 || ((u_quad_t)(*cookiep)) <= toff)) { cpos += dp->d_reclen; dp = (struct dirent *)cpos; cookiep++; ncookies--; } if (cpos >= cend || ncookies == 0) { toff = off; siz = fullsiz; goto again; } /* * Probe one of the directory entries to see if the filesystem * supports VGET. */ if (VFS_VGET(vp->v_mount, dp->d_fileno, &nvp) == EOPNOTSUPP) { error = NFSERR_NOTSUPP; vrele(vp); free((caddr_t)cookies, M_TEMP); free((caddr_t)rbuf, M_TEMP); nfsm_reply(NFSX_V3POSTOPATTR); nfsm_srvpostop_attr(getret, &at); return (0); } vput(nvp); dirlen = len = NFSX_V3POSTOPATTR + NFSX_V3COOKIEVERF + 2 * NFSX_UNSIGNED; nfsm_reply(cnt); nfsm_srvpostop_attr(getret, &at); nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED); txdr_hyper(&at.va_filerev, tl); mp = mp2 = mb; bp = bpos; be = bp + M_TRAILINGSPACE(mp); /* Loop through the records and build reply */ while (cpos < cend && ncookies > 0) { if (dp->d_fileno != 0) { nlen = dp->d_namlen; rem = nfsm_rndup(nlen)-nlen; /* * For readdir_and_lookup get the vnode using * the file number. */ if (VFS_VGET(vp->v_mount, dp->d_fileno, &nvp)) goto invalid; bzero((caddr_t)nfhp, NFSX_V3FH); nfhp->fh_fsid = nvp->v_mount->mnt_stat.f_fsid; if (VFS_VPTOFH(nvp, &nfhp->fh_fid)) { vput(nvp); goto invalid; } if (VOP_GETATTR(nvp, vap, cred, procp)) { vput(nvp); goto invalid; } vput(nvp); /* * If either the dircount or maxcount will be * exceeded, get out now. Both of these lengths * are calculated conservatively, including all * XDR overheads. */ len += (7 * NFSX_UNSIGNED + nlen + rem + NFSX_V3FH + NFSX_V3POSTOPATTR); dirlen += (6 * NFSX_UNSIGNED + nlen + rem); if (len > cnt || dirlen > fullsiz) { eofflag = 0; break; } /* * Build the directory record xdr from * the dirent entry. */ fp = (struct nfs_fattr *)&fl.fl_fattr; nfsm_srvfillattr(vap, fp); fl.fl_fhsize = txdr_unsigned(NFSX_V3FH); fl.fl_fhok = nfs_true; fl.fl_postopok = nfs_true; fl.fl_off.nfsuquad[0] = 0; fl.fl_off.nfsuquad[1] = txdr_unsigned(*cookiep); nfsm_clget; *tl = nfs_true; bp += NFSX_UNSIGNED; nfsm_clget; *tl = 0; bp += NFSX_UNSIGNED; nfsm_clget; *tl = txdr_unsigned(dp->d_fileno); bp += NFSX_UNSIGNED; nfsm_clget; *tl = txdr_unsigned(nlen); bp += NFSX_UNSIGNED; /* And loop around copying the name */ xfer = nlen; cp = dp->d_name; while (xfer > 0) { nfsm_clget; if ((bp + xfer) > be) tsiz = be - bp; else tsiz = xfer; bcopy(cp, bp, tsiz); bp += tsiz; xfer -= tsiz; if (xfer > 0) cp += tsiz; } /* And null pad to a long boundary */ for (i = 0; i < rem; i++) *bp++ = '\0'; /* * Now copy the flrep structure out. */ xfer = sizeof (struct flrep); cp = (caddr_t)&fl; while (xfer > 0) { nfsm_clget; if ((bp + xfer) > be) tsiz = be - bp; else tsiz = xfer; bcopy(cp, bp, tsiz); bp += tsiz; xfer -= tsiz; if (xfer > 0) cp += tsiz; } } invalid: cpos += dp->d_reclen; dp = (struct dirent *)cpos; cookiep++; ncookies--; } vrele(vp); nfsm_clget; *tl = nfs_false; bp += NFSX_UNSIGNED; nfsm_clget; if (eofflag) *tl = nfs_true; else *tl = nfs_false; bp += NFSX_UNSIGNED; if (mp != mb) { if (bp < be) mp->m_len = bp - mtod(mp, caddr_t); } else mp->m_len += bp - bpos; FREE((caddr_t)cookies, M_TEMP); FREE((caddr_t)rbuf, M_TEMP); nfsm_srvdone; } /* * nfs commit service */ int nfsrv_commit(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; struct sockaddr *nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; struct ucred *cred = &nfsd->nd_cr; struct vattr bfor, aft; struct vnode *vp; nfsfh_t nfh; fhandle_t *fhp; register u_long *tl; register long t1; caddr_t bpos; int error = 0, rdonly, for_ret = 1, aft_ret = 1, cnt, cache; char *cp2; struct mbuf *mb, *mb2, *mreq; u_quad_t frev, off; #ifndef nolint cache = 0; #endif fhp = &nfh.fh_generic; nfsm_srvmtofh(fhp); nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED); /* * XXX At this time VOP_FSYNC() does not accept offset and byte * count parameters, so these arguments are useless (someday maybe). */ fxdr_hyper(tl, &off); tl += 2; cnt = fxdr_unsigned(int, *tl); if (error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE)) { nfsm_reply(2 * NFSX_UNSIGNED); nfsm_srvwcc_data(for_ret, &bfor, aft_ret, &aft); return (0); } for_ret = VOP_GETATTR(vp, &bfor, cred, procp); if (vp->v_object && (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) { vm_object_page_clean(vp->v_object, 0, 0, TRUE); } error = VOP_FSYNC(vp, cred, MNT_WAIT, procp); aft_ret = VOP_GETATTR(vp, &aft, cred, procp); vput(vp); nfsm_reply(NFSX_V3WCCDATA + NFSX_V3WRITEVERF); nfsm_srvwcc_data(for_ret, &bfor, aft_ret, &aft); if (!error) { nfsm_build(tl, u_long *, NFSX_V3WRITEVERF); *tl++ = txdr_unsigned(boottime.tv_sec); *tl = txdr_unsigned(boottime.tv_usec); } else return (0); nfsm_srvdone; } /* * nfs statfs service */ int nfsrv_statfs(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; struct sockaddr *nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; struct ucred *cred = &nfsd->nd_cr; register struct statfs *sf; register struct nfs_statfs *sfp; register u_long *tl; register long t1; caddr_t bpos; int error = 0, rdonly, cache, getret = 1; int v3 = (nfsd->nd_flag & ND_NFSV3); char *cp2; struct mbuf *mb, *mb2, *mreq; struct vnode *vp; struct vattr at; nfsfh_t nfh; fhandle_t *fhp; struct statfs statfs; u_quad_t frev, tval; #ifndef nolint cache = 0; #endif fhp = &nfh.fh_generic; nfsm_srvmtofh(fhp); if (error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE)) { nfsm_reply(NFSX_UNSIGNED); nfsm_srvpostop_attr(getret, &at); return (0); } sf = &statfs; error = VFS_STATFS(vp->v_mount, sf, procp); getret = VOP_GETATTR(vp, &at, cred, procp); vput(vp); nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_STATFS(v3)); if (v3) nfsm_srvpostop_attr(getret, &at); if (error) return (0); nfsm_build(sfp, struct nfs_statfs *, NFSX_STATFS(v3)); if (v3) { tval = (u_quad_t)sf->f_blocks; tval *= (u_quad_t)sf->f_bsize; txdr_hyper(&tval, &sfp->sf_tbytes); tval = (u_quad_t)sf->f_bfree; tval *= (u_quad_t)sf->f_bsize; txdr_hyper(&tval, &sfp->sf_fbytes); tval = (u_quad_t)sf->f_bavail; tval *= (u_quad_t)sf->f_bsize; txdr_hyper(&tval, &sfp->sf_abytes); sfp->sf_tfiles.nfsuquad[0] = 0; sfp->sf_tfiles.nfsuquad[1] = txdr_unsigned(sf->f_files); sfp->sf_ffiles.nfsuquad[0] = 0; sfp->sf_ffiles.nfsuquad[1] = txdr_unsigned(sf->f_ffree); sfp->sf_afiles.nfsuquad[0] = 0; sfp->sf_afiles.nfsuquad[1] = txdr_unsigned(sf->f_ffree); sfp->sf_invarsec = 0; } else { sfp->sf_tsize = txdr_unsigned(NFS_MAXDGRAMDATA); sfp->sf_bsize = txdr_unsigned(sf->f_bsize); sfp->sf_blocks = txdr_unsigned(sf->f_blocks); sfp->sf_bfree = txdr_unsigned(sf->f_bfree); sfp->sf_bavail = txdr_unsigned(sf->f_bavail); } nfsm_srvdone; } /* * nfs fsinfo service */ int nfsrv_fsinfo(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; struct sockaddr *nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; struct ucred *cred = &nfsd->nd_cr; register u_long *tl; register struct nfsv3_fsinfo *sip; register long t1; caddr_t bpos; int error = 0, rdonly, cache, getret = 1, pref; char *cp2; struct mbuf *mb, *mb2, *mreq; struct vnode *vp; struct vattr at; nfsfh_t nfh; fhandle_t *fhp; u_quad_t frev; #ifndef nolint cache = 0; #endif fhp = &nfh.fh_generic; nfsm_srvmtofh(fhp); if (error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE)) { nfsm_reply(NFSX_UNSIGNED); nfsm_srvpostop_attr(getret, &at); return (0); } getret = VOP_GETATTR(vp, &at, cred, procp); vput(vp); nfsm_reply(NFSX_V3POSTOPATTR + NFSX_V3FSINFO); nfsm_srvpostop_attr(getret, &at); nfsm_build(sip, struct nfsv3_fsinfo *, NFSX_V3FSINFO); /* * XXX * There should be file system VFS OP(s) to get this information. * For now, assume ufs. */ if (slp->ns_so->so_type == SOCK_DGRAM) pref = NFS_MAXDGRAMDATA; else pref = NFS_MAXDATA; sip->fs_rtmax = txdr_unsigned(NFS_MAXDATA); sip->fs_rtpref = txdr_unsigned(pref); sip->fs_rtmult = txdr_unsigned(NFS_FABLKSIZE); sip->fs_wtmax = txdr_unsigned(NFS_MAXDATA); sip->fs_wtpref = txdr_unsigned(pref); sip->fs_wtmult = txdr_unsigned(NFS_FABLKSIZE); sip->fs_dtpref = txdr_unsigned(pref); sip->fs_maxfilesize.nfsuquad[0] = 0xffffffff; sip->fs_maxfilesize.nfsuquad[1] = 0xffffffff; sip->fs_timedelta.nfsv3_sec = 0; sip->fs_timedelta.nfsv3_nsec = txdr_unsigned(1); sip->fs_properties = txdr_unsigned(NFSV3FSINFO_LINK | NFSV3FSINFO_SYMLINK | NFSV3FSINFO_HOMOGENEOUS | NFSV3FSINFO_CANSETTIME); nfsm_srvdone; } /* * nfs pathconf service */ int nfsrv_pathconf(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; struct sockaddr *nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; struct ucred *cred = &nfsd->nd_cr; register u_long *tl; register struct nfsv3_pathconf *pc; register long t1; caddr_t bpos; int error = 0, rdonly, cache, getret = 1, linkmax, namemax; int chownres, notrunc; char *cp2; struct mbuf *mb, *mb2, *mreq; struct vnode *vp; struct vattr at; nfsfh_t nfh; fhandle_t *fhp; u_quad_t frev; #ifndef nolint cache = 0; #endif fhp = &nfh.fh_generic; nfsm_srvmtofh(fhp); if (error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE)) { nfsm_reply(NFSX_UNSIGNED); nfsm_srvpostop_attr(getret, &at); return (0); } error = VOP_PATHCONF(vp, _PC_LINK_MAX, &linkmax); if (!error) error = VOP_PATHCONF(vp, _PC_NAME_MAX, &namemax); if (!error) error = VOP_PATHCONF(vp, _PC_CHOWN_RESTRICTED, &chownres); if (!error) error = VOP_PATHCONF(vp, _PC_NO_TRUNC, ¬runc); getret = VOP_GETATTR(vp, &at, cred, procp); vput(vp); nfsm_reply(NFSX_V3POSTOPATTR + NFSX_V3PATHCONF); nfsm_srvpostop_attr(getret, &at); if (error) return (0); nfsm_build(pc, struct nfsv3_pathconf *, NFSX_V3PATHCONF); pc->pc_linkmax = txdr_unsigned(linkmax); pc->pc_namemax = txdr_unsigned(namemax); pc->pc_notrunc = txdr_unsigned(notrunc); pc->pc_chownrestricted = txdr_unsigned(chownres); /* * These should probably be supported by VOP_PATHCONF(), but * until msdosfs is exportable (why would you want to?), the * Unix defaults should be ok. */ pc->pc_caseinsensitive = nfs_false; pc->pc_casepreserving = nfs_true; nfsm_srvdone; } /* * Null operation, used by clients to ping server */ /* ARGSUSED */ int nfsrv_null(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep; caddr_t bpos; int error = NFSERR_RETVOID, cache; struct mbuf *mb, *mreq; u_quad_t frev; #ifndef nolint cache = 0; #endif nfsm_reply(0); return (0); } /* * No operation, used for obsolete procedures */ /* ARGSUSED */ int nfsrv_noop(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep; caddr_t bpos; int error, cache; struct mbuf *mb, *mreq; u_quad_t frev; #ifndef nolint cache = 0; #endif if (nfsd->nd_repstat) error = nfsd->nd_repstat; else error = EPROCUNAVAIL; nfsm_reply(0); return (0); } /* * Perform access checking for vnodes obtained from file handles that would * refer to files already opened by a Unix client. You cannot just use * vn_writechk() and VOP_ACCESS() for two reasons. * 1 - You must check for exported rdonly as well as MNT_RDONLY for the write case * 2 - The owner is to be given access irrespective of mode bits so that * processes that chmod after opening a file don't break. I don't like * this because it opens a security hole, but since the nfs server opens * a security hole the size of a barn door anyhow, what the heck. */ static int nfsrv_access(vp, flags, cred, rdonly, p) register struct vnode *vp; int flags; register struct ucred *cred; int rdonly; struct proc *p; { struct vattr vattr; int error; if (flags & VWRITE) { /* Just vn_writechk() changed to check rdonly */ /* * Disallow write attempts on read-only file systems; * unless the file is a socket or a block or character * device resident on the file system. */ if (rdonly || (vp->v_mount->mnt_flag & MNT_RDONLY)) { switch (vp->v_type) { case VREG: case VDIR: case VLNK: return (EROFS); } } /* * If there's shared text associated with * the inode, we can't allow writing. */ if (vp->v_flag & VTEXT) return (ETXTBSY); } if (error = VOP_GETATTR(vp, &vattr, cred, p)) return (error); if ((error = VOP_ACCESS(vp, flags, cred, p)) && cred->cr_uid != vattr.va_uid) return (error); return (0); } #endif /* NFS_NOSERVER */ Index: head/sys/nfs/nfs_vnops.c =================================================================== --- head/sys/nfs/nfs_vnops.c (revision 35822) +++ head/sys/nfs/nfs_vnops.c (revision 35823) @@ -1,3304 +1,3284 @@ /* * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)nfs_vnops.c 8.16 (Berkeley) 5/27/95 - * $Id: nfs_vnops.c,v 1.82 1998/03/28 12:04:40 bde Exp $ + * $Id: nfs_vnops.c,v 1.83 1998/03/30 09:54:32 phk Exp $ */ /* * vnode op calls for Sun NFS version 2 and 3 */ #include "opt_inet.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* Defs */ #define TRUE 1 #define FALSE 0 /* * Ifdef for FreeBSD-current merged buffer cache. It is unfortunate that these * calls are not in getblk() and brelse() so that they would not be necessary * here. */ #ifndef B_VMIO #define vfs_busy_pages(bp, f) #endif static int nfsspec_read __P((struct vop_read_args *)); static int nfsspec_write __P((struct vop_write_args *)); static int nfsfifo_read __P((struct vop_read_args *)); static int nfsfifo_write __P((struct vop_write_args *)); static int nfsspec_close __P((struct vop_close_args *)); static int nfsfifo_close __P((struct vop_close_args *)); #define nfs_poll vop_nopoll static int nfs_flush __P((struct vnode *,struct ucred *,int,struct proc *,int)); static int nfs_setattrrpc __P((struct vnode *,struct vattr *,struct ucred *,struct proc *)); static int nfs_lookup __P((struct vop_lookup_args *)); static int nfs_create __P((struct vop_create_args *)); static int nfs_mknod __P((struct vop_mknod_args *)); static int nfs_open __P((struct vop_open_args *)); static int nfs_close __P((struct vop_close_args *)); static int nfs_access __P((struct vop_access_args *)); static int nfs_getattr __P((struct vop_getattr_args *)); static int nfs_setattr __P((struct vop_setattr_args *)); static int nfs_read __P((struct vop_read_args *)); static int nfs_mmap __P((struct vop_mmap_args *)); static int nfs_fsync __P((struct vop_fsync_args *)); static int nfs_remove __P((struct vop_remove_args *)); static int nfs_link __P((struct vop_link_args *)); static int nfs_rename __P((struct vop_rename_args *)); static int nfs_mkdir __P((struct vop_mkdir_args *)); static int nfs_rmdir __P((struct vop_rmdir_args *)); static int nfs_symlink __P((struct vop_symlink_args *)); static int nfs_readdir __P((struct vop_readdir_args *)); static int nfs_bmap __P((struct vop_bmap_args *)); static int nfs_strategy __P((struct vop_strategy_args *)); static int nfs_lookitup __P((struct vnode *,char *,int,struct ucred *,struct proc *,struct nfsnode **)); static int nfs_sillyrename __P((struct vnode *,struct vnode *,struct componentname *)); static int nfsspec_access __P((struct vop_access_args *)); static int nfs_readlink __P((struct vop_readlink_args *)); static int nfs_print __P((struct vop_print_args *)); static int nfs_advlock __P((struct vop_advlock_args *)); static int nfs_bwrite __P((struct vop_bwrite_args *)); /* * Global vfs data structures for nfs */ vop_t **nfsv2_vnodeop_p; static struct vnodeopv_entry_desc nfsv2_vnodeop_entries[] = { { &vop_default_desc, (vop_t *) vop_defaultop }, { &vop_abortop_desc, (vop_t *) nfs_abortop }, { &vop_access_desc, (vop_t *) nfs_access }, { &vop_advlock_desc, (vop_t *) nfs_advlock }, { &vop_bmap_desc, (vop_t *) nfs_bmap }, { &vop_bwrite_desc, (vop_t *) nfs_bwrite }, { &vop_close_desc, (vop_t *) nfs_close }, { &vop_create_desc, (vop_t *) nfs_create }, { &vop_fsync_desc, (vop_t *) nfs_fsync }, { &vop_getattr_desc, (vop_t *) nfs_getattr }, { &vop_getpages_desc, (vop_t *) nfs_getpages }, { &vop_putpages_desc, (vop_t *) nfs_putpages }, { &vop_inactive_desc, (vop_t *) nfs_inactive }, { &vop_lease_desc, (vop_t *) vop_null }, { &vop_link_desc, (vop_t *) nfs_link }, { &vop_lock_desc, (vop_t *) vop_sharedlock }, { &vop_lookup_desc, (vop_t *) nfs_lookup }, { &vop_mkdir_desc, (vop_t *) nfs_mkdir }, { &vop_mknod_desc, (vop_t *) nfs_mknod }, { &vop_mmap_desc, (vop_t *) nfs_mmap }, { &vop_open_desc, (vop_t *) nfs_open }, { &vop_poll_desc, (vop_t *) nfs_poll }, { &vop_print_desc, (vop_t *) nfs_print }, { &vop_read_desc, (vop_t *) nfs_read }, { &vop_readdir_desc, (vop_t *) nfs_readdir }, { &vop_readlink_desc, (vop_t *) nfs_readlink }, { &vop_reclaim_desc, (vop_t *) nfs_reclaim }, { &vop_remove_desc, (vop_t *) nfs_remove }, { &vop_rename_desc, (vop_t *) nfs_rename }, { &vop_rmdir_desc, (vop_t *) nfs_rmdir }, { &vop_setattr_desc, (vop_t *) nfs_setattr }, { &vop_strategy_desc, (vop_t *) nfs_strategy }, { &vop_symlink_desc, (vop_t *) nfs_symlink }, { &vop_write_desc, (vop_t *) nfs_write }, { NULL, NULL } }; static struct vnodeopv_desc nfsv2_vnodeop_opv_desc = { &nfsv2_vnodeop_p, nfsv2_vnodeop_entries }; VNODEOP_SET(nfsv2_vnodeop_opv_desc); /* * Special device vnode ops */ vop_t **spec_nfsv2nodeop_p; static struct vnodeopv_entry_desc nfsv2_specop_entries[] = { { &vop_default_desc, (vop_t *) spec_vnoperate }, { &vop_access_desc, (vop_t *) nfsspec_access }, { &vop_close_desc, (vop_t *) nfsspec_close }, { &vop_fsync_desc, (vop_t *) nfs_fsync }, { &vop_getattr_desc, (vop_t *) nfs_getattr }, { &vop_inactive_desc, (vop_t *) nfs_inactive }, { &vop_lock_desc, (vop_t *) vop_sharedlock }, { &vop_print_desc, (vop_t *) nfs_print }, { &vop_read_desc, (vop_t *) nfsspec_read }, { &vop_reclaim_desc, (vop_t *) nfs_reclaim }, { &vop_setattr_desc, (vop_t *) nfs_setattr }, { &vop_write_desc, (vop_t *) nfsspec_write }, { NULL, NULL } }; static struct vnodeopv_desc spec_nfsv2nodeop_opv_desc = { &spec_nfsv2nodeop_p, nfsv2_specop_entries }; VNODEOP_SET(spec_nfsv2nodeop_opv_desc); vop_t **fifo_nfsv2nodeop_p; static struct vnodeopv_entry_desc nfsv2_fifoop_entries[] = { { &vop_default_desc, (vop_t *) fifo_vnoperate }, { &vop_access_desc, (vop_t *) nfsspec_access }, { &vop_close_desc, (vop_t *) nfsfifo_close }, { &vop_fsync_desc, (vop_t *) nfs_fsync }, { &vop_getattr_desc, (vop_t *) nfs_getattr }, { &vop_inactive_desc, (vop_t *) nfs_inactive }, { &vop_lock_desc, (vop_t *) vop_sharedlock }, { &vop_print_desc, (vop_t *) nfs_print }, { &vop_read_desc, (vop_t *) nfsfifo_read }, { &vop_reclaim_desc, (vop_t *) nfs_reclaim }, { &vop_setattr_desc, (vop_t *) nfs_setattr }, { &vop_write_desc, (vop_t *) nfsfifo_write }, { NULL, NULL } }; static struct vnodeopv_desc fifo_nfsv2nodeop_opv_desc = { &fifo_nfsv2nodeop_p, nfsv2_fifoop_entries }; VNODEOP_SET(fifo_nfsv2nodeop_opv_desc); static int nfs_commit __P((struct vnode *vp, u_quad_t offset, int cnt, struct ucred *cred, struct proc *procp)); static int nfs_mknodrpc __P((struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, struct vattr *vap)); static int nfs_removerpc __P((struct vnode *dvp, char *name, int namelen, struct ucred *cred, struct proc *proc)); static int nfs_renamerpc __P((struct vnode *fdvp, char *fnameptr, int fnamelen, struct vnode *tdvp, char *tnameptr, int tnamelen, struct ucred *cred, struct proc *proc)); static int nfs_renameit __P((struct vnode *sdvp, struct componentname *scnp, struct sillyrename *sp)); /* * Global variables */ extern u_long nfs_true, nfs_false; extern struct nfsstats nfsstats; extern nfstype nfsv3_type[9]; struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON]; struct nfsmount *nfs_iodmount[NFS_MAXASYNCDAEMON]; int nfs_numasync = 0; #define DIRHDSIZ (sizeof (struct dirent) - (MAXNAMLEN + 1)) /* * nfs access vnode op. * For nfs version 2, just return ok. File accesses may fail later. * For nfs version 3, use the access rpc to check accessibility. If file modes * are changed on the server, accesses might still fail later. */ static int nfs_access(ap) struct vop_access_args /* { struct vnode *a_vp; int a_mode; struct ucred *a_cred; struct proc *a_p; } */ *ap; { register struct vnode *vp = ap->a_vp; register u_long *tl; register caddr_t cp; register int t1, t2; caddr_t bpos, dpos, cp2; int error = 0, attrflag; struct mbuf *mreq, *mrep, *md, *mb, *mb2; u_long mode, rmode; int v3 = NFS_ISV3(vp); /* * Disallow write attempts on filesystems mounted read-only; * unless the file is a socket, fifo, or a block or character * device resident on the filesystem. */ if ((ap->a_mode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) { switch (vp->v_type) { case VREG: case VDIR: case VLNK: return (EROFS); } } /* * For nfs v3, do an access rpc, otherwise you are stuck emulating * ufs_access() locally using the vattr. This may not be correct, * since the server may apply other access criteria such as * client uid-->server uid mapping that we do not know about, but * this is better than just returning anything that is lying about * in the cache. */ if (v3) { nfsstats.rpccnt[NFSPROC_ACCESS]++; nfsm_reqhead(vp, NFSPROC_ACCESS, NFSX_FH(v3) + NFSX_UNSIGNED); nfsm_fhtom(vp, v3); nfsm_build(tl, u_long *, NFSX_UNSIGNED); if (ap->a_mode & VREAD) mode = NFSV3ACCESS_READ; else mode = 0; if (vp->v_type == VDIR) { if (ap->a_mode & VWRITE) mode |= (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND | NFSV3ACCESS_DELETE); if (ap->a_mode & VEXEC) mode |= NFSV3ACCESS_LOOKUP; } else { if (ap->a_mode & VWRITE) mode |= (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND); if (ap->a_mode & VEXEC) mode |= NFSV3ACCESS_EXECUTE; } *tl = txdr_unsigned(mode); nfsm_request(vp, NFSPROC_ACCESS, ap->a_p, ap->a_cred); nfsm_postop_attr(vp, attrflag); if (!error) { nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); rmode = fxdr_unsigned(u_long, *tl); /* * The NFS V3 spec does not clarify whether or not * the returned access bits can be a superset of * the ones requested, so... */ if ((rmode & mode) != mode) error = EACCES; } nfsm_reqdone; return (error); } else { if (error = nfsspec_access(ap)) return (error); /* * Attempt to prevent a mapped root from accessing a file * which it shouldn't. We try to read a byte from the file * if the user is root and the file is not zero length. * After calling nfsspec_access, we should have the correct * file size cached. */ if (ap->a_cred->cr_uid == 0 && (ap->a_mode & VREAD) && VTONFS(vp)->n_size > 0) { struct iovec aiov; struct uio auio; char buf[1]; aiov.iov_base = buf; aiov.iov_len = 1; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_offset = 0; auio.uio_resid = 1; auio.uio_segflg = UIO_SYSSPACE; auio.uio_rw = UIO_READ; auio.uio_procp = ap->a_p; if (vp->v_type == VREG) error = nfs_readrpc(vp, &auio, ap->a_cred); else if (vp->v_type == VDIR) { char* bp; bp = malloc(NFS_DIRBLKSIZ, M_TEMP, M_WAITOK); aiov.iov_base = bp; aiov.iov_len = auio.uio_resid = NFS_DIRBLKSIZ; error = nfs_readdirrpc(vp, &auio, ap->a_cred); free(bp, M_TEMP); } else if (vp->v_type = VLNK) error = nfs_readlinkrpc(vp, &auio, ap->a_cred); else error = EACCES; } return (error); } } /* * nfs open vnode op * Check to see if the type is ok * and that deletion is not in progress. * For paged in text files, you will need to flush the page cache * if consistency is lost. */ /* ARGSUSED */ static int nfs_open(ap) struct vop_open_args /* { struct vnode *a_vp; int a_mode; struct ucred *a_cred; struct proc *a_p; } */ *ap; { register struct vnode *vp = ap->a_vp; struct nfsnode *np = VTONFS(vp); struct nfsmount *nmp = VFSTONFS(vp->v_mount); struct vattr vattr; int error; if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) { printf("open eacces vtyp=%d\n",vp->v_type); return (EACCES); } /* * Get a valid lease. If cached data is stale, flush it. */ if (nmp->nm_flag & NFSMNT_NQNFS) { if (NQNFS_CKINVALID(vp, np, ND_READ)) { do { error = nqnfs_getlease(vp, ND_READ, ap->a_cred, ap->a_p); } while (error == NQNFS_EXPIRED); if (error) { return (error); } if (np->n_lrev != np->n_brev || (np->n_flag & NQNFSNONCACHE)) { if ((error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 1)) == EINTR) { return (error); } np->n_brev = np->n_lrev; } } } else { if (np->n_flag & NMODIFIED) { if ((error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 1)) == EINTR) { return (error); } np->n_attrstamp = 0; if (vp->v_type == VDIR) np->n_direofoffset = 0; error = VOP_GETATTR(vp, &vattr, ap->a_cred, ap->a_p); if (error) { return (error); } np->n_mtime = vattr.va_mtime.tv_sec; } else { error = VOP_GETATTR(vp, &vattr, ap->a_cred, ap->a_p); if (error) { return (error); } if (np->n_mtime != vattr.va_mtime.tv_sec) { if (vp->v_type == VDIR) np->n_direofoffset = 0; if ((error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 1)) == EINTR) { return (error); } np->n_mtime = vattr.va_mtime.tv_sec; } } } if ((nmp->nm_flag & NFSMNT_NQNFS) == 0) np->n_attrstamp = 0; /* For Open/Close consistency */ return (0); } /* * nfs close vnode op * What an NFS client should do upon close after writing is a debatable issue. * Most NFS clients push delayed writes to the server upon close, basically for * two reasons: * 1 - So that any write errors may be reported back to the client process * doing the close system call. By far the two most likely errors are * NFSERR_NOSPC and NFSERR_DQUOT to indicate space allocation failure. * 2 - To put a worst case upper bound on cache inconsistency between * multiple clients for the file. * There is also a consistency problem for Version 2 of the protocol w.r.t. * not being able to tell if other clients are writing a file concurrently, * since there is no way of knowing if the changed modify time in the reply * is only due to the write for this client. * (NFS Version 3 provides weak cache consistency data in the reply that * should be sufficient to detect and handle this case.) * * The current code does the following: * for NFS Version 2 - play it safe and flush/invalidate all dirty buffers * for NFS Version 3 - flush dirty buffers to the server but don't invalidate * or commit them (this satisfies 1 and 2 except for the * case where the server crashes after this close but * before the commit RPC, which is felt to be "good * enough". Changing the last argument to nfs_flush() to * a 1 would force a commit operation, if it is felt a * commit is necessary now. * for NQNFS - do nothing now, since 2 is dealt with via leases and * 1 should be dealt with via an fsync() system call for * cases where write errors are important. */ /* ARGSUSED */ static int nfs_close(ap) struct vop_close_args /* { struct vnodeop_desc *a_desc; struct vnode *a_vp; int a_fflag; struct ucred *a_cred; struct proc *a_p; } */ *ap; { register struct vnode *vp = ap->a_vp; register struct nfsnode *np = VTONFS(vp); int error = 0; if (vp->v_type == VREG) { if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS) == 0 && (np->n_flag & NMODIFIED)) { if (NFS_ISV3(vp)) { error = nfs_flush(vp, ap->a_cred, MNT_WAIT, ap->a_p, 0); np->n_flag &= ~NMODIFIED; } else error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 1); np->n_attrstamp = 0; } if (np->n_flag & NWRITEERR) { np->n_flag &= ~NWRITEERR; error = np->n_error; } } return (error); } /* * nfs getattr call from vfs. */ static int nfs_getattr(ap) struct vop_getattr_args /* { struct vnode *a_vp; struct vattr *a_vap; struct ucred *a_cred; struct proc *a_p; } */ *ap; { register struct vnode *vp = ap->a_vp; register struct nfsnode *np = VTONFS(vp); register caddr_t cp; register u_long *tl; register int t1, t2; caddr_t bpos, dpos; int error = 0; struct mbuf *mreq, *mrep, *md, *mb, *mb2; int v3 = NFS_ISV3(vp); /* * Update local times for special files. */ if (np->n_flag & (NACC | NUPD)) np->n_flag |= NCHG; /* * First look in the cache. */ if (nfs_getattrcache(vp, ap->a_vap) == 0) return (0); nfsstats.rpccnt[NFSPROC_GETATTR]++; nfsm_reqhead(vp, NFSPROC_GETATTR, NFSX_FH(v3)); nfsm_fhtom(vp, v3); nfsm_request(vp, NFSPROC_GETATTR, ap->a_p, ap->a_cred); if (!error) nfsm_loadattr(vp, ap->a_vap); nfsm_reqdone; return (error); } /* * nfs setattr call. */ static int nfs_setattr(ap) struct vop_setattr_args /* { struct vnodeop_desc *a_desc; struct vnode *a_vp; struct vattr *a_vap; struct ucred *a_cred; struct proc *a_p; } */ *ap; { register struct vnode *vp = ap->a_vp; register struct nfsnode *np = VTONFS(vp); register struct vattr *vap = ap->a_vap; int error = 0; u_quad_t tsize; #ifndef nolint tsize = (u_quad_t)0; #endif /* * Disallow write attempts if the filesystem is mounted read-only. */ if ((vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL) && (vp->v_mount->mnt_flag & MNT_RDONLY)) return (EROFS); if (vap->va_size != VNOVAL) { switch (vp->v_type) { case VDIR: return (EISDIR); case VCHR: case VBLK: case VSOCK: case VFIFO: if (vap->va_mtime.tv_sec == VNOVAL && vap->va_atime.tv_sec == VNOVAL && vap->va_mode == (u_short)VNOVAL && vap->va_uid == (uid_t)VNOVAL && vap->va_gid == (gid_t)VNOVAL) return (0); vap->va_size = VNOVAL; break; default: /* * Disallow write attempts if the filesystem is * mounted read-only. */ if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); if (np->n_flag & NMODIFIED) { if (vap->va_size == 0) error = nfs_vinvalbuf(vp, 0, ap->a_cred, ap->a_p, 1); else error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 1); if (error) return (error); } tsize = np->n_size; np->n_size = np->n_vattr.va_size = vap->va_size; vnode_pager_setsize(vp, (u_long)np->n_size); }; } else if ((vap->va_mtime.tv_sec != VNOVAL || vap->va_atime.tv_sec != VNOVAL) && (np->n_flag & NMODIFIED) && vp->v_type == VREG && (error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 1)) == EINTR) return (error); error = nfs_setattrrpc(vp, vap, ap->a_cred, ap->a_p); if (error && vap->va_size != VNOVAL) { np->n_size = np->n_vattr.va_size = tsize; vnode_pager_setsize(vp, (u_long)np->n_size); } return (error); } /* * Do an nfs setattr rpc. */ static int nfs_setattrrpc(vp, vap, cred, procp) register struct vnode *vp; register struct vattr *vap; struct ucred *cred; struct proc *procp; { register struct nfsv2_sattr *sp; register caddr_t cp; register long t1, t2; caddr_t bpos, dpos, cp2; u_long *tl; int error = 0, wccflag = NFSV3_WCCRATTR; struct mbuf *mreq, *mrep, *md, *mb, *mb2; int v3 = NFS_ISV3(vp); nfsstats.rpccnt[NFSPROC_SETATTR]++; nfsm_reqhead(vp, NFSPROC_SETATTR, NFSX_FH(v3) + NFSX_SATTR(v3)); nfsm_fhtom(vp, v3); if (v3) { if (vap->va_mode != (u_short)VNOVAL) { nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED); *tl++ = nfs_true; *tl = txdr_unsigned(vap->va_mode); } else { nfsm_build(tl, u_long *, NFSX_UNSIGNED); *tl = nfs_false; } if (vap->va_uid != (uid_t)VNOVAL) { nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED); *tl++ = nfs_true; *tl = txdr_unsigned(vap->va_uid); } else { nfsm_build(tl, u_long *, NFSX_UNSIGNED); *tl = nfs_false; } if (vap->va_gid != (gid_t)VNOVAL) { nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED); *tl++ = nfs_true; *tl = txdr_unsigned(vap->va_gid); } else { nfsm_build(tl, u_long *, NFSX_UNSIGNED); *tl = nfs_false; } if (vap->va_size != VNOVAL) { nfsm_build(tl, u_long *, 3 * NFSX_UNSIGNED); *tl++ = nfs_true; txdr_hyper(&vap->va_size, tl); } else { nfsm_build(tl, u_long *, NFSX_UNSIGNED); *tl = nfs_false; } if (vap->va_atime.tv_sec != VNOVAL) { if (vap->va_atime.tv_sec != time_second) { nfsm_build(tl, u_long *, 3 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV3SATTRTIME_TOCLIENT); txdr_nfsv3time(&vap->va_atime, tl); } else { nfsm_build(tl, u_long *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV3SATTRTIME_TOSERVER); } } else { nfsm_build(tl, u_long *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV3SATTRTIME_DONTCHANGE); } if (vap->va_mtime.tv_sec != VNOVAL) { if (vap->va_mtime.tv_sec != time_second) { nfsm_build(tl, u_long *, 3 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV3SATTRTIME_TOCLIENT); txdr_nfsv3time(&vap->va_mtime, tl); } else { nfsm_build(tl, u_long *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV3SATTRTIME_TOSERVER); } } else { nfsm_build(tl, u_long *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV3SATTRTIME_DONTCHANGE); } nfsm_build(tl, u_long *, NFSX_UNSIGNED); *tl = nfs_false; } else { nfsm_build(sp, struct nfsv2_sattr *, NFSX_V2SATTR); if (vap->va_mode == (u_short)VNOVAL) sp->sa_mode = VNOVAL; else sp->sa_mode = vtonfsv2_mode(vp->v_type, vap->va_mode); if (vap->va_uid == (uid_t)VNOVAL) sp->sa_uid = VNOVAL; else sp->sa_uid = txdr_unsigned(vap->va_uid); if (vap->va_gid == (gid_t)VNOVAL) sp->sa_gid = VNOVAL; else sp->sa_gid = txdr_unsigned(vap->va_gid); sp->sa_size = txdr_unsigned(vap->va_size); txdr_nfsv2time(&vap->va_atime, &sp->sa_atime); txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime); } nfsm_request(vp, NFSPROC_SETATTR, procp, cred); if (v3) { nfsm_wcc_data(vp, wccflag); } else nfsm_loadattr(vp, (struct vattr *)0); nfsm_reqdone; return (error); } /* * nfs lookup call, one step at a time... * First look in cache * If not found, unlock the directory nfsnode and do the rpc */ static int nfs_lookup(ap) struct vop_lookup_args /* { struct vnodeop_desc *a_desc; struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; } */ *ap; { register struct componentname *cnp = ap->a_cnp; register struct vnode *dvp = ap->a_dvp; register struct vnode **vpp = ap->a_vpp; register int flags = cnp->cn_flags; register struct vnode *newvp; register u_long *tl; register caddr_t cp; register long t1, t2; struct nfsmount *nmp; caddr_t bpos, dpos, cp2; struct mbuf *mreq, *mrep, *md, *mb, *mb2; long len; nfsfh_t *fhp; struct nfsnode *np; int lockparent, wantparent, error = 0, attrflag, fhsize; int v3 = NFS_ISV3(dvp); struct proc *p = cnp->cn_proc; if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) && (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) return (EROFS); *vpp = NULLVP; if (dvp->v_type != VDIR) return (ENOTDIR); lockparent = flags & LOCKPARENT; wantparent = flags & (LOCKPARENT|WANTPARENT); nmp = VFSTONFS(dvp->v_mount); np = VTONFS(dvp); if ((error = cache_lookup(dvp, vpp, cnp)) && error != ENOENT) { struct vattr vattr; int vpid; if (error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, p)) { *vpp = NULLVP; return (error); } newvp = *vpp; vpid = newvp->v_id; /* * See the comment starting `Step through' in ufs/ufs_lookup.c * for an explanation of the locking protocol */ if (dvp == newvp) { VREF(newvp); error = 0; } else if (flags & ISDOTDOT) { VOP_UNLOCK(dvp, 0, p); error = vget(newvp, LK_EXCLUSIVE, p); if (!error && lockparent && (flags & ISLASTCN)) error = vn_lock(dvp, LK_EXCLUSIVE, p); } else { error = vget(newvp, LK_EXCLUSIVE, p); if (!lockparent || error || !(flags & ISLASTCN)) VOP_UNLOCK(dvp, 0, p); } if (!error) { if (vpid == newvp->v_id) { if (!VOP_GETATTR(newvp, &vattr, cnp->cn_cred, p) && vattr.va_ctime.tv_sec == VTONFS(newvp)->n_ctime) { nfsstats.lookupcache_hits++; if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN)) cnp->cn_flags |= SAVENAME; return (0); } cache_purge(newvp); } vput(newvp); if (lockparent && dvp != newvp && (flags & ISLASTCN)) VOP_UNLOCK(dvp, 0, p); } error = vn_lock(dvp, LK_EXCLUSIVE, p); *vpp = NULLVP; if (error) return (error); } error = 0; newvp = NULLVP; nfsstats.lookupcache_misses++; nfsstats.rpccnt[NFSPROC_LOOKUP]++; len = cnp->cn_namelen; nfsm_reqhead(dvp, NFSPROC_LOOKUP, NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len)); nfsm_fhtom(dvp, v3); nfsm_strtom(cnp->cn_nameptr, len, NFS_MAXNAMLEN); nfsm_request(dvp, NFSPROC_LOOKUP, cnp->cn_proc, cnp->cn_cred); if (error) { nfsm_postop_attr(dvp, attrflag); m_freem(mrep); goto nfsmout; } nfsm_getfh(fhp, fhsize, v3); /* * Handle RENAME case... */ if (cnp->cn_nameiop == RENAME && wantparent && (flags & ISLASTCN)) { if (NFS_CMPFH(np, fhp, fhsize)) { m_freem(mrep); return (EISDIR); } if (error = nfs_nget(dvp->v_mount, fhp, fhsize, &np)) { m_freem(mrep); return (error); } newvp = NFSTOV(np); if (v3) { nfsm_postop_attr(newvp, attrflag); nfsm_postop_attr(dvp, attrflag); } else nfsm_loadattr(newvp, (struct vattr *)0); *vpp = newvp; m_freem(mrep); cnp->cn_flags |= SAVENAME; if (!lockparent) VOP_UNLOCK(dvp, 0, p); return (0); } if (flags & ISDOTDOT) { VOP_UNLOCK(dvp, 0, p); error = nfs_nget(dvp->v_mount, fhp, fhsize, &np); if (error) { vn_lock(dvp, LK_EXCLUSIVE + LK_RETRY, p); return (error); } newvp = NFSTOV(np); if (lockparent && (flags & ISLASTCN) && (error = vn_lock(dvp, LK_EXCLUSIVE, p))) { vput(newvp); return (error); } } else if (NFS_CMPFH(np, fhp, fhsize)) { VREF(dvp); newvp = dvp; } else { if (error = nfs_nget(dvp->v_mount, fhp, fhsize, &np)) { m_freem(mrep); return (error); } if (!lockparent || !(flags & ISLASTCN)) VOP_UNLOCK(dvp, 0, p); newvp = NFSTOV(np); } if (v3) { nfsm_postop_attr(newvp, attrflag); nfsm_postop_attr(dvp, attrflag); } else nfsm_loadattr(newvp, (struct vattr *)0); if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN)) cnp->cn_flags |= SAVENAME; if ((cnp->cn_flags & MAKEENTRY) && (cnp->cn_nameiop != DELETE || !(flags & ISLASTCN))) { np->n_ctime = np->n_vattr.va_ctime.tv_sec; cache_enter(dvp, newvp, cnp); } *vpp = newvp; nfsm_reqdone; if (error) { if (newvp != NULLVP) { vrele(newvp); *vpp = NULLVP; } if ((cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME) && (flags & ISLASTCN) && error == ENOENT) { if (!lockparent) VOP_UNLOCK(dvp, 0, p); if (dvp->v_mount->mnt_flag & MNT_RDONLY) error = EROFS; else error = EJUSTRETURN; } if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN)) cnp->cn_flags |= SAVENAME; } return (error); } /* * nfs read call. * Just call nfs_bioread() to do the work. */ static int nfs_read(ap) struct vop_read_args /* { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; struct ucred *a_cred; } */ *ap; { register struct vnode *vp = ap->a_vp; if (vp->v_type != VREG) return (EPERM); return (nfs_bioread(vp, ap->a_uio, ap->a_ioflag, ap->a_cred, 0)); } /* * nfs readlink call */ static int nfs_readlink(ap) struct vop_readlink_args /* { struct vnode *a_vp; struct uio *a_uio; struct ucred *a_cred; } */ *ap; { register struct vnode *vp = ap->a_vp; if (vp->v_type != VLNK) return (EPERM); return (nfs_bioread(vp, ap->a_uio, 0, ap->a_cred, 0)); } /* * Do a readlink rpc. * Called by nfs_doio() from below the buffer cache. */ int nfs_readlinkrpc(vp, uiop, cred) register struct vnode *vp; struct uio *uiop; struct ucred *cred; { register u_long *tl; register caddr_t cp; register long t1, t2; caddr_t bpos, dpos, cp2; int error = 0, len, attrflag; struct mbuf *mreq, *mrep, *md, *mb, *mb2; int v3 = NFS_ISV3(vp); nfsstats.rpccnt[NFSPROC_READLINK]++; nfsm_reqhead(vp, NFSPROC_READLINK, NFSX_FH(v3)); nfsm_fhtom(vp, v3); nfsm_request(vp, NFSPROC_READLINK, uiop->uio_procp, cred); if (v3) nfsm_postop_attr(vp, attrflag); if (!error) { nfsm_strsiz(len, NFS_MAXPATHLEN); nfsm_mtouio(uiop, len); } nfsm_reqdone; return (error); } /* * nfs read rpc call * Ditto above */ int nfs_readrpc(vp, uiop, cred) register struct vnode *vp; struct uio *uiop; struct ucred *cred; { register u_long *tl; register caddr_t cp; register long t1, t2; caddr_t bpos, dpos, cp2; struct mbuf *mreq, *mrep, *md, *mb, *mb2; struct nfsmount *nmp; int error = 0, len, retlen, tsiz, eof, attrflag; int v3 = NFS_ISV3(vp); #ifndef nolint eof = 0; #endif nmp = VFSTONFS(vp->v_mount); tsiz = uiop->uio_resid; if (uiop->uio_offset + tsiz > 0xffffffff && !v3) return (EFBIG); while (tsiz > 0) { nfsstats.rpccnt[NFSPROC_READ]++; len = (tsiz > nmp->nm_rsize) ? nmp->nm_rsize : tsiz; nfsm_reqhead(vp, NFSPROC_READ, NFSX_FH(v3) + NFSX_UNSIGNED * 3); nfsm_fhtom(vp, v3); nfsm_build(tl, u_long *, NFSX_UNSIGNED * 3); if (v3) { txdr_hyper(&uiop->uio_offset, tl); *(tl + 2) = txdr_unsigned(len); } else { *tl++ = txdr_unsigned(uiop->uio_offset); *tl++ = txdr_unsigned(len); *tl = 0; } nfsm_request(vp, NFSPROC_READ, uiop->uio_procp, cred); if (v3) { nfsm_postop_attr(vp, attrflag); if (error) { m_freem(mrep); goto nfsmout; } nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED); eof = fxdr_unsigned(int, *(tl + 1)); } else nfsm_loadattr(vp, (struct vattr *)0); nfsm_strsiz(retlen, nmp->nm_rsize); nfsm_mtouio(uiop, retlen); m_freem(mrep); tsiz -= retlen; if (v3) { if (eof || retlen == 0) tsiz = 0; } else if (retlen < len) tsiz = 0; } nfsmout: return (error); } /* * nfs write call */ int nfs_writerpc(vp, uiop, cred, iomode, must_commit) register struct vnode *vp; register struct uio *uiop; struct ucred *cred; int *iomode, *must_commit; { register u_long *tl; register caddr_t cp; register int t1, t2, backup; caddr_t bpos, dpos, cp2; struct mbuf *mreq, *mrep, *md, *mb, *mb2; struct nfsmount *nmp = VFSTONFS(vp->v_mount); int error = 0, len, tsiz, wccflag = NFSV3_WCCRATTR, rlen, commit; int v3 = NFS_ISV3(vp), committed = NFSV3WRITE_FILESYNC; #ifndef DIAGNOSTIC if (uiop->uio_iovcnt != 1) panic("nfs: writerpc iovcnt > 1"); #endif *must_commit = 0; tsiz = uiop->uio_resid; if (uiop->uio_offset + tsiz > 0xffffffff && !v3) return (EFBIG); while (tsiz > 0) { nfsstats.rpccnt[NFSPROC_WRITE]++; len = (tsiz > nmp->nm_wsize) ? nmp->nm_wsize : tsiz; nfsm_reqhead(vp, NFSPROC_WRITE, NFSX_FH(v3) + 5 * NFSX_UNSIGNED + nfsm_rndup(len)); nfsm_fhtom(vp, v3); if (v3) { nfsm_build(tl, u_long *, 5 * NFSX_UNSIGNED); txdr_hyper(&uiop->uio_offset, tl); tl += 2; *tl++ = txdr_unsigned(len); *tl++ = txdr_unsigned(*iomode); } else { nfsm_build(tl, u_long *, 4 * NFSX_UNSIGNED); *++tl = txdr_unsigned(uiop->uio_offset); tl += 2; } *tl = txdr_unsigned(len); nfsm_uiotom(uiop, len); nfsm_request(vp, NFSPROC_WRITE, uiop->uio_procp, cred); if (v3) { wccflag = NFSV3_WCCCHK; nfsm_wcc_data(vp, wccflag); if (!error) { nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED + NFSX_V3WRITEVERF); rlen = fxdr_unsigned(int, *tl++); if (rlen == 0) { error = NFSERR_IO; break; } else if (rlen < len) { backup = len - rlen; uiop->uio_iov->iov_base -= backup; uiop->uio_iov->iov_len += backup; uiop->uio_offset -= backup; uiop->uio_resid += backup; len = rlen; } commit = fxdr_unsigned(int, *tl++); /* * Return the lowest committment level * obtained by any of the RPCs. */ if (committed == NFSV3WRITE_FILESYNC) committed = commit; else if (committed == NFSV3WRITE_DATASYNC && commit == NFSV3WRITE_UNSTABLE) committed = commit; if ((nmp->nm_flag & NFSMNT_HASWRITEVERF) == 0) { bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf, NFSX_V3WRITEVERF); nmp->nm_flag |= NFSMNT_HASWRITEVERF; } else if (bcmp((caddr_t)tl, (caddr_t)nmp->nm_verf, NFSX_V3WRITEVERF)) { *must_commit = 1; bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf, NFSX_V3WRITEVERF); } } } else nfsm_loadattr(vp, (struct vattr *)0); if (wccflag) VTONFS(vp)->n_mtime = VTONFS(vp)->n_vattr.va_mtime.tv_sec; m_freem(mrep); tsiz -= len; } nfsmout: if (vp->v_mount->mnt_flag & MNT_ASYNC) committed = NFSV3WRITE_FILESYNC; *iomode = committed; if (error) uiop->uio_resid = tsiz; return (error); } /* * nfs mknod rpc * For NFS v2 this is a kludge. Use a create rpc but with the IFMT bits of the * mode set to specify the file type and the size field for rdev. */ static int nfs_mknodrpc(dvp, vpp, cnp, vap) register struct vnode *dvp; register struct vnode **vpp; register struct componentname *cnp; register struct vattr *vap; { register struct nfsv2_sattr *sp; register struct nfsv3_sattr *sp3; register u_long *tl; register caddr_t cp; register long t1, t2; struct vnode *newvp = (struct vnode *)0; struct nfsnode *np = (struct nfsnode *)0; struct vattr vattr; char *cp2; caddr_t bpos, dpos; int error = 0, wccflag = NFSV3_WCCRATTR, gotvp = 0; struct mbuf *mreq, *mrep, *md, *mb, *mb2; u_long rdev; int v3 = NFS_ISV3(dvp); if (vap->va_type == VCHR || vap->va_type == VBLK) rdev = txdr_unsigned(vap->va_rdev); else if (vap->va_type == VFIFO || vap->va_type == VSOCK) rdev = 0xffffffff; else { VOP_ABORTOP(dvp, cnp); - vput(dvp); return (EOPNOTSUPP); } if (error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred, cnp->cn_proc)) { VOP_ABORTOP(dvp, cnp); - vput(dvp); return (error); } nfsstats.rpccnt[NFSPROC_MKNOD]++; nfsm_reqhead(dvp, NFSPROC_MKNOD, NFSX_FH(v3) + 4 * NFSX_UNSIGNED + + nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(v3)); nfsm_fhtom(dvp, v3); nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN); if (v3) { nfsm_build(tl, u_long *, NFSX_UNSIGNED + NFSX_V3SRVSATTR); *tl++ = vtonfsv3_type(vap->va_type); sp3 = (struct nfsv3_sattr *)tl; nfsm_v3sattr(sp3, vap, cnp->cn_cred->cr_uid, vattr.va_gid); if (vap->va_type == VCHR || vap->va_type == VBLK) { nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(major(vap->va_rdev)); *tl = txdr_unsigned(minor(vap->va_rdev)); } } else { nfsm_build(sp, struct nfsv2_sattr *, NFSX_V2SATTR); sp->sa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode); sp->sa_uid = txdr_unsigned(cnp->cn_cred->cr_uid); sp->sa_gid = txdr_unsigned(vattr.va_gid); sp->sa_size = rdev; txdr_nfsv2time(&vap->va_atime, &sp->sa_atime); txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime); } nfsm_request(dvp, NFSPROC_MKNOD, cnp->cn_proc, cnp->cn_cred); if (!error) { nfsm_mtofh(dvp, newvp, v3, gotvp); if (!gotvp) { if (newvp) { vput(newvp); newvp = (struct vnode *)0; } error = nfs_lookitup(dvp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_cred, cnp->cn_proc, &np); if (!error) newvp = NFSTOV(np); } } if (v3) nfsm_wcc_data(dvp, wccflag); nfsm_reqdone; if (error) { if (newvp) vput(newvp); } else { if (cnp->cn_flags & MAKEENTRY) cache_enter(dvp, newvp, cnp); *vpp = newvp; } zfree(namei_zone, cnp->cn_pnbuf); VTONFS(dvp)->n_flag |= NMODIFIED; if (!wccflag) VTONFS(dvp)->n_attrstamp = 0; - vput(dvp); return (error); } /* * nfs mknod vop * just call nfs_mknodrpc() to do the work. */ /* ARGSUSED */ static int nfs_mknod(ap) struct vop_mknod_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; struct vattr *a_vap; } */ *ap; { struct vnode *newvp; int error; error = nfs_mknodrpc(ap->a_dvp, &newvp, ap->a_cnp, ap->a_vap); if (!error) vput(newvp); return (error); } static u_long create_verf; /* * nfs file create call */ static int nfs_create(ap) struct vop_create_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; struct vattr *a_vap; } */ *ap; { register struct vnode *dvp = ap->a_dvp; register struct vattr *vap = ap->a_vap; register struct componentname *cnp = ap->a_cnp; register struct nfsv2_sattr *sp; register struct nfsv3_sattr *sp3; register u_long *tl; register caddr_t cp; register long t1, t2; struct nfsnode *np = (struct nfsnode *)0; struct vnode *newvp = (struct vnode *)0; caddr_t bpos, dpos, cp2; int error = 0, wccflag = NFSV3_WCCRATTR, gotvp = 0, fmode = 0; struct mbuf *mreq, *mrep, *md, *mb, *mb2; struct vattr vattr; int v3 = NFS_ISV3(dvp); /* * Oops, not for me.. */ if (vap->va_type == VSOCK) return (nfs_mknodrpc(dvp, ap->a_vpp, cnp, vap)); if (error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred, cnp->cn_proc)) { VOP_ABORTOP(dvp, cnp); - vput(dvp); return (error); } if (vap->va_vaflags & VA_EXCLUSIVE) fmode |= O_EXCL; again: nfsstats.rpccnt[NFSPROC_CREATE]++; nfsm_reqhead(dvp, NFSPROC_CREATE, NFSX_FH(v3) + 2 * NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(v3)); nfsm_fhtom(dvp, v3); nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN); if (v3) { nfsm_build(tl, u_long *, NFSX_UNSIGNED); if (fmode & O_EXCL) { *tl = txdr_unsigned(NFSV3CREATE_EXCLUSIVE); nfsm_build(tl, u_long *, NFSX_V3CREATEVERF); #ifdef INET if (!TAILQ_EMPTY(&in_ifaddrhead)) *tl++ = IA_SIN(in_ifaddrhead.tqh_first)->sin_addr.s_addr; else #endif *tl++ = create_verf; *tl = ++create_verf; } else { *tl = txdr_unsigned(NFSV3CREATE_UNCHECKED); nfsm_build(tl, u_long *, NFSX_V3SRVSATTR); sp3 = (struct nfsv3_sattr *)tl; nfsm_v3sattr(sp3, vap, cnp->cn_cred->cr_uid, vattr.va_gid); } } else { nfsm_build(sp, struct nfsv2_sattr *, NFSX_V2SATTR); sp->sa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode); sp->sa_uid = txdr_unsigned(cnp->cn_cred->cr_uid); sp->sa_gid = txdr_unsigned(vattr.va_gid); sp->sa_size = 0; txdr_nfsv2time(&vap->va_atime, &sp->sa_atime); txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime); } nfsm_request(dvp, NFSPROC_CREATE, cnp->cn_proc, cnp->cn_cred); if (!error) { nfsm_mtofh(dvp, newvp, v3, gotvp); if (!gotvp) { if (newvp) { vput(newvp); newvp = (struct vnode *)0; } error = nfs_lookitup(dvp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_cred, cnp->cn_proc, &np); if (!error) newvp = NFSTOV(np); } } if (v3) nfsm_wcc_data(dvp, wccflag); nfsm_reqdone; if (error) { if (v3 && (fmode & O_EXCL) && error == NFSERR_NOTSUPP) { fmode &= ~O_EXCL; goto again; } if (newvp) vput(newvp); } else if (v3 && (fmode & O_EXCL)) error = nfs_setattrrpc(newvp, vap, cnp->cn_cred, cnp->cn_proc); if (!error) { if (cnp->cn_flags & MAKEENTRY) cache_enter(dvp, newvp, cnp); *ap->a_vpp = newvp; } zfree(namei_zone, cnp->cn_pnbuf); VTONFS(dvp)->n_flag |= NMODIFIED; if (!wccflag) VTONFS(dvp)->n_attrstamp = 0; - vput(dvp); return (error); } /* * nfs file remove call * To try and make nfs semantics closer to ufs semantics, a file that has * other processes using the vnode is renamed instead of removed and then * removed later on the last close. * - If v_usecount > 1 * If a rename is not already in the works * call nfs_sillyrename() to set it up * else * do the remove rpc */ static int nfs_remove(ap) struct vop_remove_args /* { struct vnodeop_desc *a_desc; struct vnode * a_dvp; struct vnode * a_vp; struct componentname * a_cnp; } */ *ap; { register struct vnode *vp = ap->a_vp; register struct vnode *dvp = ap->a_dvp; register struct componentname *cnp = ap->a_cnp; register struct nfsnode *np = VTONFS(vp); int error = 0; struct vattr vattr; #ifndef DIAGNOSTIC if ((cnp->cn_flags & HASBUF) == 0) panic("nfs_remove: no name"); if (vp->v_usecount < 1) panic("nfs_remove: bad v_usecount"); #endif if (vp->v_usecount == 1 || (np->n_sillyrename && VOP_GETATTR(vp, &vattr, cnp->cn_cred, cnp->cn_proc) == 0 && vattr.va_nlink > 1)) { /* * Purge the name cache so that the chance of a lookup for * the name succeeding while the remove is in progress is * minimized. Without node locking it can still happen, such * that an I/O op returns ESTALE, but since you get this if * another host removes the file.. */ cache_purge(vp); /* * throw away biocache buffers, mainly to avoid * unnecessary delayed writes later. */ error = nfs_vinvalbuf(vp, 0, cnp->cn_cred, cnp->cn_proc, 1); /* Do the rpc */ if (error != EINTR) error = nfs_removerpc(dvp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_cred, cnp->cn_proc); /* * Kludge City: If the first reply to the remove rpc is lost.. * the reply to the retransmitted request will be ENOENT * since the file was in fact removed * Therefore, we cheat and return success. */ if (error == ENOENT) error = 0; } else if (!np->n_sillyrename) error = nfs_sillyrename(dvp, vp, cnp); zfree(namei_zone, cnp->cn_pnbuf); np->n_attrstamp = 0; - vput(dvp); - if (vp == dvp) - vrele(vp); - else - vput(vp); return (error); } /* * nfs file remove rpc called from nfs_inactive */ int nfs_removeit(sp) register struct sillyrename *sp; { return (nfs_removerpc(sp->s_dvp, sp->s_name, sp->s_namlen, sp->s_cred, (struct proc *)0)); } /* * Nfs remove rpc, called from nfs_remove() and nfs_removeit(). */ static int nfs_removerpc(dvp, name, namelen, cred, proc) register struct vnode *dvp; char *name; int namelen; struct ucred *cred; struct proc *proc; { register u_long *tl; register caddr_t cp; register long t1, t2; caddr_t bpos, dpos, cp2; int error = 0, wccflag = NFSV3_WCCRATTR; struct mbuf *mreq, *mrep, *md, *mb, *mb2; int v3 = NFS_ISV3(dvp); nfsstats.rpccnt[NFSPROC_REMOVE]++; nfsm_reqhead(dvp, NFSPROC_REMOVE, NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(namelen)); nfsm_fhtom(dvp, v3); nfsm_strtom(name, namelen, NFS_MAXNAMLEN); nfsm_request(dvp, NFSPROC_REMOVE, proc, cred); if (v3) nfsm_wcc_data(dvp, wccflag); nfsm_reqdone; VTONFS(dvp)->n_flag |= NMODIFIED; if (!wccflag) VTONFS(dvp)->n_attrstamp = 0; return (error); } /* * nfs file rename call */ static int nfs_rename(ap) struct vop_rename_args /* { struct vnode *a_fdvp; struct vnode *a_fvp; struct componentname *a_fcnp; struct vnode *a_tdvp; struct vnode *a_tvp; struct componentname *a_tcnp; } */ *ap; { register struct vnode *fvp = ap->a_fvp; register struct vnode *tvp = ap->a_tvp; register struct vnode *fdvp = ap->a_fdvp; register struct vnode *tdvp = ap->a_tdvp; register struct componentname *tcnp = ap->a_tcnp; register struct componentname *fcnp = ap->a_fcnp; int error; #ifndef DIAGNOSTIC if ((tcnp->cn_flags & HASBUF) == 0 || (fcnp->cn_flags & HASBUF) == 0) panic("nfs_rename: no name"); #endif /* Check for cross-device rename */ if ((fvp->v_mount != tdvp->v_mount) || (tvp && (fvp->v_mount != tvp->v_mount))) { error = EXDEV; goto out; } /* * If the tvp exists and is in use, sillyrename it before doing the * rename of the new file over it. * XXX Can't sillyrename a directory. */ if (tvp && tvp->v_usecount > 1 && !VTONFS(tvp)->n_sillyrename && tvp->v_type != VDIR && !nfs_sillyrename(tdvp, tvp, tcnp)) { vput(tvp); tvp = NULL; } error = nfs_renamerpc(fdvp, fcnp->cn_nameptr, fcnp->cn_namelen, tdvp, tcnp->cn_nameptr, tcnp->cn_namelen, tcnp->cn_cred, tcnp->cn_proc); if (fvp->v_type == VDIR) { if (tvp != NULL && tvp->v_type == VDIR) cache_purge(tdvp); cache_purge(fdvp); } out: if (tdvp == tvp) vrele(tdvp); else vput(tdvp); if (tvp) vput(tvp); vrele(fdvp); vrele(fvp); /* * Kludge: Map ENOENT => 0 assuming that it is a reply to a retry. */ if (error == ENOENT) error = 0; return (error); } /* * nfs file rename rpc called from nfs_remove() above */ static int nfs_renameit(sdvp, scnp, sp) struct vnode *sdvp; struct componentname *scnp; register struct sillyrename *sp; { return (nfs_renamerpc(sdvp, scnp->cn_nameptr, scnp->cn_namelen, sdvp, sp->s_name, sp->s_namlen, scnp->cn_cred, scnp->cn_proc)); } /* * Do an nfs rename rpc. Called from nfs_rename() and nfs_renameit(). */ static int nfs_renamerpc(fdvp, fnameptr, fnamelen, tdvp, tnameptr, tnamelen, cred, proc) register struct vnode *fdvp; char *fnameptr; int fnamelen; register struct vnode *tdvp; char *tnameptr; int tnamelen; struct ucred *cred; struct proc *proc; { register u_long *tl; register caddr_t cp; register long t1, t2; caddr_t bpos, dpos, cp2; int error = 0, fwccflag = NFSV3_WCCRATTR, twccflag = NFSV3_WCCRATTR; struct mbuf *mreq, *mrep, *md, *mb, *mb2; int v3 = NFS_ISV3(fdvp); nfsstats.rpccnt[NFSPROC_RENAME]++; nfsm_reqhead(fdvp, NFSPROC_RENAME, (NFSX_FH(v3) + NFSX_UNSIGNED)*2 + nfsm_rndup(fnamelen) + nfsm_rndup(tnamelen)); nfsm_fhtom(fdvp, v3); nfsm_strtom(fnameptr, fnamelen, NFS_MAXNAMLEN); nfsm_fhtom(tdvp, v3); nfsm_strtom(tnameptr, tnamelen, NFS_MAXNAMLEN); nfsm_request(fdvp, NFSPROC_RENAME, proc, cred); if (v3) { nfsm_wcc_data(fdvp, fwccflag); nfsm_wcc_data(tdvp, twccflag); } nfsm_reqdone; VTONFS(fdvp)->n_flag |= NMODIFIED; VTONFS(tdvp)->n_flag |= NMODIFIED; if (!fwccflag) VTONFS(fdvp)->n_attrstamp = 0; if (!twccflag) VTONFS(tdvp)->n_attrstamp = 0; return (error); } /* * nfs hard link create call */ static int nfs_link(ap) struct vop_link_args /* { struct vnode *a_tdvp; struct vnode *a_vp; struct componentname *a_cnp; } */ *ap; { register struct vnode *vp = ap->a_vp; register struct vnode *tdvp = ap->a_tdvp; register struct componentname *cnp = ap->a_cnp; register u_long *tl; register caddr_t cp; register long t1, t2; caddr_t bpos, dpos, cp2; int error = 0, wccflag = NFSV3_WCCRATTR, attrflag = 0; struct mbuf *mreq, *mrep, *md, *mb, *mb2; int v3 = NFS_ISV3(vp); if (vp->v_mount != tdvp->v_mount) { VOP_ABORTOP(vp, cnp); - if (tdvp == vp) - vrele(tdvp); - else - vput(tdvp); return (EXDEV); } /* * Push all writes to the server, so that the attribute cache * doesn't get "out of sync" with the server. * XXX There should be a better way! */ VOP_FSYNC(vp, cnp->cn_cred, MNT_WAIT, cnp->cn_proc); nfsstats.rpccnt[NFSPROC_LINK]++; nfsm_reqhead(vp, NFSPROC_LINK, NFSX_FH(v3)*2 + NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen)); nfsm_fhtom(vp, v3); nfsm_fhtom(tdvp, v3); nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN); nfsm_request(vp, NFSPROC_LINK, cnp->cn_proc, cnp->cn_cred); if (v3) { nfsm_postop_attr(vp, attrflag); nfsm_wcc_data(tdvp, wccflag); } nfsm_reqdone; zfree(namei_zone, cnp->cn_pnbuf); VTONFS(tdvp)->n_flag |= NMODIFIED; if (!attrflag) VTONFS(vp)->n_attrstamp = 0; if (!wccflag) VTONFS(tdvp)->n_attrstamp = 0; - vput(tdvp); /* * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry. */ if (error == EEXIST) error = 0; return (error); } /* * nfs symbolic link create call */ static int nfs_symlink(ap) struct vop_symlink_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; struct vattr *a_vap; char *a_target; } */ *ap; { register struct vnode *dvp = ap->a_dvp; register struct vattr *vap = ap->a_vap; register struct componentname *cnp = ap->a_cnp; register struct nfsv2_sattr *sp; register struct nfsv3_sattr *sp3; register u_long *tl; register caddr_t cp; register long t1, t2; caddr_t bpos, dpos, cp2; int slen, error = 0, wccflag = NFSV3_WCCRATTR, gotvp; struct mbuf *mreq, *mrep, *md, *mb, *mb2; struct vnode *newvp = (struct vnode *)0; int v3 = NFS_ISV3(dvp); nfsstats.rpccnt[NFSPROC_SYMLINK]++; slen = strlen(ap->a_target); nfsm_reqhead(dvp, NFSPROC_SYMLINK, NFSX_FH(v3) + 2*NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen) + nfsm_rndup(slen) + NFSX_SATTR(v3)); nfsm_fhtom(dvp, v3); nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN); if (v3) { nfsm_build(sp3, struct nfsv3_sattr *, NFSX_V3SRVSATTR); nfsm_v3sattr(sp3, vap, cnp->cn_cred->cr_uid, cnp->cn_cred->cr_gid); } nfsm_strtom(ap->a_target, slen, NFS_MAXPATHLEN); if (!v3) { nfsm_build(sp, struct nfsv2_sattr *, NFSX_V2SATTR); sp->sa_mode = vtonfsv2_mode(VLNK, vap->va_mode); sp->sa_uid = txdr_unsigned(cnp->cn_cred->cr_uid); sp->sa_gid = txdr_unsigned(cnp->cn_cred->cr_gid); sp->sa_size = -1; txdr_nfsv2time(&vap->va_atime, &sp->sa_atime); txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime); } nfsm_request(dvp, NFSPROC_SYMLINK, cnp->cn_proc, cnp->cn_cred); if (v3) { if (!error) nfsm_mtofh(dvp, newvp, v3, gotvp); nfsm_wcc_data(dvp, wccflag); } nfsm_reqdone; if (newvp) vput(newvp); zfree(namei_zone, cnp->cn_pnbuf); VTONFS(dvp)->n_flag |= NMODIFIED; if (!wccflag) VTONFS(dvp)->n_attrstamp = 0; - vput(dvp); /* * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry. */ if (error == EEXIST) error = 0; return (error); } /* * nfs make dir call */ static int nfs_mkdir(ap) struct vop_mkdir_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; struct vattr *a_vap; } */ *ap; { register struct vnode *dvp = ap->a_dvp; register struct vattr *vap = ap->a_vap; register struct componentname *cnp = ap->a_cnp; register struct nfsv2_sattr *sp; register struct nfsv3_sattr *sp3; register u_long *tl; register caddr_t cp; register long t1, t2; register int len; struct nfsnode *np = (struct nfsnode *)0; struct vnode *newvp = (struct vnode *)0; caddr_t bpos, dpos, cp2; int error = 0, wccflag = NFSV3_WCCRATTR; int gotvp = 0; struct mbuf *mreq, *mrep, *md, *mb, *mb2; struct vattr vattr; int v3 = NFS_ISV3(dvp); if (error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred, cnp->cn_proc)) { VOP_ABORTOP(dvp, cnp); - vput(dvp); return (error); } len = cnp->cn_namelen; nfsstats.rpccnt[NFSPROC_MKDIR]++; nfsm_reqhead(dvp, NFSPROC_MKDIR, NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len) + NFSX_SATTR(v3)); nfsm_fhtom(dvp, v3); nfsm_strtom(cnp->cn_nameptr, len, NFS_MAXNAMLEN); if (v3) { nfsm_build(sp3, struct nfsv3_sattr *, NFSX_V3SRVSATTR); nfsm_v3sattr(sp3, vap, cnp->cn_cred->cr_uid, vattr.va_gid); } else { nfsm_build(sp, struct nfsv2_sattr *, NFSX_V2SATTR); sp->sa_mode = vtonfsv2_mode(VDIR, vap->va_mode); sp->sa_uid = txdr_unsigned(cnp->cn_cred->cr_uid); sp->sa_gid = txdr_unsigned(vattr.va_gid); sp->sa_size = -1; txdr_nfsv2time(&vap->va_atime, &sp->sa_atime); txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime); } nfsm_request(dvp, NFSPROC_MKDIR, cnp->cn_proc, cnp->cn_cred); if (!error) nfsm_mtofh(dvp, newvp, v3, gotvp); if (v3) nfsm_wcc_data(dvp, wccflag); nfsm_reqdone; VTONFS(dvp)->n_flag |= NMODIFIED; if (!wccflag) VTONFS(dvp)->n_attrstamp = 0; /* * Kludge: Map EEXIST => 0 assuming that you have a reply to a retry * if we can succeed in looking up the directory. */ if (error == EEXIST || (!error && !gotvp)) { if (newvp) { vrele(newvp); newvp = (struct vnode *)0; } error = nfs_lookitup(dvp, cnp->cn_nameptr, len, cnp->cn_cred, cnp->cn_proc, &np); if (!error) { newvp = NFSTOV(np); if (newvp->v_type != VDIR) error = EEXIST; } } if (error) { if (newvp) vrele(newvp); } else *ap->a_vpp = newvp; zfree(namei_zone, cnp->cn_pnbuf); - vput(dvp); return (error); } /* * nfs remove directory call */ static int nfs_rmdir(ap) struct vop_rmdir_args /* { struct vnode *a_dvp; struct vnode *a_vp; struct componentname *a_cnp; } */ *ap; { register struct vnode *vp = ap->a_vp; register struct vnode *dvp = ap->a_dvp; register struct componentname *cnp = ap->a_cnp; register u_long *tl; register caddr_t cp; register long t1, t2; caddr_t bpos, dpos, cp2; int error = 0, wccflag = NFSV3_WCCRATTR; struct mbuf *mreq, *mrep, *md, *mb, *mb2; int v3 = NFS_ISV3(dvp); nfsstats.rpccnt[NFSPROC_RMDIR]++; nfsm_reqhead(dvp, NFSPROC_RMDIR, NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen)); nfsm_fhtom(dvp, v3); nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN); nfsm_request(dvp, NFSPROC_RMDIR, cnp->cn_proc, cnp->cn_cred); if (v3) nfsm_wcc_data(dvp, wccflag); nfsm_reqdone; zfree(namei_zone, cnp->cn_pnbuf); VTONFS(dvp)->n_flag |= NMODIFIED; if (!wccflag) VTONFS(dvp)->n_attrstamp = 0; cache_purge(dvp); cache_purge(vp); - vput(vp); - vput(dvp); /* * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry. */ if (error == ENOENT) error = 0; return (error); } /* * nfs readdir call */ static int nfs_readdir(ap) struct vop_readdir_args /* { struct vnode *a_vp; struct uio *a_uio; struct ucred *a_cred; } */ *ap; { register struct vnode *vp = ap->a_vp; register struct nfsnode *np = VTONFS(vp); register struct uio *uio = ap->a_uio; int tresid, error; struct vattr vattr; if (vp->v_type != VDIR) return (EPERM); /* * First, check for hit on the EOF offset cache */ if (np->n_direofoffset > 0 && uio->uio_offset >= np->n_direofoffset && (np->n_flag & NMODIFIED) == 0) { if (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS) { if (NQNFS_CKCACHABLE(vp, ND_READ)) { nfsstats.direofcache_hits++; return (0); } } else if (VOP_GETATTR(vp, &vattr, ap->a_cred, uio->uio_procp) == 0 && np->n_mtime == vattr.va_mtime.tv_sec) { nfsstats.direofcache_hits++; return (0); } } /* * Call nfs_bioread() to do the real work. */ tresid = uio->uio_resid; error = nfs_bioread(vp, uio, 0, ap->a_cred, 0); if (!error && uio->uio_resid == tresid) nfsstats.direofcache_misses++; return (error); } /* * Readdir rpc call. * Called from below the buffer cache by nfs_doio(). */ int nfs_readdirrpc(vp, uiop, cred) struct vnode *vp; register struct uio *uiop; struct ucred *cred; { register int len, left; register struct dirent *dp; register u_long *tl; register caddr_t cp; register long t1, t2; register nfsuint64 *cookiep; caddr_t bpos, dpos, cp2; struct mbuf *mreq, *mrep, *md, *mb, *mb2; nfsuint64 cookie; struct nfsmount *nmp = VFSTONFS(vp->v_mount); struct nfsnode *dnp = VTONFS(vp); u_quad_t fileno; int error = 0, tlen, more_dirs = 1, blksiz = 0, bigenough = 1; int attrflag; int v3 = NFS_ISV3(vp); #ifndef nolint dp = (struct dirent *)0; #endif #ifndef DIAGNOSTIC if (uiop->uio_iovcnt != 1 || (uiop->uio_offset & (NFS_DIRBLKSIZ - 1)) || (uiop->uio_resid & (NFS_DIRBLKSIZ - 1))) panic("nfs readdirrpc bad uio"); #endif /* * If there is no cookie, assume directory was stale. */ cookiep = nfs_getcookie(dnp, uiop->uio_offset, 0); if (cookiep) cookie = *cookiep; else return (NFSERR_BAD_COOKIE); /* * Loop around doing readdir rpc's of size nm_readdirsize * truncated to a multiple of DIRBLKSIZ. * The stopping criteria is EOF or buffer full. */ while (more_dirs && bigenough) { nfsstats.rpccnt[NFSPROC_READDIR]++; nfsm_reqhead(vp, NFSPROC_READDIR, NFSX_FH(v3) + NFSX_READDIR(v3)); nfsm_fhtom(vp, v3); if (v3) { nfsm_build(tl, u_long *, 5 * NFSX_UNSIGNED); *tl++ = cookie.nfsuquad[0]; *tl++ = cookie.nfsuquad[1]; *tl++ = dnp->n_cookieverf.nfsuquad[0]; *tl++ = dnp->n_cookieverf.nfsuquad[1]; } else { nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED); *tl++ = cookie.nfsuquad[0]; } *tl = txdr_unsigned(nmp->nm_readdirsize); nfsm_request(vp, NFSPROC_READDIR, uiop->uio_procp, cred); if (v3) { nfsm_postop_attr(vp, attrflag); if (!error) { nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED); dnp->n_cookieverf.nfsuquad[0] = *tl++; dnp->n_cookieverf.nfsuquad[1] = *tl; } else { m_freem(mrep); goto nfsmout; } } nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); more_dirs = fxdr_unsigned(int, *tl); /* loop thru the dir entries, doctoring them to 4bsd form */ while (more_dirs && bigenough) { if (v3) { nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED); fxdr_hyper(tl, &fileno); len = fxdr_unsigned(int, *(tl + 2)); } else { nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED); fileno = fxdr_unsigned(u_quad_t, *tl++); len = fxdr_unsigned(int, *tl); } if (len <= 0 || len > NFS_MAXNAMLEN) { error = EBADRPC; m_freem(mrep); goto nfsmout; } tlen = nfsm_rndup(len); if (tlen == len) tlen += 4; /* To ensure null termination */ left = DIRBLKSIZ - blksiz; if ((tlen + DIRHDSIZ) > left) { dp->d_reclen += left; uiop->uio_iov->iov_base += left; uiop->uio_iov->iov_len -= left; uiop->uio_offset += left; uiop->uio_resid -= left; blksiz = 0; } if ((tlen + DIRHDSIZ) > uiop->uio_resid) bigenough = 0; if (bigenough) { dp = (struct dirent *)uiop->uio_iov->iov_base; dp->d_fileno = (int)fileno; dp->d_namlen = len; dp->d_reclen = tlen + DIRHDSIZ; dp->d_type = DT_UNKNOWN; blksiz += dp->d_reclen; if (blksiz == DIRBLKSIZ) blksiz = 0; uiop->uio_offset += DIRHDSIZ; uiop->uio_resid -= DIRHDSIZ; uiop->uio_iov->iov_base += DIRHDSIZ; uiop->uio_iov->iov_len -= DIRHDSIZ; nfsm_mtouio(uiop, len); cp = uiop->uio_iov->iov_base; tlen -= len; *cp = '\0'; /* null terminate */ uiop->uio_iov->iov_base += tlen; uiop->uio_iov->iov_len -= tlen; uiop->uio_offset += tlen; uiop->uio_resid -= tlen; } else nfsm_adv(nfsm_rndup(len)); if (v3) { nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED); } else { nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED); } if (bigenough) { cookie.nfsuquad[0] = *tl++; if (v3) cookie.nfsuquad[1] = *tl++; } else if (v3) tl += 2; else tl++; more_dirs = fxdr_unsigned(int, *tl); } /* * If at end of rpc data, get the eof boolean */ if (!more_dirs) { nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); more_dirs = (fxdr_unsigned(int, *tl) == 0); } m_freem(mrep); } /* * Fill last record, iff any, out to a multiple of DIRBLKSIZ * by increasing d_reclen for the last record. */ if (blksiz > 0) { left = DIRBLKSIZ - blksiz; dp->d_reclen += left; uiop->uio_iov->iov_base += left; uiop->uio_iov->iov_len -= left; uiop->uio_offset += left; uiop->uio_resid -= left; } /* * We are now either at the end of the directory or have filled the * block. */ if (bigenough) dnp->n_direofoffset = uiop->uio_offset; else { if (uiop->uio_resid > 0) printf("EEK! readdirrpc resid > 0\n"); cookiep = nfs_getcookie(dnp, uiop->uio_offset, 1); *cookiep = cookie; } nfsmout: return (error); } /* * NFS V3 readdir plus RPC. Used in place of nfs_readdirrpc(). */ int nfs_readdirplusrpc(vp, uiop, cred) struct vnode *vp; register struct uio *uiop; struct ucred *cred; { register int len, left; register struct dirent *dp; register u_long *tl; register caddr_t cp; register long t1, t2; register struct vnode *newvp; register nfsuint64 *cookiep; caddr_t bpos, dpos, cp2, dpossav1, dpossav2; struct mbuf *mreq, *mrep, *md, *mb, *mb2, *mdsav1, *mdsav2; struct nameidata nami, *ndp = &nami; struct componentname *cnp = &ndp->ni_cnd; nfsuint64 cookie; struct nfsmount *nmp = VFSTONFS(vp->v_mount); struct nfsnode *dnp = VTONFS(vp), *np; nfsfh_t *fhp; u_quad_t fileno; int error = 0, tlen, more_dirs = 1, blksiz = 0, doit, bigenough = 1, i; int attrflag, fhsize; #ifndef nolint dp = (struct dirent *)0; #endif #ifndef DIAGNOSTIC if (uiop->uio_iovcnt != 1 || (uiop->uio_offset & (DIRBLKSIZ - 1)) || (uiop->uio_resid & (DIRBLKSIZ - 1))) panic("nfs readdirplusrpc bad uio"); #endif ndp->ni_dvp = vp; newvp = NULLVP; /* * If there is no cookie, assume directory was stale. */ cookiep = nfs_getcookie(dnp, uiop->uio_offset, 0); if (cookiep) cookie = *cookiep; else return (NFSERR_BAD_COOKIE); /* * Loop around doing readdir rpc's of size nm_readdirsize * truncated to a multiple of DIRBLKSIZ. * The stopping criteria is EOF or buffer full. */ while (more_dirs && bigenough) { nfsstats.rpccnt[NFSPROC_READDIRPLUS]++; nfsm_reqhead(vp, NFSPROC_READDIRPLUS, NFSX_FH(1) + 6 * NFSX_UNSIGNED); nfsm_fhtom(vp, 1); nfsm_build(tl, u_long *, 6 * NFSX_UNSIGNED); *tl++ = cookie.nfsuquad[0]; *tl++ = cookie.nfsuquad[1]; *tl++ = dnp->n_cookieverf.nfsuquad[0]; *tl++ = dnp->n_cookieverf.nfsuquad[1]; *tl++ = txdr_unsigned(nmp->nm_readdirsize); *tl = txdr_unsigned(nmp->nm_rsize); nfsm_request(vp, NFSPROC_READDIRPLUS, uiop->uio_procp, cred); nfsm_postop_attr(vp, attrflag); if (error) { m_freem(mrep); goto nfsmout; } nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED); dnp->n_cookieverf.nfsuquad[0] = *tl++; dnp->n_cookieverf.nfsuquad[1] = *tl++; more_dirs = fxdr_unsigned(int, *tl); /* loop thru the dir entries, doctoring them to 4bsd form */ while (more_dirs && bigenough) { nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED); fxdr_hyper(tl, &fileno); len = fxdr_unsigned(int, *(tl + 2)); if (len <= 0 || len > NFS_MAXNAMLEN) { error = EBADRPC; m_freem(mrep); goto nfsmout; } tlen = nfsm_rndup(len); if (tlen == len) tlen += 4; /* To ensure null termination*/ left = DIRBLKSIZ - blksiz; if ((tlen + DIRHDSIZ) > left) { dp->d_reclen += left; uiop->uio_iov->iov_base += left; uiop->uio_iov->iov_len -= left; uiop->uio_offset += left; uiop->uio_resid -= left; blksiz = 0; } if ((tlen + DIRHDSIZ) > uiop->uio_resid) bigenough = 0; if (bigenough) { dp = (struct dirent *)uiop->uio_iov->iov_base; dp->d_fileno = (int)fileno; dp->d_namlen = len; dp->d_reclen = tlen + DIRHDSIZ; dp->d_type = DT_UNKNOWN; blksiz += dp->d_reclen; if (blksiz == DIRBLKSIZ) blksiz = 0; uiop->uio_offset += DIRHDSIZ; uiop->uio_resid -= DIRHDSIZ; uiop->uio_iov->iov_base += DIRHDSIZ; uiop->uio_iov->iov_len -= DIRHDSIZ; cnp->cn_nameptr = uiop->uio_iov->iov_base; cnp->cn_namelen = len; nfsm_mtouio(uiop, len); cp = uiop->uio_iov->iov_base; tlen -= len; *cp = '\0'; uiop->uio_iov->iov_base += tlen; uiop->uio_iov->iov_len -= tlen; uiop->uio_offset += tlen; uiop->uio_resid -= tlen; } else nfsm_adv(nfsm_rndup(len)); nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED); if (bigenough) { cookie.nfsuquad[0] = *tl++; cookie.nfsuquad[1] = *tl++; } else tl += 2; /* * Since the attributes are before the file handle * (sigh), we must skip over the attributes and then * come back and get them. */ attrflag = fxdr_unsigned(int, *tl); if (attrflag) { dpossav1 = dpos; mdsav1 = md; nfsm_adv(NFSX_V3FATTR); nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); doit = fxdr_unsigned(int, *tl); if (doit) { nfsm_getfh(fhp, fhsize, 1); if (NFS_CMPFH(dnp, fhp, fhsize)) { VREF(vp); newvp = vp; np = dnp; } else { if (error = nfs_nget(vp->v_mount, fhp, fhsize, &np)) doit = 0; else newvp = NFSTOV(np); } } if (doit) { dpossav2 = dpos; dpos = dpossav1; mdsav2 = md; md = mdsav1; nfsm_loadattr(newvp, (struct vattr *)0); dpos = dpossav2; md = mdsav2; dp->d_type = IFTODT(VTTOIF(np->n_vattr.va_type)); ndp->ni_vp = newvp; cnp->cn_hash = 0; for (cp = cnp->cn_nameptr, i = 1; i <= len; i++, cp++) cnp->cn_hash += (unsigned char)*cp * i; cache_enter(ndp->ni_dvp, ndp->ni_vp, cnp); } } else { /* Just skip over the file handle */ nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); i = fxdr_unsigned(int, *tl); nfsm_adv(nfsm_rndup(i)); } if (newvp != NULLVP) { vrele(newvp); newvp = NULLVP; } nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); more_dirs = fxdr_unsigned(int, *tl); } /* * If at end of rpc data, get the eof boolean */ if (!more_dirs) { nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); more_dirs = (fxdr_unsigned(int, *tl) == 0); } m_freem(mrep); } /* * Fill last record, iff any, out to a multiple of NFS_DIRBLKSIZ * by increasing d_reclen for the last record. */ if (blksiz > 0) { left = DIRBLKSIZ - blksiz; dp->d_reclen += left; uiop->uio_iov->iov_base += left; uiop->uio_iov->iov_len -= left; uiop->uio_offset += left; uiop->uio_resid -= left; } /* * We are now either at the end of the directory or have filled the * block. */ if (bigenough) dnp->n_direofoffset = uiop->uio_offset; else { if (uiop->uio_resid > 0) printf("EEK! readdirplusrpc resid > 0\n"); cookiep = nfs_getcookie(dnp, uiop->uio_offset, 1); *cookiep = cookie; } nfsmout: if (newvp != NULLVP) { if (newvp == vp) vrele(newvp); else vput(newvp); newvp = NULLVP; } return (error); } /* * Silly rename. To make the NFS filesystem that is stateless look a little * more like the "ufs" a remove of an active vnode is translated to a rename * to a funny looking filename that is removed by nfs_inactive on the * nfsnode. There is the potential for another process on a different client * to create the same funny name between the nfs_lookitup() fails and the * nfs_rename() completes, but... */ static int nfs_sillyrename(dvp, vp, cnp) struct vnode *dvp, *vp; struct componentname *cnp; { register struct sillyrename *sp; struct nfsnode *np; int error; short pid; cache_purge(dvp); np = VTONFS(vp); #ifndef DIAGNOSTIC if (vp->v_type == VDIR) panic("nfs: sillyrename dir"); #endif MALLOC(sp, struct sillyrename *, sizeof (struct sillyrename), M_NFSREQ, M_WAITOK); sp->s_cred = crdup(cnp->cn_cred); sp->s_dvp = dvp; VREF(dvp); /* Fudge together a funny name */ pid = cnp->cn_proc->p_pid; sp->s_namlen = sprintf(sp->s_name, ".nfsA%04x4.4", pid); /* Try lookitups until we get one that isn't there */ while (nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred, cnp->cn_proc, (struct nfsnode **)0) == 0) { sp->s_name[4]++; if (sp->s_name[4] > 'z') { error = EINVAL; goto bad; } } if (error = nfs_renameit(dvp, cnp, sp)) goto bad; error = nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred, cnp->cn_proc, &np); np->n_sillyrename = sp; return (0); bad: vrele(sp->s_dvp); crfree(sp->s_cred); free((caddr_t)sp, M_NFSREQ); return (error); } /* * Look up a file name and optionally either update the file handle or * allocate an nfsnode, depending on the value of npp. * npp == NULL --> just do the lookup * *npp == NULL --> allocate a new nfsnode and make sure attributes are * handled too * *npp != NULL --> update the file handle in the vnode */ static int nfs_lookitup(dvp, name, len, cred, procp, npp) register struct vnode *dvp; char *name; int len; struct ucred *cred; struct proc *procp; struct nfsnode **npp; { register u_long *tl; register caddr_t cp; register long t1, t2; struct vnode *newvp = (struct vnode *)0; struct nfsnode *np, *dnp = VTONFS(dvp); caddr_t bpos, dpos, cp2; int error = 0, fhlen, attrflag; struct mbuf *mreq, *mrep, *md, *mb, *mb2; nfsfh_t *nfhp; int v3 = NFS_ISV3(dvp); nfsstats.rpccnt[NFSPROC_LOOKUP]++; nfsm_reqhead(dvp, NFSPROC_LOOKUP, NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len)); nfsm_fhtom(dvp, v3); nfsm_strtom(name, len, NFS_MAXNAMLEN); nfsm_request(dvp, NFSPROC_LOOKUP, procp, cred); if (npp && !error) { nfsm_getfh(nfhp, fhlen, v3); if (*npp) { np = *npp; if (np->n_fhsize > NFS_SMALLFH && fhlen <= NFS_SMALLFH) { free((caddr_t)np->n_fhp, M_NFSBIGFH); np->n_fhp = &np->n_fh; } else if (np->n_fhsize <= NFS_SMALLFH && fhlen>NFS_SMALLFH) np->n_fhp =(nfsfh_t *)malloc(fhlen,M_NFSBIGFH,M_WAITOK); bcopy((caddr_t)nfhp, (caddr_t)np->n_fhp, fhlen); np->n_fhsize = fhlen; newvp = NFSTOV(np); } else if (NFS_CMPFH(dnp, nfhp, fhlen)) { VREF(dvp); newvp = dvp; } else { error = nfs_nget(dvp->v_mount, nfhp, fhlen, &np); if (error) { m_freem(mrep); return (error); } newvp = NFSTOV(np); } if (v3) { nfsm_postop_attr(newvp, attrflag); if (!attrflag && *npp == NULL) { m_freem(mrep); if (newvp == dvp) vrele(newvp); else vput(newvp); return (ENOENT); } } else nfsm_loadattr(newvp, (struct vattr *)0); } nfsm_reqdone; if (npp && *npp == NULL) { if (error) { if (newvp) if (newvp == dvp) vrele(newvp); else vput(newvp); } else *npp = np; } return (error); } /* * Nfs Version 3 commit rpc */ static int nfs_commit(vp, offset, cnt, cred, procp) register struct vnode *vp; u_quad_t offset; int cnt; struct ucred *cred; struct proc *procp; { register caddr_t cp; register u_long *tl; register int t1, t2; register struct nfsmount *nmp = VFSTONFS(vp->v_mount); caddr_t bpos, dpos, cp2; int error = 0, wccflag = NFSV3_WCCRATTR; struct mbuf *mreq, *mrep, *md, *mb, *mb2; if ((nmp->nm_flag & NFSMNT_HASWRITEVERF) == 0) return (0); nfsstats.rpccnt[NFSPROC_COMMIT]++; nfsm_reqhead(vp, NFSPROC_COMMIT, NFSX_FH(1)); nfsm_fhtom(vp, 1); nfsm_build(tl, u_long *, 3 * NFSX_UNSIGNED); txdr_hyper(&offset, tl); tl += 2; *tl = txdr_unsigned(cnt); nfsm_request(vp, NFSPROC_COMMIT, procp, cred); nfsm_wcc_data(vp, wccflag); if (!error) { nfsm_dissect(tl, u_long *, NFSX_V3WRITEVERF); if (bcmp((caddr_t)nmp->nm_verf, (caddr_t)tl, NFSX_V3WRITEVERF)) { bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf, NFSX_V3WRITEVERF); error = NFSERR_STALEWRITEVERF; } } nfsm_reqdone; return (error); } /* * Kludge City.. * - make nfs_bmap() essentially a no-op that does no translation * - do nfs_strategy() by doing I/O with nfs_readrpc/nfs_writerpc * (Maybe I could use the process's page mapping, but I was concerned that * Kernel Write might not be enabled and also figured copyout() would do * a lot more work than bcopy() and also it currently happens in the * context of the swapper process (2). */ static int nfs_bmap(ap) struct vop_bmap_args /* { struct vnode *a_vp; daddr_t a_bn; struct vnode **a_vpp; daddr_t *a_bnp; int *a_runp; int *a_runb; } */ *ap; { register struct vnode *vp = ap->a_vp; if (ap->a_vpp != NULL) *ap->a_vpp = vp; if (ap->a_bnp != NULL) *ap->a_bnp = ap->a_bn * btodb(vp->v_mount->mnt_stat.f_iosize); if (ap->a_runp != NULL) *ap->a_runp = 0; if (ap->a_runb != NULL) *ap->a_runb = 0; return (0); } /* * Strategy routine. * For async requests when nfsiod(s) are running, queue the request by * calling nfs_asyncio(), otherwise just all nfs_doio() to do the * request. */ static int nfs_strategy(ap) struct vop_strategy_args *ap; { register struct buf *bp = ap->a_bp; struct ucred *cr; struct proc *p; int error = 0; if (bp->b_flags & B_PHYS) panic("nfs physio"); if (bp->b_flags & B_ASYNC) p = (struct proc *)0; else p = curproc; /* XXX */ if (bp->b_flags & B_READ) cr = bp->b_rcred; else cr = bp->b_wcred; /* * If the op is asynchronous and an i/o daemon is waiting * queue the request, wake it up and wait for completion * otherwise just do it ourselves. */ if ((bp->b_flags & B_ASYNC) == 0 || nfs_asyncio(bp, NOCRED)) error = nfs_doio(bp, cr, p); return (error); } /* * Mmap a file * * NB Currently unsupported. */ /* ARGSUSED */ static int nfs_mmap(ap) struct vop_mmap_args /* { struct vnode *a_vp; int a_fflags; struct ucred *a_cred; struct proc *a_p; } */ *ap; { return (EINVAL); } /* * fsync vnode op. Just call nfs_flush() with commit == 1. */ /* ARGSUSED */ static int nfs_fsync(ap) struct vop_fsync_args /* { struct vnodeop_desc *a_desc; struct vnode * a_vp; struct ucred * a_cred; int a_waitfor; struct proc * a_p; } */ *ap; { return (nfs_flush(ap->a_vp, ap->a_cred, ap->a_waitfor, ap->a_p, 1)); } /* * Flush all the blocks associated with a vnode. * Walk through the buffer pool and push any dirty pages * associated with the vnode. */ static int nfs_flush(vp, cred, waitfor, p, commit) register struct vnode *vp; struct ucred *cred; int waitfor; struct proc *p; int commit; { register struct nfsnode *np = VTONFS(vp); register struct buf *bp; register int i; struct buf *nbp; struct nfsmount *nmp = VFSTONFS(vp->v_mount); int s, error = 0, slptimeo = 0, slpflag = 0, retv, bvecpos; int passone = 1; u_quad_t off, endoff, toff; struct ucred* wcred = NULL; struct buf **bvec = NULL; #ifndef NFS_COMMITBVECSIZ #define NFS_COMMITBVECSIZ 20 #endif struct buf *bvec_on_stack[NFS_COMMITBVECSIZ]; int bvecsize = 0, bveccount; if (nmp->nm_flag & NFSMNT_INT) slpflag = PCATCH; if (!commit) passone = 0; /* * A b_flags == (B_DELWRI | B_NEEDCOMMIT) block has been written to the * server, but nas not been committed to stable storage on the server * yet. On the first pass, the byte range is worked out and the commit * rpc is done. On the second pass, nfs_writebp() is called to do the * job. */ again: off = (u_quad_t)-1; endoff = 0; bvecpos = 0; if (NFS_ISV3(vp) && commit) { s = splbio(); /* * Count up how many buffers waiting for a commit. */ bveccount = 0; for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) { nbp = bp->b_vnbufs.le_next; if ((bp->b_flags & (B_BUSY | B_DELWRI | B_NEEDCOMMIT)) == (B_DELWRI | B_NEEDCOMMIT)) bveccount++; } /* * Allocate space to remember the list of bufs to commit. It is * important to use M_NOWAIT here to avoid a race with nfs_write. * If we can't get memory (for whatever reason), we will end up * committing the buffers one-by-one in the loop below. */ if (bveccount > NFS_COMMITBVECSIZ) { if (bvec != NULL && bvec != bvec_on_stack) free(bvec, M_TEMP); bvec = (struct buf **) malloc(bveccount * sizeof(struct buf *), M_TEMP, M_NOWAIT); if (bvec == NULL) { bvec = bvec_on_stack; bvecsize = NFS_COMMITBVECSIZ; } else bvecsize = bveccount; } else { bvec = bvec_on_stack; bvecsize = NFS_COMMITBVECSIZ; } for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) { nbp = bp->b_vnbufs.le_next; if (bvecpos >= bvecsize) break; if ((bp->b_flags & (B_BUSY | B_DELWRI | B_NEEDCOMMIT)) != (B_DELWRI | B_NEEDCOMMIT)) continue; bremfree(bp); /* * Work out if all buffers are using the same cred * so we can deal with them all with one commit. */ if (wcred == NULL) wcred = bp->b_wcred; else if (wcred != bp->b_wcred) wcred = NOCRED; bp->b_flags |= (B_BUSY | B_WRITEINPROG); vfs_busy_pages(bp, 1); /* * A list of these buffers is kept so that the * second loop knows which buffers have actually * been committed. This is necessary, since there * may be a race between the commit rpc and new * uncommitted writes on the file. */ bvec[bvecpos++] = bp; toff = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + bp->b_dirtyoff; if (toff < off) off = toff; toff += (u_quad_t)(bp->b_dirtyend - bp->b_dirtyoff); if (toff > endoff) endoff = toff; } splx(s); } if (bvecpos > 0) { /* * Commit data on the server, as required. * If all bufs are using the same wcred, then use that with * one call for all of them, otherwise commit each one * separately. */ if (wcred != NOCRED) retv = nfs_commit(vp, off, (int)(endoff - off), wcred, p); else { retv = 0; for (i = 0; i < bvecpos; i++) { off_t off, size; bp = bvec[i]; off = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + bp->b_dirtyoff; size = (u_quad_t)(bp->b_dirtyend - bp->b_dirtyoff); retv = nfs_commit(vp, off, (int)size, bp->b_wcred, p); if (retv) break; } } if (retv == NFSERR_STALEWRITEVERF) nfs_clearcommit(vp->v_mount); /* * Now, either mark the blocks I/O done or mark the * blocks dirty, depending on whether the commit * succeeded. */ for (i = 0; i < bvecpos; i++) { bp = bvec[i]; bp->b_flags &= ~(B_NEEDCOMMIT | B_WRITEINPROG); if (retv) { vfs_unbusy_pages(bp); brelse(bp); } else { vp->v_numoutput++; bp->b_flags |= B_ASYNC; if (bp->b_flags & B_DELWRI) { --numdirtybuffers; if (needsbuffer) { vfs_bio_need_satisfy(); } } s = splbio(); /* XXX check this positionning */ bp->b_flags &= ~(B_READ|B_DONE|B_ERROR|B_DELWRI); bp->b_dirtyoff = bp->b_dirtyend = 0; reassignbuf(bp, vp); splx(s); biodone(bp); } } } /* * Start/do any write(s) that are required. */ loop: s = splbio(); for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) { nbp = bp->b_vnbufs.le_next; if (bp->b_flags & B_BUSY) { if (waitfor != MNT_WAIT || passone) continue; bp->b_flags |= B_WANTED; error = tsleep((caddr_t)bp, slpflag | (PRIBIO + 1), "nfsfsync", slptimeo); splx(s); if (error) { if (nfs_sigintr(nmp, (struct nfsreq *)0, p)) { error = EINTR; goto done; } if (slpflag == PCATCH) { slpflag = 0; slptimeo = 2 * hz; } } goto loop; } if ((bp->b_flags & B_DELWRI) == 0) panic("nfs_fsync: not dirty"); if ((passone || !commit) && (bp->b_flags & B_NEEDCOMMIT)) continue; bremfree(bp); if (passone || !commit) bp->b_flags |= (B_BUSY|B_ASYNC); else bp->b_flags |= (B_BUSY|B_ASYNC|B_WRITEINPROG|B_NEEDCOMMIT); splx(s); VOP_BWRITE(bp); goto loop; } splx(s); if (passone) { passone = 0; goto again; } if (waitfor == MNT_WAIT) { while (vp->v_numoutput) { vp->v_flag |= VBWAIT; error = tsleep((caddr_t)&vp->v_numoutput, slpflag | (PRIBIO + 1), "nfsfsync", slptimeo); if (error) { if (nfs_sigintr(nmp, (struct nfsreq *)0, p)) { error = EINTR; goto done; } if (slpflag == PCATCH) { slpflag = 0; slptimeo = 2 * hz; } } } if (vp->v_dirtyblkhd.lh_first && commit) { goto loop; } } if (np->n_flag & NWRITEERR) { error = np->n_error; np->n_flag &= ~NWRITEERR; } done: if (bvec != NULL && bvec != bvec_on_stack) free(bvec, M_TEMP); return (error); } /* * NFS advisory byte-level locks. * Currently unsupported. */ static int nfs_advlock(ap) struct vop_advlock_args /* { struct vnode *a_vp; caddr_t a_id; int a_op; struct flock *a_fl; int a_flags; } */ *ap; { register struct nfsnode *np = VTONFS(ap->a_vp); /* * The following kludge is to allow diskless support to work * until a real NFS lockd is implemented. Basically, just pretend * that this is a local lock. */ return (lf_advlock(ap, &(np->n_lockf), np->n_size)); } /* * Print out the contents of an nfsnode. */ static int nfs_print(ap) struct vop_print_args /* { struct vnode *a_vp; } */ *ap; { register struct vnode *vp = ap->a_vp; register struct nfsnode *np = VTONFS(vp); printf("tag VT_NFS, fileid %ld fsid 0x%lx", np->n_vattr.va_fileid, np->n_vattr.va_fsid); if (vp->v_type == VFIFO) fifo_printinfo(vp); printf("\n"); return (0); } /* * Just call nfs_writebp() with the force argument set to 1. */ static int nfs_bwrite(ap) struct vop_bwrite_args /* { struct vnode *a_bp; } */ *ap; { return (nfs_writebp(ap->a_bp, 1)); } /* * This is a clone of vn_bwrite(), except that B_WRITEINPROG isn't set unless * the force flag is one and it also handles the B_NEEDCOMMIT flag. */ int nfs_writebp(bp, force) register struct buf *bp; int force; { int s; register int oldflags = bp->b_flags, retv = 1; off_t off; if(!(bp->b_flags & B_BUSY)) panic("bwrite: buffer is not busy???"); if (bp->b_flags & B_INVAL) bp->b_flags |= B_INVAL | B_NOCACHE; if (bp->b_flags & B_DELWRI) { --numdirtybuffers; if (needsbuffer) vfs_bio_need_satisfy(); } s = splbio(); /* XXX check if needed */ bp->b_flags &= ~(B_READ|B_DONE|B_ERROR|B_DELWRI); if ((oldflags & (B_ASYNC|B_DELWRI)) == (B_ASYNC|B_DELWRI)) { reassignbuf(bp, bp->b_vp); } bp->b_vp->v_numoutput++; curproc->p_stats->p_ru.ru_oublock++; splx(s); /* * If B_NEEDCOMMIT is set, a commit rpc may do the trick. If not * an actual write will have to be scheduled via. VOP_STRATEGY(). * If B_WRITEINPROG is already set, then push it with a write anyhow. */ vfs_busy_pages(bp, 1); if ((oldflags & (B_NEEDCOMMIT | B_WRITEINPROG)) == B_NEEDCOMMIT) { off = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + bp->b_dirtyoff; bp->b_flags |= B_WRITEINPROG; retv = nfs_commit(bp->b_vp, off, bp->b_dirtyend-bp->b_dirtyoff, bp->b_wcred, bp->b_proc); bp->b_flags &= ~B_WRITEINPROG; if (!retv) { bp->b_dirtyoff = bp->b_dirtyend = 0; bp->b_flags &= ~B_NEEDCOMMIT; biodone(bp); } else if (retv == NFSERR_STALEWRITEVERF) nfs_clearcommit(bp->b_vp->v_mount); } if (retv) { if (force) bp->b_flags |= B_WRITEINPROG; VOP_STRATEGY(bp); } if( (oldflags & B_ASYNC) == 0) { int rtval = biowait(bp); if (oldflags & B_DELWRI) { s = splbio(); reassignbuf(bp, bp->b_vp); splx(s); } brelse(bp); return (rtval); } return (0); } /* * nfs special file access vnode op. * Essentially just get vattr and then imitate iaccess() since the device is * local to the client. */ static int nfsspec_access(ap) struct vop_access_args /* { struct vnode *a_vp; int a_mode; struct ucred *a_cred; struct proc *a_p; } */ *ap; { register struct vattr *vap; register gid_t *gp; register struct ucred *cred = ap->a_cred; struct vnode *vp = ap->a_vp; mode_t mode = ap->a_mode; struct vattr vattr; register int i; int error; /* * Disallow write attempts on filesystems mounted read-only; * unless the file is a socket, fifo, or a block or character * device resident on the filesystem. */ if ((mode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) { switch (vp->v_type) { case VREG: case VDIR: case VLNK: return (EROFS); } } /* * If you're the super-user, * you always get access. */ if (cred->cr_uid == 0) return (0); vap = &vattr; error = VOP_GETATTR(vp, vap, cred, ap->a_p); if (error) return (error); /* * Access check is based on only one of owner, group, public. * If not owner, then check group. If not a member of the * group, then check public access. */ if (cred->cr_uid != vap->va_uid) { mode >>= 3; gp = cred->cr_groups; for (i = 0; i < cred->cr_ngroups; i++, gp++) if (vap->va_gid == *gp) goto found; mode >>= 3; found: ; } error = (vap->va_mode & mode) == mode ? 0 : EACCES; return (error); } /* * Read wrapper for special devices. */ static int nfsspec_read(ap) struct vop_read_args /* { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; struct ucred *a_cred; } */ *ap; { register struct nfsnode *np = VTONFS(ap->a_vp); struct timeval tv; /* * Set access flag. */ np->n_flag |= NACC; getmicrotime(&tv); np->n_atim.tv_sec = tv.tv_sec; np->n_atim.tv_nsec = tv.tv_usec * 1000; return (VOCALL(spec_vnodeop_p, VOFFSET(vop_read), ap)); } /* * Write wrapper for special devices. */ static int nfsspec_write(ap) struct vop_write_args /* { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; struct ucred *a_cred; } */ *ap; { register struct nfsnode *np = VTONFS(ap->a_vp); struct timeval tv; /* * Set update flag. */ np->n_flag |= NUPD; getmicrotime(&tv); np->n_mtim.tv_sec = tv.tv_sec; np->n_mtim.tv_nsec = tv.tv_usec * 1000; return (VOCALL(spec_vnodeop_p, VOFFSET(vop_write), ap)); } /* * Close wrapper for special devices. * * Update the times on the nfsnode then do device close. */ static int nfsspec_close(ap) struct vop_close_args /* { struct vnode *a_vp; int a_fflag; struct ucred *a_cred; struct proc *a_p; } */ *ap; { register struct vnode *vp = ap->a_vp; register struct nfsnode *np = VTONFS(vp); struct vattr vattr; if (np->n_flag & (NACC | NUPD)) { np->n_flag |= NCHG; if (vp->v_usecount == 1 && (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) { VATTR_NULL(&vattr); if (np->n_flag & NACC) vattr.va_atime = np->n_atim; if (np->n_flag & NUPD) vattr.va_mtime = np->n_mtim; (void)VOP_SETATTR(vp, &vattr, ap->a_cred, ap->a_p); } } return (VOCALL(spec_vnodeop_p, VOFFSET(vop_close), ap)); } /* * Read wrapper for fifos. */ static int nfsfifo_read(ap) struct vop_read_args /* { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; struct ucred *a_cred; } */ *ap; { register struct nfsnode *np = VTONFS(ap->a_vp); struct timeval tv; /* * Set access flag. */ np->n_flag |= NACC; getmicrotime(&tv); np->n_atim.tv_sec = tv.tv_sec; np->n_atim.tv_nsec = tv.tv_usec * 1000; return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_read), ap)); } /* * Write wrapper for fifos. */ static int nfsfifo_write(ap) struct vop_write_args /* { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; struct ucred *a_cred; } */ *ap; { register struct nfsnode *np = VTONFS(ap->a_vp); struct timeval tv; /* * Set update flag. */ np->n_flag |= NUPD; getmicrotime(&tv); np->n_mtim.tv_sec = tv.tv_sec; np->n_mtim.tv_nsec = tv.tv_usec * 1000; return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_write), ap)); } /* * Close wrapper for fifos. * * Update the times on the nfsnode then do fifo close. */ static int nfsfifo_close(ap) struct vop_close_args /* { struct vnode *a_vp; int a_fflag; struct ucred *a_cred; struct proc *a_p; } */ *ap; { register struct vnode *vp = ap->a_vp; register struct nfsnode *np = VTONFS(vp); struct timeval tv; struct vattr vattr; if (np->n_flag & (NACC | NUPD)) { getmicrotime(&tv); if (np->n_flag & NACC) { np->n_atim.tv_sec = tv.tv_sec; np->n_atim.tv_nsec = tv.tv_usec * 1000; } if (np->n_flag & NUPD) { np->n_mtim.tv_sec = tv.tv_sec; np->n_mtim.tv_nsec = tv.tv_usec * 1000; } np->n_flag |= NCHG; if (vp->v_usecount == 1 && (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) { VATTR_NULL(&vattr); if (np->n_flag & NACC) vattr.va_atime = np->n_atim; if (np->n_flag & NUPD) vattr.va_mtime = np->n_mtim; (void)VOP_SETATTR(vp, &vattr, ap->a_cred, ap->a_p); } } return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_close), ap)); } Index: head/sys/nfsclient/nfs_vnops.c =================================================================== --- head/sys/nfsclient/nfs_vnops.c (revision 35822) +++ head/sys/nfsclient/nfs_vnops.c (revision 35823) @@ -1,3304 +1,3284 @@ /* * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)nfs_vnops.c 8.16 (Berkeley) 5/27/95 - * $Id: nfs_vnops.c,v 1.82 1998/03/28 12:04:40 bde Exp $ + * $Id: nfs_vnops.c,v 1.83 1998/03/30 09:54:32 phk Exp $ */ /* * vnode op calls for Sun NFS version 2 and 3 */ #include "opt_inet.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* Defs */ #define TRUE 1 #define FALSE 0 /* * Ifdef for FreeBSD-current merged buffer cache. It is unfortunate that these * calls are not in getblk() and brelse() so that they would not be necessary * here. */ #ifndef B_VMIO #define vfs_busy_pages(bp, f) #endif static int nfsspec_read __P((struct vop_read_args *)); static int nfsspec_write __P((struct vop_write_args *)); static int nfsfifo_read __P((struct vop_read_args *)); static int nfsfifo_write __P((struct vop_write_args *)); static int nfsspec_close __P((struct vop_close_args *)); static int nfsfifo_close __P((struct vop_close_args *)); #define nfs_poll vop_nopoll static int nfs_flush __P((struct vnode *,struct ucred *,int,struct proc *,int)); static int nfs_setattrrpc __P((struct vnode *,struct vattr *,struct ucred *,struct proc *)); static int nfs_lookup __P((struct vop_lookup_args *)); static int nfs_create __P((struct vop_create_args *)); static int nfs_mknod __P((struct vop_mknod_args *)); static int nfs_open __P((struct vop_open_args *)); static int nfs_close __P((struct vop_close_args *)); static int nfs_access __P((struct vop_access_args *)); static int nfs_getattr __P((struct vop_getattr_args *)); static int nfs_setattr __P((struct vop_setattr_args *)); static int nfs_read __P((struct vop_read_args *)); static int nfs_mmap __P((struct vop_mmap_args *)); static int nfs_fsync __P((struct vop_fsync_args *)); static int nfs_remove __P((struct vop_remove_args *)); static int nfs_link __P((struct vop_link_args *)); static int nfs_rename __P((struct vop_rename_args *)); static int nfs_mkdir __P((struct vop_mkdir_args *)); static int nfs_rmdir __P((struct vop_rmdir_args *)); static int nfs_symlink __P((struct vop_symlink_args *)); static int nfs_readdir __P((struct vop_readdir_args *)); static int nfs_bmap __P((struct vop_bmap_args *)); static int nfs_strategy __P((struct vop_strategy_args *)); static int nfs_lookitup __P((struct vnode *,char *,int,struct ucred *,struct proc *,struct nfsnode **)); static int nfs_sillyrename __P((struct vnode *,struct vnode *,struct componentname *)); static int nfsspec_access __P((struct vop_access_args *)); static int nfs_readlink __P((struct vop_readlink_args *)); static int nfs_print __P((struct vop_print_args *)); static int nfs_advlock __P((struct vop_advlock_args *)); static int nfs_bwrite __P((struct vop_bwrite_args *)); /* * Global vfs data structures for nfs */ vop_t **nfsv2_vnodeop_p; static struct vnodeopv_entry_desc nfsv2_vnodeop_entries[] = { { &vop_default_desc, (vop_t *) vop_defaultop }, { &vop_abortop_desc, (vop_t *) nfs_abortop }, { &vop_access_desc, (vop_t *) nfs_access }, { &vop_advlock_desc, (vop_t *) nfs_advlock }, { &vop_bmap_desc, (vop_t *) nfs_bmap }, { &vop_bwrite_desc, (vop_t *) nfs_bwrite }, { &vop_close_desc, (vop_t *) nfs_close }, { &vop_create_desc, (vop_t *) nfs_create }, { &vop_fsync_desc, (vop_t *) nfs_fsync }, { &vop_getattr_desc, (vop_t *) nfs_getattr }, { &vop_getpages_desc, (vop_t *) nfs_getpages }, { &vop_putpages_desc, (vop_t *) nfs_putpages }, { &vop_inactive_desc, (vop_t *) nfs_inactive }, { &vop_lease_desc, (vop_t *) vop_null }, { &vop_link_desc, (vop_t *) nfs_link }, { &vop_lock_desc, (vop_t *) vop_sharedlock }, { &vop_lookup_desc, (vop_t *) nfs_lookup }, { &vop_mkdir_desc, (vop_t *) nfs_mkdir }, { &vop_mknod_desc, (vop_t *) nfs_mknod }, { &vop_mmap_desc, (vop_t *) nfs_mmap }, { &vop_open_desc, (vop_t *) nfs_open }, { &vop_poll_desc, (vop_t *) nfs_poll }, { &vop_print_desc, (vop_t *) nfs_print }, { &vop_read_desc, (vop_t *) nfs_read }, { &vop_readdir_desc, (vop_t *) nfs_readdir }, { &vop_readlink_desc, (vop_t *) nfs_readlink }, { &vop_reclaim_desc, (vop_t *) nfs_reclaim }, { &vop_remove_desc, (vop_t *) nfs_remove }, { &vop_rename_desc, (vop_t *) nfs_rename }, { &vop_rmdir_desc, (vop_t *) nfs_rmdir }, { &vop_setattr_desc, (vop_t *) nfs_setattr }, { &vop_strategy_desc, (vop_t *) nfs_strategy }, { &vop_symlink_desc, (vop_t *) nfs_symlink }, { &vop_write_desc, (vop_t *) nfs_write }, { NULL, NULL } }; static struct vnodeopv_desc nfsv2_vnodeop_opv_desc = { &nfsv2_vnodeop_p, nfsv2_vnodeop_entries }; VNODEOP_SET(nfsv2_vnodeop_opv_desc); /* * Special device vnode ops */ vop_t **spec_nfsv2nodeop_p; static struct vnodeopv_entry_desc nfsv2_specop_entries[] = { { &vop_default_desc, (vop_t *) spec_vnoperate }, { &vop_access_desc, (vop_t *) nfsspec_access }, { &vop_close_desc, (vop_t *) nfsspec_close }, { &vop_fsync_desc, (vop_t *) nfs_fsync }, { &vop_getattr_desc, (vop_t *) nfs_getattr }, { &vop_inactive_desc, (vop_t *) nfs_inactive }, { &vop_lock_desc, (vop_t *) vop_sharedlock }, { &vop_print_desc, (vop_t *) nfs_print }, { &vop_read_desc, (vop_t *) nfsspec_read }, { &vop_reclaim_desc, (vop_t *) nfs_reclaim }, { &vop_setattr_desc, (vop_t *) nfs_setattr }, { &vop_write_desc, (vop_t *) nfsspec_write }, { NULL, NULL } }; static struct vnodeopv_desc spec_nfsv2nodeop_opv_desc = { &spec_nfsv2nodeop_p, nfsv2_specop_entries }; VNODEOP_SET(spec_nfsv2nodeop_opv_desc); vop_t **fifo_nfsv2nodeop_p; static struct vnodeopv_entry_desc nfsv2_fifoop_entries[] = { { &vop_default_desc, (vop_t *) fifo_vnoperate }, { &vop_access_desc, (vop_t *) nfsspec_access }, { &vop_close_desc, (vop_t *) nfsfifo_close }, { &vop_fsync_desc, (vop_t *) nfs_fsync }, { &vop_getattr_desc, (vop_t *) nfs_getattr }, { &vop_inactive_desc, (vop_t *) nfs_inactive }, { &vop_lock_desc, (vop_t *) vop_sharedlock }, { &vop_print_desc, (vop_t *) nfs_print }, { &vop_read_desc, (vop_t *) nfsfifo_read }, { &vop_reclaim_desc, (vop_t *) nfs_reclaim }, { &vop_setattr_desc, (vop_t *) nfs_setattr }, { &vop_write_desc, (vop_t *) nfsfifo_write }, { NULL, NULL } }; static struct vnodeopv_desc fifo_nfsv2nodeop_opv_desc = { &fifo_nfsv2nodeop_p, nfsv2_fifoop_entries }; VNODEOP_SET(fifo_nfsv2nodeop_opv_desc); static int nfs_commit __P((struct vnode *vp, u_quad_t offset, int cnt, struct ucred *cred, struct proc *procp)); static int nfs_mknodrpc __P((struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, struct vattr *vap)); static int nfs_removerpc __P((struct vnode *dvp, char *name, int namelen, struct ucred *cred, struct proc *proc)); static int nfs_renamerpc __P((struct vnode *fdvp, char *fnameptr, int fnamelen, struct vnode *tdvp, char *tnameptr, int tnamelen, struct ucred *cred, struct proc *proc)); static int nfs_renameit __P((struct vnode *sdvp, struct componentname *scnp, struct sillyrename *sp)); /* * Global variables */ extern u_long nfs_true, nfs_false; extern struct nfsstats nfsstats; extern nfstype nfsv3_type[9]; struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON]; struct nfsmount *nfs_iodmount[NFS_MAXASYNCDAEMON]; int nfs_numasync = 0; #define DIRHDSIZ (sizeof (struct dirent) - (MAXNAMLEN + 1)) /* * nfs access vnode op. * For nfs version 2, just return ok. File accesses may fail later. * For nfs version 3, use the access rpc to check accessibility. If file modes * are changed on the server, accesses might still fail later. */ static int nfs_access(ap) struct vop_access_args /* { struct vnode *a_vp; int a_mode; struct ucred *a_cred; struct proc *a_p; } */ *ap; { register struct vnode *vp = ap->a_vp; register u_long *tl; register caddr_t cp; register int t1, t2; caddr_t bpos, dpos, cp2; int error = 0, attrflag; struct mbuf *mreq, *mrep, *md, *mb, *mb2; u_long mode, rmode; int v3 = NFS_ISV3(vp); /* * Disallow write attempts on filesystems mounted read-only; * unless the file is a socket, fifo, or a block or character * device resident on the filesystem. */ if ((ap->a_mode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) { switch (vp->v_type) { case VREG: case VDIR: case VLNK: return (EROFS); } } /* * For nfs v3, do an access rpc, otherwise you are stuck emulating * ufs_access() locally using the vattr. This may not be correct, * since the server may apply other access criteria such as * client uid-->server uid mapping that we do not know about, but * this is better than just returning anything that is lying about * in the cache. */ if (v3) { nfsstats.rpccnt[NFSPROC_ACCESS]++; nfsm_reqhead(vp, NFSPROC_ACCESS, NFSX_FH(v3) + NFSX_UNSIGNED); nfsm_fhtom(vp, v3); nfsm_build(tl, u_long *, NFSX_UNSIGNED); if (ap->a_mode & VREAD) mode = NFSV3ACCESS_READ; else mode = 0; if (vp->v_type == VDIR) { if (ap->a_mode & VWRITE) mode |= (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND | NFSV3ACCESS_DELETE); if (ap->a_mode & VEXEC) mode |= NFSV3ACCESS_LOOKUP; } else { if (ap->a_mode & VWRITE) mode |= (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND); if (ap->a_mode & VEXEC) mode |= NFSV3ACCESS_EXECUTE; } *tl = txdr_unsigned(mode); nfsm_request(vp, NFSPROC_ACCESS, ap->a_p, ap->a_cred); nfsm_postop_attr(vp, attrflag); if (!error) { nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); rmode = fxdr_unsigned(u_long, *tl); /* * The NFS V3 spec does not clarify whether or not * the returned access bits can be a superset of * the ones requested, so... */ if ((rmode & mode) != mode) error = EACCES; } nfsm_reqdone; return (error); } else { if (error = nfsspec_access(ap)) return (error); /* * Attempt to prevent a mapped root from accessing a file * which it shouldn't. We try to read a byte from the file * if the user is root and the file is not zero length. * After calling nfsspec_access, we should have the correct * file size cached. */ if (ap->a_cred->cr_uid == 0 && (ap->a_mode & VREAD) && VTONFS(vp)->n_size > 0) { struct iovec aiov; struct uio auio; char buf[1]; aiov.iov_base = buf; aiov.iov_len = 1; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_offset = 0; auio.uio_resid = 1; auio.uio_segflg = UIO_SYSSPACE; auio.uio_rw = UIO_READ; auio.uio_procp = ap->a_p; if (vp->v_type == VREG) error = nfs_readrpc(vp, &auio, ap->a_cred); else if (vp->v_type == VDIR) { char* bp; bp = malloc(NFS_DIRBLKSIZ, M_TEMP, M_WAITOK); aiov.iov_base = bp; aiov.iov_len = auio.uio_resid = NFS_DIRBLKSIZ; error = nfs_readdirrpc(vp, &auio, ap->a_cred); free(bp, M_TEMP); } else if (vp->v_type = VLNK) error = nfs_readlinkrpc(vp, &auio, ap->a_cred); else error = EACCES; } return (error); } } /* * nfs open vnode op * Check to see if the type is ok * and that deletion is not in progress. * For paged in text files, you will need to flush the page cache * if consistency is lost. */ /* ARGSUSED */ static int nfs_open(ap) struct vop_open_args /* { struct vnode *a_vp; int a_mode; struct ucred *a_cred; struct proc *a_p; } */ *ap; { register struct vnode *vp = ap->a_vp; struct nfsnode *np = VTONFS(vp); struct nfsmount *nmp = VFSTONFS(vp->v_mount); struct vattr vattr; int error; if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) { printf("open eacces vtyp=%d\n",vp->v_type); return (EACCES); } /* * Get a valid lease. If cached data is stale, flush it. */ if (nmp->nm_flag & NFSMNT_NQNFS) { if (NQNFS_CKINVALID(vp, np, ND_READ)) { do { error = nqnfs_getlease(vp, ND_READ, ap->a_cred, ap->a_p); } while (error == NQNFS_EXPIRED); if (error) { return (error); } if (np->n_lrev != np->n_brev || (np->n_flag & NQNFSNONCACHE)) { if ((error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 1)) == EINTR) { return (error); } np->n_brev = np->n_lrev; } } } else { if (np->n_flag & NMODIFIED) { if ((error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 1)) == EINTR) { return (error); } np->n_attrstamp = 0; if (vp->v_type == VDIR) np->n_direofoffset = 0; error = VOP_GETATTR(vp, &vattr, ap->a_cred, ap->a_p); if (error) { return (error); } np->n_mtime = vattr.va_mtime.tv_sec; } else { error = VOP_GETATTR(vp, &vattr, ap->a_cred, ap->a_p); if (error) { return (error); } if (np->n_mtime != vattr.va_mtime.tv_sec) { if (vp->v_type == VDIR) np->n_direofoffset = 0; if ((error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 1)) == EINTR) { return (error); } np->n_mtime = vattr.va_mtime.tv_sec; } } } if ((nmp->nm_flag & NFSMNT_NQNFS) == 0) np->n_attrstamp = 0; /* For Open/Close consistency */ return (0); } /* * nfs close vnode op * What an NFS client should do upon close after writing is a debatable issue. * Most NFS clients push delayed writes to the server upon close, basically for * two reasons: * 1 - So that any write errors may be reported back to the client process * doing the close system call. By far the two most likely errors are * NFSERR_NOSPC and NFSERR_DQUOT to indicate space allocation failure. * 2 - To put a worst case upper bound on cache inconsistency between * multiple clients for the file. * There is also a consistency problem for Version 2 of the protocol w.r.t. * not being able to tell if other clients are writing a file concurrently, * since there is no way of knowing if the changed modify time in the reply * is only due to the write for this client. * (NFS Version 3 provides weak cache consistency data in the reply that * should be sufficient to detect and handle this case.) * * The current code does the following: * for NFS Version 2 - play it safe and flush/invalidate all dirty buffers * for NFS Version 3 - flush dirty buffers to the server but don't invalidate * or commit them (this satisfies 1 and 2 except for the * case where the server crashes after this close but * before the commit RPC, which is felt to be "good * enough". Changing the last argument to nfs_flush() to * a 1 would force a commit operation, if it is felt a * commit is necessary now. * for NQNFS - do nothing now, since 2 is dealt with via leases and * 1 should be dealt with via an fsync() system call for * cases where write errors are important. */ /* ARGSUSED */ static int nfs_close(ap) struct vop_close_args /* { struct vnodeop_desc *a_desc; struct vnode *a_vp; int a_fflag; struct ucred *a_cred; struct proc *a_p; } */ *ap; { register struct vnode *vp = ap->a_vp; register struct nfsnode *np = VTONFS(vp); int error = 0; if (vp->v_type == VREG) { if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS) == 0 && (np->n_flag & NMODIFIED)) { if (NFS_ISV3(vp)) { error = nfs_flush(vp, ap->a_cred, MNT_WAIT, ap->a_p, 0); np->n_flag &= ~NMODIFIED; } else error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 1); np->n_attrstamp = 0; } if (np->n_flag & NWRITEERR) { np->n_flag &= ~NWRITEERR; error = np->n_error; } } return (error); } /* * nfs getattr call from vfs. */ static int nfs_getattr(ap) struct vop_getattr_args /* { struct vnode *a_vp; struct vattr *a_vap; struct ucred *a_cred; struct proc *a_p; } */ *ap; { register struct vnode *vp = ap->a_vp; register struct nfsnode *np = VTONFS(vp); register caddr_t cp; register u_long *tl; register int t1, t2; caddr_t bpos, dpos; int error = 0; struct mbuf *mreq, *mrep, *md, *mb, *mb2; int v3 = NFS_ISV3(vp); /* * Update local times for special files. */ if (np->n_flag & (NACC | NUPD)) np->n_flag |= NCHG; /* * First look in the cache. */ if (nfs_getattrcache(vp, ap->a_vap) == 0) return (0); nfsstats.rpccnt[NFSPROC_GETATTR]++; nfsm_reqhead(vp, NFSPROC_GETATTR, NFSX_FH(v3)); nfsm_fhtom(vp, v3); nfsm_request(vp, NFSPROC_GETATTR, ap->a_p, ap->a_cred); if (!error) nfsm_loadattr(vp, ap->a_vap); nfsm_reqdone; return (error); } /* * nfs setattr call. */ static int nfs_setattr(ap) struct vop_setattr_args /* { struct vnodeop_desc *a_desc; struct vnode *a_vp; struct vattr *a_vap; struct ucred *a_cred; struct proc *a_p; } */ *ap; { register struct vnode *vp = ap->a_vp; register struct nfsnode *np = VTONFS(vp); register struct vattr *vap = ap->a_vap; int error = 0; u_quad_t tsize; #ifndef nolint tsize = (u_quad_t)0; #endif /* * Disallow write attempts if the filesystem is mounted read-only. */ if ((vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL) && (vp->v_mount->mnt_flag & MNT_RDONLY)) return (EROFS); if (vap->va_size != VNOVAL) { switch (vp->v_type) { case VDIR: return (EISDIR); case VCHR: case VBLK: case VSOCK: case VFIFO: if (vap->va_mtime.tv_sec == VNOVAL && vap->va_atime.tv_sec == VNOVAL && vap->va_mode == (u_short)VNOVAL && vap->va_uid == (uid_t)VNOVAL && vap->va_gid == (gid_t)VNOVAL) return (0); vap->va_size = VNOVAL; break; default: /* * Disallow write attempts if the filesystem is * mounted read-only. */ if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); if (np->n_flag & NMODIFIED) { if (vap->va_size == 0) error = nfs_vinvalbuf(vp, 0, ap->a_cred, ap->a_p, 1); else error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 1); if (error) return (error); } tsize = np->n_size; np->n_size = np->n_vattr.va_size = vap->va_size; vnode_pager_setsize(vp, (u_long)np->n_size); }; } else if ((vap->va_mtime.tv_sec != VNOVAL || vap->va_atime.tv_sec != VNOVAL) && (np->n_flag & NMODIFIED) && vp->v_type == VREG && (error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 1)) == EINTR) return (error); error = nfs_setattrrpc(vp, vap, ap->a_cred, ap->a_p); if (error && vap->va_size != VNOVAL) { np->n_size = np->n_vattr.va_size = tsize; vnode_pager_setsize(vp, (u_long)np->n_size); } return (error); } /* * Do an nfs setattr rpc. */ static int nfs_setattrrpc(vp, vap, cred, procp) register struct vnode *vp; register struct vattr *vap; struct ucred *cred; struct proc *procp; { register struct nfsv2_sattr *sp; register caddr_t cp; register long t1, t2; caddr_t bpos, dpos, cp2; u_long *tl; int error = 0, wccflag = NFSV3_WCCRATTR; struct mbuf *mreq, *mrep, *md, *mb, *mb2; int v3 = NFS_ISV3(vp); nfsstats.rpccnt[NFSPROC_SETATTR]++; nfsm_reqhead(vp, NFSPROC_SETATTR, NFSX_FH(v3) + NFSX_SATTR(v3)); nfsm_fhtom(vp, v3); if (v3) { if (vap->va_mode != (u_short)VNOVAL) { nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED); *tl++ = nfs_true; *tl = txdr_unsigned(vap->va_mode); } else { nfsm_build(tl, u_long *, NFSX_UNSIGNED); *tl = nfs_false; } if (vap->va_uid != (uid_t)VNOVAL) { nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED); *tl++ = nfs_true; *tl = txdr_unsigned(vap->va_uid); } else { nfsm_build(tl, u_long *, NFSX_UNSIGNED); *tl = nfs_false; } if (vap->va_gid != (gid_t)VNOVAL) { nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED); *tl++ = nfs_true; *tl = txdr_unsigned(vap->va_gid); } else { nfsm_build(tl, u_long *, NFSX_UNSIGNED); *tl = nfs_false; } if (vap->va_size != VNOVAL) { nfsm_build(tl, u_long *, 3 * NFSX_UNSIGNED); *tl++ = nfs_true; txdr_hyper(&vap->va_size, tl); } else { nfsm_build(tl, u_long *, NFSX_UNSIGNED); *tl = nfs_false; } if (vap->va_atime.tv_sec != VNOVAL) { if (vap->va_atime.tv_sec != time_second) { nfsm_build(tl, u_long *, 3 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV3SATTRTIME_TOCLIENT); txdr_nfsv3time(&vap->va_atime, tl); } else { nfsm_build(tl, u_long *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV3SATTRTIME_TOSERVER); } } else { nfsm_build(tl, u_long *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV3SATTRTIME_DONTCHANGE); } if (vap->va_mtime.tv_sec != VNOVAL) { if (vap->va_mtime.tv_sec != time_second) { nfsm_build(tl, u_long *, 3 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(NFSV3SATTRTIME_TOCLIENT); txdr_nfsv3time(&vap->va_mtime, tl); } else { nfsm_build(tl, u_long *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV3SATTRTIME_TOSERVER); } } else { nfsm_build(tl, u_long *, NFSX_UNSIGNED); *tl = txdr_unsigned(NFSV3SATTRTIME_DONTCHANGE); } nfsm_build(tl, u_long *, NFSX_UNSIGNED); *tl = nfs_false; } else { nfsm_build(sp, struct nfsv2_sattr *, NFSX_V2SATTR); if (vap->va_mode == (u_short)VNOVAL) sp->sa_mode = VNOVAL; else sp->sa_mode = vtonfsv2_mode(vp->v_type, vap->va_mode); if (vap->va_uid == (uid_t)VNOVAL) sp->sa_uid = VNOVAL; else sp->sa_uid = txdr_unsigned(vap->va_uid); if (vap->va_gid == (gid_t)VNOVAL) sp->sa_gid = VNOVAL; else sp->sa_gid = txdr_unsigned(vap->va_gid); sp->sa_size = txdr_unsigned(vap->va_size); txdr_nfsv2time(&vap->va_atime, &sp->sa_atime); txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime); } nfsm_request(vp, NFSPROC_SETATTR, procp, cred); if (v3) { nfsm_wcc_data(vp, wccflag); } else nfsm_loadattr(vp, (struct vattr *)0); nfsm_reqdone; return (error); } /* * nfs lookup call, one step at a time... * First look in cache * If not found, unlock the directory nfsnode and do the rpc */ static int nfs_lookup(ap) struct vop_lookup_args /* { struct vnodeop_desc *a_desc; struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; } */ *ap; { register struct componentname *cnp = ap->a_cnp; register struct vnode *dvp = ap->a_dvp; register struct vnode **vpp = ap->a_vpp; register int flags = cnp->cn_flags; register struct vnode *newvp; register u_long *tl; register caddr_t cp; register long t1, t2; struct nfsmount *nmp; caddr_t bpos, dpos, cp2; struct mbuf *mreq, *mrep, *md, *mb, *mb2; long len; nfsfh_t *fhp; struct nfsnode *np; int lockparent, wantparent, error = 0, attrflag, fhsize; int v3 = NFS_ISV3(dvp); struct proc *p = cnp->cn_proc; if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) && (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) return (EROFS); *vpp = NULLVP; if (dvp->v_type != VDIR) return (ENOTDIR); lockparent = flags & LOCKPARENT; wantparent = flags & (LOCKPARENT|WANTPARENT); nmp = VFSTONFS(dvp->v_mount); np = VTONFS(dvp); if ((error = cache_lookup(dvp, vpp, cnp)) && error != ENOENT) { struct vattr vattr; int vpid; if (error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, p)) { *vpp = NULLVP; return (error); } newvp = *vpp; vpid = newvp->v_id; /* * See the comment starting `Step through' in ufs/ufs_lookup.c * for an explanation of the locking protocol */ if (dvp == newvp) { VREF(newvp); error = 0; } else if (flags & ISDOTDOT) { VOP_UNLOCK(dvp, 0, p); error = vget(newvp, LK_EXCLUSIVE, p); if (!error && lockparent && (flags & ISLASTCN)) error = vn_lock(dvp, LK_EXCLUSIVE, p); } else { error = vget(newvp, LK_EXCLUSIVE, p); if (!lockparent || error || !(flags & ISLASTCN)) VOP_UNLOCK(dvp, 0, p); } if (!error) { if (vpid == newvp->v_id) { if (!VOP_GETATTR(newvp, &vattr, cnp->cn_cred, p) && vattr.va_ctime.tv_sec == VTONFS(newvp)->n_ctime) { nfsstats.lookupcache_hits++; if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN)) cnp->cn_flags |= SAVENAME; return (0); } cache_purge(newvp); } vput(newvp); if (lockparent && dvp != newvp && (flags & ISLASTCN)) VOP_UNLOCK(dvp, 0, p); } error = vn_lock(dvp, LK_EXCLUSIVE, p); *vpp = NULLVP; if (error) return (error); } error = 0; newvp = NULLVP; nfsstats.lookupcache_misses++; nfsstats.rpccnt[NFSPROC_LOOKUP]++; len = cnp->cn_namelen; nfsm_reqhead(dvp, NFSPROC_LOOKUP, NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len)); nfsm_fhtom(dvp, v3); nfsm_strtom(cnp->cn_nameptr, len, NFS_MAXNAMLEN); nfsm_request(dvp, NFSPROC_LOOKUP, cnp->cn_proc, cnp->cn_cred); if (error) { nfsm_postop_attr(dvp, attrflag); m_freem(mrep); goto nfsmout; } nfsm_getfh(fhp, fhsize, v3); /* * Handle RENAME case... */ if (cnp->cn_nameiop == RENAME && wantparent && (flags & ISLASTCN)) { if (NFS_CMPFH(np, fhp, fhsize)) { m_freem(mrep); return (EISDIR); } if (error = nfs_nget(dvp->v_mount, fhp, fhsize, &np)) { m_freem(mrep); return (error); } newvp = NFSTOV(np); if (v3) { nfsm_postop_attr(newvp, attrflag); nfsm_postop_attr(dvp, attrflag); } else nfsm_loadattr(newvp, (struct vattr *)0); *vpp = newvp; m_freem(mrep); cnp->cn_flags |= SAVENAME; if (!lockparent) VOP_UNLOCK(dvp, 0, p); return (0); } if (flags & ISDOTDOT) { VOP_UNLOCK(dvp, 0, p); error = nfs_nget(dvp->v_mount, fhp, fhsize, &np); if (error) { vn_lock(dvp, LK_EXCLUSIVE + LK_RETRY, p); return (error); } newvp = NFSTOV(np); if (lockparent && (flags & ISLASTCN) && (error = vn_lock(dvp, LK_EXCLUSIVE, p))) { vput(newvp); return (error); } } else if (NFS_CMPFH(np, fhp, fhsize)) { VREF(dvp); newvp = dvp; } else { if (error = nfs_nget(dvp->v_mount, fhp, fhsize, &np)) { m_freem(mrep); return (error); } if (!lockparent || !(flags & ISLASTCN)) VOP_UNLOCK(dvp, 0, p); newvp = NFSTOV(np); } if (v3) { nfsm_postop_attr(newvp, attrflag); nfsm_postop_attr(dvp, attrflag); } else nfsm_loadattr(newvp, (struct vattr *)0); if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN)) cnp->cn_flags |= SAVENAME; if ((cnp->cn_flags & MAKEENTRY) && (cnp->cn_nameiop != DELETE || !(flags & ISLASTCN))) { np->n_ctime = np->n_vattr.va_ctime.tv_sec; cache_enter(dvp, newvp, cnp); } *vpp = newvp; nfsm_reqdone; if (error) { if (newvp != NULLVP) { vrele(newvp); *vpp = NULLVP; } if ((cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME) && (flags & ISLASTCN) && error == ENOENT) { if (!lockparent) VOP_UNLOCK(dvp, 0, p); if (dvp->v_mount->mnt_flag & MNT_RDONLY) error = EROFS; else error = EJUSTRETURN; } if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN)) cnp->cn_flags |= SAVENAME; } return (error); } /* * nfs read call. * Just call nfs_bioread() to do the work. */ static int nfs_read(ap) struct vop_read_args /* { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; struct ucred *a_cred; } */ *ap; { register struct vnode *vp = ap->a_vp; if (vp->v_type != VREG) return (EPERM); return (nfs_bioread(vp, ap->a_uio, ap->a_ioflag, ap->a_cred, 0)); } /* * nfs readlink call */ static int nfs_readlink(ap) struct vop_readlink_args /* { struct vnode *a_vp; struct uio *a_uio; struct ucred *a_cred; } */ *ap; { register struct vnode *vp = ap->a_vp; if (vp->v_type != VLNK) return (EPERM); return (nfs_bioread(vp, ap->a_uio, 0, ap->a_cred, 0)); } /* * Do a readlink rpc. * Called by nfs_doio() from below the buffer cache. */ int nfs_readlinkrpc(vp, uiop, cred) register struct vnode *vp; struct uio *uiop; struct ucred *cred; { register u_long *tl; register caddr_t cp; register long t1, t2; caddr_t bpos, dpos, cp2; int error = 0, len, attrflag; struct mbuf *mreq, *mrep, *md, *mb, *mb2; int v3 = NFS_ISV3(vp); nfsstats.rpccnt[NFSPROC_READLINK]++; nfsm_reqhead(vp, NFSPROC_READLINK, NFSX_FH(v3)); nfsm_fhtom(vp, v3); nfsm_request(vp, NFSPROC_READLINK, uiop->uio_procp, cred); if (v3) nfsm_postop_attr(vp, attrflag); if (!error) { nfsm_strsiz(len, NFS_MAXPATHLEN); nfsm_mtouio(uiop, len); } nfsm_reqdone; return (error); } /* * nfs read rpc call * Ditto above */ int nfs_readrpc(vp, uiop, cred) register struct vnode *vp; struct uio *uiop; struct ucred *cred; { register u_long *tl; register caddr_t cp; register long t1, t2; caddr_t bpos, dpos, cp2; struct mbuf *mreq, *mrep, *md, *mb, *mb2; struct nfsmount *nmp; int error = 0, len, retlen, tsiz, eof, attrflag; int v3 = NFS_ISV3(vp); #ifndef nolint eof = 0; #endif nmp = VFSTONFS(vp->v_mount); tsiz = uiop->uio_resid; if (uiop->uio_offset + tsiz > 0xffffffff && !v3) return (EFBIG); while (tsiz > 0) { nfsstats.rpccnt[NFSPROC_READ]++; len = (tsiz > nmp->nm_rsize) ? nmp->nm_rsize : tsiz; nfsm_reqhead(vp, NFSPROC_READ, NFSX_FH(v3) + NFSX_UNSIGNED * 3); nfsm_fhtom(vp, v3); nfsm_build(tl, u_long *, NFSX_UNSIGNED * 3); if (v3) { txdr_hyper(&uiop->uio_offset, tl); *(tl + 2) = txdr_unsigned(len); } else { *tl++ = txdr_unsigned(uiop->uio_offset); *tl++ = txdr_unsigned(len); *tl = 0; } nfsm_request(vp, NFSPROC_READ, uiop->uio_procp, cred); if (v3) { nfsm_postop_attr(vp, attrflag); if (error) { m_freem(mrep); goto nfsmout; } nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED); eof = fxdr_unsigned(int, *(tl + 1)); } else nfsm_loadattr(vp, (struct vattr *)0); nfsm_strsiz(retlen, nmp->nm_rsize); nfsm_mtouio(uiop, retlen); m_freem(mrep); tsiz -= retlen; if (v3) { if (eof || retlen == 0) tsiz = 0; } else if (retlen < len) tsiz = 0; } nfsmout: return (error); } /* * nfs write call */ int nfs_writerpc(vp, uiop, cred, iomode, must_commit) register struct vnode *vp; register struct uio *uiop; struct ucred *cred; int *iomode, *must_commit; { register u_long *tl; register caddr_t cp; register int t1, t2, backup; caddr_t bpos, dpos, cp2; struct mbuf *mreq, *mrep, *md, *mb, *mb2; struct nfsmount *nmp = VFSTONFS(vp->v_mount); int error = 0, len, tsiz, wccflag = NFSV3_WCCRATTR, rlen, commit; int v3 = NFS_ISV3(vp), committed = NFSV3WRITE_FILESYNC; #ifndef DIAGNOSTIC if (uiop->uio_iovcnt != 1) panic("nfs: writerpc iovcnt > 1"); #endif *must_commit = 0; tsiz = uiop->uio_resid; if (uiop->uio_offset + tsiz > 0xffffffff && !v3) return (EFBIG); while (tsiz > 0) { nfsstats.rpccnt[NFSPROC_WRITE]++; len = (tsiz > nmp->nm_wsize) ? nmp->nm_wsize : tsiz; nfsm_reqhead(vp, NFSPROC_WRITE, NFSX_FH(v3) + 5 * NFSX_UNSIGNED + nfsm_rndup(len)); nfsm_fhtom(vp, v3); if (v3) { nfsm_build(tl, u_long *, 5 * NFSX_UNSIGNED); txdr_hyper(&uiop->uio_offset, tl); tl += 2; *tl++ = txdr_unsigned(len); *tl++ = txdr_unsigned(*iomode); } else { nfsm_build(tl, u_long *, 4 * NFSX_UNSIGNED); *++tl = txdr_unsigned(uiop->uio_offset); tl += 2; } *tl = txdr_unsigned(len); nfsm_uiotom(uiop, len); nfsm_request(vp, NFSPROC_WRITE, uiop->uio_procp, cred); if (v3) { wccflag = NFSV3_WCCCHK; nfsm_wcc_data(vp, wccflag); if (!error) { nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED + NFSX_V3WRITEVERF); rlen = fxdr_unsigned(int, *tl++); if (rlen == 0) { error = NFSERR_IO; break; } else if (rlen < len) { backup = len - rlen; uiop->uio_iov->iov_base -= backup; uiop->uio_iov->iov_len += backup; uiop->uio_offset -= backup; uiop->uio_resid += backup; len = rlen; } commit = fxdr_unsigned(int, *tl++); /* * Return the lowest committment level * obtained by any of the RPCs. */ if (committed == NFSV3WRITE_FILESYNC) committed = commit; else if (committed == NFSV3WRITE_DATASYNC && commit == NFSV3WRITE_UNSTABLE) committed = commit; if ((nmp->nm_flag & NFSMNT_HASWRITEVERF) == 0) { bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf, NFSX_V3WRITEVERF); nmp->nm_flag |= NFSMNT_HASWRITEVERF; } else if (bcmp((caddr_t)tl, (caddr_t)nmp->nm_verf, NFSX_V3WRITEVERF)) { *must_commit = 1; bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf, NFSX_V3WRITEVERF); } } } else nfsm_loadattr(vp, (struct vattr *)0); if (wccflag) VTONFS(vp)->n_mtime = VTONFS(vp)->n_vattr.va_mtime.tv_sec; m_freem(mrep); tsiz -= len; } nfsmout: if (vp->v_mount->mnt_flag & MNT_ASYNC) committed = NFSV3WRITE_FILESYNC; *iomode = committed; if (error) uiop->uio_resid = tsiz; return (error); } /* * nfs mknod rpc * For NFS v2 this is a kludge. Use a create rpc but with the IFMT bits of the * mode set to specify the file type and the size field for rdev. */ static int nfs_mknodrpc(dvp, vpp, cnp, vap) register struct vnode *dvp; register struct vnode **vpp; register struct componentname *cnp; register struct vattr *vap; { register struct nfsv2_sattr *sp; register struct nfsv3_sattr *sp3; register u_long *tl; register caddr_t cp; register long t1, t2; struct vnode *newvp = (struct vnode *)0; struct nfsnode *np = (struct nfsnode *)0; struct vattr vattr; char *cp2; caddr_t bpos, dpos; int error = 0, wccflag = NFSV3_WCCRATTR, gotvp = 0; struct mbuf *mreq, *mrep, *md, *mb, *mb2; u_long rdev; int v3 = NFS_ISV3(dvp); if (vap->va_type == VCHR || vap->va_type == VBLK) rdev = txdr_unsigned(vap->va_rdev); else if (vap->va_type == VFIFO || vap->va_type == VSOCK) rdev = 0xffffffff; else { VOP_ABORTOP(dvp, cnp); - vput(dvp); return (EOPNOTSUPP); } if (error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred, cnp->cn_proc)) { VOP_ABORTOP(dvp, cnp); - vput(dvp); return (error); } nfsstats.rpccnt[NFSPROC_MKNOD]++; nfsm_reqhead(dvp, NFSPROC_MKNOD, NFSX_FH(v3) + 4 * NFSX_UNSIGNED + + nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(v3)); nfsm_fhtom(dvp, v3); nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN); if (v3) { nfsm_build(tl, u_long *, NFSX_UNSIGNED + NFSX_V3SRVSATTR); *tl++ = vtonfsv3_type(vap->va_type); sp3 = (struct nfsv3_sattr *)tl; nfsm_v3sattr(sp3, vap, cnp->cn_cred->cr_uid, vattr.va_gid); if (vap->va_type == VCHR || vap->va_type == VBLK) { nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(major(vap->va_rdev)); *tl = txdr_unsigned(minor(vap->va_rdev)); } } else { nfsm_build(sp, struct nfsv2_sattr *, NFSX_V2SATTR); sp->sa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode); sp->sa_uid = txdr_unsigned(cnp->cn_cred->cr_uid); sp->sa_gid = txdr_unsigned(vattr.va_gid); sp->sa_size = rdev; txdr_nfsv2time(&vap->va_atime, &sp->sa_atime); txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime); } nfsm_request(dvp, NFSPROC_MKNOD, cnp->cn_proc, cnp->cn_cred); if (!error) { nfsm_mtofh(dvp, newvp, v3, gotvp); if (!gotvp) { if (newvp) { vput(newvp); newvp = (struct vnode *)0; } error = nfs_lookitup(dvp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_cred, cnp->cn_proc, &np); if (!error) newvp = NFSTOV(np); } } if (v3) nfsm_wcc_data(dvp, wccflag); nfsm_reqdone; if (error) { if (newvp) vput(newvp); } else { if (cnp->cn_flags & MAKEENTRY) cache_enter(dvp, newvp, cnp); *vpp = newvp; } zfree(namei_zone, cnp->cn_pnbuf); VTONFS(dvp)->n_flag |= NMODIFIED; if (!wccflag) VTONFS(dvp)->n_attrstamp = 0; - vput(dvp); return (error); } /* * nfs mknod vop * just call nfs_mknodrpc() to do the work. */ /* ARGSUSED */ static int nfs_mknod(ap) struct vop_mknod_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; struct vattr *a_vap; } */ *ap; { struct vnode *newvp; int error; error = nfs_mknodrpc(ap->a_dvp, &newvp, ap->a_cnp, ap->a_vap); if (!error) vput(newvp); return (error); } static u_long create_verf; /* * nfs file create call */ static int nfs_create(ap) struct vop_create_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; struct vattr *a_vap; } */ *ap; { register struct vnode *dvp = ap->a_dvp; register struct vattr *vap = ap->a_vap; register struct componentname *cnp = ap->a_cnp; register struct nfsv2_sattr *sp; register struct nfsv3_sattr *sp3; register u_long *tl; register caddr_t cp; register long t1, t2; struct nfsnode *np = (struct nfsnode *)0; struct vnode *newvp = (struct vnode *)0; caddr_t bpos, dpos, cp2; int error = 0, wccflag = NFSV3_WCCRATTR, gotvp = 0, fmode = 0; struct mbuf *mreq, *mrep, *md, *mb, *mb2; struct vattr vattr; int v3 = NFS_ISV3(dvp); /* * Oops, not for me.. */ if (vap->va_type == VSOCK) return (nfs_mknodrpc(dvp, ap->a_vpp, cnp, vap)); if (error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred, cnp->cn_proc)) { VOP_ABORTOP(dvp, cnp); - vput(dvp); return (error); } if (vap->va_vaflags & VA_EXCLUSIVE) fmode |= O_EXCL; again: nfsstats.rpccnt[NFSPROC_CREATE]++; nfsm_reqhead(dvp, NFSPROC_CREATE, NFSX_FH(v3) + 2 * NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(v3)); nfsm_fhtom(dvp, v3); nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN); if (v3) { nfsm_build(tl, u_long *, NFSX_UNSIGNED); if (fmode & O_EXCL) { *tl = txdr_unsigned(NFSV3CREATE_EXCLUSIVE); nfsm_build(tl, u_long *, NFSX_V3CREATEVERF); #ifdef INET if (!TAILQ_EMPTY(&in_ifaddrhead)) *tl++ = IA_SIN(in_ifaddrhead.tqh_first)->sin_addr.s_addr; else #endif *tl++ = create_verf; *tl = ++create_verf; } else { *tl = txdr_unsigned(NFSV3CREATE_UNCHECKED); nfsm_build(tl, u_long *, NFSX_V3SRVSATTR); sp3 = (struct nfsv3_sattr *)tl; nfsm_v3sattr(sp3, vap, cnp->cn_cred->cr_uid, vattr.va_gid); } } else { nfsm_build(sp, struct nfsv2_sattr *, NFSX_V2SATTR); sp->sa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode); sp->sa_uid = txdr_unsigned(cnp->cn_cred->cr_uid); sp->sa_gid = txdr_unsigned(vattr.va_gid); sp->sa_size = 0; txdr_nfsv2time(&vap->va_atime, &sp->sa_atime); txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime); } nfsm_request(dvp, NFSPROC_CREATE, cnp->cn_proc, cnp->cn_cred); if (!error) { nfsm_mtofh(dvp, newvp, v3, gotvp); if (!gotvp) { if (newvp) { vput(newvp); newvp = (struct vnode *)0; } error = nfs_lookitup(dvp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_cred, cnp->cn_proc, &np); if (!error) newvp = NFSTOV(np); } } if (v3) nfsm_wcc_data(dvp, wccflag); nfsm_reqdone; if (error) { if (v3 && (fmode & O_EXCL) && error == NFSERR_NOTSUPP) { fmode &= ~O_EXCL; goto again; } if (newvp) vput(newvp); } else if (v3 && (fmode & O_EXCL)) error = nfs_setattrrpc(newvp, vap, cnp->cn_cred, cnp->cn_proc); if (!error) { if (cnp->cn_flags & MAKEENTRY) cache_enter(dvp, newvp, cnp); *ap->a_vpp = newvp; } zfree(namei_zone, cnp->cn_pnbuf); VTONFS(dvp)->n_flag |= NMODIFIED; if (!wccflag) VTONFS(dvp)->n_attrstamp = 0; - vput(dvp); return (error); } /* * nfs file remove call * To try and make nfs semantics closer to ufs semantics, a file that has * other processes using the vnode is renamed instead of removed and then * removed later on the last close. * - If v_usecount > 1 * If a rename is not already in the works * call nfs_sillyrename() to set it up * else * do the remove rpc */ static int nfs_remove(ap) struct vop_remove_args /* { struct vnodeop_desc *a_desc; struct vnode * a_dvp; struct vnode * a_vp; struct componentname * a_cnp; } */ *ap; { register struct vnode *vp = ap->a_vp; register struct vnode *dvp = ap->a_dvp; register struct componentname *cnp = ap->a_cnp; register struct nfsnode *np = VTONFS(vp); int error = 0; struct vattr vattr; #ifndef DIAGNOSTIC if ((cnp->cn_flags & HASBUF) == 0) panic("nfs_remove: no name"); if (vp->v_usecount < 1) panic("nfs_remove: bad v_usecount"); #endif if (vp->v_usecount == 1 || (np->n_sillyrename && VOP_GETATTR(vp, &vattr, cnp->cn_cred, cnp->cn_proc) == 0 && vattr.va_nlink > 1)) { /* * Purge the name cache so that the chance of a lookup for * the name succeeding while the remove is in progress is * minimized. Without node locking it can still happen, such * that an I/O op returns ESTALE, but since you get this if * another host removes the file.. */ cache_purge(vp); /* * throw away biocache buffers, mainly to avoid * unnecessary delayed writes later. */ error = nfs_vinvalbuf(vp, 0, cnp->cn_cred, cnp->cn_proc, 1); /* Do the rpc */ if (error != EINTR) error = nfs_removerpc(dvp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_cred, cnp->cn_proc); /* * Kludge City: If the first reply to the remove rpc is lost.. * the reply to the retransmitted request will be ENOENT * since the file was in fact removed * Therefore, we cheat and return success. */ if (error == ENOENT) error = 0; } else if (!np->n_sillyrename) error = nfs_sillyrename(dvp, vp, cnp); zfree(namei_zone, cnp->cn_pnbuf); np->n_attrstamp = 0; - vput(dvp); - if (vp == dvp) - vrele(vp); - else - vput(vp); return (error); } /* * nfs file remove rpc called from nfs_inactive */ int nfs_removeit(sp) register struct sillyrename *sp; { return (nfs_removerpc(sp->s_dvp, sp->s_name, sp->s_namlen, sp->s_cred, (struct proc *)0)); } /* * Nfs remove rpc, called from nfs_remove() and nfs_removeit(). */ static int nfs_removerpc(dvp, name, namelen, cred, proc) register struct vnode *dvp; char *name; int namelen; struct ucred *cred; struct proc *proc; { register u_long *tl; register caddr_t cp; register long t1, t2; caddr_t bpos, dpos, cp2; int error = 0, wccflag = NFSV3_WCCRATTR; struct mbuf *mreq, *mrep, *md, *mb, *mb2; int v3 = NFS_ISV3(dvp); nfsstats.rpccnt[NFSPROC_REMOVE]++; nfsm_reqhead(dvp, NFSPROC_REMOVE, NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(namelen)); nfsm_fhtom(dvp, v3); nfsm_strtom(name, namelen, NFS_MAXNAMLEN); nfsm_request(dvp, NFSPROC_REMOVE, proc, cred); if (v3) nfsm_wcc_data(dvp, wccflag); nfsm_reqdone; VTONFS(dvp)->n_flag |= NMODIFIED; if (!wccflag) VTONFS(dvp)->n_attrstamp = 0; return (error); } /* * nfs file rename call */ static int nfs_rename(ap) struct vop_rename_args /* { struct vnode *a_fdvp; struct vnode *a_fvp; struct componentname *a_fcnp; struct vnode *a_tdvp; struct vnode *a_tvp; struct componentname *a_tcnp; } */ *ap; { register struct vnode *fvp = ap->a_fvp; register struct vnode *tvp = ap->a_tvp; register struct vnode *fdvp = ap->a_fdvp; register struct vnode *tdvp = ap->a_tdvp; register struct componentname *tcnp = ap->a_tcnp; register struct componentname *fcnp = ap->a_fcnp; int error; #ifndef DIAGNOSTIC if ((tcnp->cn_flags & HASBUF) == 0 || (fcnp->cn_flags & HASBUF) == 0) panic("nfs_rename: no name"); #endif /* Check for cross-device rename */ if ((fvp->v_mount != tdvp->v_mount) || (tvp && (fvp->v_mount != tvp->v_mount))) { error = EXDEV; goto out; } /* * If the tvp exists and is in use, sillyrename it before doing the * rename of the new file over it. * XXX Can't sillyrename a directory. */ if (tvp && tvp->v_usecount > 1 && !VTONFS(tvp)->n_sillyrename && tvp->v_type != VDIR && !nfs_sillyrename(tdvp, tvp, tcnp)) { vput(tvp); tvp = NULL; } error = nfs_renamerpc(fdvp, fcnp->cn_nameptr, fcnp->cn_namelen, tdvp, tcnp->cn_nameptr, tcnp->cn_namelen, tcnp->cn_cred, tcnp->cn_proc); if (fvp->v_type == VDIR) { if (tvp != NULL && tvp->v_type == VDIR) cache_purge(tdvp); cache_purge(fdvp); } out: if (tdvp == tvp) vrele(tdvp); else vput(tdvp); if (tvp) vput(tvp); vrele(fdvp); vrele(fvp); /* * Kludge: Map ENOENT => 0 assuming that it is a reply to a retry. */ if (error == ENOENT) error = 0; return (error); } /* * nfs file rename rpc called from nfs_remove() above */ static int nfs_renameit(sdvp, scnp, sp) struct vnode *sdvp; struct componentname *scnp; register struct sillyrename *sp; { return (nfs_renamerpc(sdvp, scnp->cn_nameptr, scnp->cn_namelen, sdvp, sp->s_name, sp->s_namlen, scnp->cn_cred, scnp->cn_proc)); } /* * Do an nfs rename rpc. Called from nfs_rename() and nfs_renameit(). */ static int nfs_renamerpc(fdvp, fnameptr, fnamelen, tdvp, tnameptr, tnamelen, cred, proc) register struct vnode *fdvp; char *fnameptr; int fnamelen; register struct vnode *tdvp; char *tnameptr; int tnamelen; struct ucred *cred; struct proc *proc; { register u_long *tl; register caddr_t cp; register long t1, t2; caddr_t bpos, dpos, cp2; int error = 0, fwccflag = NFSV3_WCCRATTR, twccflag = NFSV3_WCCRATTR; struct mbuf *mreq, *mrep, *md, *mb, *mb2; int v3 = NFS_ISV3(fdvp); nfsstats.rpccnt[NFSPROC_RENAME]++; nfsm_reqhead(fdvp, NFSPROC_RENAME, (NFSX_FH(v3) + NFSX_UNSIGNED)*2 + nfsm_rndup(fnamelen) + nfsm_rndup(tnamelen)); nfsm_fhtom(fdvp, v3); nfsm_strtom(fnameptr, fnamelen, NFS_MAXNAMLEN); nfsm_fhtom(tdvp, v3); nfsm_strtom(tnameptr, tnamelen, NFS_MAXNAMLEN); nfsm_request(fdvp, NFSPROC_RENAME, proc, cred); if (v3) { nfsm_wcc_data(fdvp, fwccflag); nfsm_wcc_data(tdvp, twccflag); } nfsm_reqdone; VTONFS(fdvp)->n_flag |= NMODIFIED; VTONFS(tdvp)->n_flag |= NMODIFIED; if (!fwccflag) VTONFS(fdvp)->n_attrstamp = 0; if (!twccflag) VTONFS(tdvp)->n_attrstamp = 0; return (error); } /* * nfs hard link create call */ static int nfs_link(ap) struct vop_link_args /* { struct vnode *a_tdvp; struct vnode *a_vp; struct componentname *a_cnp; } */ *ap; { register struct vnode *vp = ap->a_vp; register struct vnode *tdvp = ap->a_tdvp; register struct componentname *cnp = ap->a_cnp; register u_long *tl; register caddr_t cp; register long t1, t2; caddr_t bpos, dpos, cp2; int error = 0, wccflag = NFSV3_WCCRATTR, attrflag = 0; struct mbuf *mreq, *mrep, *md, *mb, *mb2; int v3 = NFS_ISV3(vp); if (vp->v_mount != tdvp->v_mount) { VOP_ABORTOP(vp, cnp); - if (tdvp == vp) - vrele(tdvp); - else - vput(tdvp); return (EXDEV); } /* * Push all writes to the server, so that the attribute cache * doesn't get "out of sync" with the server. * XXX There should be a better way! */ VOP_FSYNC(vp, cnp->cn_cred, MNT_WAIT, cnp->cn_proc); nfsstats.rpccnt[NFSPROC_LINK]++; nfsm_reqhead(vp, NFSPROC_LINK, NFSX_FH(v3)*2 + NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen)); nfsm_fhtom(vp, v3); nfsm_fhtom(tdvp, v3); nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN); nfsm_request(vp, NFSPROC_LINK, cnp->cn_proc, cnp->cn_cred); if (v3) { nfsm_postop_attr(vp, attrflag); nfsm_wcc_data(tdvp, wccflag); } nfsm_reqdone; zfree(namei_zone, cnp->cn_pnbuf); VTONFS(tdvp)->n_flag |= NMODIFIED; if (!attrflag) VTONFS(vp)->n_attrstamp = 0; if (!wccflag) VTONFS(tdvp)->n_attrstamp = 0; - vput(tdvp); /* * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry. */ if (error == EEXIST) error = 0; return (error); } /* * nfs symbolic link create call */ static int nfs_symlink(ap) struct vop_symlink_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; struct vattr *a_vap; char *a_target; } */ *ap; { register struct vnode *dvp = ap->a_dvp; register struct vattr *vap = ap->a_vap; register struct componentname *cnp = ap->a_cnp; register struct nfsv2_sattr *sp; register struct nfsv3_sattr *sp3; register u_long *tl; register caddr_t cp; register long t1, t2; caddr_t bpos, dpos, cp2; int slen, error = 0, wccflag = NFSV3_WCCRATTR, gotvp; struct mbuf *mreq, *mrep, *md, *mb, *mb2; struct vnode *newvp = (struct vnode *)0; int v3 = NFS_ISV3(dvp); nfsstats.rpccnt[NFSPROC_SYMLINK]++; slen = strlen(ap->a_target); nfsm_reqhead(dvp, NFSPROC_SYMLINK, NFSX_FH(v3) + 2*NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen) + nfsm_rndup(slen) + NFSX_SATTR(v3)); nfsm_fhtom(dvp, v3); nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN); if (v3) { nfsm_build(sp3, struct nfsv3_sattr *, NFSX_V3SRVSATTR); nfsm_v3sattr(sp3, vap, cnp->cn_cred->cr_uid, cnp->cn_cred->cr_gid); } nfsm_strtom(ap->a_target, slen, NFS_MAXPATHLEN); if (!v3) { nfsm_build(sp, struct nfsv2_sattr *, NFSX_V2SATTR); sp->sa_mode = vtonfsv2_mode(VLNK, vap->va_mode); sp->sa_uid = txdr_unsigned(cnp->cn_cred->cr_uid); sp->sa_gid = txdr_unsigned(cnp->cn_cred->cr_gid); sp->sa_size = -1; txdr_nfsv2time(&vap->va_atime, &sp->sa_atime); txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime); } nfsm_request(dvp, NFSPROC_SYMLINK, cnp->cn_proc, cnp->cn_cred); if (v3) { if (!error) nfsm_mtofh(dvp, newvp, v3, gotvp); nfsm_wcc_data(dvp, wccflag); } nfsm_reqdone; if (newvp) vput(newvp); zfree(namei_zone, cnp->cn_pnbuf); VTONFS(dvp)->n_flag |= NMODIFIED; if (!wccflag) VTONFS(dvp)->n_attrstamp = 0; - vput(dvp); /* * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry. */ if (error == EEXIST) error = 0; return (error); } /* * nfs make dir call */ static int nfs_mkdir(ap) struct vop_mkdir_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; struct vattr *a_vap; } */ *ap; { register struct vnode *dvp = ap->a_dvp; register struct vattr *vap = ap->a_vap; register struct componentname *cnp = ap->a_cnp; register struct nfsv2_sattr *sp; register struct nfsv3_sattr *sp3; register u_long *tl; register caddr_t cp; register long t1, t2; register int len; struct nfsnode *np = (struct nfsnode *)0; struct vnode *newvp = (struct vnode *)0; caddr_t bpos, dpos, cp2; int error = 0, wccflag = NFSV3_WCCRATTR; int gotvp = 0; struct mbuf *mreq, *mrep, *md, *mb, *mb2; struct vattr vattr; int v3 = NFS_ISV3(dvp); if (error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred, cnp->cn_proc)) { VOP_ABORTOP(dvp, cnp); - vput(dvp); return (error); } len = cnp->cn_namelen; nfsstats.rpccnt[NFSPROC_MKDIR]++; nfsm_reqhead(dvp, NFSPROC_MKDIR, NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len) + NFSX_SATTR(v3)); nfsm_fhtom(dvp, v3); nfsm_strtom(cnp->cn_nameptr, len, NFS_MAXNAMLEN); if (v3) { nfsm_build(sp3, struct nfsv3_sattr *, NFSX_V3SRVSATTR); nfsm_v3sattr(sp3, vap, cnp->cn_cred->cr_uid, vattr.va_gid); } else { nfsm_build(sp, struct nfsv2_sattr *, NFSX_V2SATTR); sp->sa_mode = vtonfsv2_mode(VDIR, vap->va_mode); sp->sa_uid = txdr_unsigned(cnp->cn_cred->cr_uid); sp->sa_gid = txdr_unsigned(vattr.va_gid); sp->sa_size = -1; txdr_nfsv2time(&vap->va_atime, &sp->sa_atime); txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime); } nfsm_request(dvp, NFSPROC_MKDIR, cnp->cn_proc, cnp->cn_cred); if (!error) nfsm_mtofh(dvp, newvp, v3, gotvp); if (v3) nfsm_wcc_data(dvp, wccflag); nfsm_reqdone; VTONFS(dvp)->n_flag |= NMODIFIED; if (!wccflag) VTONFS(dvp)->n_attrstamp = 0; /* * Kludge: Map EEXIST => 0 assuming that you have a reply to a retry * if we can succeed in looking up the directory. */ if (error == EEXIST || (!error && !gotvp)) { if (newvp) { vrele(newvp); newvp = (struct vnode *)0; } error = nfs_lookitup(dvp, cnp->cn_nameptr, len, cnp->cn_cred, cnp->cn_proc, &np); if (!error) { newvp = NFSTOV(np); if (newvp->v_type != VDIR) error = EEXIST; } } if (error) { if (newvp) vrele(newvp); } else *ap->a_vpp = newvp; zfree(namei_zone, cnp->cn_pnbuf); - vput(dvp); return (error); } /* * nfs remove directory call */ static int nfs_rmdir(ap) struct vop_rmdir_args /* { struct vnode *a_dvp; struct vnode *a_vp; struct componentname *a_cnp; } */ *ap; { register struct vnode *vp = ap->a_vp; register struct vnode *dvp = ap->a_dvp; register struct componentname *cnp = ap->a_cnp; register u_long *tl; register caddr_t cp; register long t1, t2; caddr_t bpos, dpos, cp2; int error = 0, wccflag = NFSV3_WCCRATTR; struct mbuf *mreq, *mrep, *md, *mb, *mb2; int v3 = NFS_ISV3(dvp); nfsstats.rpccnt[NFSPROC_RMDIR]++; nfsm_reqhead(dvp, NFSPROC_RMDIR, NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen)); nfsm_fhtom(dvp, v3); nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN); nfsm_request(dvp, NFSPROC_RMDIR, cnp->cn_proc, cnp->cn_cred); if (v3) nfsm_wcc_data(dvp, wccflag); nfsm_reqdone; zfree(namei_zone, cnp->cn_pnbuf); VTONFS(dvp)->n_flag |= NMODIFIED; if (!wccflag) VTONFS(dvp)->n_attrstamp = 0; cache_purge(dvp); cache_purge(vp); - vput(vp); - vput(dvp); /* * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry. */ if (error == ENOENT) error = 0; return (error); } /* * nfs readdir call */ static int nfs_readdir(ap) struct vop_readdir_args /* { struct vnode *a_vp; struct uio *a_uio; struct ucred *a_cred; } */ *ap; { register struct vnode *vp = ap->a_vp; register struct nfsnode *np = VTONFS(vp); register struct uio *uio = ap->a_uio; int tresid, error; struct vattr vattr; if (vp->v_type != VDIR) return (EPERM); /* * First, check for hit on the EOF offset cache */ if (np->n_direofoffset > 0 && uio->uio_offset >= np->n_direofoffset && (np->n_flag & NMODIFIED) == 0) { if (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS) { if (NQNFS_CKCACHABLE(vp, ND_READ)) { nfsstats.direofcache_hits++; return (0); } } else if (VOP_GETATTR(vp, &vattr, ap->a_cred, uio->uio_procp) == 0 && np->n_mtime == vattr.va_mtime.tv_sec) { nfsstats.direofcache_hits++; return (0); } } /* * Call nfs_bioread() to do the real work. */ tresid = uio->uio_resid; error = nfs_bioread(vp, uio, 0, ap->a_cred, 0); if (!error && uio->uio_resid == tresid) nfsstats.direofcache_misses++; return (error); } /* * Readdir rpc call. * Called from below the buffer cache by nfs_doio(). */ int nfs_readdirrpc(vp, uiop, cred) struct vnode *vp; register struct uio *uiop; struct ucred *cred; { register int len, left; register struct dirent *dp; register u_long *tl; register caddr_t cp; register long t1, t2; register nfsuint64 *cookiep; caddr_t bpos, dpos, cp2; struct mbuf *mreq, *mrep, *md, *mb, *mb2; nfsuint64 cookie; struct nfsmount *nmp = VFSTONFS(vp->v_mount); struct nfsnode *dnp = VTONFS(vp); u_quad_t fileno; int error = 0, tlen, more_dirs = 1, blksiz = 0, bigenough = 1; int attrflag; int v3 = NFS_ISV3(vp); #ifndef nolint dp = (struct dirent *)0; #endif #ifndef DIAGNOSTIC if (uiop->uio_iovcnt != 1 || (uiop->uio_offset & (NFS_DIRBLKSIZ - 1)) || (uiop->uio_resid & (NFS_DIRBLKSIZ - 1))) panic("nfs readdirrpc bad uio"); #endif /* * If there is no cookie, assume directory was stale. */ cookiep = nfs_getcookie(dnp, uiop->uio_offset, 0); if (cookiep) cookie = *cookiep; else return (NFSERR_BAD_COOKIE); /* * Loop around doing readdir rpc's of size nm_readdirsize * truncated to a multiple of DIRBLKSIZ. * The stopping criteria is EOF or buffer full. */ while (more_dirs && bigenough) { nfsstats.rpccnt[NFSPROC_READDIR]++; nfsm_reqhead(vp, NFSPROC_READDIR, NFSX_FH(v3) + NFSX_READDIR(v3)); nfsm_fhtom(vp, v3); if (v3) { nfsm_build(tl, u_long *, 5 * NFSX_UNSIGNED); *tl++ = cookie.nfsuquad[0]; *tl++ = cookie.nfsuquad[1]; *tl++ = dnp->n_cookieverf.nfsuquad[0]; *tl++ = dnp->n_cookieverf.nfsuquad[1]; } else { nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED); *tl++ = cookie.nfsuquad[0]; } *tl = txdr_unsigned(nmp->nm_readdirsize); nfsm_request(vp, NFSPROC_READDIR, uiop->uio_procp, cred); if (v3) { nfsm_postop_attr(vp, attrflag); if (!error) { nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED); dnp->n_cookieverf.nfsuquad[0] = *tl++; dnp->n_cookieverf.nfsuquad[1] = *tl; } else { m_freem(mrep); goto nfsmout; } } nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); more_dirs = fxdr_unsigned(int, *tl); /* loop thru the dir entries, doctoring them to 4bsd form */ while (more_dirs && bigenough) { if (v3) { nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED); fxdr_hyper(tl, &fileno); len = fxdr_unsigned(int, *(tl + 2)); } else { nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED); fileno = fxdr_unsigned(u_quad_t, *tl++); len = fxdr_unsigned(int, *tl); } if (len <= 0 || len > NFS_MAXNAMLEN) { error = EBADRPC; m_freem(mrep); goto nfsmout; } tlen = nfsm_rndup(len); if (tlen == len) tlen += 4; /* To ensure null termination */ left = DIRBLKSIZ - blksiz; if ((tlen + DIRHDSIZ) > left) { dp->d_reclen += left; uiop->uio_iov->iov_base += left; uiop->uio_iov->iov_len -= left; uiop->uio_offset += left; uiop->uio_resid -= left; blksiz = 0; } if ((tlen + DIRHDSIZ) > uiop->uio_resid) bigenough = 0; if (bigenough) { dp = (struct dirent *)uiop->uio_iov->iov_base; dp->d_fileno = (int)fileno; dp->d_namlen = len; dp->d_reclen = tlen + DIRHDSIZ; dp->d_type = DT_UNKNOWN; blksiz += dp->d_reclen; if (blksiz == DIRBLKSIZ) blksiz = 0; uiop->uio_offset += DIRHDSIZ; uiop->uio_resid -= DIRHDSIZ; uiop->uio_iov->iov_base += DIRHDSIZ; uiop->uio_iov->iov_len -= DIRHDSIZ; nfsm_mtouio(uiop, len); cp = uiop->uio_iov->iov_base; tlen -= len; *cp = '\0'; /* null terminate */ uiop->uio_iov->iov_base += tlen; uiop->uio_iov->iov_len -= tlen; uiop->uio_offset += tlen; uiop->uio_resid -= tlen; } else nfsm_adv(nfsm_rndup(len)); if (v3) { nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED); } else { nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED); } if (bigenough) { cookie.nfsuquad[0] = *tl++; if (v3) cookie.nfsuquad[1] = *tl++; } else if (v3) tl += 2; else tl++; more_dirs = fxdr_unsigned(int, *tl); } /* * If at end of rpc data, get the eof boolean */ if (!more_dirs) { nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); more_dirs = (fxdr_unsigned(int, *tl) == 0); } m_freem(mrep); } /* * Fill last record, iff any, out to a multiple of DIRBLKSIZ * by increasing d_reclen for the last record. */ if (blksiz > 0) { left = DIRBLKSIZ - blksiz; dp->d_reclen += left; uiop->uio_iov->iov_base += left; uiop->uio_iov->iov_len -= left; uiop->uio_offset += left; uiop->uio_resid -= left; } /* * We are now either at the end of the directory or have filled the * block. */ if (bigenough) dnp->n_direofoffset = uiop->uio_offset; else { if (uiop->uio_resid > 0) printf("EEK! readdirrpc resid > 0\n"); cookiep = nfs_getcookie(dnp, uiop->uio_offset, 1); *cookiep = cookie; } nfsmout: return (error); } /* * NFS V3 readdir plus RPC. Used in place of nfs_readdirrpc(). */ int nfs_readdirplusrpc(vp, uiop, cred) struct vnode *vp; register struct uio *uiop; struct ucred *cred; { register int len, left; register struct dirent *dp; register u_long *tl; register caddr_t cp; register long t1, t2; register struct vnode *newvp; register nfsuint64 *cookiep; caddr_t bpos, dpos, cp2, dpossav1, dpossav2; struct mbuf *mreq, *mrep, *md, *mb, *mb2, *mdsav1, *mdsav2; struct nameidata nami, *ndp = &nami; struct componentname *cnp = &ndp->ni_cnd; nfsuint64 cookie; struct nfsmount *nmp = VFSTONFS(vp->v_mount); struct nfsnode *dnp = VTONFS(vp), *np; nfsfh_t *fhp; u_quad_t fileno; int error = 0, tlen, more_dirs = 1, blksiz = 0, doit, bigenough = 1, i; int attrflag, fhsize; #ifndef nolint dp = (struct dirent *)0; #endif #ifndef DIAGNOSTIC if (uiop->uio_iovcnt != 1 || (uiop->uio_offset & (DIRBLKSIZ - 1)) || (uiop->uio_resid & (DIRBLKSIZ - 1))) panic("nfs readdirplusrpc bad uio"); #endif ndp->ni_dvp = vp; newvp = NULLVP; /* * If there is no cookie, assume directory was stale. */ cookiep = nfs_getcookie(dnp, uiop->uio_offset, 0); if (cookiep) cookie = *cookiep; else return (NFSERR_BAD_COOKIE); /* * Loop around doing readdir rpc's of size nm_readdirsize * truncated to a multiple of DIRBLKSIZ. * The stopping criteria is EOF or buffer full. */ while (more_dirs && bigenough) { nfsstats.rpccnt[NFSPROC_READDIRPLUS]++; nfsm_reqhead(vp, NFSPROC_READDIRPLUS, NFSX_FH(1) + 6 * NFSX_UNSIGNED); nfsm_fhtom(vp, 1); nfsm_build(tl, u_long *, 6 * NFSX_UNSIGNED); *tl++ = cookie.nfsuquad[0]; *tl++ = cookie.nfsuquad[1]; *tl++ = dnp->n_cookieverf.nfsuquad[0]; *tl++ = dnp->n_cookieverf.nfsuquad[1]; *tl++ = txdr_unsigned(nmp->nm_readdirsize); *tl = txdr_unsigned(nmp->nm_rsize); nfsm_request(vp, NFSPROC_READDIRPLUS, uiop->uio_procp, cred); nfsm_postop_attr(vp, attrflag); if (error) { m_freem(mrep); goto nfsmout; } nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED); dnp->n_cookieverf.nfsuquad[0] = *tl++; dnp->n_cookieverf.nfsuquad[1] = *tl++; more_dirs = fxdr_unsigned(int, *tl); /* loop thru the dir entries, doctoring them to 4bsd form */ while (more_dirs && bigenough) { nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED); fxdr_hyper(tl, &fileno); len = fxdr_unsigned(int, *(tl + 2)); if (len <= 0 || len > NFS_MAXNAMLEN) { error = EBADRPC; m_freem(mrep); goto nfsmout; } tlen = nfsm_rndup(len); if (tlen == len) tlen += 4; /* To ensure null termination*/ left = DIRBLKSIZ - blksiz; if ((tlen + DIRHDSIZ) > left) { dp->d_reclen += left; uiop->uio_iov->iov_base += left; uiop->uio_iov->iov_len -= left; uiop->uio_offset += left; uiop->uio_resid -= left; blksiz = 0; } if ((tlen + DIRHDSIZ) > uiop->uio_resid) bigenough = 0; if (bigenough) { dp = (struct dirent *)uiop->uio_iov->iov_base; dp->d_fileno = (int)fileno; dp->d_namlen = len; dp->d_reclen = tlen + DIRHDSIZ; dp->d_type = DT_UNKNOWN; blksiz += dp->d_reclen; if (blksiz == DIRBLKSIZ) blksiz = 0; uiop->uio_offset += DIRHDSIZ; uiop->uio_resid -= DIRHDSIZ; uiop->uio_iov->iov_base += DIRHDSIZ; uiop->uio_iov->iov_len -= DIRHDSIZ; cnp->cn_nameptr = uiop->uio_iov->iov_base; cnp->cn_namelen = len; nfsm_mtouio(uiop, len); cp = uiop->uio_iov->iov_base; tlen -= len; *cp = '\0'; uiop->uio_iov->iov_base += tlen; uiop->uio_iov->iov_len -= tlen; uiop->uio_offset += tlen; uiop->uio_resid -= tlen; } else nfsm_adv(nfsm_rndup(len)); nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED); if (bigenough) { cookie.nfsuquad[0] = *tl++; cookie.nfsuquad[1] = *tl++; } else tl += 2; /* * Since the attributes are before the file handle * (sigh), we must skip over the attributes and then * come back and get them. */ attrflag = fxdr_unsigned(int, *tl); if (attrflag) { dpossav1 = dpos; mdsav1 = md; nfsm_adv(NFSX_V3FATTR); nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); doit = fxdr_unsigned(int, *tl); if (doit) { nfsm_getfh(fhp, fhsize, 1); if (NFS_CMPFH(dnp, fhp, fhsize)) { VREF(vp); newvp = vp; np = dnp; } else { if (error = nfs_nget(vp->v_mount, fhp, fhsize, &np)) doit = 0; else newvp = NFSTOV(np); } } if (doit) { dpossav2 = dpos; dpos = dpossav1; mdsav2 = md; md = mdsav1; nfsm_loadattr(newvp, (struct vattr *)0); dpos = dpossav2; md = mdsav2; dp->d_type = IFTODT(VTTOIF(np->n_vattr.va_type)); ndp->ni_vp = newvp; cnp->cn_hash = 0; for (cp = cnp->cn_nameptr, i = 1; i <= len; i++, cp++) cnp->cn_hash += (unsigned char)*cp * i; cache_enter(ndp->ni_dvp, ndp->ni_vp, cnp); } } else { /* Just skip over the file handle */ nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); i = fxdr_unsigned(int, *tl); nfsm_adv(nfsm_rndup(i)); } if (newvp != NULLVP) { vrele(newvp); newvp = NULLVP; } nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); more_dirs = fxdr_unsigned(int, *tl); } /* * If at end of rpc data, get the eof boolean */ if (!more_dirs) { nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); more_dirs = (fxdr_unsigned(int, *tl) == 0); } m_freem(mrep); } /* * Fill last record, iff any, out to a multiple of NFS_DIRBLKSIZ * by increasing d_reclen for the last record. */ if (blksiz > 0) { left = DIRBLKSIZ - blksiz; dp->d_reclen += left; uiop->uio_iov->iov_base += left; uiop->uio_iov->iov_len -= left; uiop->uio_offset += left; uiop->uio_resid -= left; } /* * We are now either at the end of the directory or have filled the * block. */ if (bigenough) dnp->n_direofoffset = uiop->uio_offset; else { if (uiop->uio_resid > 0) printf("EEK! readdirplusrpc resid > 0\n"); cookiep = nfs_getcookie(dnp, uiop->uio_offset, 1); *cookiep = cookie; } nfsmout: if (newvp != NULLVP) { if (newvp == vp) vrele(newvp); else vput(newvp); newvp = NULLVP; } return (error); } /* * Silly rename. To make the NFS filesystem that is stateless look a little * more like the "ufs" a remove of an active vnode is translated to a rename * to a funny looking filename that is removed by nfs_inactive on the * nfsnode. There is the potential for another process on a different client * to create the same funny name between the nfs_lookitup() fails and the * nfs_rename() completes, but... */ static int nfs_sillyrename(dvp, vp, cnp) struct vnode *dvp, *vp; struct componentname *cnp; { register struct sillyrename *sp; struct nfsnode *np; int error; short pid; cache_purge(dvp); np = VTONFS(vp); #ifndef DIAGNOSTIC if (vp->v_type == VDIR) panic("nfs: sillyrename dir"); #endif MALLOC(sp, struct sillyrename *, sizeof (struct sillyrename), M_NFSREQ, M_WAITOK); sp->s_cred = crdup(cnp->cn_cred); sp->s_dvp = dvp; VREF(dvp); /* Fudge together a funny name */ pid = cnp->cn_proc->p_pid; sp->s_namlen = sprintf(sp->s_name, ".nfsA%04x4.4", pid); /* Try lookitups until we get one that isn't there */ while (nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred, cnp->cn_proc, (struct nfsnode **)0) == 0) { sp->s_name[4]++; if (sp->s_name[4] > 'z') { error = EINVAL; goto bad; } } if (error = nfs_renameit(dvp, cnp, sp)) goto bad; error = nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred, cnp->cn_proc, &np); np->n_sillyrename = sp; return (0); bad: vrele(sp->s_dvp); crfree(sp->s_cred); free((caddr_t)sp, M_NFSREQ); return (error); } /* * Look up a file name and optionally either update the file handle or * allocate an nfsnode, depending on the value of npp. * npp == NULL --> just do the lookup * *npp == NULL --> allocate a new nfsnode and make sure attributes are * handled too * *npp != NULL --> update the file handle in the vnode */ static int nfs_lookitup(dvp, name, len, cred, procp, npp) register struct vnode *dvp; char *name; int len; struct ucred *cred; struct proc *procp; struct nfsnode **npp; { register u_long *tl; register caddr_t cp; register long t1, t2; struct vnode *newvp = (struct vnode *)0; struct nfsnode *np, *dnp = VTONFS(dvp); caddr_t bpos, dpos, cp2; int error = 0, fhlen, attrflag; struct mbuf *mreq, *mrep, *md, *mb, *mb2; nfsfh_t *nfhp; int v3 = NFS_ISV3(dvp); nfsstats.rpccnt[NFSPROC_LOOKUP]++; nfsm_reqhead(dvp, NFSPROC_LOOKUP, NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len)); nfsm_fhtom(dvp, v3); nfsm_strtom(name, len, NFS_MAXNAMLEN); nfsm_request(dvp, NFSPROC_LOOKUP, procp, cred); if (npp && !error) { nfsm_getfh(nfhp, fhlen, v3); if (*npp) { np = *npp; if (np->n_fhsize > NFS_SMALLFH && fhlen <= NFS_SMALLFH) { free((caddr_t)np->n_fhp, M_NFSBIGFH); np->n_fhp = &np->n_fh; } else if (np->n_fhsize <= NFS_SMALLFH && fhlen>NFS_SMALLFH) np->n_fhp =(nfsfh_t *)malloc(fhlen,M_NFSBIGFH,M_WAITOK); bcopy((caddr_t)nfhp, (caddr_t)np->n_fhp, fhlen); np->n_fhsize = fhlen; newvp = NFSTOV(np); } else if (NFS_CMPFH(dnp, nfhp, fhlen)) { VREF(dvp); newvp = dvp; } else { error = nfs_nget(dvp->v_mount, nfhp, fhlen, &np); if (error) { m_freem(mrep); return (error); } newvp = NFSTOV(np); } if (v3) { nfsm_postop_attr(newvp, attrflag); if (!attrflag && *npp == NULL) { m_freem(mrep); if (newvp == dvp) vrele(newvp); else vput(newvp); return (ENOENT); } } else nfsm_loadattr(newvp, (struct vattr *)0); } nfsm_reqdone; if (npp && *npp == NULL) { if (error) { if (newvp) if (newvp == dvp) vrele(newvp); else vput(newvp); } else *npp = np; } return (error); } /* * Nfs Version 3 commit rpc */ static int nfs_commit(vp, offset, cnt, cred, procp) register struct vnode *vp; u_quad_t offset; int cnt; struct ucred *cred; struct proc *procp; { register caddr_t cp; register u_long *tl; register int t1, t2; register struct nfsmount *nmp = VFSTONFS(vp->v_mount); caddr_t bpos, dpos, cp2; int error = 0, wccflag = NFSV3_WCCRATTR; struct mbuf *mreq, *mrep, *md, *mb, *mb2; if ((nmp->nm_flag & NFSMNT_HASWRITEVERF) == 0) return (0); nfsstats.rpccnt[NFSPROC_COMMIT]++; nfsm_reqhead(vp, NFSPROC_COMMIT, NFSX_FH(1)); nfsm_fhtom(vp, 1); nfsm_build(tl, u_long *, 3 * NFSX_UNSIGNED); txdr_hyper(&offset, tl); tl += 2; *tl = txdr_unsigned(cnt); nfsm_request(vp, NFSPROC_COMMIT, procp, cred); nfsm_wcc_data(vp, wccflag); if (!error) { nfsm_dissect(tl, u_long *, NFSX_V3WRITEVERF); if (bcmp((caddr_t)nmp->nm_verf, (caddr_t)tl, NFSX_V3WRITEVERF)) { bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf, NFSX_V3WRITEVERF); error = NFSERR_STALEWRITEVERF; } } nfsm_reqdone; return (error); } /* * Kludge City.. * - make nfs_bmap() essentially a no-op that does no translation * - do nfs_strategy() by doing I/O with nfs_readrpc/nfs_writerpc * (Maybe I could use the process's page mapping, but I was concerned that * Kernel Write might not be enabled and also figured copyout() would do * a lot more work than bcopy() and also it currently happens in the * context of the swapper process (2). */ static int nfs_bmap(ap) struct vop_bmap_args /* { struct vnode *a_vp; daddr_t a_bn; struct vnode **a_vpp; daddr_t *a_bnp; int *a_runp; int *a_runb; } */ *ap; { register struct vnode *vp = ap->a_vp; if (ap->a_vpp != NULL) *ap->a_vpp = vp; if (ap->a_bnp != NULL) *ap->a_bnp = ap->a_bn * btodb(vp->v_mount->mnt_stat.f_iosize); if (ap->a_runp != NULL) *ap->a_runp = 0; if (ap->a_runb != NULL) *ap->a_runb = 0; return (0); } /* * Strategy routine. * For async requests when nfsiod(s) are running, queue the request by * calling nfs_asyncio(), otherwise just all nfs_doio() to do the * request. */ static int nfs_strategy(ap) struct vop_strategy_args *ap; { register struct buf *bp = ap->a_bp; struct ucred *cr; struct proc *p; int error = 0; if (bp->b_flags & B_PHYS) panic("nfs physio"); if (bp->b_flags & B_ASYNC) p = (struct proc *)0; else p = curproc; /* XXX */ if (bp->b_flags & B_READ) cr = bp->b_rcred; else cr = bp->b_wcred; /* * If the op is asynchronous and an i/o daemon is waiting * queue the request, wake it up and wait for completion * otherwise just do it ourselves. */ if ((bp->b_flags & B_ASYNC) == 0 || nfs_asyncio(bp, NOCRED)) error = nfs_doio(bp, cr, p); return (error); } /* * Mmap a file * * NB Currently unsupported. */ /* ARGSUSED */ static int nfs_mmap(ap) struct vop_mmap_args /* { struct vnode *a_vp; int a_fflags; struct ucred *a_cred; struct proc *a_p; } */ *ap; { return (EINVAL); } /* * fsync vnode op. Just call nfs_flush() with commit == 1. */ /* ARGSUSED */ static int nfs_fsync(ap) struct vop_fsync_args /* { struct vnodeop_desc *a_desc; struct vnode * a_vp; struct ucred * a_cred; int a_waitfor; struct proc * a_p; } */ *ap; { return (nfs_flush(ap->a_vp, ap->a_cred, ap->a_waitfor, ap->a_p, 1)); } /* * Flush all the blocks associated with a vnode. * Walk through the buffer pool and push any dirty pages * associated with the vnode. */ static int nfs_flush(vp, cred, waitfor, p, commit) register struct vnode *vp; struct ucred *cred; int waitfor; struct proc *p; int commit; { register struct nfsnode *np = VTONFS(vp); register struct buf *bp; register int i; struct buf *nbp; struct nfsmount *nmp = VFSTONFS(vp->v_mount); int s, error = 0, slptimeo = 0, slpflag = 0, retv, bvecpos; int passone = 1; u_quad_t off, endoff, toff; struct ucred* wcred = NULL; struct buf **bvec = NULL; #ifndef NFS_COMMITBVECSIZ #define NFS_COMMITBVECSIZ 20 #endif struct buf *bvec_on_stack[NFS_COMMITBVECSIZ]; int bvecsize = 0, bveccount; if (nmp->nm_flag & NFSMNT_INT) slpflag = PCATCH; if (!commit) passone = 0; /* * A b_flags == (B_DELWRI | B_NEEDCOMMIT) block has been written to the * server, but nas not been committed to stable storage on the server * yet. On the first pass, the byte range is worked out and the commit * rpc is done. On the second pass, nfs_writebp() is called to do the * job. */ again: off = (u_quad_t)-1; endoff = 0; bvecpos = 0; if (NFS_ISV3(vp) && commit) { s = splbio(); /* * Count up how many buffers waiting for a commit. */ bveccount = 0; for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) { nbp = bp->b_vnbufs.le_next; if ((bp->b_flags & (B_BUSY | B_DELWRI | B_NEEDCOMMIT)) == (B_DELWRI | B_NEEDCOMMIT)) bveccount++; } /* * Allocate space to remember the list of bufs to commit. It is * important to use M_NOWAIT here to avoid a race with nfs_write. * If we can't get memory (for whatever reason), we will end up * committing the buffers one-by-one in the loop below. */ if (bveccount > NFS_COMMITBVECSIZ) { if (bvec != NULL && bvec != bvec_on_stack) free(bvec, M_TEMP); bvec = (struct buf **) malloc(bveccount * sizeof(struct buf *), M_TEMP, M_NOWAIT); if (bvec == NULL) { bvec = bvec_on_stack; bvecsize = NFS_COMMITBVECSIZ; } else bvecsize = bveccount; } else { bvec = bvec_on_stack; bvecsize = NFS_COMMITBVECSIZ; } for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) { nbp = bp->b_vnbufs.le_next; if (bvecpos >= bvecsize) break; if ((bp->b_flags & (B_BUSY | B_DELWRI | B_NEEDCOMMIT)) != (B_DELWRI | B_NEEDCOMMIT)) continue; bremfree(bp); /* * Work out if all buffers are using the same cred * so we can deal with them all with one commit. */ if (wcred == NULL) wcred = bp->b_wcred; else if (wcred != bp->b_wcred) wcred = NOCRED; bp->b_flags |= (B_BUSY | B_WRITEINPROG); vfs_busy_pages(bp, 1); /* * A list of these buffers is kept so that the * second loop knows which buffers have actually * been committed. This is necessary, since there * may be a race between the commit rpc and new * uncommitted writes on the file. */ bvec[bvecpos++] = bp; toff = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + bp->b_dirtyoff; if (toff < off) off = toff; toff += (u_quad_t)(bp->b_dirtyend - bp->b_dirtyoff); if (toff > endoff) endoff = toff; } splx(s); } if (bvecpos > 0) { /* * Commit data on the server, as required. * If all bufs are using the same wcred, then use that with * one call for all of them, otherwise commit each one * separately. */ if (wcred != NOCRED) retv = nfs_commit(vp, off, (int)(endoff - off), wcred, p); else { retv = 0; for (i = 0; i < bvecpos; i++) { off_t off, size; bp = bvec[i]; off = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + bp->b_dirtyoff; size = (u_quad_t)(bp->b_dirtyend - bp->b_dirtyoff); retv = nfs_commit(vp, off, (int)size, bp->b_wcred, p); if (retv) break; } } if (retv == NFSERR_STALEWRITEVERF) nfs_clearcommit(vp->v_mount); /* * Now, either mark the blocks I/O done or mark the * blocks dirty, depending on whether the commit * succeeded. */ for (i = 0; i < bvecpos; i++) { bp = bvec[i]; bp->b_flags &= ~(B_NEEDCOMMIT | B_WRITEINPROG); if (retv) { vfs_unbusy_pages(bp); brelse(bp); } else { vp->v_numoutput++; bp->b_flags |= B_ASYNC; if (bp->b_flags & B_DELWRI) { --numdirtybuffers; if (needsbuffer) { vfs_bio_need_satisfy(); } } s = splbio(); /* XXX check this positionning */ bp->b_flags &= ~(B_READ|B_DONE|B_ERROR|B_DELWRI); bp->b_dirtyoff = bp->b_dirtyend = 0; reassignbuf(bp, vp); splx(s); biodone(bp); } } } /* * Start/do any write(s) that are required. */ loop: s = splbio(); for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) { nbp = bp->b_vnbufs.le_next; if (bp->b_flags & B_BUSY) { if (waitfor != MNT_WAIT || passone) continue; bp->b_flags |= B_WANTED; error = tsleep((caddr_t)bp, slpflag | (PRIBIO + 1), "nfsfsync", slptimeo); splx(s); if (error) { if (nfs_sigintr(nmp, (struct nfsreq *)0, p)) { error = EINTR; goto done; } if (slpflag == PCATCH) { slpflag = 0; slptimeo = 2 * hz; } } goto loop; } if ((bp->b_flags & B_DELWRI) == 0) panic("nfs_fsync: not dirty"); if ((passone || !commit) && (bp->b_flags & B_NEEDCOMMIT)) continue; bremfree(bp); if (passone || !commit) bp->b_flags |= (B_BUSY|B_ASYNC); else bp->b_flags |= (B_BUSY|B_ASYNC|B_WRITEINPROG|B_NEEDCOMMIT); splx(s); VOP_BWRITE(bp); goto loop; } splx(s); if (passone) { passone = 0; goto again; } if (waitfor == MNT_WAIT) { while (vp->v_numoutput) { vp->v_flag |= VBWAIT; error = tsleep((caddr_t)&vp->v_numoutput, slpflag | (PRIBIO + 1), "nfsfsync", slptimeo); if (error) { if (nfs_sigintr(nmp, (struct nfsreq *)0, p)) { error = EINTR; goto done; } if (slpflag == PCATCH) { slpflag = 0; slptimeo = 2 * hz; } } } if (vp->v_dirtyblkhd.lh_first && commit) { goto loop; } } if (np->n_flag & NWRITEERR) { error = np->n_error; np->n_flag &= ~NWRITEERR; } done: if (bvec != NULL && bvec != bvec_on_stack) free(bvec, M_TEMP); return (error); } /* * NFS advisory byte-level locks. * Currently unsupported. */ static int nfs_advlock(ap) struct vop_advlock_args /* { struct vnode *a_vp; caddr_t a_id; int a_op; struct flock *a_fl; int a_flags; } */ *ap; { register struct nfsnode *np = VTONFS(ap->a_vp); /* * The following kludge is to allow diskless support to work * until a real NFS lockd is implemented. Basically, just pretend * that this is a local lock. */ return (lf_advlock(ap, &(np->n_lockf), np->n_size)); } /* * Print out the contents of an nfsnode. */ static int nfs_print(ap) struct vop_print_args /* { struct vnode *a_vp; } */ *ap; { register struct vnode *vp = ap->a_vp; register struct nfsnode *np = VTONFS(vp); printf("tag VT_NFS, fileid %ld fsid 0x%lx", np->n_vattr.va_fileid, np->n_vattr.va_fsid); if (vp->v_type == VFIFO) fifo_printinfo(vp); printf("\n"); return (0); } /* * Just call nfs_writebp() with the force argument set to 1. */ static int nfs_bwrite(ap) struct vop_bwrite_args /* { struct vnode *a_bp; } */ *ap; { return (nfs_writebp(ap->a_bp, 1)); } /* * This is a clone of vn_bwrite(), except that B_WRITEINPROG isn't set unless * the force flag is one and it also handles the B_NEEDCOMMIT flag. */ int nfs_writebp(bp, force) register struct buf *bp; int force; { int s; register int oldflags = bp->b_flags, retv = 1; off_t off; if(!(bp->b_flags & B_BUSY)) panic("bwrite: buffer is not busy???"); if (bp->b_flags & B_INVAL) bp->b_flags |= B_INVAL | B_NOCACHE; if (bp->b_flags & B_DELWRI) { --numdirtybuffers; if (needsbuffer) vfs_bio_need_satisfy(); } s = splbio(); /* XXX check if needed */ bp->b_flags &= ~(B_READ|B_DONE|B_ERROR|B_DELWRI); if ((oldflags & (B_ASYNC|B_DELWRI)) == (B_ASYNC|B_DELWRI)) { reassignbuf(bp, bp->b_vp); } bp->b_vp->v_numoutput++; curproc->p_stats->p_ru.ru_oublock++; splx(s); /* * If B_NEEDCOMMIT is set, a commit rpc may do the trick. If not * an actual write will have to be scheduled via. VOP_STRATEGY(). * If B_WRITEINPROG is already set, then push it with a write anyhow. */ vfs_busy_pages(bp, 1); if ((oldflags & (B_NEEDCOMMIT | B_WRITEINPROG)) == B_NEEDCOMMIT) { off = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + bp->b_dirtyoff; bp->b_flags |= B_WRITEINPROG; retv = nfs_commit(bp->b_vp, off, bp->b_dirtyend-bp->b_dirtyoff, bp->b_wcred, bp->b_proc); bp->b_flags &= ~B_WRITEINPROG; if (!retv) { bp->b_dirtyoff = bp->b_dirtyend = 0; bp->b_flags &= ~B_NEEDCOMMIT; biodone(bp); } else if (retv == NFSERR_STALEWRITEVERF) nfs_clearcommit(bp->b_vp->v_mount); } if (retv) { if (force) bp->b_flags |= B_WRITEINPROG; VOP_STRATEGY(bp); } if( (oldflags & B_ASYNC) == 0) { int rtval = biowait(bp); if (oldflags & B_DELWRI) { s = splbio(); reassignbuf(bp, bp->b_vp); splx(s); } brelse(bp); return (rtval); } return (0); } /* * nfs special file access vnode op. * Essentially just get vattr and then imitate iaccess() since the device is * local to the client. */ static int nfsspec_access(ap) struct vop_access_args /* { struct vnode *a_vp; int a_mode; struct ucred *a_cred; struct proc *a_p; } */ *ap; { register struct vattr *vap; register gid_t *gp; register struct ucred *cred = ap->a_cred; struct vnode *vp = ap->a_vp; mode_t mode = ap->a_mode; struct vattr vattr; register int i; int error; /* * Disallow write attempts on filesystems mounted read-only; * unless the file is a socket, fifo, or a block or character * device resident on the filesystem. */ if ((mode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) { switch (vp->v_type) { case VREG: case VDIR: case VLNK: return (EROFS); } } /* * If you're the super-user, * you always get access. */ if (cred->cr_uid == 0) return (0); vap = &vattr; error = VOP_GETATTR(vp, vap, cred, ap->a_p); if (error) return (error); /* * Access check is based on only one of owner, group, public. * If not owner, then check group. If not a member of the * group, then check public access. */ if (cred->cr_uid != vap->va_uid) { mode >>= 3; gp = cred->cr_groups; for (i = 0; i < cred->cr_ngroups; i++, gp++) if (vap->va_gid == *gp) goto found; mode >>= 3; found: ; } error = (vap->va_mode & mode) == mode ? 0 : EACCES; return (error); } /* * Read wrapper for special devices. */ static int nfsspec_read(ap) struct vop_read_args /* { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; struct ucred *a_cred; } */ *ap; { register struct nfsnode *np = VTONFS(ap->a_vp); struct timeval tv; /* * Set access flag. */ np->n_flag |= NACC; getmicrotime(&tv); np->n_atim.tv_sec = tv.tv_sec; np->n_atim.tv_nsec = tv.tv_usec * 1000; return (VOCALL(spec_vnodeop_p, VOFFSET(vop_read), ap)); } /* * Write wrapper for special devices. */ static int nfsspec_write(ap) struct vop_write_args /* { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; struct ucred *a_cred; } */ *ap; { register struct nfsnode *np = VTONFS(ap->a_vp); struct timeval tv; /* * Set update flag. */ np->n_flag |= NUPD; getmicrotime(&tv); np->n_mtim.tv_sec = tv.tv_sec; np->n_mtim.tv_nsec = tv.tv_usec * 1000; return (VOCALL(spec_vnodeop_p, VOFFSET(vop_write), ap)); } /* * Close wrapper for special devices. * * Update the times on the nfsnode then do device close. */ static int nfsspec_close(ap) struct vop_close_args /* { struct vnode *a_vp; int a_fflag; struct ucred *a_cred; struct proc *a_p; } */ *ap; { register struct vnode *vp = ap->a_vp; register struct nfsnode *np = VTONFS(vp); struct vattr vattr; if (np->n_flag & (NACC | NUPD)) { np->n_flag |= NCHG; if (vp->v_usecount == 1 && (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) { VATTR_NULL(&vattr); if (np->n_flag & NACC) vattr.va_atime = np->n_atim; if (np->n_flag & NUPD) vattr.va_mtime = np->n_mtim; (void)VOP_SETATTR(vp, &vattr, ap->a_cred, ap->a_p); } } return (VOCALL(spec_vnodeop_p, VOFFSET(vop_close), ap)); } /* * Read wrapper for fifos. */ static int nfsfifo_read(ap) struct vop_read_args /* { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; struct ucred *a_cred; } */ *ap; { register struct nfsnode *np = VTONFS(ap->a_vp); struct timeval tv; /* * Set access flag. */ np->n_flag |= NACC; getmicrotime(&tv); np->n_atim.tv_sec = tv.tv_sec; np->n_atim.tv_nsec = tv.tv_usec * 1000; return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_read), ap)); } /* * Write wrapper for fifos. */ static int nfsfifo_write(ap) struct vop_write_args /* { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; struct ucred *a_cred; } */ *ap; { register struct nfsnode *np = VTONFS(ap->a_vp); struct timeval tv; /* * Set update flag. */ np->n_flag |= NUPD; getmicrotime(&tv); np->n_mtim.tv_sec = tv.tv_sec; np->n_mtim.tv_nsec = tv.tv_usec * 1000; return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_write), ap)); } /* * Close wrapper for fifos. * * Update the times on the nfsnode then do fifo close. */ static int nfsfifo_close(ap) struct vop_close_args /* { struct vnode *a_vp; int a_fflag; struct ucred *a_cred; struct proc *a_p; } */ *ap; { register struct vnode *vp = ap->a_vp; register struct nfsnode *np = VTONFS(vp); struct timeval tv; struct vattr vattr; if (np->n_flag & (NACC | NUPD)) { getmicrotime(&tv); if (np->n_flag & NACC) { np->n_atim.tv_sec = tv.tv_sec; np->n_atim.tv_nsec = tv.tv_usec * 1000; } if (np->n_flag & NUPD) { np->n_mtim.tv_sec = tv.tv_sec; np->n_mtim.tv_nsec = tv.tv_usec * 1000; } np->n_flag |= NCHG; if (vp->v_usecount == 1 && (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) { VATTR_NULL(&vattr); if (np->n_flag & NACC) vattr.va_atime = np->n_atim; if (np->n_flag & NUPD) vattr.va_mtime = np->n_mtim; (void)VOP_SETATTR(vp, &vattr, ap->a_cred, ap->a_p); } } return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_close), ap)); } Index: head/sys/nfsserver/nfs_serv.c =================================================================== --- head/sys/nfsserver/nfs_serv.c (revision 35822) +++ head/sys/nfsserver/nfs_serv.c (revision 35823) @@ -1,3462 +1,3473 @@ /* * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)nfs_serv.c 8.3 (Berkeley) 1/12/94 - * $Id: nfs_serv.c,v 1.58 1998/02/09 06:10:35 eivind Exp $ + * $Id: nfs_serv.c,v 1.59 1998/03/30 09:53:56 phk Exp $ */ /* * nfs version 2 and 3 server calls to vnode ops * - these routines generally have 3 phases * 1 - break down and validate rpc request in mbuf list * 2 - do the vnode ops for the request * (surprisingly ?? many are very similar to syscalls in vfs_syscalls.c) * 3 - build the rpc reply in an mbuf list * nb: * - do not mix the phases, since the nfsm_?? macros can return failures * on a bad rpc or similar and do not do any vrele() or vput()'s * * - the nfsm_reply() macro generates an nfs rpc reply with the nfs * error number iff error != 0 whereas * returning an error from the server function implies a fatal error * such as a badly constructed rpc request that should be dropped without * a reply. * For Version 3, nfsm_reply() does not return for the error case, since * most version 3 rpcs return more than the status for error cases. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include nfstype nfsv3_type[9] = { NFNON, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, NFSOCK, NFFIFO, NFNON }; #ifndef NFS_NOSERVER nfstype nfsv2_type[9] = { NFNON, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, NFNON, NFCHR, NFNON }; /* Global vars */ extern u_long nfs_xdrneg1; extern u_long nfs_false, nfs_true; extern enum vtype nv3tov_type[8]; extern struct nfsstats nfsstats; int nfsrvw_procrastinate = NFS_GATHERDELAY * 1000; int nfsrvw_procrastinate_v3 = 0; static int nfs_async; SYSCTL_INT(_vfs_nfs, OID_AUTO, async, CTLFLAG_RW, &nfs_async, 0, ""); static int nfsrv_access __P((struct vnode *,int,struct ucred *,int, struct proc *)); static void nfsrvw_coalesce __P((struct nfsrv_descript *, struct nfsrv_descript *)); /* * nfs v3 access service */ int nfsrv3_access(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; struct sockaddr *nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; struct ucred *cred = &nfsd->nd_cr; struct vnode *vp; nfsfh_t nfh; fhandle_t *fhp; register u_long *tl; register long t1; caddr_t bpos; int error = 0, rdonly, cache, getret; char *cp2; struct mbuf *mb, *mreq, *mb2; struct vattr vattr, *vap = &vattr; u_long testmode, nfsmode; u_quad_t frev; #ifndef nolint cache = 0; #endif fhp = &nfh.fh_generic; nfsm_srvmtofh(fhp); nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); if (error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE)) { nfsm_reply(NFSX_UNSIGNED); nfsm_srvpostop_attr(1, (struct vattr *)0); return (0); } nfsmode = fxdr_unsigned(u_long, *tl); if ((nfsmode & NFSV3ACCESS_READ) && nfsrv_access(vp, VREAD, cred, rdonly, procp)) nfsmode &= ~NFSV3ACCESS_READ; if (vp->v_type == VDIR) testmode = (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND | NFSV3ACCESS_DELETE); else testmode = (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND); if ((nfsmode & testmode) && nfsrv_access(vp, VWRITE, cred, rdonly, procp)) nfsmode &= ~testmode; if (vp->v_type == VDIR) testmode = NFSV3ACCESS_LOOKUP; else testmode = NFSV3ACCESS_EXECUTE; if ((nfsmode & testmode) && nfsrv_access(vp, VEXEC, cred, rdonly, procp)) nfsmode &= ~testmode; getret = VOP_GETATTR(vp, vap, cred, procp); vput(vp); nfsm_reply(NFSX_POSTOPATTR(1) + NFSX_UNSIGNED); nfsm_srvpostop_attr(getret, vap); nfsm_build(tl, u_long *, NFSX_UNSIGNED); *tl = txdr_unsigned(nfsmode); nfsm_srvdone; } /* * nfs getattr service */ int nfsrv_getattr(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; struct sockaddr *nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; struct ucred *cred = &nfsd->nd_cr; register struct nfs_fattr *fp; struct vattr va; register struct vattr *vap = &va; struct vnode *vp; nfsfh_t nfh; fhandle_t *fhp; register u_long *tl; register long t1; caddr_t bpos; int error = 0, rdonly, cache; char *cp2; struct mbuf *mb, *mb2, *mreq; u_quad_t frev; fhp = &nfh.fh_generic; nfsm_srvmtofh(fhp); if (error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE)) { nfsm_reply(0); return (0); } nqsrv_getl(vp, ND_READ); error = VOP_GETATTR(vp, vap, cred, procp); vput(vp); nfsm_reply(NFSX_FATTR(nfsd->nd_flag & ND_NFSV3)); if (error) return (0); nfsm_build(fp, struct nfs_fattr *, NFSX_FATTR(nfsd->nd_flag & ND_NFSV3)); nfsm_srvfillattr(vap, fp); nfsm_srvdone; } /* * nfs setattr service */ int nfsrv_setattr(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; struct sockaddr *nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; struct ucred *cred = &nfsd->nd_cr; struct vattr va, preat; register struct vattr *vap = &va; register struct nfsv2_sattr *sp; register struct nfs_fattr *fp; struct vnode *vp; nfsfh_t nfh; fhandle_t *fhp; register u_long *tl; register long t1; caddr_t bpos; int error = 0, rdonly, cache, preat_ret = 1, postat_ret = 1; int v3 = (nfsd->nd_flag & ND_NFSV3), gcheck = 0; char *cp2; struct mbuf *mb, *mb2, *mreq; u_quad_t frev; struct timespec guard; fhp = &nfh.fh_generic; nfsm_srvmtofh(fhp); VATTR_NULL(vap); if (v3) { nfsm_srvsattr(vap); nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); gcheck = fxdr_unsigned(int, *tl); if (gcheck) { nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED); fxdr_nfsv3time(tl, &guard); } } else { nfsm_dissect(sp, struct nfsv2_sattr *, NFSX_V2SATTR); /* * Nah nah nah nah na nah * There is a bug in the Sun client that puts 0xffff in the mode * field of sattr when it should put in 0xffffffff. The u_short * doesn't sign extend. * --> check the low order 2 bytes for 0xffff */ if ((fxdr_unsigned(int, sp->sa_mode) & 0xffff) != 0xffff) vap->va_mode = nfstov_mode(sp->sa_mode); if (sp->sa_uid != nfs_xdrneg1) vap->va_uid = fxdr_unsigned(uid_t, sp->sa_uid); if (sp->sa_gid != nfs_xdrneg1) vap->va_gid = fxdr_unsigned(gid_t, sp->sa_gid); if (sp->sa_size != nfs_xdrneg1) vap->va_size = fxdr_unsigned(u_quad_t, sp->sa_size); if (sp->sa_atime.nfsv2_sec != nfs_xdrneg1) { #ifdef notyet fxdr_nfsv2time(&sp->sa_atime, &vap->va_atime); #else vap->va_atime.tv_sec = fxdr_unsigned(long, sp->sa_atime.nfsv2_sec); vap->va_atime.tv_nsec = 0; #endif } if (sp->sa_mtime.nfsv2_sec != nfs_xdrneg1) fxdr_nfsv2time(&sp->sa_mtime, &vap->va_mtime); } /* * Now that we have all the fields, lets do it. */ if (error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE)) { nfsm_reply(2 * NFSX_UNSIGNED); nfsm_srvwcc_data(preat_ret, &preat, postat_ret, vap); return (0); } nqsrv_getl(vp, ND_WRITE); if (v3) { error = preat_ret = VOP_GETATTR(vp, &preat, cred, procp); if (!error && gcheck && (preat.va_ctime.tv_sec != guard.tv_sec || preat.va_ctime.tv_nsec != guard.tv_nsec)) error = NFSERR_NOT_SYNC; if (error) { vput(vp); nfsm_reply(NFSX_WCCDATA(v3)); nfsm_srvwcc_data(preat_ret, &preat, postat_ret, vap); return (0); } } /* * If the size is being changed write acces is required, otherwise * just check for a read only file system. */ if (vap->va_size == ((u_quad_t)((quad_t) -1))) { if (rdonly || (vp->v_mount->mnt_flag & MNT_RDONLY)) { error = EROFS; goto out; } } else { if (vp->v_type == VDIR) { error = EISDIR; goto out; } else if (error = nfsrv_access(vp, VWRITE, cred, rdonly, procp)) goto out; } error = VOP_SETATTR(vp, vap, cred, procp); postat_ret = VOP_GETATTR(vp, vap, cred, procp); if (!error) error = postat_ret; out: vput(vp); nfsm_reply(NFSX_WCCORFATTR(v3)); if (v3) { nfsm_srvwcc_data(preat_ret, &preat, postat_ret, vap); return (0); } else { nfsm_build(fp, struct nfs_fattr *, NFSX_V2FATTR); nfsm_srvfillattr(vap, fp); } nfsm_srvdone; } /* * nfs lookup rpc */ int nfsrv_lookup(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; struct sockaddr *nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; struct ucred *cred = &nfsd->nd_cr; register struct nfs_fattr *fp; struct nameidata nd, ind, *ndp = &nd; struct vnode *vp, *dirp; nfsfh_t nfh; fhandle_t *fhp; register caddr_t cp; register u_long *tl; register long t1; caddr_t bpos; int error = 0, cache, len, dirattr_ret = 1; int v3 = (nfsd->nd_flag & ND_NFSV3), pubflag; char *cp2; struct mbuf *mb, *mb2, *mreq; struct vattr va, dirattr, *vap = &va; u_quad_t frev; fhp = &nfh.fh_generic; nfsm_srvmtofh(fhp); nfsm_srvnamesiz(len); pubflag = nfs_ispublicfh(fhp); nd.ni_cnd.cn_cred = cred; nd.ni_cnd.cn_nameiop = LOOKUP; nd.ni_cnd.cn_flags = LOCKLEAF | SAVESTART; error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos, &dirp, procp, (nfsd->nd_flag & ND_KERBAUTH), pubflag); if (!error && pubflag) { if (nd.ni_vp->v_type == VDIR && nfs_pub.np_index != NULL) { /* * Setup call to lookup() to see if we can find * the index file. Arguably, this doesn't belong * in a kernel.. Ugh. */ ind = nd; VOP_UNLOCK(nd.ni_vp, 0, procp); ind.ni_pathlen = strlen(nfs_pub.np_index); ind.ni_cnd.cn_nameptr = ind.ni_cnd.cn_pnbuf = nfs_pub.np_index; ind.ni_startdir = nd.ni_vp; VREF(ind.ni_startdir); error = lookup(&ind); if (!error) { /* * Found an index file. Get rid of * the old references. */ if (dirp) vrele(dirp); dirp = nd.ni_vp; vrele(nd.ni_startdir); ndp = &ind; } else error = 0; } /* * If the public filehandle was used, check that this lookup * didn't result in a filehandle outside the publicly exported * filesystem. */ if (!error && ndp->ni_vp->v_mount != nfs_pub.np_mount) { vput(nd.ni_vp); error = EPERM; } } if (dirp) { if (v3) dirattr_ret = VOP_GETATTR(dirp, &dirattr, cred, procp); vrele(dirp); } if (error) { nfsm_reply(NFSX_POSTOPATTR(v3)); nfsm_srvpostop_attr(dirattr_ret, &dirattr); return (0); } nqsrv_getl(ndp->ni_startdir, ND_READ); vrele(ndp->ni_startdir); zfree(namei_zone, nd.ni_cnd.cn_pnbuf); vp = ndp->ni_vp; bzero((caddr_t)fhp, sizeof(nfh)); fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid; error = VFS_VPTOFH(vp, &fhp->fh_fid); if (!error) error = VOP_GETATTR(vp, vap, cred, procp); vput(vp); nfsm_reply(NFSX_SRVFH(v3) + NFSX_POSTOPORFATTR(v3) + NFSX_POSTOPATTR(v3)); if (error) { nfsm_srvpostop_attr(dirattr_ret, &dirattr); return (0); } nfsm_srvfhtom(fhp, v3); if (v3) { nfsm_srvpostop_attr(0, vap); nfsm_srvpostop_attr(dirattr_ret, &dirattr); } else { nfsm_build(fp, struct nfs_fattr *, NFSX_V2FATTR); nfsm_srvfillattr(vap, fp); } nfsm_srvdone; } /* * nfs readlink service */ int nfsrv_readlink(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; struct sockaddr *nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; struct ucred *cred = &nfsd->nd_cr; struct iovec iv[(NFS_MAXPATHLEN+MLEN-1)/MLEN]; register struct iovec *ivp = iv; register struct mbuf *mp; register u_long *tl; register long t1; caddr_t bpos; int error = 0, rdonly, cache, i, tlen, len, getret; int v3 = (nfsd->nd_flag & ND_NFSV3); char *cp2; struct mbuf *mb, *mb2, *mp2, *mp3, *mreq; struct vnode *vp; struct vattr attr; nfsfh_t nfh; fhandle_t *fhp; struct uio io, *uiop = &io; u_quad_t frev; #ifndef nolint mp2 = mp3 = (struct mbuf *)0; #endif fhp = &nfh.fh_generic; nfsm_srvmtofh(fhp); len = 0; i = 0; while (len < NFS_MAXPATHLEN) { MGET(mp, M_WAIT, MT_DATA); MCLGET(mp, M_WAIT); mp->m_len = NFSMSIZ(mp); if (len == 0) mp3 = mp2 = mp; else { mp2->m_next = mp; mp2 = mp; } if ((len+mp->m_len) > NFS_MAXPATHLEN) { mp->m_len = NFS_MAXPATHLEN-len; len = NFS_MAXPATHLEN; } else len += mp->m_len; ivp->iov_base = mtod(mp, caddr_t); ivp->iov_len = mp->m_len; i++; ivp++; } uiop->uio_iov = iv; uiop->uio_iovcnt = i; uiop->uio_offset = 0; uiop->uio_resid = len; uiop->uio_rw = UIO_READ; uiop->uio_segflg = UIO_SYSSPACE; uiop->uio_procp = (struct proc *)0; if (error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE)) { m_freem(mp3); nfsm_reply(2 * NFSX_UNSIGNED); nfsm_srvpostop_attr(1, (struct vattr *)0); return (0); } if (vp->v_type != VLNK) { if (v3) error = EINVAL; else error = ENXIO; goto out; } nqsrv_getl(vp, ND_READ); error = VOP_READLINK(vp, uiop, cred); out: getret = VOP_GETATTR(vp, &attr, cred, procp); vput(vp); if (error) m_freem(mp3); nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_UNSIGNED); if (v3) { nfsm_srvpostop_attr(getret, &attr); if (error) return (0); } if (uiop->uio_resid > 0) { len -= uiop->uio_resid; tlen = nfsm_rndup(len); nfsm_adj(mp3, NFS_MAXPATHLEN-tlen, tlen-len); } nfsm_build(tl, u_long *, NFSX_UNSIGNED); *tl = txdr_unsigned(len); mb->m_next = mp3; nfsm_srvdone; } /* * nfs read service */ int nfsrv_read(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; struct sockaddr *nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; struct ucred *cred = &nfsd->nd_cr; register struct iovec *iv; struct iovec *iv2; register struct mbuf *m; register struct nfs_fattr *fp; register u_long *tl; register long t1; register int i; caddr_t bpos; int error = 0, rdonly, cache, cnt, len, left, siz, tlen, getret; int v3 = (nfsd->nd_flag & ND_NFSV3), reqlen; char *cp2; struct mbuf *mb, *mb2, *mreq; struct mbuf *m2; struct vnode *vp; nfsfh_t nfh; fhandle_t *fhp; struct uio io, *uiop = &io; struct vattr va, *vap = &va; off_t off; u_quad_t frev; fhp = &nfh.fh_generic; nfsm_srvmtofh(fhp); if (v3) { nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED); fxdr_hyper(tl, &off); } else { nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); off = (off_t)fxdr_unsigned(u_long, *tl); } nfsm_srvstrsiz(reqlen, NFS_SRVMAXDATA(nfsd)); if (error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE)) { nfsm_reply(2 * NFSX_UNSIGNED); nfsm_srvpostop_attr(1, (struct vattr *)0); return (0); } if (vp->v_type != VREG) { if (v3) error = EINVAL; else error = (vp->v_type == VDIR) ? EISDIR : EACCES; } if (!error) { nqsrv_getl(vp, ND_READ); if (error = nfsrv_access(vp, VREAD, cred, rdonly, procp)) error = nfsrv_access(vp, VEXEC, cred, rdonly, procp); } getret = VOP_GETATTR(vp, vap, cred, procp); if (!error) error = getret; if (error) { vput(vp); nfsm_reply(NFSX_POSTOPATTR(v3)); nfsm_srvpostop_attr(getret, vap); return (0); } if (off >= vap->va_size) cnt = 0; else if ((off + reqlen) > vap->va_size) cnt = nfsm_rndup(vap->va_size - off); else cnt = reqlen; nfsm_reply(NFSX_POSTOPORFATTR(v3) + 3 * NFSX_UNSIGNED+nfsm_rndup(cnt)); if (v3) { nfsm_build(tl, u_long *, NFSX_V3FATTR + 4 * NFSX_UNSIGNED); *tl++ = nfs_true; fp = (struct nfs_fattr *)tl; tl += (NFSX_V3FATTR / sizeof (u_long)); } else { nfsm_build(tl, u_long *, NFSX_V2FATTR + NFSX_UNSIGNED); fp = (struct nfs_fattr *)tl; tl += (NFSX_V2FATTR / sizeof (u_long)); } len = left = cnt; if (cnt > 0) { /* * Generate the mbuf list with the uio_iov ref. to it. */ i = 0; m = m2 = mb; while (left > 0) { siz = min(M_TRAILINGSPACE(m), left); if (siz > 0) { left -= siz; i++; } if (left > 0) { MGET(m, M_WAIT, MT_DATA); MCLGET(m, M_WAIT); m->m_len = 0; m2->m_next = m; m2 = m; } } MALLOC(iv, struct iovec *, i * sizeof (struct iovec), M_TEMP, M_WAITOK); uiop->uio_iov = iv2 = iv; m = mb; left = cnt; i = 0; while (left > 0) { if (m == NULL) panic("nfsrv_read iov"); siz = min(M_TRAILINGSPACE(m), left); if (siz > 0) { iv->iov_base = mtod(m, caddr_t) + m->m_len; iv->iov_len = siz; m->m_len += siz; left -= siz; iv++; i++; } m = m->m_next; } uiop->uio_iovcnt = i; uiop->uio_offset = off; uiop->uio_resid = cnt; uiop->uio_rw = UIO_READ; uiop->uio_segflg = UIO_SYSSPACE; error = VOP_READ(vp, uiop, IO_NODELOCKED, cred); off = uiop->uio_offset; FREE((caddr_t)iv2, M_TEMP); if (error || (getret = VOP_GETATTR(vp, vap, cred, procp))) { if (!error) error = getret; m_freem(mreq); vput(vp); nfsm_reply(NFSX_POSTOPATTR(v3)); nfsm_srvpostop_attr(getret, vap); return (0); } } else uiop->uio_resid = 0; vput(vp); nfsm_srvfillattr(vap, fp); len -= uiop->uio_resid; tlen = nfsm_rndup(len); if (cnt != tlen || tlen != len) nfsm_adj(mb, cnt - tlen, tlen - len); if (v3) { *tl++ = txdr_unsigned(len); if (len < reqlen) *tl++ = nfs_true; else *tl++ = nfs_false; } *tl = txdr_unsigned(len); nfsm_srvdone; } /* * nfs write service */ int nfsrv_write(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; struct sockaddr *nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; struct ucred *cred = &nfsd->nd_cr; register struct iovec *ivp; register int i, cnt; register struct mbuf *mp; register struct nfs_fattr *fp; struct iovec *iv; struct vattr va, forat; register struct vattr *vap = &va; register u_long *tl; register long t1; caddr_t bpos; int error = 0, rdonly, cache, len, forat_ret = 1; int ioflags, aftat_ret = 1, retlen, zeroing, adjust; int stable = NFSV3WRITE_FILESYNC; int v3 = (nfsd->nd_flag & ND_NFSV3); char *cp2; struct mbuf *mb, *mb2, *mreq; struct vnode *vp; nfsfh_t nfh; fhandle_t *fhp; struct uio io, *uiop = &io; off_t off; u_quad_t frev; if (mrep == NULL) { *mrq = NULL; return (0); } fhp = &nfh.fh_generic; nfsm_srvmtofh(fhp); if (v3) { nfsm_dissect(tl, u_long *, 5 * NFSX_UNSIGNED); fxdr_hyper(tl, &off); tl += 3; stable = fxdr_unsigned(int, *tl++); } else { nfsm_dissect(tl, u_long *, 4 * NFSX_UNSIGNED); off = (off_t)fxdr_unsigned(u_long, *++tl); tl += 2; if (nfs_async) stable = NFSV3WRITE_UNSTABLE; } retlen = len = fxdr_unsigned(long, *tl); cnt = i = 0; /* * For NFS Version 2, it is not obvious what a write of zero length * should do, but I might as well be consistent with Version 3, * which is to return ok so long as there are no permission problems. */ if (len > 0) { zeroing = 1; mp = mrep; while (mp) { if (mp == md) { zeroing = 0; adjust = dpos - mtod(mp, caddr_t); mp->m_len -= adjust; if (mp->m_len > 0 && adjust > 0) NFSMADV(mp, adjust); } if (zeroing) mp->m_len = 0; else if (mp->m_len > 0) { i += mp->m_len; if (i > len) { mp->m_len -= (i - len); zeroing = 1; } if (mp->m_len > 0) cnt++; } mp = mp->m_next; } } if (len > NFS_MAXDATA || len < 0 || i < len) { error = EIO; nfsm_reply(2 * NFSX_UNSIGNED); nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, vap); return (0); } if (error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE)) { nfsm_reply(2 * NFSX_UNSIGNED); nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, vap); return (0); } if (v3) forat_ret = VOP_GETATTR(vp, &forat, cred, procp); if (vp->v_type != VREG) { if (v3) error = EINVAL; else error = (vp->v_type == VDIR) ? EISDIR : EACCES; } if (!error) { nqsrv_getl(vp, ND_WRITE); error = nfsrv_access(vp, VWRITE, cred, rdonly, procp); } if (error) { vput(vp); nfsm_reply(NFSX_WCCDATA(v3)); nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, vap); return (0); } if (len > 0) { MALLOC(ivp, struct iovec *, cnt * sizeof (struct iovec), M_TEMP, M_WAITOK); uiop->uio_iov = iv = ivp; uiop->uio_iovcnt = cnt; mp = mrep; while (mp) { if (mp->m_len > 0) { ivp->iov_base = mtod(mp, caddr_t); ivp->iov_len = mp->m_len; ivp++; } mp = mp->m_next; } /* * XXX * The IO_METASYNC flag indicates that all metadata (and not just * enough to ensure data integrity) mus be written to stable storage * synchronously. * (IO_METASYNC is not yet implemented in 4.4BSD-Lite.) */ if (stable == NFSV3WRITE_UNSTABLE) ioflags = IO_NODELOCKED; else if (stable == NFSV3WRITE_DATASYNC) ioflags = (IO_SYNC | IO_NODELOCKED); else ioflags = (IO_METASYNC | IO_SYNC | IO_NODELOCKED); uiop->uio_resid = len; uiop->uio_rw = UIO_WRITE; uiop->uio_segflg = UIO_SYSSPACE; uiop->uio_procp = (struct proc *)0; uiop->uio_offset = off; error = VOP_WRITE(vp, uiop, ioflags, cred); nfsstats.srvvop_writes++; FREE((caddr_t)iv, M_TEMP); } aftat_ret = VOP_GETATTR(vp, vap, cred, procp); vput(vp); if (!error) error = aftat_ret; nfsm_reply(NFSX_PREOPATTR(v3) + NFSX_POSTOPORFATTR(v3) + 2 * NFSX_UNSIGNED + NFSX_WRITEVERF(v3)); if (v3) { nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, vap); if (error) return (0); nfsm_build(tl, u_long *, 4 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(retlen); /* * If nfs_async is set, then pretend the write was FILESYNC. */ if (stable == NFSV3WRITE_UNSTABLE && !nfs_async) *tl++ = txdr_unsigned(stable); else *tl++ = txdr_unsigned(NFSV3WRITE_FILESYNC); /* * Actually, there is no need to txdr these fields, * but it may make the values more human readable, * for debugging purposes. */ *tl++ = txdr_unsigned(boottime.tv_sec); *tl = txdr_unsigned(boottime.tv_usec); } else { nfsm_build(fp, struct nfs_fattr *, NFSX_V2FATTR); nfsm_srvfillattr(vap, fp); } nfsm_srvdone; } /* * NFS write service with write gathering support. Called when * nfsrvw_procrastinate > 0. * See: Chet Juszczak, "Improving the Write Performance of an NFS Server", * in Proc. of the Winter 1994 Usenix Conference, pg. 247-259, San Franscisco, * Jan. 1994. */ int nfsrv_writegather(ndp, slp, procp, mrq) struct nfsrv_descript **ndp; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { register struct iovec *ivp; register struct mbuf *mp; register struct nfsrv_descript *wp, *nfsd, *owp, *swp; register struct nfs_fattr *fp; register int i; struct iovec *iov; struct nfsrvw_delayhash *wpp; struct ucred *cred; struct vattr va, forat; register u_long *tl; register long t1; caddr_t bpos, dpos; int error = 0, rdonly, cache, len, forat_ret = 1; int ioflags, aftat_ret = 1, s, adjust, v3, zeroing; char *cp2; struct mbuf *mb, *mb2, *mreq, *mrep, *md; struct vnode *vp; struct uio io, *uiop = &io; u_quad_t frev, cur_usec; #ifndef nolint i = 0; len = 0; #endif *mrq = NULL; if (*ndp) { nfsd = *ndp; *ndp = NULL; mrep = nfsd->nd_mrep; md = nfsd->nd_md; dpos = nfsd->nd_dpos; cred = &nfsd->nd_cr; v3 = (nfsd->nd_flag & ND_NFSV3); LIST_INIT(&nfsd->nd_coalesce); nfsd->nd_mreq = NULL; nfsd->nd_stable = NFSV3WRITE_FILESYNC; cur_usec = nfs_curusec(); nfsd->nd_time = cur_usec + (v3 ? nfsrvw_procrastinate_v3 : nfsrvw_procrastinate); /* * Now, get the write header.. */ nfsm_srvmtofh(&nfsd->nd_fh); if (v3) { nfsm_dissect(tl, u_long *, 5 * NFSX_UNSIGNED); fxdr_hyper(tl, &nfsd->nd_off); tl += 3; nfsd->nd_stable = fxdr_unsigned(int, *tl++); } else { nfsm_dissect(tl, u_long *, 4 * NFSX_UNSIGNED); nfsd->nd_off = (off_t)fxdr_unsigned(u_long, *++tl); tl += 2; if (nfs_async) nfsd->nd_stable = NFSV3WRITE_UNSTABLE; } len = fxdr_unsigned(long, *tl); nfsd->nd_len = len; nfsd->nd_eoff = nfsd->nd_off + len; /* * Trim the header out of the mbuf list and trim off any trailing * junk so that the mbuf list has only the write data. */ zeroing = 1; i = 0; mp = mrep; while (mp) { if (mp == md) { zeroing = 0; adjust = dpos - mtod(mp, caddr_t); mp->m_len -= adjust; if (mp->m_len > 0 && adjust > 0) NFSMADV(mp, adjust); } if (zeroing) mp->m_len = 0; else { i += mp->m_len; if (i > len) { mp->m_len -= (i - len); zeroing = 1; } } mp = mp->m_next; } if (len > NFS_MAXDATA || len < 0 || i < len) { nfsmout: m_freem(mrep); error = EIO; nfsm_writereply(2 * NFSX_UNSIGNED, v3); if (v3) nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, &va); nfsd->nd_mreq = mreq; nfsd->nd_mrep = NULL; nfsd->nd_time = 0; } /* * Add this entry to the hash and time queues. */ s = splsoftclock(); owp = NULL; wp = slp->ns_tq.lh_first; while (wp && wp->nd_time < nfsd->nd_time) { owp = wp; wp = wp->nd_tq.le_next; } NFS_DPF(WG, ("Q%03x", nfsd->nd_retxid & 0xfff)); if (owp) { LIST_INSERT_AFTER(owp, nfsd, nd_tq); } else { LIST_INSERT_HEAD(&slp->ns_tq, nfsd, nd_tq); } if (nfsd->nd_mrep) { wpp = NWDELAYHASH(slp, nfsd->nd_fh.fh_fid.fid_data); owp = NULL; wp = wpp->lh_first; while (wp && bcmp((caddr_t)&nfsd->nd_fh,(caddr_t)&wp->nd_fh,NFSX_V3FH)) { owp = wp; wp = wp->nd_hash.le_next; } while (wp && wp->nd_off < nfsd->nd_off && !bcmp((caddr_t)&nfsd->nd_fh,(caddr_t)&wp->nd_fh,NFSX_V3FH)) { owp = wp; wp = wp->nd_hash.le_next; } if (owp) { LIST_INSERT_AFTER(owp, nfsd, nd_hash); /* * Search the hash list for overlapping entries and * coalesce. */ for(; nfsd && NFSW_CONTIG(owp, nfsd); nfsd = wp) { wp = nfsd->nd_hash.le_next; if (NFSW_SAMECRED(owp, nfsd)) nfsrvw_coalesce(owp, nfsd); } } else { LIST_INSERT_HEAD(wpp, nfsd, nd_hash); } } splx(s); } /* * Now, do VOP_WRITE()s for any one(s) that need to be done now * and generate the associated reply mbuf list(s). */ loop1: cur_usec = nfs_curusec(); s = splsoftclock(); for (nfsd = slp->ns_tq.lh_first; nfsd; nfsd = owp) { owp = nfsd->nd_tq.le_next; if (nfsd->nd_time > cur_usec) break; if (nfsd->nd_mreq) continue; NFS_DPF(WG, ("P%03x", nfsd->nd_retxid & 0xfff)); LIST_REMOVE(nfsd, nd_tq); LIST_REMOVE(nfsd, nd_hash); splx(s); mrep = nfsd->nd_mrep; nfsd->nd_mrep = NULL; cred = &nfsd->nd_cr; v3 = (nfsd->nd_flag & ND_NFSV3); forat_ret = aftat_ret = 1; error = nfsrv_fhtovp(&nfsd->nd_fh, 1, &vp, cred, slp, nfsd->nd_nam, &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE); if (!error) { if (v3) forat_ret = VOP_GETATTR(vp, &forat, cred, procp); if (vp->v_type != VREG) { if (v3) error = EINVAL; else error = (vp->v_type == VDIR) ? EISDIR : EACCES; } } else vp = NULL; if (!error) { nqsrv_getl(vp, ND_WRITE); error = nfsrv_access(vp, VWRITE, cred, rdonly, procp); } if (nfsd->nd_stable == NFSV3WRITE_UNSTABLE) ioflags = IO_NODELOCKED; else if (nfsd->nd_stable == NFSV3WRITE_DATASYNC) ioflags = (IO_SYNC | IO_NODELOCKED); else ioflags = (IO_METASYNC | IO_SYNC | IO_NODELOCKED); uiop->uio_rw = UIO_WRITE; uiop->uio_segflg = UIO_SYSSPACE; uiop->uio_procp = (struct proc *)0; uiop->uio_offset = nfsd->nd_off; uiop->uio_resid = nfsd->nd_eoff - nfsd->nd_off; if (uiop->uio_resid > 0) { mp = mrep; i = 0; while (mp) { if (mp->m_len > 0) i++; mp = mp->m_next; } uiop->uio_iovcnt = i; MALLOC(iov, struct iovec *, i * sizeof (struct iovec), M_TEMP, M_WAITOK); uiop->uio_iov = ivp = iov; mp = mrep; while (mp) { if (mp->m_len > 0) { ivp->iov_base = mtod(mp, caddr_t); ivp->iov_len = mp->m_len; ivp++; } mp = mp->m_next; } if (!error) { error = VOP_WRITE(vp, uiop, ioflags, cred); nfsstats.srvvop_writes++; } FREE((caddr_t)iov, M_TEMP); } m_freem(mrep); if (vp) { aftat_ret = VOP_GETATTR(vp, &va, cred, procp); vput(vp); } /* * Loop around generating replies for all write rpcs that have * now been completed. */ swp = nfsd; do { NFS_DPF(WG, ("R%03x", nfsd->nd_retxid & 0xfff)); if (error) { nfsm_writereply(NFSX_WCCDATA(v3), v3); if (v3) { nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, &va); } } else { nfsm_writereply(NFSX_PREOPATTR(v3) + NFSX_POSTOPORFATTR(v3) + 2 * NFSX_UNSIGNED + NFSX_WRITEVERF(v3), v3); if (v3) { nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, &va); nfsm_build(tl, u_long *, 4 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(nfsd->nd_len); *tl++ = txdr_unsigned(swp->nd_stable); /* * Actually, there is no need to txdr these fields, * but it may make the values more human readable, * for debugging purposes. */ *tl++ = txdr_unsigned(boottime.tv_sec); *tl = txdr_unsigned(boottime.tv_usec); } else { nfsm_build(fp, struct nfs_fattr *, NFSX_V2FATTR); nfsm_srvfillattr(&va, fp); } } nfsd->nd_mreq = mreq; if (nfsd->nd_mrep) panic("nfsrv_write: nd_mrep not free"); /* * Done. Put it at the head of the timer queue so that * the final phase can return the reply. */ s = splsoftclock(); if (nfsd != swp) { nfsd->nd_time = 0; LIST_INSERT_HEAD(&slp->ns_tq, nfsd, nd_tq); } nfsd = swp->nd_coalesce.lh_first; if (nfsd) { LIST_REMOVE(nfsd, nd_tq); } splx(s); } while (nfsd); s = splsoftclock(); swp->nd_time = 0; LIST_INSERT_HEAD(&slp->ns_tq, swp, nd_tq); splx(s); goto loop1; } splx(s); /* * Search for a reply to return. */ s = splsoftclock(); for (nfsd = slp->ns_tq.lh_first; nfsd; nfsd = nfsd->nd_tq.le_next) if (nfsd->nd_mreq) { NFS_DPF(WG, ("X%03x", nfsd->nd_retxid & 0xfff)); LIST_REMOVE(nfsd, nd_tq); *mrq = nfsd->nd_mreq; *ndp = nfsd; break; } splx(s); return (0); } /* * Coalesce the write request nfsd into owp. To do this we must: * - remove nfsd from the queues * - merge nfsd->nd_mrep into owp->nd_mrep * - update the nd_eoff and nd_stable for owp * - put nfsd on owp's nd_coalesce list * NB: Must be called at splsoftclock(). */ static void nfsrvw_coalesce(owp, nfsd) register struct nfsrv_descript *owp; register struct nfsrv_descript *nfsd; { register int overlap; register struct mbuf *mp; struct nfsrv_descript *p; NFS_DPF(WG, ("C%03x-%03x", nfsd->nd_retxid & 0xfff, owp->nd_retxid & 0xfff)); LIST_REMOVE(nfsd, nd_hash); LIST_REMOVE(nfsd, nd_tq); if (owp->nd_eoff < nfsd->nd_eoff) { overlap = owp->nd_eoff - nfsd->nd_off; if (overlap < 0) panic("nfsrv_coalesce: bad off"); if (overlap > 0) m_adj(nfsd->nd_mrep, overlap); mp = owp->nd_mrep; while (mp->m_next) mp = mp->m_next; mp->m_next = nfsd->nd_mrep; owp->nd_eoff = nfsd->nd_eoff; } else m_freem(nfsd->nd_mrep); nfsd->nd_mrep = NULL; if (nfsd->nd_stable == NFSV3WRITE_FILESYNC) owp->nd_stable = NFSV3WRITE_FILESYNC; else if (nfsd->nd_stable == NFSV3WRITE_DATASYNC && owp->nd_stable == NFSV3WRITE_UNSTABLE) owp->nd_stable = NFSV3WRITE_DATASYNC; LIST_INSERT_HEAD(&owp->nd_coalesce, nfsd, nd_tq); /* * If nfsd had anything else coalesced into it, transfer them * to owp, otherwise their replies will never get sent. */ for (p = nfsd->nd_coalesce.lh_first; p; p = nfsd->nd_coalesce.lh_first) { LIST_REMOVE(p, nd_tq); LIST_INSERT_HEAD(&owp->nd_coalesce, p, nd_tq); } } /* * Sort the group list in increasing numerical order. * (Insertion sort by Chris Torek, who was grossed out by the bubble sort * that used to be here.) */ void nfsrvw_sort(list, num) register gid_t *list; register int num; { register int i, j; gid_t v; /* Insertion sort. */ for (i = 1; i < num; i++) { v = list[i]; /* find correct slot for value v, moving others up */ for (j = i; --j >= 0 && v < list[j];) list[j + 1] = list[j]; list[j + 1] = v; } } /* * copy credentials making sure that the result can be compared with bcmp(). */ void nfsrv_setcred(incred, outcred) register struct ucred *incred, *outcred; { register int i; bzero((caddr_t)outcred, sizeof (struct ucred)); outcred->cr_ref = 1; outcred->cr_uid = incred->cr_uid; outcred->cr_ngroups = incred->cr_ngroups; for (i = 0; i < incred->cr_ngroups; i++) outcred->cr_groups[i] = incred->cr_groups[i]; nfsrvw_sort(outcred->cr_groups, outcred->cr_ngroups); } /* * nfs create service * now does a truncate to 0 length via. setattr if it already exists */ int nfsrv_create(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; struct sockaddr *nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; struct ucred *cred = &nfsd->nd_cr; register struct nfs_fattr *fp; struct vattr va, dirfor, diraft; register struct vattr *vap = &va; register struct nfsv2_sattr *sp; register u_long *tl; struct nameidata nd; register caddr_t cp; register long t1; caddr_t bpos; int error = 0, rdev, cache, len, tsize, dirfor_ret = 1, diraft_ret = 1; int v3 = (nfsd->nd_flag & ND_NFSV3), how, exclusive_flag = 0; char *cp2; struct mbuf *mb, *mb2, *mreq; struct vnode *vp, *dirp = (struct vnode *)0; nfsfh_t nfh; fhandle_t *fhp; u_quad_t frev, tempsize; u_char cverf[NFSX_V3CREATEVERF]; #ifndef nolint rdev = 0; #endif nd.ni_cnd.cn_nameiop = 0; fhp = &nfh.fh_generic; nfsm_srvmtofh(fhp); nfsm_srvnamesiz(len); nd.ni_cnd.cn_cred = cred; nd.ni_cnd.cn_nameiop = CREATE; nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF | SAVESTART; error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos, &dirp, procp, (nfsd->nd_flag & ND_KERBAUTH), FALSE); if (dirp) { if (v3) dirfor_ret = VOP_GETATTR(dirp, &dirfor, cred, procp); else { vrele(dirp); dirp = (struct vnode *)0; } } if (error) { nfsm_reply(NFSX_WCCDATA(v3)); nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft); if (dirp) vrele(dirp); return (0); } VATTR_NULL(vap); if (v3) { nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); how = fxdr_unsigned(int, *tl); switch (how) { case NFSV3CREATE_GUARDED: if (nd.ni_vp) { error = EEXIST; break; } case NFSV3CREATE_UNCHECKED: nfsm_srvsattr(vap); break; case NFSV3CREATE_EXCLUSIVE: nfsm_dissect(cp, caddr_t, NFSX_V3CREATEVERF); bcopy(cp, cverf, NFSX_V3CREATEVERF); exclusive_flag = 1; if (nd.ni_vp == NULL) vap->va_mode = 0; break; }; vap->va_type = VREG; } else { nfsm_dissect(sp, struct nfsv2_sattr *, NFSX_V2SATTR); vap->va_type = IFTOVT(fxdr_unsigned(u_long, sp->sa_mode)); if (vap->va_type == VNON) vap->va_type = VREG; vap->va_mode = nfstov_mode(sp->sa_mode); switch (vap->va_type) { case VREG: tsize = fxdr_unsigned(long, sp->sa_size); if (tsize != -1) vap->va_size = (u_quad_t)tsize; break; case VCHR: case VBLK: case VFIFO: rdev = fxdr_unsigned(long, sp->sa_size); break; }; } /* * Iff doesn't exist, create it * otherwise just truncate to 0 length * should I set the mode too ?? */ if (nd.ni_vp == NULL) { if (vap->va_type == VREG || vap->va_type == VSOCK) { vrele(nd.ni_startdir); nqsrv_getl(nd.ni_dvp, ND_WRITE); error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap); + vput(nd.ni_dvp); if (!error) { nfsrv_object_create(nd.ni_vp); zfree(namei_zone, nd.ni_cnd.cn_pnbuf); if (exclusive_flag) { exclusive_flag = 0; VATTR_NULL(vap); bcopy(cverf, (caddr_t)&vap->va_atime, NFSX_V3CREATEVERF); error = VOP_SETATTR(nd.ni_vp, vap, cred, procp); } } } else if (vap->va_type == VCHR || vap->va_type == VBLK || vap->va_type == VFIFO) { if (vap->va_type == VCHR && rdev == 0xffffffff) vap->va_type = VFIFO; if (vap->va_type != VFIFO && (error = suser(cred, (u_short *)0))) { vrele(nd.ni_startdir); zfree(namei_zone, nd.ni_cnd.cn_pnbuf); VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); vput(nd.ni_dvp); nfsm_reply(0); return (error); } else vap->va_rdev = (dev_t)rdev; nqsrv_getl(nd.ni_dvp, ND_WRITE); - if (error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap)) { + error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap); + vput(nd.ni_dvp); + if (error) { vrele(nd.ni_startdir); nfsm_reply(0); } nd.ni_cnd.cn_nameiop = LOOKUP; nd.ni_cnd.cn_flags &= ~(LOCKPARENT | SAVESTART); nd.ni_cnd.cn_proc = procp; nd.ni_cnd.cn_cred = cred; if (error = lookup(&nd)) { zfree(namei_zone, nd.ni_cnd.cn_pnbuf); nfsm_reply(0); } nfsrv_object_create(nd.ni_vp); zfree(namei_zone, nd.ni_cnd.cn_pnbuf); if (nd.ni_cnd.cn_flags & ISSYMLINK) { vrele(nd.ni_dvp); vput(nd.ni_vp); VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); error = EINVAL; nfsm_reply(0); } } else { vrele(nd.ni_startdir); zfree(namei_zone, nd.ni_cnd.cn_pnbuf); VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); vput(nd.ni_dvp); error = ENXIO; } vp = nd.ni_vp; } else { vrele(nd.ni_startdir); zfree(namei_zone, nd.ni_cnd.cn_pnbuf); vp = nd.ni_vp; if (nd.ni_dvp == vp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); if (vap->va_size != -1) { error = nfsrv_access(vp, VWRITE, cred, (nd.ni_cnd.cn_flags & RDONLY), procp); if (!error) { nqsrv_getl(vp, ND_WRITE); tempsize = vap->va_size; VATTR_NULL(vap); vap->va_size = tempsize; error = VOP_SETATTR(vp, vap, cred, procp); } if (error) vput(vp); } } if (!error) { bzero((caddr_t)fhp, sizeof(nfh)); fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid; error = VFS_VPTOFH(vp, &fhp->fh_fid); if (!error) error = VOP_GETATTR(vp, vap, cred, procp); vput(vp); } if (v3) { if (exclusive_flag && !error && bcmp(cverf, (caddr_t)&vap->va_atime, NFSX_V3CREATEVERF)) error = EEXIST; diraft_ret = VOP_GETATTR(dirp, &diraft, cred, procp); vrele(dirp); } nfsm_reply(NFSX_SRVFH(v3) + NFSX_FATTR(v3) + NFSX_WCCDATA(v3)); if (v3) { if (!error) { nfsm_srvpostop_fh(fhp); nfsm_srvpostop_attr(0, vap); } nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft); } else { nfsm_srvfhtom(fhp, v3); nfsm_build(fp, struct nfs_fattr *, NFSX_V2FATTR); nfsm_srvfillattr(vap, fp); } return (0); nfsmout: if (dirp) vrele(dirp); if (nd.ni_cnd.cn_nameiop) { vrele(nd.ni_startdir); zfree(namei_zone, nd.ni_cnd.cn_pnbuf); } VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); if (nd.ni_dvp == nd.ni_vp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); if (nd.ni_vp) vput(nd.ni_vp); return (error); } /* * nfs v3 mknod service */ int nfsrv_mknod(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; struct sockaddr *nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; struct ucred *cred = &nfsd->nd_cr; struct vattr va, dirfor, diraft; register struct vattr *vap = &va; register u_long *tl; struct nameidata nd; register long t1; caddr_t bpos; int error = 0, cache, len, dirfor_ret = 1, diraft_ret = 1; u_long major, minor; enum vtype vtyp; char *cp2; struct mbuf *mb, *mb2, *mreq; struct vnode *vp, *dirp = (struct vnode *)0; nfsfh_t nfh; fhandle_t *fhp; u_quad_t frev; nd.ni_cnd.cn_nameiop = 0; fhp = &nfh.fh_generic; nfsm_srvmtofh(fhp); nfsm_srvnamesiz(len); nd.ni_cnd.cn_cred = cred; nd.ni_cnd.cn_nameiop = CREATE; nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF | SAVESTART; error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos, &dirp, procp, (nfsd->nd_flag & ND_KERBAUTH), FALSE); if (dirp) dirfor_ret = VOP_GETATTR(dirp, &dirfor, cred, procp); if (error) { nfsm_reply(NFSX_WCCDATA(1)); nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft); if (dirp) vrele(dirp); return (0); } nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); vtyp = nfsv3tov_type(*tl); if (vtyp != VCHR && vtyp != VBLK && vtyp != VSOCK && vtyp != VFIFO) { vrele(nd.ni_startdir); zfree(namei_zone, nd.ni_cnd.cn_pnbuf); error = NFSERR_BADTYPE; VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); vput(nd.ni_dvp); goto out; } VATTR_NULL(vap); nfsm_srvsattr(vap); if (vtyp == VCHR || vtyp == VBLK) { nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED); major = fxdr_unsigned(u_long, *tl++); minor = fxdr_unsigned(u_long, *tl); vap->va_rdev = makedev(major, minor); } /* * Iff doesn't exist, create it. */ if (nd.ni_vp) { vrele(nd.ni_startdir); zfree(namei_zone, nd.ni_cnd.cn_pnbuf); error = EEXIST; VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); vput(nd.ni_dvp); goto out; } vap->va_type = vtyp; if (vtyp == VSOCK) { vrele(nd.ni_startdir); nqsrv_getl(nd.ni_dvp, ND_WRITE); error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap); + vput(nd.ni_dvp); if (!error) zfree(namei_zone, nd.ni_cnd.cn_pnbuf); } else { if (vtyp != VFIFO && (error = suser(cred, (u_short *)0))) { vrele(nd.ni_startdir); zfree(namei_zone, nd.ni_cnd.cn_pnbuf); VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); vput(nd.ni_dvp); goto out; } nqsrv_getl(nd.ni_dvp, ND_WRITE); - if (error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap)) { + error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap); + vput(nd.ni_dvp); + if (error) { vrele(nd.ni_startdir); goto out; } nd.ni_cnd.cn_nameiop = LOOKUP; nd.ni_cnd.cn_flags &= ~(LOCKPARENT | SAVESTART); nd.ni_cnd.cn_proc = procp; nd.ni_cnd.cn_cred = procp->p_ucred; error = lookup(&nd); zfree(namei_zone, nd.ni_cnd.cn_pnbuf); if (error) goto out; if (nd.ni_cnd.cn_flags & ISSYMLINK) { vrele(nd.ni_dvp); vput(nd.ni_vp); VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); error = EINVAL; } } out: vp = nd.ni_vp; if (!error) { bzero((caddr_t)fhp, sizeof(nfh)); fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid; error = VFS_VPTOFH(vp, &fhp->fh_fid); if (!error) error = VOP_GETATTR(vp, vap, cred, procp); vput(vp); } diraft_ret = VOP_GETATTR(dirp, &diraft, cred, procp); vrele(dirp); nfsm_reply(NFSX_SRVFH(1) + NFSX_POSTOPATTR(1) + NFSX_WCCDATA(1)); if (!error) { nfsm_srvpostop_fh(fhp); nfsm_srvpostop_attr(0, vap); } nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft); return (0); nfsmout: if (dirp) vrele(dirp); if (nd.ni_cnd.cn_nameiop) { vrele(nd.ni_startdir); zfree(namei_zone, nd.ni_cnd.cn_pnbuf); } VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); if (nd.ni_dvp == nd.ni_vp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); if (nd.ni_vp) vput(nd.ni_vp); return (error); } /* * nfs remove service */ int nfsrv_remove(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; struct sockaddr *nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; struct ucred *cred = &nfsd->nd_cr; struct nameidata nd; register u_long *tl; register long t1; caddr_t bpos; int error = 0, cache, len, dirfor_ret = 1, diraft_ret = 1; int v3 = (nfsd->nd_flag & ND_NFSV3); char *cp2; struct mbuf *mb, *mreq; struct vnode *vp, *dirp; struct vattr dirfor, diraft; nfsfh_t nfh; fhandle_t *fhp; u_quad_t frev; #ifndef nolint vp = (struct vnode *)0; #endif fhp = &nfh.fh_generic; nfsm_srvmtofh(fhp); nfsm_srvnamesiz(len); nd.ni_cnd.cn_cred = cred; nd.ni_cnd.cn_nameiop = DELETE; nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF; error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos, &dirp, procp, (nfsd->nd_flag & ND_KERBAUTH), FALSE); if (dirp) { if (v3) dirfor_ret = VOP_GETATTR(dirp, &dirfor, cred, procp); else vrele(dirp); } if (!error) { vp = nd.ni_vp; if (vp->v_type == VDIR) { error = EPERM; /* POSIX */ goto out; } /* * The root of a mounted filesystem cannot be deleted. */ if (vp->v_flag & VROOT) { error = EBUSY; goto out; } out: if (!error) { nqsrv_getl(nd.ni_dvp, ND_WRITE); nqsrv_getl(vp, ND_WRITE); error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); } else { VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); - if (nd.ni_dvp == vp) - vrele(nd.ni_dvp); - else - vput(nd.ni_dvp); - vput(vp); } + if (nd.ni_dvp == vp) + vrele(nd.ni_dvp); + else + vput(nd.ni_dvp); + if (vp != NULLVP); + vput(vp); } if (dirp && v3) { diraft_ret = VOP_GETATTR(dirp, &diraft, cred, procp); vrele(dirp); } nfsm_reply(NFSX_WCCDATA(v3)); if (v3) { nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft); return (0); } nfsm_srvdone; } /* * nfs rename service */ int nfsrv_rename(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; struct sockaddr *nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; struct ucred *cred = &nfsd->nd_cr; register u_long *tl; register long t1; caddr_t bpos; int error = 0, cache, len, len2, fdirfor_ret = 1, fdiraft_ret = 1; int tdirfor_ret = 1, tdiraft_ret = 1; int v3 = (nfsd->nd_flag & ND_NFSV3); char *cp2; struct mbuf *mb, *mreq; struct nameidata fromnd, tond; struct vnode *fvp, *tvp, *tdvp, *fdirp = (struct vnode *)0; struct vnode *tdirp = (struct vnode *)0; struct vattr fdirfor, fdiraft, tdirfor, tdiraft; nfsfh_t fnfh, tnfh; fhandle_t *ffhp, *tfhp; u_quad_t frev; uid_t saved_uid; #ifndef nolint fvp = (struct vnode *)0; #endif ffhp = &fnfh.fh_generic; tfhp = &tnfh.fh_generic; fromnd.ni_cnd.cn_nameiop = 0; tond.ni_cnd.cn_nameiop = 0; nfsm_srvmtofh(ffhp); nfsm_srvnamesiz(len); /* * Remember our original uid so that we can reset cr_uid before * the second nfs_namei() call, in case it is remapped. */ saved_uid = cred->cr_uid; fromnd.ni_cnd.cn_cred = cred; fromnd.ni_cnd.cn_nameiop = DELETE; fromnd.ni_cnd.cn_flags = WANTPARENT | SAVESTART; error = nfs_namei(&fromnd, ffhp, len, slp, nam, &md, &dpos, &fdirp, procp, (nfsd->nd_flag & ND_KERBAUTH), FALSE); if (fdirp) { if (v3) fdirfor_ret = VOP_GETATTR(fdirp, &fdirfor, cred, procp); else { vrele(fdirp); fdirp = (struct vnode *)0; } } if (error) { nfsm_reply(2 * NFSX_WCCDATA(v3)); nfsm_srvwcc_data(fdirfor_ret, &fdirfor, fdiraft_ret, &fdiraft); nfsm_srvwcc_data(tdirfor_ret, &tdirfor, tdiraft_ret, &tdiraft); if (fdirp) vrele(fdirp); return (0); } fvp = fromnd.ni_vp; nfsm_srvmtofh(tfhp); nfsm_strsiz(len2, NFS_MAXNAMLEN); cred->cr_uid = saved_uid; tond.ni_cnd.cn_cred = cred; tond.ni_cnd.cn_nameiop = RENAME; tond.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART; error = nfs_namei(&tond, tfhp, len2, slp, nam, &md, &dpos, &tdirp, procp, (nfsd->nd_flag & ND_KERBAUTH), FALSE); if (tdirp) { if (v3) tdirfor_ret = VOP_GETATTR(tdirp, &tdirfor, cred, procp); else { vrele(tdirp); tdirp = (struct vnode *)0; } } if (error) { VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); vrele(fromnd.ni_dvp); vrele(fvp); goto out1; } tdvp = tond.ni_dvp; tvp = tond.ni_vp; if (tvp != NULL) { if (fvp->v_type == VDIR && tvp->v_type != VDIR) { if (v3) error = EEXIST; else error = EISDIR; goto out; } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { if (v3) error = EEXIST; else error = ENOTDIR; goto out; } if (tvp->v_type == VDIR && tvp->v_mountedhere) { if (v3) error = EXDEV; else error = ENOTEMPTY; goto out; } } if (fvp->v_type == VDIR && fvp->v_mountedhere) { if (v3) error = EXDEV; else error = ENOTEMPTY; goto out; } if (fvp->v_mount != tdvp->v_mount) { if (v3) error = EXDEV; else error = ENOTEMPTY; goto out; } if (fvp == tdvp) if (v3) error = EINVAL; else error = ENOTEMPTY; /* * If source is the same as the destination (that is the * same vnode with the same name in the same directory), * then there is nothing to do. */ if (fvp == tvp && fromnd.ni_dvp == tdvp && fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen && !bcmp(fromnd.ni_cnd.cn_nameptr, tond.ni_cnd.cn_nameptr, fromnd.ni_cnd.cn_namelen)) error = -1; out: if (!error) { nqsrv_getl(fromnd.ni_dvp, ND_WRITE); nqsrv_getl(tdvp, ND_WRITE); if (tvp) { nqsrv_getl(tvp, ND_WRITE); } error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); } else { VOP_ABORTOP(tond.ni_dvp, &tond.ni_cnd); if (tdvp == tvp) vrele(tdvp); else vput(tdvp); if (tvp) vput(tvp); VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); vrele(fromnd.ni_dvp); vrele(fvp); if (error == -1) error = 0; } vrele(tond.ni_startdir); zfree(namei_zone, tond.ni_cnd.cn_pnbuf); out1: if (fdirp) { fdiraft_ret = VOP_GETATTR(fdirp, &fdiraft, cred, procp); vrele(fdirp); } if (tdirp) { tdiraft_ret = VOP_GETATTR(tdirp, &tdiraft, cred, procp); vrele(tdirp); } vrele(fromnd.ni_startdir); zfree(namei_zone, fromnd.ni_cnd.cn_pnbuf); nfsm_reply(2 * NFSX_WCCDATA(v3)); if (v3) { nfsm_srvwcc_data(fdirfor_ret, &fdirfor, fdiraft_ret, &fdiraft); nfsm_srvwcc_data(tdirfor_ret, &tdirfor, tdiraft_ret, &tdiraft); } return (0); nfsmout: if (fdirp) vrele(fdirp); if (tdirp) vrele(tdirp); if (tond.ni_cnd.cn_nameiop) { vrele(tond.ni_startdir); zfree(namei_zone, tond.ni_cnd.cn_pnbuf); } if (fromnd.ni_cnd.cn_nameiop) { vrele(fromnd.ni_startdir); zfree(namei_zone, fromnd.ni_cnd.cn_pnbuf); VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); vrele(fromnd.ni_dvp); vrele(fvp); } return (error); } /* * nfs link service */ int nfsrv_link(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; struct sockaddr *nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; struct ucred *cred = &nfsd->nd_cr; struct nameidata nd; register u_long *tl; register long t1; caddr_t bpos; int error = 0, rdonly, cache, len, dirfor_ret = 1, diraft_ret = 1; int getret = 1, v3 = (nfsd->nd_flag & ND_NFSV3); char *cp2; struct mbuf *mb, *mreq; struct vnode *vp, *xp, *dirp = (struct vnode *)0; struct vattr dirfor, diraft, at; nfsfh_t nfh, dnfh; fhandle_t *fhp, *dfhp; u_quad_t frev; fhp = &nfh.fh_generic; dfhp = &dnfh.fh_generic; nfsm_srvmtofh(fhp); nfsm_srvmtofh(dfhp); nfsm_srvnamesiz(len); if (error = nfsrv_fhtovp(fhp, FALSE, &vp, cred, slp, nam, &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE)) { nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_WCCDATA(v3)); nfsm_srvpostop_attr(getret, &at); nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft); return (0); } if (vp->v_type == VDIR) { error = EPERM; /* POSIX */ goto out1; } nd.ni_cnd.cn_cred = cred; nd.ni_cnd.cn_nameiop = CREATE; nd.ni_cnd.cn_flags = LOCKPARENT; error = nfs_namei(&nd, dfhp, len, slp, nam, &md, &dpos, &dirp, procp, (nfsd->nd_flag & ND_KERBAUTH), FALSE); if (dirp) { if (v3) dirfor_ret = VOP_GETATTR(dirp, &dirfor, cred, procp); else { vrele(dirp); dirp = (struct vnode *)0; } } if (error) goto out1; xp = nd.ni_vp; if (xp != NULL) { error = EEXIST; goto out; } xp = nd.ni_dvp; if (vp->v_mount != xp->v_mount) error = EXDEV; out: if (!error) { nqsrv_getl(vp, ND_WRITE); nqsrv_getl(xp, ND_WRITE); error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); + vput(nd.ni_dvp); } else { VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); if (nd.ni_dvp == nd.ni_vp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); if (nd.ni_vp) vrele(nd.ni_vp); } out1: if (v3) getret = VOP_GETATTR(vp, &at, cred, procp); if (dirp) { diraft_ret = VOP_GETATTR(dirp, &diraft, cred, procp); vrele(dirp); } vrele(vp); nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_WCCDATA(v3)); if (v3) { nfsm_srvpostop_attr(getret, &at); nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft); return (0); } nfsm_srvdone; } /* * nfs symbolic link service */ int nfsrv_symlink(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; struct sockaddr *nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; struct ucred *cred = &nfsd->nd_cr; struct vattr va, dirfor, diraft; struct nameidata nd; register struct vattr *vap = &va; register u_long *tl; register long t1; struct nfsv2_sattr *sp; char *bpos, *pathcp = (char *)0, *cp2; struct uio io; struct iovec iv; int error = 0, cache, len, len2, dirfor_ret = 1, diraft_ret = 1; int v3 = (nfsd->nd_flag & ND_NFSV3); struct mbuf *mb, *mreq, *mb2; struct vnode *dirp = (struct vnode *)0; nfsfh_t nfh; fhandle_t *fhp; u_quad_t frev; nd.ni_cnd.cn_nameiop = 0; fhp = &nfh.fh_generic; nfsm_srvmtofh(fhp); nfsm_srvnamesiz(len); nd.ni_cnd.cn_cred = cred; nd.ni_cnd.cn_nameiop = CREATE; nd.ni_cnd.cn_flags = LOCKPARENT | SAVESTART; error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos, &dirp, procp, (nfsd->nd_flag & ND_KERBAUTH), FALSE); if (dirp) { if (v3) dirfor_ret = VOP_GETATTR(dirp, &dirfor, cred, procp); else { vrele(dirp); dirp = (struct vnode *)0; } } if (error) goto out; VATTR_NULL(vap); if (v3) nfsm_srvsattr(vap); nfsm_strsiz(len2, NFS_MAXPATHLEN); MALLOC(pathcp, caddr_t, len2 + 1, M_TEMP, M_WAITOK); iv.iov_base = pathcp; iv.iov_len = len2; io.uio_resid = len2; io.uio_offset = 0; io.uio_iov = &iv; io.uio_iovcnt = 1; io.uio_segflg = UIO_SYSSPACE; io.uio_rw = UIO_READ; io.uio_procp = (struct proc *)0; nfsm_mtouio(&io, len2); if (!v3) { nfsm_dissect(sp, struct nfsv2_sattr *, NFSX_V2SATTR); vap->va_mode = fxdr_unsigned(u_short, sp->sa_mode); } *(pathcp + len2) = '\0'; if (nd.ni_vp) { vrele(nd.ni_startdir); zfree(namei_zone, nd.ni_cnd.cn_pnbuf); VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); if (nd.ni_dvp == nd.ni_vp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); vrele(nd.ni_vp); error = EEXIST; goto out; } nqsrv_getl(nd.ni_dvp, ND_WRITE); error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap, pathcp); + vput(nd.ni_dvp); if (error) vrele(nd.ni_startdir); else { if (v3) { nd.ni_cnd.cn_nameiop = LOOKUP; nd.ni_cnd.cn_flags &= ~(LOCKPARENT | SAVESTART | FOLLOW); nd.ni_cnd.cn_flags |= (NOFOLLOW | LOCKLEAF); nd.ni_cnd.cn_proc = procp; nd.ni_cnd.cn_cred = cred; error = lookup(&nd); if (!error) { bzero((caddr_t)fhp, sizeof(nfh)); fhp->fh_fsid = nd.ni_vp->v_mount->mnt_stat.f_fsid; error = VFS_VPTOFH(nd.ni_vp, &fhp->fh_fid); if (!error) error = VOP_GETATTR(nd.ni_vp, vap, cred, procp); vput(nd.ni_vp); } } else vrele(nd.ni_startdir); zfree(namei_zone, nd.ni_cnd.cn_pnbuf); } out: if (pathcp) FREE(pathcp, M_TEMP); if (dirp) { diraft_ret = VOP_GETATTR(dirp, &diraft, cred, procp); vrele(dirp); } nfsm_reply(NFSX_SRVFH(v3) + NFSX_POSTOPATTR(v3) + NFSX_WCCDATA(v3)); if (v3) { if (!error) { nfsm_srvpostop_fh(fhp); nfsm_srvpostop_attr(0, vap); } nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft); } return (0); nfsmout: if (nd.ni_cnd.cn_nameiop) { vrele(nd.ni_startdir); zfree(namei_zone, nd.ni_cnd.cn_pnbuf); } if (dirp) vrele(dirp); VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); if (nd.ni_dvp == nd.ni_vp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); if (nd.ni_vp) vrele(nd.ni_vp); if (pathcp) FREE(pathcp, M_TEMP); return (error); } /* * nfs mkdir service */ int nfsrv_mkdir(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; struct sockaddr *nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; struct ucred *cred = &nfsd->nd_cr; struct vattr va, dirfor, diraft; register struct vattr *vap = &va; register struct nfs_fattr *fp; struct nameidata nd; register caddr_t cp; register u_long *tl; register long t1; caddr_t bpos; int error = 0, cache, len, dirfor_ret = 1, diraft_ret = 1; int v3 = (nfsd->nd_flag & ND_NFSV3); char *cp2; struct mbuf *mb, *mb2, *mreq; struct vnode *vp, *dirp = (struct vnode *)0; nfsfh_t nfh; fhandle_t *fhp; u_quad_t frev; fhp = &nfh.fh_generic; nfsm_srvmtofh(fhp); nfsm_srvnamesiz(len); nd.ni_cnd.cn_cred = cred; nd.ni_cnd.cn_nameiop = CREATE; nd.ni_cnd.cn_flags = LOCKPARENT; error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos, &dirp, procp, (nfsd->nd_flag & ND_KERBAUTH), FALSE); if (dirp) { if (v3) dirfor_ret = VOP_GETATTR(dirp, &dirfor, cred, procp); else { vrele(dirp); dirp = (struct vnode *)0; } } if (error) { nfsm_reply(NFSX_WCCDATA(v3)); nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft); if (dirp) vrele(dirp); return (0); } VATTR_NULL(vap); if (v3) { nfsm_srvsattr(vap); } else { nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); vap->va_mode = nfstov_mode(*tl++); } vap->va_type = VDIR; vp = nd.ni_vp; if (vp != NULL) { VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); if (nd.ni_dvp == vp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); vrele(vp); error = EEXIST; goto out; } nqsrv_getl(nd.ni_dvp, ND_WRITE); error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap); + vput(nd.ni_dvp); if (!error) { vp = nd.ni_vp; bzero((caddr_t)fhp, sizeof(nfh)); fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid; error = VFS_VPTOFH(vp, &fhp->fh_fid); if (!error) error = VOP_GETATTR(vp, vap, cred, procp); vput(vp); } out: if (dirp) { diraft_ret = VOP_GETATTR(dirp, &diraft, cred, procp); vrele(dirp); } nfsm_reply(NFSX_SRVFH(v3) + NFSX_POSTOPATTR(v3) + NFSX_WCCDATA(v3)); if (v3) { if (!error) { nfsm_srvpostop_fh(fhp); nfsm_srvpostop_attr(0, vap); } nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft); } else { nfsm_srvfhtom(fhp, v3); nfsm_build(fp, struct nfs_fattr *, NFSX_V2FATTR); nfsm_srvfillattr(vap, fp); } return (0); nfsmout: if (dirp) vrele(dirp); VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); if (nd.ni_dvp == nd.ni_vp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); if (nd.ni_vp) vrele(nd.ni_vp); return (error); } /* * nfs rmdir service */ int nfsrv_rmdir(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; struct sockaddr *nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; struct ucred *cred = &nfsd->nd_cr; register u_long *tl; register long t1; caddr_t bpos; int error = 0, cache, len, dirfor_ret = 1, diraft_ret = 1; int v3 = (nfsd->nd_flag & ND_NFSV3); char *cp2; struct mbuf *mb, *mreq; struct vnode *vp, *dirp = (struct vnode *)0; struct vattr dirfor, diraft; nfsfh_t nfh; fhandle_t *fhp; struct nameidata nd; u_quad_t frev; fhp = &nfh.fh_generic; nfsm_srvmtofh(fhp); nfsm_srvnamesiz(len); nd.ni_cnd.cn_cred = cred; nd.ni_cnd.cn_nameiop = DELETE; nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF; error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos, &dirp, procp, (nfsd->nd_flag & ND_KERBAUTH), FALSE); if (dirp) { if (v3) dirfor_ret = VOP_GETATTR(dirp, &dirfor, cred, procp); else { vrele(dirp); dirp = (struct vnode *)0; } } if (error) { nfsm_reply(NFSX_WCCDATA(v3)); nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft); if (dirp) vrele(dirp); return (0); } vp = nd.ni_vp; if (vp->v_type != VDIR) { error = ENOTDIR; goto out; } /* * No rmdir "." please. */ if (nd.ni_dvp == vp) { error = EINVAL; goto out; } /* * The root of a mounted filesystem cannot be deleted. */ if (vp->v_flag & VROOT) error = EBUSY; out: if (!error) { nqsrv_getl(nd.ni_dvp, ND_WRITE); nqsrv_getl(vp, ND_WRITE); error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); } else { VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); - if (nd.ni_dvp == nd.ni_vp) - vrele(nd.ni_dvp); - else - vput(nd.ni_dvp); - vput(vp); } + if (nd.ni_dvp == nd.ni_vp) + vrele(nd.ni_dvp); + else + vput(nd.ni_dvp); + if (vp != NULLVP) + vput(vp); if (dirp) { diraft_ret = VOP_GETATTR(dirp, &diraft, cred, procp); vrele(dirp); } nfsm_reply(NFSX_WCCDATA(v3)); if (v3) { nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft); return (0); } nfsm_srvdone; } /* * nfs readdir service * - mallocs what it thinks is enough to read * count rounded up to a multiple of NFS_DIRBLKSIZ <= NFS_MAXREADDIR * - calls VOP_READDIR() * - loops around building the reply * if the output generated exceeds count break out of loop * The nfsm_clget macro is used here so that the reply will be packed * tightly in mbuf clusters. * - it only knows that it has encountered eof when the VOP_READDIR() * reads nothing * - as such one readdir rpc will return eof false although you are there * and then the next will return eof * - it trims out records with d_fileno == 0 * this doesn't matter for Unix clients, but they might confuse clients * for other os'. * NB: It is tempting to set eof to true if the VOP_READDIR() reads less * than requested, but this may not apply to all filesystems. For * example, client NFS does not { although it is never remote mounted * anyhow } * The alternate call nfsrv_readdirplus() does lookups as well. * PS: The NFS protocol spec. does not clarify what the "count" byte * argument is a count of.. just name strings and file id's or the * entire reply rpc or ... * I tried just file name and id sizes and it confused the Sun client, * so I am using the full rpc size now. The "paranoia.." comment refers * to including the status longwords that are not a part of the dir. * "entry" structures, but are in the rpc. */ struct flrep { nfsuint64 fl_off; u_long fl_postopok; u_long fl_fattr[NFSX_V3FATTR / sizeof (u_long)]; u_long fl_fhok; u_long fl_fhsize; u_long fl_nfh[NFSX_V3FH / sizeof (u_long)]; }; int nfsrv_readdir(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; struct sockaddr *nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; struct ucred *cred = &nfsd->nd_cr; register char *bp, *be; register struct mbuf *mp; register struct dirent *dp; register caddr_t cp; register u_long *tl; register long t1; caddr_t bpos; struct mbuf *mb, *mb2, *mreq, *mp2; char *cpos, *cend, *cp2, *rbuf; struct vnode *vp; struct vattr at; nfsfh_t nfh; fhandle_t *fhp; struct uio io; struct iovec iv; int len, nlen, rem, xfer, tsiz, i, error = 0, getret = 1; int siz, cnt, fullsiz, eofflag, rdonly, cache, ncookies; int v3 = (nfsd->nd_flag & ND_NFSV3); u_quad_t frev, off, toff, verf; u_long *cookies = NULL, *cookiep; fhp = &nfh.fh_generic; nfsm_srvmtofh(fhp); if (v3) { nfsm_dissect(tl, u_long *, 5 * NFSX_UNSIGNED); fxdr_hyper(tl, &toff); tl += 2; fxdr_hyper(tl, &verf); tl += 2; } else { nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED); toff = fxdr_unsigned(u_quad_t, *tl++); } off = toff; cnt = fxdr_unsigned(int, *tl); siz = ((cnt + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1)); xfer = NFS_SRVMAXDATA(nfsd); if (siz > xfer) siz = xfer; fullsiz = siz; if (error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE)) { nfsm_reply(NFSX_UNSIGNED); nfsm_srvpostop_attr(getret, &at); return (0); } nqsrv_getl(vp, ND_READ); if (v3) { error = getret = VOP_GETATTR(vp, &at, cred, procp); if (!error && toff && verf && verf != at.va_filerev) error = NFSERR_BAD_COOKIE; } if (!error) error = nfsrv_access(vp, VEXEC, cred, rdonly, procp); if (error) { vput(vp); nfsm_reply(NFSX_POSTOPATTR(v3)); nfsm_srvpostop_attr(getret, &at); return (0); } VOP_UNLOCK(vp, 0, procp); MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK); again: iv.iov_base = rbuf; iv.iov_len = fullsiz; io.uio_iov = &iv; io.uio_iovcnt = 1; io.uio_offset = (off_t)off; io.uio_resid = fullsiz; io.uio_segflg = UIO_SYSSPACE; io.uio_rw = UIO_READ; io.uio_procp = (struct proc *)0; eofflag = 0; vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, procp); if (cookies) { free((caddr_t)cookies, M_TEMP); cookies = NULL; } error = VOP_READDIR(vp, &io, cred, &eofflag, &ncookies, &cookies); off = (off_t)io.uio_offset; if (!cookies && !error) error = NFSERR_PERM; if (v3) { getret = VOP_GETATTR(vp, &at, cred, procp); if (!error) error = getret; } VOP_UNLOCK(vp, 0, procp); if (error) { vrele(vp); free((caddr_t)rbuf, M_TEMP); if (cookies) free((caddr_t)cookies, M_TEMP); nfsm_reply(NFSX_POSTOPATTR(v3)); nfsm_srvpostop_attr(getret, &at); return (0); } if (io.uio_resid) { siz -= io.uio_resid; /* * If nothing read, return eof * rpc reply */ if (siz == 0) { vrele(vp); nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_COOKIEVERF(v3) + 2 * NFSX_UNSIGNED); if (v3) { nfsm_srvpostop_attr(getret, &at); nfsm_build(tl, u_long *, 4 * NFSX_UNSIGNED); txdr_hyper(&at.va_filerev, tl); tl += 2; } else nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED); *tl++ = nfs_false; *tl = nfs_true; FREE((caddr_t)rbuf, M_TEMP); FREE((caddr_t)cookies, M_TEMP); return (0); } } /* * Check for degenerate cases of nothing useful read. * If so go try again */ cpos = rbuf; cend = rbuf + siz; dp = (struct dirent *)cpos; cookiep = cookies; /* * For some reason FreeBSD's ufs_readdir() chooses to back the * directory offset up to a block boundary, so it is necessary to * skip over the records that preceed the requested offset. This * requires the assumption that file offset cookies monotonically * increase. */ while (cpos < cend && ncookies > 0 && (dp->d_fileno == 0 || ((u_quad_t)(*cookiep)) <= toff)) { cpos += dp->d_reclen; dp = (struct dirent *)cpos; cookiep++; ncookies--; } if (cpos >= cend || ncookies == 0) { toff = off; siz = fullsiz; goto again; } len = 3 * NFSX_UNSIGNED; /* paranoia, probably can be 0 */ nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_COOKIEVERF(v3) + siz); if (v3) { nfsm_srvpostop_attr(getret, &at); nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED); txdr_hyper(&at.va_filerev, tl); } mp = mp2 = mb; bp = bpos; be = bp + M_TRAILINGSPACE(mp); /* Loop through the records and build reply */ while (cpos < cend && ncookies > 0) { if (dp->d_fileno != 0) { nlen = dp->d_namlen; rem = nfsm_rndup(nlen)-nlen; len += (4 * NFSX_UNSIGNED + nlen + rem); if (v3) len += 2 * NFSX_UNSIGNED; if (len > cnt) { eofflag = 0; break; } /* * Build the directory record xdr from * the dirent entry. */ nfsm_clget; *tl = nfs_true; bp += NFSX_UNSIGNED; if (v3) { nfsm_clget; *tl = 0; bp += NFSX_UNSIGNED; } nfsm_clget; *tl = txdr_unsigned(dp->d_fileno); bp += NFSX_UNSIGNED; nfsm_clget; *tl = txdr_unsigned(nlen); bp += NFSX_UNSIGNED; /* And loop around copying the name */ xfer = nlen; cp = dp->d_name; while (xfer > 0) { nfsm_clget; if ((bp+xfer) > be) tsiz = be-bp; else tsiz = xfer; bcopy(cp, bp, tsiz); bp += tsiz; xfer -= tsiz; if (xfer > 0) cp += tsiz; } /* And null pad to a long boundary */ for (i = 0; i < rem; i++) *bp++ = '\0'; nfsm_clget; /* Finish off the record */ if (v3) { *tl = 0; bp += NFSX_UNSIGNED; nfsm_clget; } *tl = txdr_unsigned(*cookiep); bp += NFSX_UNSIGNED; } cpos += dp->d_reclen; dp = (struct dirent *)cpos; cookiep++; ncookies--; } vrele(vp); nfsm_clget; *tl = nfs_false; bp += NFSX_UNSIGNED; nfsm_clget; if (eofflag) *tl = nfs_true; else *tl = nfs_false; bp += NFSX_UNSIGNED; if (mp != mb) { if (bp < be) mp->m_len = bp - mtod(mp, caddr_t); } else mp->m_len += bp - bpos; FREE((caddr_t)rbuf, M_TEMP); FREE((caddr_t)cookies, M_TEMP); nfsm_srvdone; } int nfsrv_readdirplus(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; struct sockaddr *nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; struct ucred *cred = &nfsd->nd_cr; register char *bp, *be; register struct mbuf *mp; register struct dirent *dp; register caddr_t cp; register u_long *tl; register long t1; caddr_t bpos; struct mbuf *mb, *mb2, *mreq, *mp2; char *cpos, *cend, *cp2, *rbuf; struct vnode *vp, *nvp; struct flrep fl; nfsfh_t nfh; fhandle_t *fhp, *nfhp = (fhandle_t *)fl.fl_nfh; struct uio io; struct iovec iv; struct vattr va, at, *vap = &va; struct nfs_fattr *fp; int len, nlen, rem, xfer, tsiz, i, error = 0, getret = 1; int siz, cnt, fullsiz, eofflag, rdonly, cache, dirlen, ncookies; u_quad_t frev, off, toff, verf; u_long *cookies = NULL, *cookiep; fhp = &nfh.fh_generic; nfsm_srvmtofh(fhp); nfsm_dissect(tl, u_long *, 6 * NFSX_UNSIGNED); fxdr_hyper(tl, &toff); tl += 2; fxdr_hyper(tl, &verf); tl += 2; siz = fxdr_unsigned(int, *tl++); cnt = fxdr_unsigned(int, *tl); off = toff; siz = ((siz + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1)); xfer = NFS_SRVMAXDATA(nfsd); if (siz > xfer) siz = xfer; fullsiz = siz; if (error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE)) { nfsm_reply(NFSX_UNSIGNED); nfsm_srvpostop_attr(getret, &at); return (0); } error = getret = VOP_GETATTR(vp, &at, cred, procp); if (!error && toff && verf && verf != at.va_filerev) error = NFSERR_BAD_COOKIE; if (!error) { nqsrv_getl(vp, ND_READ); error = nfsrv_access(vp, VEXEC, cred, rdonly, procp); } if (error) { vput(vp); nfsm_reply(NFSX_V3POSTOPATTR); nfsm_srvpostop_attr(getret, &at); return (0); } VOP_UNLOCK(vp, 0, procp); MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK); again: iv.iov_base = rbuf; iv.iov_len = fullsiz; io.uio_iov = &iv; io.uio_iovcnt = 1; io.uio_offset = (off_t)off; io.uio_resid = fullsiz; io.uio_segflg = UIO_SYSSPACE; io.uio_rw = UIO_READ; io.uio_procp = (struct proc *)0; eofflag = 0; vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, procp); if (cookies) { free((caddr_t)cookies, M_TEMP); cookies = NULL; } error = VOP_READDIR(vp, &io, cred, &eofflag, &ncookies, &cookies); off = (u_quad_t)io.uio_offset; getret = VOP_GETATTR(vp, &at, cred, procp); VOP_UNLOCK(vp, 0, procp); if (!cookies && !error) error = NFSERR_PERM; if (!error) error = getret; if (error) { vrele(vp); if (cookies) free((caddr_t)cookies, M_TEMP); free((caddr_t)rbuf, M_TEMP); nfsm_reply(NFSX_V3POSTOPATTR); nfsm_srvpostop_attr(getret, &at); return (0); } if (io.uio_resid) { siz -= io.uio_resid; /* * If nothing read, return eof * rpc reply */ if (siz == 0) { vrele(vp); nfsm_reply(NFSX_V3POSTOPATTR + NFSX_V3COOKIEVERF + 2 * NFSX_UNSIGNED); nfsm_srvpostop_attr(getret, &at); nfsm_build(tl, u_long *, 4 * NFSX_UNSIGNED); txdr_hyper(&at.va_filerev, tl); tl += 2; *tl++ = nfs_false; *tl = nfs_true; FREE((caddr_t)cookies, M_TEMP); FREE((caddr_t)rbuf, M_TEMP); return (0); } } /* * Check for degenerate cases of nothing useful read. * If so go try again */ cpos = rbuf; cend = rbuf + siz; dp = (struct dirent *)cpos; cookiep = cookies; /* * For some reason FreeBSD's ufs_readdir() chooses to back the * directory offset up to a block boundary, so it is necessary to * skip over the records that preceed the requested offset. This * requires the assumption that file offset cookies monotonically * increase. */ while (cpos < cend && ncookies > 0 && (dp->d_fileno == 0 || ((u_quad_t)(*cookiep)) <= toff)) { cpos += dp->d_reclen; dp = (struct dirent *)cpos; cookiep++; ncookies--; } if (cpos >= cend || ncookies == 0) { toff = off; siz = fullsiz; goto again; } /* * Probe one of the directory entries to see if the filesystem * supports VGET. */ if (VFS_VGET(vp->v_mount, dp->d_fileno, &nvp) == EOPNOTSUPP) { error = NFSERR_NOTSUPP; vrele(vp); free((caddr_t)cookies, M_TEMP); free((caddr_t)rbuf, M_TEMP); nfsm_reply(NFSX_V3POSTOPATTR); nfsm_srvpostop_attr(getret, &at); return (0); } vput(nvp); dirlen = len = NFSX_V3POSTOPATTR + NFSX_V3COOKIEVERF + 2 * NFSX_UNSIGNED; nfsm_reply(cnt); nfsm_srvpostop_attr(getret, &at); nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED); txdr_hyper(&at.va_filerev, tl); mp = mp2 = mb; bp = bpos; be = bp + M_TRAILINGSPACE(mp); /* Loop through the records and build reply */ while (cpos < cend && ncookies > 0) { if (dp->d_fileno != 0) { nlen = dp->d_namlen; rem = nfsm_rndup(nlen)-nlen; /* * For readdir_and_lookup get the vnode using * the file number. */ if (VFS_VGET(vp->v_mount, dp->d_fileno, &nvp)) goto invalid; bzero((caddr_t)nfhp, NFSX_V3FH); nfhp->fh_fsid = nvp->v_mount->mnt_stat.f_fsid; if (VFS_VPTOFH(nvp, &nfhp->fh_fid)) { vput(nvp); goto invalid; } if (VOP_GETATTR(nvp, vap, cred, procp)) { vput(nvp); goto invalid; } vput(nvp); /* * If either the dircount or maxcount will be * exceeded, get out now. Both of these lengths * are calculated conservatively, including all * XDR overheads. */ len += (7 * NFSX_UNSIGNED + nlen + rem + NFSX_V3FH + NFSX_V3POSTOPATTR); dirlen += (6 * NFSX_UNSIGNED + nlen + rem); if (len > cnt || dirlen > fullsiz) { eofflag = 0; break; } /* * Build the directory record xdr from * the dirent entry. */ fp = (struct nfs_fattr *)&fl.fl_fattr; nfsm_srvfillattr(vap, fp); fl.fl_fhsize = txdr_unsigned(NFSX_V3FH); fl.fl_fhok = nfs_true; fl.fl_postopok = nfs_true; fl.fl_off.nfsuquad[0] = 0; fl.fl_off.nfsuquad[1] = txdr_unsigned(*cookiep); nfsm_clget; *tl = nfs_true; bp += NFSX_UNSIGNED; nfsm_clget; *tl = 0; bp += NFSX_UNSIGNED; nfsm_clget; *tl = txdr_unsigned(dp->d_fileno); bp += NFSX_UNSIGNED; nfsm_clget; *tl = txdr_unsigned(nlen); bp += NFSX_UNSIGNED; /* And loop around copying the name */ xfer = nlen; cp = dp->d_name; while (xfer > 0) { nfsm_clget; if ((bp + xfer) > be) tsiz = be - bp; else tsiz = xfer; bcopy(cp, bp, tsiz); bp += tsiz; xfer -= tsiz; if (xfer > 0) cp += tsiz; } /* And null pad to a long boundary */ for (i = 0; i < rem; i++) *bp++ = '\0'; /* * Now copy the flrep structure out. */ xfer = sizeof (struct flrep); cp = (caddr_t)&fl; while (xfer > 0) { nfsm_clget; if ((bp + xfer) > be) tsiz = be - bp; else tsiz = xfer; bcopy(cp, bp, tsiz); bp += tsiz; xfer -= tsiz; if (xfer > 0) cp += tsiz; } } invalid: cpos += dp->d_reclen; dp = (struct dirent *)cpos; cookiep++; ncookies--; } vrele(vp); nfsm_clget; *tl = nfs_false; bp += NFSX_UNSIGNED; nfsm_clget; if (eofflag) *tl = nfs_true; else *tl = nfs_false; bp += NFSX_UNSIGNED; if (mp != mb) { if (bp < be) mp->m_len = bp - mtod(mp, caddr_t); } else mp->m_len += bp - bpos; FREE((caddr_t)cookies, M_TEMP); FREE((caddr_t)rbuf, M_TEMP); nfsm_srvdone; } /* * nfs commit service */ int nfsrv_commit(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; struct sockaddr *nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; struct ucred *cred = &nfsd->nd_cr; struct vattr bfor, aft; struct vnode *vp; nfsfh_t nfh; fhandle_t *fhp; register u_long *tl; register long t1; caddr_t bpos; int error = 0, rdonly, for_ret = 1, aft_ret = 1, cnt, cache; char *cp2; struct mbuf *mb, *mb2, *mreq; u_quad_t frev, off; #ifndef nolint cache = 0; #endif fhp = &nfh.fh_generic; nfsm_srvmtofh(fhp); nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED); /* * XXX At this time VOP_FSYNC() does not accept offset and byte * count parameters, so these arguments are useless (someday maybe). */ fxdr_hyper(tl, &off); tl += 2; cnt = fxdr_unsigned(int, *tl); if (error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE)) { nfsm_reply(2 * NFSX_UNSIGNED); nfsm_srvwcc_data(for_ret, &bfor, aft_ret, &aft); return (0); } for_ret = VOP_GETATTR(vp, &bfor, cred, procp); if (vp->v_object && (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) { vm_object_page_clean(vp->v_object, 0, 0, TRUE); } error = VOP_FSYNC(vp, cred, MNT_WAIT, procp); aft_ret = VOP_GETATTR(vp, &aft, cred, procp); vput(vp); nfsm_reply(NFSX_V3WCCDATA + NFSX_V3WRITEVERF); nfsm_srvwcc_data(for_ret, &bfor, aft_ret, &aft); if (!error) { nfsm_build(tl, u_long *, NFSX_V3WRITEVERF); *tl++ = txdr_unsigned(boottime.tv_sec); *tl = txdr_unsigned(boottime.tv_usec); } else return (0); nfsm_srvdone; } /* * nfs statfs service */ int nfsrv_statfs(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; struct sockaddr *nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; struct ucred *cred = &nfsd->nd_cr; register struct statfs *sf; register struct nfs_statfs *sfp; register u_long *tl; register long t1; caddr_t bpos; int error = 0, rdonly, cache, getret = 1; int v3 = (nfsd->nd_flag & ND_NFSV3); char *cp2; struct mbuf *mb, *mb2, *mreq; struct vnode *vp; struct vattr at; nfsfh_t nfh; fhandle_t *fhp; struct statfs statfs; u_quad_t frev, tval; #ifndef nolint cache = 0; #endif fhp = &nfh.fh_generic; nfsm_srvmtofh(fhp); if (error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE)) { nfsm_reply(NFSX_UNSIGNED); nfsm_srvpostop_attr(getret, &at); return (0); } sf = &statfs; error = VFS_STATFS(vp->v_mount, sf, procp); getret = VOP_GETATTR(vp, &at, cred, procp); vput(vp); nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_STATFS(v3)); if (v3) nfsm_srvpostop_attr(getret, &at); if (error) return (0); nfsm_build(sfp, struct nfs_statfs *, NFSX_STATFS(v3)); if (v3) { tval = (u_quad_t)sf->f_blocks; tval *= (u_quad_t)sf->f_bsize; txdr_hyper(&tval, &sfp->sf_tbytes); tval = (u_quad_t)sf->f_bfree; tval *= (u_quad_t)sf->f_bsize; txdr_hyper(&tval, &sfp->sf_fbytes); tval = (u_quad_t)sf->f_bavail; tval *= (u_quad_t)sf->f_bsize; txdr_hyper(&tval, &sfp->sf_abytes); sfp->sf_tfiles.nfsuquad[0] = 0; sfp->sf_tfiles.nfsuquad[1] = txdr_unsigned(sf->f_files); sfp->sf_ffiles.nfsuquad[0] = 0; sfp->sf_ffiles.nfsuquad[1] = txdr_unsigned(sf->f_ffree); sfp->sf_afiles.nfsuquad[0] = 0; sfp->sf_afiles.nfsuquad[1] = txdr_unsigned(sf->f_ffree); sfp->sf_invarsec = 0; } else { sfp->sf_tsize = txdr_unsigned(NFS_MAXDGRAMDATA); sfp->sf_bsize = txdr_unsigned(sf->f_bsize); sfp->sf_blocks = txdr_unsigned(sf->f_blocks); sfp->sf_bfree = txdr_unsigned(sf->f_bfree); sfp->sf_bavail = txdr_unsigned(sf->f_bavail); } nfsm_srvdone; } /* * nfs fsinfo service */ int nfsrv_fsinfo(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; struct sockaddr *nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; struct ucred *cred = &nfsd->nd_cr; register u_long *tl; register struct nfsv3_fsinfo *sip; register long t1; caddr_t bpos; int error = 0, rdonly, cache, getret = 1, pref; char *cp2; struct mbuf *mb, *mb2, *mreq; struct vnode *vp; struct vattr at; nfsfh_t nfh; fhandle_t *fhp; u_quad_t frev; #ifndef nolint cache = 0; #endif fhp = &nfh.fh_generic; nfsm_srvmtofh(fhp); if (error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE)) { nfsm_reply(NFSX_UNSIGNED); nfsm_srvpostop_attr(getret, &at); return (0); } getret = VOP_GETATTR(vp, &at, cred, procp); vput(vp); nfsm_reply(NFSX_V3POSTOPATTR + NFSX_V3FSINFO); nfsm_srvpostop_attr(getret, &at); nfsm_build(sip, struct nfsv3_fsinfo *, NFSX_V3FSINFO); /* * XXX * There should be file system VFS OP(s) to get this information. * For now, assume ufs. */ if (slp->ns_so->so_type == SOCK_DGRAM) pref = NFS_MAXDGRAMDATA; else pref = NFS_MAXDATA; sip->fs_rtmax = txdr_unsigned(NFS_MAXDATA); sip->fs_rtpref = txdr_unsigned(pref); sip->fs_rtmult = txdr_unsigned(NFS_FABLKSIZE); sip->fs_wtmax = txdr_unsigned(NFS_MAXDATA); sip->fs_wtpref = txdr_unsigned(pref); sip->fs_wtmult = txdr_unsigned(NFS_FABLKSIZE); sip->fs_dtpref = txdr_unsigned(pref); sip->fs_maxfilesize.nfsuquad[0] = 0xffffffff; sip->fs_maxfilesize.nfsuquad[1] = 0xffffffff; sip->fs_timedelta.nfsv3_sec = 0; sip->fs_timedelta.nfsv3_nsec = txdr_unsigned(1); sip->fs_properties = txdr_unsigned(NFSV3FSINFO_LINK | NFSV3FSINFO_SYMLINK | NFSV3FSINFO_HOMOGENEOUS | NFSV3FSINFO_CANSETTIME); nfsm_srvdone; } /* * nfs pathconf service */ int nfsrv_pathconf(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; struct sockaddr *nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; struct ucred *cred = &nfsd->nd_cr; register u_long *tl; register struct nfsv3_pathconf *pc; register long t1; caddr_t bpos; int error = 0, rdonly, cache, getret = 1, linkmax, namemax; int chownres, notrunc; char *cp2; struct mbuf *mb, *mb2, *mreq; struct vnode *vp; struct vattr at; nfsfh_t nfh; fhandle_t *fhp; u_quad_t frev; #ifndef nolint cache = 0; #endif fhp = &nfh.fh_generic; nfsm_srvmtofh(fhp); if (error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE)) { nfsm_reply(NFSX_UNSIGNED); nfsm_srvpostop_attr(getret, &at); return (0); } error = VOP_PATHCONF(vp, _PC_LINK_MAX, &linkmax); if (!error) error = VOP_PATHCONF(vp, _PC_NAME_MAX, &namemax); if (!error) error = VOP_PATHCONF(vp, _PC_CHOWN_RESTRICTED, &chownres); if (!error) error = VOP_PATHCONF(vp, _PC_NO_TRUNC, ¬runc); getret = VOP_GETATTR(vp, &at, cred, procp); vput(vp); nfsm_reply(NFSX_V3POSTOPATTR + NFSX_V3PATHCONF); nfsm_srvpostop_attr(getret, &at); if (error) return (0); nfsm_build(pc, struct nfsv3_pathconf *, NFSX_V3PATHCONF); pc->pc_linkmax = txdr_unsigned(linkmax); pc->pc_namemax = txdr_unsigned(namemax); pc->pc_notrunc = txdr_unsigned(notrunc); pc->pc_chownrestricted = txdr_unsigned(chownres); /* * These should probably be supported by VOP_PATHCONF(), but * until msdosfs is exportable (why would you want to?), the * Unix defaults should be ok. */ pc->pc_caseinsensitive = nfs_false; pc->pc_casepreserving = nfs_true; nfsm_srvdone; } /* * Null operation, used by clients to ping server */ /* ARGSUSED */ int nfsrv_null(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep; caddr_t bpos; int error = NFSERR_RETVOID, cache; struct mbuf *mb, *mreq; u_quad_t frev; #ifndef nolint cache = 0; #endif nfsm_reply(0); return (0); } /* * No operation, used for obsolete procedures */ /* ARGSUSED */ int nfsrv_noop(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep; caddr_t bpos; int error, cache; struct mbuf *mb, *mreq; u_quad_t frev; #ifndef nolint cache = 0; #endif if (nfsd->nd_repstat) error = nfsd->nd_repstat; else error = EPROCUNAVAIL; nfsm_reply(0); return (0); } /* * Perform access checking for vnodes obtained from file handles that would * refer to files already opened by a Unix client. You cannot just use * vn_writechk() and VOP_ACCESS() for two reasons. * 1 - You must check for exported rdonly as well as MNT_RDONLY for the write case * 2 - The owner is to be given access irrespective of mode bits so that * processes that chmod after opening a file don't break. I don't like * this because it opens a security hole, but since the nfs server opens * a security hole the size of a barn door anyhow, what the heck. */ static int nfsrv_access(vp, flags, cred, rdonly, p) register struct vnode *vp; int flags; register struct ucred *cred; int rdonly; struct proc *p; { struct vattr vattr; int error; if (flags & VWRITE) { /* Just vn_writechk() changed to check rdonly */ /* * Disallow write attempts on read-only file systems; * unless the file is a socket or a block or character * device resident on the file system. */ if (rdonly || (vp->v_mount->mnt_flag & MNT_RDONLY)) { switch (vp->v_type) { case VREG: case VDIR: case VLNK: return (EROFS); } } /* * If there's shared text associated with * the inode, we can't allow writing. */ if (vp->v_flag & VTEXT) return (ETXTBSY); } if (error = VOP_GETATTR(vp, &vattr, cred, p)) return (error); if ((error = VOP_ACCESS(vp, flags, cred, p)) && cred->cr_uid != vattr.va_uid) return (error); return (0); } #endif /* NFS_NOSERVER */ Index: head/sys/ufs/ufs/ufs_vnops.c =================================================================== --- head/sys/ufs/ufs/ufs_vnops.c (revision 35822) +++ head/sys/ufs/ufs/ufs_vnops.c (revision 35823) @@ -1,2278 +1,2269 @@ /* * Copyright (c) 1982, 1986, 1989, 1993, 1995 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ufs_vnops.c 8.27 (Berkeley) 5/27/95 - * $Id: ufs_vnops.c,v 1.82 1998/04/04 13:26:20 phk Exp $ + * $Id: ufs_vnops.c,v 1.83 1998/04/17 22:37:19 des Exp $ */ #include "opt_quota.h" #include "opt_suiddir.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static int ufs_abortop __P((struct vop_abortop_args *)); static int ufs_access __P((struct vop_access_args *)); static int ufs_advlock __P((struct vop_advlock_args *)); static int ufs_chmod __P((struct vnode *, int, struct ucred *, struct proc *)); static int ufs_chown __P((struct vnode *, uid_t, gid_t, struct ucred *, struct proc *)); static int ufs_close __P((struct vop_close_args *)); static int ufs_create __P((struct vop_create_args *)); static int ufs_getattr __P((struct vop_getattr_args *)); static int ufs_link __P((struct vop_link_args *)); static int ufs_makeinode __P((int mode, struct vnode *, struct vnode **, struct componentname *)); static int ufs_missingop __P((struct vop_generic_args *ap)); static int ufs_mkdir __P((struct vop_mkdir_args *)); static int ufs_mknod __P((struct vop_mknod_args *)); static int ufs_mmap __P((struct vop_mmap_args *)); static int ufs_open __P((struct vop_open_args *)); static int ufs_pathconf __P((struct vop_pathconf_args *)); static int ufs_print __P((struct vop_print_args *)); static int ufs_readdir __P((struct vop_readdir_args *)); static int ufs_readlink __P((struct vop_readlink_args *)); static int ufs_remove __P((struct vop_remove_args *)); static int ufs_rename __P((struct vop_rename_args *)); static int ufs_rmdir __P((struct vop_rmdir_args *)); static int ufs_setattr __P((struct vop_setattr_args *)); static int ufs_strategy __P((struct vop_strategy_args *)); static int ufs_symlink __P((struct vop_symlink_args *)); static int ufs_whiteout __P((struct vop_whiteout_args *)); static int ufsfifo_close __P((struct vop_close_args *)); static int ufsfifo_read __P((struct vop_read_args *)); static int ufsfifo_write __P((struct vop_write_args *)); static int ufsspec_close __P((struct vop_close_args *)); static int ufsspec_read __P((struct vop_read_args *)); static int ufsspec_write __P((struct vop_write_args *)); union _qcvt { int64_t qcvt; int32_t val[2]; }; #define SETHIGH(q, h) { \ union _qcvt tmp; \ tmp.qcvt = (q); \ tmp.val[_QUAD_HIGHWORD] = (h); \ (q) = tmp.qcvt; \ } #define SETLOW(q, l) { \ union _qcvt tmp; \ tmp.qcvt = (q); \ tmp.val[_QUAD_LOWWORD] = (l); \ (q) = tmp.qcvt; \ } /* * XXX this is too long to be a macro, and isn't used in any time-critical * place; */ #define ITIMES(ip) { \ struct timeval tv; \ getmicrotime(&tv); \ if ((ip)->i_flag & (IN_ACCESS | IN_CHANGE | IN_UPDATE)) { \ (ip)->i_flag |= IN_MODIFIED; \ if ((ip)->i_flag & IN_ACCESS) \ (ip)->i_atime = tv.tv_sec; \ if ((ip)->i_flag & IN_UPDATE) { \ (ip)->i_mtime = tv.tv_sec; \ (ip)->i_modrev++; \ } \ if ((ip)->i_flag & IN_CHANGE) \ (ip)->i_ctime = tv.tv_sec; \ (ip)->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_UPDATE); \ } \ } /* * A virgin directory (no blushing please). */ static struct dirtemplate mastertemplate = { 0, 12, DT_DIR, 1, ".", 0, DIRBLKSIZ - 12, DT_DIR, 2, ".." }; static struct odirtemplate omastertemplate = { 0, 12, 1, ".", 0, DIRBLKSIZ - 12, 2, ".." }; /* * Create a regular file */ int ufs_create(ap) struct vop_create_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; struct vattr *a_vap; } */ *ap; { int error; error = ufs_makeinode(MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode), ap->a_dvp, ap->a_vpp, ap->a_cnp); if (error) return (error); VN_POLLEVENT(ap->a_dvp, POLLWRITE); return (0); } /* * Mknod vnode call */ /* ARGSUSED */ int ufs_mknod(ap) struct vop_mknod_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; struct vattr *a_vap; } */ *ap; { struct vattr *vap = ap->a_vap; struct vnode **vpp = ap->a_vpp; struct inode *ip; int error; error = ufs_makeinode(MAKEIMODE(vap->va_type, vap->va_mode), ap->a_dvp, vpp, ap->a_cnp); if (error) return (error); VN_POLLEVENT(ap->a_dvp, POLLWRITE); ip = VTOI(*vpp); ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE; if (vap->va_rdev != VNOVAL) { /* * Want to be able to use this to make badblock * inodes, so don't truncate the dev number. */ ip->i_rdev = vap->va_rdev; } /* * Remove inode so that it will be reloaded by VFS_VGET and * checked to see if it is an alias of an existing entry in * the inode cache. */ vput(*vpp); (*vpp)->v_type = VNON; vgone(*vpp); *vpp = 0; return (0); } /* * Open called. * * Nothing to do. */ /* ARGSUSED */ int ufs_open(ap) struct vop_open_args /* { struct vnode *a_vp; int a_mode; struct ucred *a_cred; struct proc *a_p; } */ *ap; { /* * Files marked append-only must be opened for appending. */ if ((VTOI(ap->a_vp)->i_flags & APPEND) && (ap->a_mode & (FWRITE | O_APPEND)) == FWRITE) return (EPERM); return (0); } /* * Close called. * * Update the times on the inode. */ /* ARGSUSED */ int ufs_close(ap) struct vop_close_args /* { struct vnode *a_vp; int a_fflag; struct ucred *a_cred; struct proc *a_p; } */ *ap; { register struct vnode *vp = ap->a_vp; register struct inode *ip = VTOI(vp); simple_lock(&vp->v_interlock); if (vp->v_usecount > 1) ITIMES(ip); simple_unlock(&vp->v_interlock); return (0); } int ufs_access(ap) struct vop_access_args /* { struct vnode *a_vp; int a_mode; struct ucred *a_cred; struct proc *a_p; } */ *ap; { struct vnode *vp = ap->a_vp; struct inode *ip = VTOI(vp); struct ucred *cred = ap->a_cred; mode_t mask, mode = ap->a_mode; register gid_t *gp; int i; #ifdef QUOTA int error; #endif /* * Disallow write attempts on read-only file systems; * unless the file is a socket, fifo, or a block or * character device resident on the file system. */ if (mode & VWRITE) { switch (vp->v_type) { case VDIR: case VLNK: case VREG: if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); #ifdef QUOTA if (error = getinoquota(ip)) return (error); #endif break; default: break; } } /* If immutable bit set, nobody gets to write it. */ if ((mode & VWRITE) && (ip->i_flags & IMMUTABLE)) return (EPERM); /* Otherwise, user id 0 always gets access. */ if (cred->cr_uid == 0) return (0); mask = 0; /* Otherwise, check the owner. */ if (cred->cr_uid == ip->i_uid) { if (mode & VEXEC) mask |= S_IXUSR; if (mode & VREAD) mask |= S_IRUSR; if (mode & VWRITE) mask |= S_IWUSR; return ((ip->i_mode & mask) == mask ? 0 : EACCES); } /* Otherwise, check the groups. */ for (i = 0, gp = cred->cr_groups; i < cred->cr_ngroups; i++, gp++) if (ip->i_gid == *gp) { if (mode & VEXEC) mask |= S_IXGRP; if (mode & VREAD) mask |= S_IRGRP; if (mode & VWRITE) mask |= S_IWGRP; return ((ip->i_mode & mask) == mask ? 0 : EACCES); } /* Otherwise, check everyone else. */ if (mode & VEXEC) mask |= S_IXOTH; if (mode & VREAD) mask |= S_IROTH; if (mode & VWRITE) mask |= S_IWOTH; return ((ip->i_mode & mask) == mask ? 0 : EACCES); } /* ARGSUSED */ int ufs_getattr(ap) struct vop_getattr_args /* { struct vnode *a_vp; struct vattr *a_vap; struct ucred *a_cred; struct proc *a_p; } */ *ap; { register struct vnode *vp = ap->a_vp; register struct inode *ip = VTOI(vp); register struct vattr *vap = ap->a_vap; ITIMES(ip); /* * Copy from inode table */ vap->va_fsid = ip->i_dev; vap->va_fileid = ip->i_number; vap->va_mode = ip->i_mode & ~IFMT; vap->va_nlink = ip->i_effnlink; vap->va_uid = ip->i_uid; vap->va_gid = ip->i_gid; vap->va_rdev = (dev_t)ip->i_rdev; vap->va_size = ip->i_din.di_size; vap->va_atime.tv_sec = ip->i_atime; vap->va_atime.tv_nsec = ip->i_atimensec; vap->va_mtime.tv_sec = ip->i_mtime; vap->va_mtime.tv_nsec = ip->i_mtimensec; vap->va_ctime.tv_sec = ip->i_ctime; vap->va_ctime.tv_nsec = ip->i_ctimensec; vap->va_flags = ip->i_flags; vap->va_gen = ip->i_gen; /* this doesn't belong here */ if (vp->v_type == VBLK) vap->va_blocksize = BLKDEV_IOSIZE; else if (vp->v_type == VCHR) vap->va_blocksize = MAXBSIZE; else vap->va_blocksize = vp->v_mount->mnt_stat.f_iosize; vap->va_bytes = dbtob((u_quad_t)ip->i_blocks); vap->va_type = vp->v_type; vap->va_filerev = ip->i_modrev; return (0); } /* * Set attribute vnode op. called from several syscalls */ int ufs_setattr(ap) struct vop_setattr_args /* { struct vnode *a_vp; struct vattr *a_vap; struct ucred *a_cred; struct proc *a_p; } */ *ap; { struct vattr *vap = ap->a_vap; struct vnode *vp = ap->a_vp; struct inode *ip = VTOI(vp); struct ucred *cred = ap->a_cred; struct proc *p = ap->a_p; struct timeval atimeval, mtimeval; int error; /* * Check for unsettable attributes. */ if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) || (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) || (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) || ((int)vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) { return (EINVAL); } if (vap->va_flags != VNOVAL) { if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); if (cred->cr_uid != ip->i_uid && (error = suser(cred, &p->p_acflag))) return (error); if (cred->cr_uid == 0) { if ((ip->i_flags & (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND)) && securelevel > 0) return (EPERM); ip->i_flags = vap->va_flags; } else { if (ip->i_flags & (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND) || (vap->va_flags & UF_SETTABLE) != vap->va_flags) return (EPERM); ip->i_flags &= SF_SETTABLE; ip->i_flags |= (vap->va_flags & UF_SETTABLE); } ip->i_flag |= IN_CHANGE; if (vap->va_flags & (IMMUTABLE | APPEND)) return (0); } if (ip->i_flags & (IMMUTABLE | APPEND)) return (EPERM); /* * Go through the fields and update iff not VNOVAL. */ if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) { if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); if (error = ufs_chown(vp, vap->va_uid, vap->va_gid, cred, p)) return (error); } if (vap->va_size != VNOVAL) { /* * Disallow write attempts on read-only file systems; * unless the file is a socket, fifo, or a block or * character device resident on the file system. */ switch (vp->v_type) { case VDIR: return (EISDIR); case VLNK: case VREG: if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); break; default: break; } if (error = UFS_TRUNCATE(vp, vap->va_size, 0, cred, p)) return (error); } ip = VTOI(vp); if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) { if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); if (cred->cr_uid != ip->i_uid && (error = suser(cred, &p->p_acflag)) && ((vap->va_vaflags & VA_UTIMES_NULL) == 0 || (error = VOP_ACCESS(vp, VWRITE, cred, p)))) return (error); if (vap->va_atime.tv_sec != VNOVAL) ip->i_flag |= IN_ACCESS; if (vap->va_mtime.tv_sec != VNOVAL) ip->i_flag |= IN_CHANGE | IN_UPDATE; atimeval.tv_sec = vap->va_atime.tv_sec; atimeval.tv_usec = vap->va_atime.tv_nsec / 1000; mtimeval.tv_sec = vap->va_mtime.tv_sec; mtimeval.tv_usec = vap->va_mtime.tv_nsec / 1000; error = UFS_UPDATE(vp, &atimeval, &mtimeval, 0); if (error) return (error); } error = 0; if (vap->va_mode != (mode_t)VNOVAL) { if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); error = ufs_chmod(vp, (int)vap->va_mode, cred, p); } VN_POLLEVENT(vp, POLLATTRIB); return (error); } /* * Change the mode on a file. * Inode must be locked before calling. */ static int ufs_chmod(vp, mode, cred, p) register struct vnode *vp; register int mode; register struct ucred *cred; struct proc *p; { register struct inode *ip = VTOI(vp); int error; if (cred->cr_uid != ip->i_uid) { error = suser(cred, &p->p_acflag); if (error) return (error); } if (cred->cr_uid) { if (vp->v_type != VDIR && (mode & S_ISTXT)) return (EFTYPE); if (!groupmember(ip->i_gid, cred) && (mode & ISGID)) return (EPERM); } ip->i_mode &= ~ALLPERMS; ip->i_mode |= (mode & ALLPERMS); ip->i_flag |= IN_CHANGE; return (0); } /* * Perform chown operation on inode ip; * inode must be locked prior to call. */ static int ufs_chown(vp, uid, gid, cred, p) register struct vnode *vp; uid_t uid; gid_t gid; struct ucred *cred; struct proc *p; { register struct inode *ip = VTOI(vp); uid_t ouid; gid_t ogid; int error = 0; #ifdef QUOTA register int i; long change; #endif if (uid == (uid_t)VNOVAL) uid = ip->i_uid; if (gid == (gid_t)VNOVAL) gid = ip->i_gid; /* * If we don't own the file, are trying to change the owner * of the file, or are not a member of the target group, * the caller must be superuser or the call fails. */ if ((cred->cr_uid != ip->i_uid || uid != ip->i_uid || (gid != ip->i_gid && !groupmember((gid_t)gid, cred))) && (error = suser(cred, &p->p_acflag))) return (error); ogid = ip->i_gid; ouid = ip->i_uid; #ifdef QUOTA if (error = getinoquota(ip)) return (error); if (ouid == uid) { dqrele(vp, ip->i_dquot[USRQUOTA]); ip->i_dquot[USRQUOTA] = NODQUOT; } if (ogid == gid) { dqrele(vp, ip->i_dquot[GRPQUOTA]); ip->i_dquot[GRPQUOTA] = NODQUOT; } change = ip->i_blocks; (void) chkdq(ip, -change, cred, CHOWN); (void) chkiq(ip, -1, cred, CHOWN); for (i = 0; i < MAXQUOTAS; i++) { dqrele(vp, ip->i_dquot[i]); ip->i_dquot[i] = NODQUOT; } #endif ip->i_gid = gid; ip->i_uid = uid; #ifdef QUOTA if ((error = getinoquota(ip)) == 0) { if (ouid == uid) { dqrele(vp, ip->i_dquot[USRQUOTA]); ip->i_dquot[USRQUOTA] = NODQUOT; } if (ogid == gid) { dqrele(vp, ip->i_dquot[GRPQUOTA]); ip->i_dquot[GRPQUOTA] = NODQUOT; } if ((error = chkdq(ip, change, cred, CHOWN)) == 0) { if ((error = chkiq(ip, 1, cred, CHOWN)) == 0) goto good; else (void) chkdq(ip, -change, cred, CHOWN|FORCE); } for (i = 0; i < MAXQUOTAS; i++) { dqrele(vp, ip->i_dquot[i]); ip->i_dquot[i] = NODQUOT; } } ip->i_gid = ogid; ip->i_uid = ouid; if (getinoquota(ip) == 0) { if (ouid == uid) { dqrele(vp, ip->i_dquot[USRQUOTA]); ip->i_dquot[USRQUOTA] = NODQUOT; } if (ogid == gid) { dqrele(vp, ip->i_dquot[GRPQUOTA]); ip->i_dquot[GRPQUOTA] = NODQUOT; } (void) chkdq(ip, change, cred, FORCE|CHOWN); (void) chkiq(ip, 1, cred, FORCE|CHOWN); (void) getinoquota(ip); } return (error); good: if (getinoquota(ip)) panic("ufs_chown: lost quota"); #endif /* QUOTA */ ip->i_flag |= IN_CHANGE; if (cred->cr_uid != 0 && (ouid != uid || ogid != gid)) ip->i_mode &= ~(ISUID | ISGID); return (0); } /* * Mmap a file * * NB Currently unsupported. */ /* ARGSUSED */ int ufs_mmap(ap) struct vop_mmap_args /* { struct vnode *a_vp; int a_fflags; struct ucred *a_cred; struct proc *a_p; } */ *ap; { return (EINVAL); } int ufs_remove(ap) struct vop_remove_args /* { struct vnode *a_dvp; struct vnode *a_vp; struct componentname *a_cnp; } */ *ap; { struct inode *ip; struct vnode *vp = ap->a_vp; struct vnode *dvp = ap->a_dvp; int error; ip = VTOI(vp); if ((ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) || (VTOI(dvp)->i_flags & APPEND)) { error = EPERM; goto out; } error = ufs_dirremove(dvp, ip, ap->a_cnp->cn_flags, 0); VN_POLLEVENT(vp, POLLNLINK); VN_POLLEVENT(dvp, POLLWRITE); out: - if (dvp == vp) - vrele(vp); - else - vput(vp); - vput(dvp); return (error); } /* * link vnode call */ int ufs_link(ap) struct vop_link_args /* { struct vnode *a_tdvp; struct vnode *a_vp; struct componentname *a_cnp; } */ *ap; { struct vnode *vp = ap->a_vp; struct vnode *tdvp = ap->a_tdvp; struct componentname *cnp = ap->a_cnp; struct proc *p = cnp->cn_proc; struct inode *ip; struct timeval tv; struct direct newdir; int error; #ifdef DIAGNOSTIC if ((cnp->cn_flags & HASBUF) == 0) panic("ufs_link: no name"); #endif if (tdvp->v_mount != vp->v_mount) { VOP_ABORTOP(tdvp, cnp); error = EXDEV; goto out2; } if (tdvp != vp && (error = vn_lock(vp, LK_EXCLUSIVE, p))) { VOP_ABORTOP(tdvp, cnp); goto out2; } ip = VTOI(vp); if ((nlink_t)ip->i_nlink >= LINK_MAX) { VOP_ABORTOP(tdvp, cnp); error = EMLINK; goto out1; } if (ip->i_flags & (IMMUTABLE | APPEND)) { VOP_ABORTOP(tdvp, cnp); error = EPERM; goto out1; } ip->i_effnlink++; ip->i_nlink++; ip->i_flag |= IN_CHANGE; if (DOINGSOFTDEP(vp)) softdep_increase_linkcnt(ip); getmicrotime(&tv); error = UFS_UPDATE(vp, &tv, &tv, !DOINGSOFTDEP(vp)); if (!error) { ufs_makedirentry(ip, cnp, &newdir); error = ufs_direnter(tdvp, vp, &newdir, cnp, NULL); } if (error) { ip->i_effnlink--; ip->i_nlink--; ip->i_flag |= IN_CHANGE; } zfree(namei_zone, cnp->cn_pnbuf); out1: if (tdvp != vp) VOP_UNLOCK(vp, 0, p); out2: VN_POLLEVENT(vp, POLLNLINK); VN_POLLEVENT(tdvp, POLLWRITE); - vput(tdvp); return (error); } /* * whiteout vnode call */ int ufs_whiteout(ap) struct vop_whiteout_args /* { struct vnode *a_dvp; struct componentname *a_cnp; int a_flags; } */ *ap; { struct vnode *dvp = ap->a_dvp; struct componentname *cnp = ap->a_cnp; struct direct newdir; int error = 0; switch (ap->a_flags) { case LOOKUP: /* 4.4 format directories support whiteout operations */ if (dvp->v_mount->mnt_maxsymlinklen > 0) return (0); return (EOPNOTSUPP); case CREATE: /* create a new directory whiteout */ #ifdef DIAGNOSTIC if ((cnp->cn_flags & SAVENAME) == 0) panic("ufs_whiteout: missing name"); if (dvp->v_mount->mnt_maxsymlinklen <= 0) panic("ufs_whiteout: old format filesystem"); #endif newdir.d_ino = WINO; newdir.d_namlen = cnp->cn_namelen; bcopy(cnp->cn_nameptr, newdir.d_name, (unsigned)cnp->cn_namelen + 1); newdir.d_type = DT_WHT; error = ufs_direnter(dvp, NULL, &newdir, cnp, NULL); break; case DELETE: /* remove an existing directory whiteout */ #ifdef DIAGNOSTIC if (dvp->v_mount->mnt_maxsymlinklen <= 0) panic("ufs_whiteout: old format filesystem"); #endif cnp->cn_flags &= ~DOWHITEOUT; error = ufs_dirremove(dvp, NULL, cnp->cn_flags, 0); break; default: panic("ufs_whiteout: unknown op"); } if (cnp->cn_flags & HASBUF) { zfree(namei_zone, cnp->cn_pnbuf); cnp->cn_flags &= ~HASBUF; } return (error); } /* * Rename system call. * rename("foo", "bar"); * is essentially * unlink("bar"); * link("foo", "bar"); * unlink("foo"); * but ``atomically''. Can't do full commit without saving state in the * inode on disk which isn't feasible at this time. Best we can do is * always guarantee the target exists. * * Basic algorithm is: * * 1) Bump link count on source while we're linking it to the * target. This also ensure the inode won't be deleted out * from underneath us while we work (it may be truncated by * a concurrent `trunc' or `open' for creation). * 2) Link source to destination. If destination already exists, * delete it first. * 3) Unlink source reference to inode if still around. If a * directory was moved and the parent of the destination * is different from the source, patch the ".." entry in the * directory. */ int ufs_rename(ap) struct vop_rename_args /* { struct vnode *a_fdvp; struct vnode *a_fvp; struct componentname *a_fcnp; struct vnode *a_tdvp; struct vnode *a_tvp; struct componentname *a_tcnp; } */ *ap; { struct vnode *tvp = ap->a_tvp; register struct vnode *tdvp = ap->a_tdvp; struct vnode *fvp = ap->a_fvp; struct vnode *fdvp = ap->a_fdvp; struct componentname *tcnp = ap->a_tcnp; struct componentname *fcnp = ap->a_fcnp; struct proc *p = fcnp->cn_proc; struct inode *ip, *xp, *dp; struct direct newdir; struct timeval tv; int doingdirectory = 0, oldparent = 0, newparent = 0; int error = 0; #ifdef DIAGNOSTIC if ((tcnp->cn_flags & HASBUF) == 0 || (fcnp->cn_flags & HASBUF) == 0) panic("ufs_rename: no name"); #endif /* * Check for cross-device rename. */ if ((fvp->v_mount != tdvp->v_mount) || (tvp && (fvp->v_mount != tvp->v_mount))) { error = EXDEV; abortit: VOP_ABORTOP(tdvp, tcnp); /* XXX, why not in NFS? */ if (tdvp == tvp) vrele(tdvp); else vput(tdvp); if (tvp) vput(tvp); VOP_ABORTOP(fdvp, fcnp); /* XXX, why not in NFS? */ vrele(fdvp); vrele(fvp); return (error); } if (tvp && ((VTOI(tvp)->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) || (VTOI(tdvp)->i_flags & APPEND))) { error = EPERM; goto abortit; } /* * Check if just deleting a link name or if we've lost a race. * If another process completes the same rename after we've looked * up the source and have blocked looking up the target, then the * source and target inodes may be identical now although the * names were never linked. */ if (fvp == tvp) { if (fvp->v_type == VDIR) { /* * Linked directories are impossible, so we must * have lost the race. Pretend that the rename * completed before the lookup. */ #ifdef UFS_RENAME_DEBUG printf("ufs_rename: fvp == tvp for directories\n"); #endif error = ENOENT; goto abortit; } /* Release destination completely. */ VOP_ABORTOP(tdvp, tcnp); vput(tdvp); vput(tvp); /* * Delete source. There is another race now that everything * is unlocked, but this doesn't cause any new complications. * Relookup() may find a file that is unrelated to the * original one, or it may fail. Too bad. */ vrele(fdvp); vrele(fvp); fcnp->cn_flags &= ~MODMASK; fcnp->cn_flags |= LOCKPARENT | LOCKLEAF; if ((fcnp->cn_flags & SAVESTART) == 0) panic("ufs_rename: lost from startdir"); fcnp->cn_nameiop = DELETE; VREF(fdvp); error = relookup(fdvp, &fvp, fcnp); if (error == 0) vrele(fdvp); if (fvp == NULL) { #ifdef UFS_RENAME_DEBUG printf("ufs_rename: from name disappeared\n"); #endif return (ENOENT); } - return (VOP_REMOVE(fdvp, fvp, fcnp)); + error = VOP_REMOVE(fdvp, fvp, fcnp); + /* XXX - temporarily simulate previous rele behavior */ + if (fdvp == fvp) + vrele(fdvp); + else + vput(fdvp); + vput(fvp); + return (error); } if (error = vn_lock(fvp, LK_EXCLUSIVE, p)) goto abortit; dp = VTOI(fdvp); ip = VTOI(fvp); if ((ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) || (dp->i_flags & APPEND)) { VOP_UNLOCK(fvp, 0, p); error = EPERM; goto abortit; } if ((ip->i_mode & IFMT) == IFDIR) { /* * Avoid ".", "..", and aliases of "." for obvious reasons. */ if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') || dp == ip || (fcnp->cn_flags | tcnp->cn_flags) & ISDOTDOT || (ip->i_flag & IN_RENAME)) { VOP_UNLOCK(fvp, 0, p); error = EINVAL; goto abortit; } ip->i_flag |= IN_RENAME; oldparent = dp->i_number; doingdirectory++; } VN_POLLEVENT(fdvp, POLLWRITE); vrele(fdvp); /* * When the target exists, both the directory * and target vnodes are returned locked. */ dp = VTOI(tdvp); xp = NULL; if (tvp) xp = VTOI(tvp); /* * 1) Bump link count while we're moving stuff * around. If we crash somewhere before * completing our work, the link count * may be wrong, but correctable. */ ip->i_effnlink++; ip->i_nlink++; ip->i_flag |= IN_CHANGE; if (DOINGSOFTDEP(fvp)) softdep_increase_linkcnt(ip); getmicrotime(&tv); if (error = UFS_UPDATE(fvp, &tv, &tv, !DOINGSOFTDEP(fvp))) { VOP_UNLOCK(fvp, 0, p); goto bad; } /* * If ".." must be changed (ie the directory gets a new * parent) then the source directory must not be in the * directory heirarchy above the target, as this would * orphan everything below the source directory. Also * the user must have write permission in the source so * as to be able to change "..". We must repeat the call * to namei, as the parent directory is unlocked by the * call to checkpath(). */ error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, tcnp->cn_proc); VOP_UNLOCK(fvp, 0, p); if (oldparent != dp->i_number) newparent = dp->i_number; if (doingdirectory && newparent) { if (error) /* write access check above */ goto bad; if (xp != NULL) vput(tvp); error = ufs_checkpath(ip, dp, tcnp->cn_cred); if (error) goto out; if ((tcnp->cn_flags & SAVESTART) == 0) panic("ufs_rename: lost to startdir"); VREF(tdvp); error = relookup(tdvp, &tvp, tcnp); if (error) goto out; vrele(tdvp); dp = VTOI(tdvp); xp = NULL; if (tvp) xp = VTOI(tvp); } /* * 2) If target doesn't exist, link the target * to the source and unlink the source. * Otherwise, rewrite the target directory * entry to reference the source inode and * expunge the original entry's existence. */ if (xp == NULL) { if (dp->i_dev != ip->i_dev) panic("ufs_rename: EXDEV"); /* * Account for ".." in new directory. * When source and destination have the same * parent we don't fool with the link count. */ if (doingdirectory && newparent) { if ((nlink_t)dp->i_nlink >= LINK_MAX) { error = EMLINK; goto bad; } dp->i_effnlink++; dp->i_nlink++; dp->i_flag |= IN_CHANGE; if (DOINGSOFTDEP(tdvp)) softdep_increase_linkcnt(dp); error = UFS_UPDATE(tdvp, &tv, &tv, !DOINGSOFTDEP(tdvp)); if (error) goto bad; } ufs_makedirentry(ip, tcnp, &newdir); error = ufs_direnter(tdvp, NULL, &newdir, tcnp, NULL); if (error) { if (doingdirectory && newparent) { dp->i_effnlink--; dp->i_nlink--; dp->i_flag |= IN_CHANGE; (void)UFS_UPDATE(tdvp, &tv, &tv, 1); } goto bad; } VN_POLLEVENT(tdvp, POLLWRITE); vput(tdvp); } else { if (xp->i_dev != dp->i_dev || xp->i_dev != ip->i_dev) panic("ufs_rename: EXDEV"); /* * Short circuit rename(foo, foo). */ if (xp->i_number == ip->i_number) panic("ufs_rename: same file"); /* * If the parent directory is "sticky", then the user must * own the parent directory, or the destination of the rename, * otherwise the destination may not be changed (except by * root). This implements append-only directories. */ if ((dp->i_mode & S_ISTXT) && tcnp->cn_cred->cr_uid != 0 && tcnp->cn_cred->cr_uid != dp->i_uid && xp->i_uid != tcnp->cn_cred->cr_uid) { error = EPERM; goto bad; } /* * Target must be empty if a directory and have no links * to it. Also, ensure source and target are compatible * (both directories, or both not directories). */ if ((xp->i_mode&IFMT) == IFDIR) { if ((xp->i_effnlink > 2) || !ufs_dirempty(xp, dp->i_number, tcnp->cn_cred)) { error = ENOTEMPTY; goto bad; } if (!doingdirectory) { error = ENOTDIR; goto bad; } cache_purge(tdvp); } else if (doingdirectory) { error = EISDIR; goto bad; } error = ufs_dirrewrite(dp, xp, ip->i_number, IFTODT(ip->i_mode), doingdirectory); if (error) goto bad; if (doingdirectory) { dp->i_effnlink--; dp->i_flag |= IN_CHANGE; xp->i_effnlink--; xp->i_flag |= IN_CHANGE; } VN_POLLEVENT(tdvp, POLLWRITE); if (doingdirectory && !DOINGSOFTDEP(tvp)) { /* * Truncate inode. The only stuff left in the directory * is "." and "..". The "." reference is inconsequential * since we are quashing it. We have removed the "." * reference and the reference in the parent directory, * but there may be other hard links. The soft * dependency code will arrange to do these operations * after the parent directory entry has been deleted on * disk, so when running with that code we avoid doing * them now. */ dp->i_nlink--; xp->i_nlink--; if ((error = UFS_TRUNCATE(tvp, (off_t)0, IO_SYNC, tcnp->cn_cred, tcnp->cn_proc)) != 0) goto bad; } vput(tdvp); VN_POLLEVENT(tvp, POLLNLINK); /* XXX this right? */ vput(tvp); xp = NULL; } /* * 3) Unlink the source. */ fcnp->cn_flags &= ~MODMASK; fcnp->cn_flags |= LOCKPARENT | LOCKLEAF; if ((fcnp->cn_flags & SAVESTART) == 0) panic("ufs_rename: lost from startdir"); VREF(fdvp); error = relookup(fdvp, &fvp, fcnp); if (error == 0) vrele(fdvp); if (fvp != NULL) { xp = VTOI(fvp); dp = VTOI(fdvp); } else { /* * From name has disappeared. */ if (doingdirectory) panic("ufs_rename: lost dir entry"); vrele(ap->a_fvp); return (0); } /* * Ensure that the directory entry still exists and has not * changed while the new name has been entered. If the source is * a file then the entry may have been unlinked or renamed. In * either case there is no further work to be done. If the source * is a directory then it cannot have been rmdir'ed; the IN_RENAME * flag ensures that it cannot be moved by another rename or removed * by a rmdir. */ if (xp != ip) { if (doingdirectory) panic("ufs_rename: lost dir entry"); } else { /* * If the source is a directory with a * new parent, the link count of the old * parent directory must be decremented * and ".." set to point to the new parent. */ if (doingdirectory && newparent) { xp->i_offset = mastertemplate.dot_reclen; ufs_dirrewrite(xp, dp, newparent, DT_DIR, 0); cache_purge(fdvp); } error = ufs_dirremove(fdvp, xp, fcnp->cn_flags, 0); xp->i_flag &= ~IN_RENAME; } if (dp) vput(fdvp); if (xp) vput(fvp); vrele(ap->a_fvp); return (error); bad: if (xp) vput(ITOV(xp)); vput(ITOV(dp)); out: if (doingdirectory) ip->i_flag &= ~IN_RENAME; if (vn_lock(fvp, LK_EXCLUSIVE, p) == 0) { ip->i_effnlink--; ip->i_nlink--; ip->i_flag |= IN_CHANGE; ip->i_flag &= ~IN_RENAME; vput(fvp); } else vrele(fvp); return (error); } /* * Mkdir system call */ int ufs_mkdir(ap) struct vop_mkdir_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; struct vattr *a_vap; } */ *ap; { register struct vnode *dvp = ap->a_dvp; register struct vattr *vap = ap->a_vap; register struct componentname *cnp = ap->a_cnp; register struct inode *ip, *dp; struct vnode *tvp; struct buf *bp; struct dirtemplate dirtemplate, *dtp; struct direct newdir; struct timeval tv; int error, dmode; #ifdef DIAGNOSTIC if ((cnp->cn_flags & HASBUF) == 0) panic("ufs_mkdir: no name"); #endif dp = VTOI(dvp); if ((nlink_t)dp->i_nlink >= LINK_MAX) { error = EMLINK; goto out; } dmode = vap->va_mode & 0777; dmode |= IFDIR; /* * Must simulate part of ufs_makeinode here to acquire the inode, * but not have it entered in the parent directory. The entry is * made later after writing "." and ".." entries. */ error = UFS_VALLOC(dvp, dmode, cnp->cn_cred, &tvp); if (error) goto out; ip = VTOI(tvp); ip->i_gid = dp->i_gid; #ifdef SUIDDIR { #ifdef QUOTA struct ucred ucred, *ucp; ucp = cnp->cn_cred; #endif I /* * If we are hacking owners here, (only do this where told to) * and we are not giving it TOO root, (would subvert quotas) * then go ahead and give it to the other user. * The new directory also inherits the SUID bit. * If user's UID and dir UID are the same, * 'give it away' so that the SUID is still forced on. */ if ((dvp->v_mount->mnt_flag & MNT_SUIDDIR) && (dp->i_mode & ISUID) && dp->i_uid) { dmode |= ISUID; ip->i_uid = dp->i_uid; #ifdef QUOTA if (dp->i_uid != cnp->cn_cred->cr_uid) { /* * Make sure the correct user gets charged * for the space. * Make a dummy credential for the victim. * XXX This seems to never be accessed out of * our context so a stack variable is ok. */ ucred.cr_ref = 1; ucred.cr_uid = ip->i_uid; ucred.cr_ngroups = 1; ucred.cr_groups[0] = dp->i_gid; ucp = &ucred; } #endif } else ip->i_uid = cnp->cn_cred->cr_uid; #ifdef QUOTA if ((error = getinoquota(ip)) || (error = chkiq(ip, 1, ucp, 0))) { zfree(namei_zone, cnp->cn_pnbuf); UFS_VFREE(tvp, ip->i_number, dmode); vput(tvp); - vput(dvp); return (error); } #endif } #else /* !SUIDDIR */ ip->i_uid = cnp->cn_cred->cr_uid; #ifdef QUOTA if ((error = getinoquota(ip)) || (error = chkiq(ip, 1, cnp->cn_cred, 0))) { zfree(namei_zone, cnp->cn_pnbuf); UFS_VFREE(tvp, ip->i_number, dmode); vput(tvp); - vput(dvp); return (error); } #endif #endif /* !SUIDDIR */ ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE; ip->i_mode = dmode; tvp->v_type = VDIR; /* Rest init'd in getnewvnode(). */ ip->i_effnlink = 2; ip->i_nlink = 2; if (DOINGSOFTDEP(tvp)) softdep_increase_linkcnt(ip); if (cnp->cn_flags & ISWHITEOUT) ip->i_flags |= UF_OPAQUE; /* * Bump link count in parent directory to reflect work done below. * Should be done before reference is created so cleanup is * possible if we crash. */ dp->i_effnlink++; dp->i_nlink++; dp->i_flag |= IN_CHANGE; if (DOINGSOFTDEP(dvp)) softdep_increase_linkcnt(dp); getmicrotime(&tv); error = UFS_UPDATE(tvp, &tv, &tv, !DOINGSOFTDEP(dvp)); if (error) goto bad; /* * Initialize directory with "." and ".." from static template. */ if (dvp->v_mount->mnt_maxsymlinklen > 0 ) dtp = &mastertemplate; else dtp = (struct dirtemplate *)&omastertemplate; dirtemplate = *dtp; dirtemplate.dot_ino = ip->i_number; dirtemplate.dotdot_ino = dp->i_number; if ((error = VOP_BALLOC(tvp, (off_t)0, DIRBLKSIZ, cnp->cn_cred, B_CLRBUF, &bp)) != 0) goto bad; ip->i_size = DIRBLKSIZ; ip->i_flag |= IN_CHANGE | IN_UPDATE; vnode_pager_setsize(tvp, (u_long)ip->i_size); bcopy((caddr_t)&dirtemplate, (caddr_t)bp->b_data, sizeof dirtemplate); if ((error = UFS_UPDATE(tvp, &tv, &tv, !DOINGSOFTDEP(tvp))) != 0) { (void)VOP_BWRITE(bp); goto bad; } VN_POLLEVENT(dvp, POLLWRITE); /* XXX right place? */ /* * Directory set up, now install its entry in the parent directory. * * If we are not doing soft dependencies, then we must write out the * buffer containing the new directory body before entering the new * name in the parent. If we are doing soft dependencies, then the * buffer containing the new directory body will be passed to and * released in the soft dependency code after the code has attached * an appropriate ordering dependency to the buffer which ensures that * the buffer is written before the new name is written in the parent. */ if (!DOINGSOFTDEP(dvp) && ((error = VOP_BWRITE(bp)) != 0)) goto bad; ufs_makedirentry(ip, cnp, &newdir); error = ufs_direnter(dvp, tvp, &newdir, cnp, bp); bad: if (error == 0) { *ap->a_vpp = tvp; } else { dp->i_effnlink--; dp->i_nlink--; dp->i_flag |= IN_CHANGE; /* * No need to do an explicit VOP_TRUNCATE here, vrele will * do this for us because we set the link count to 0. */ ip->i_effnlink = 0; ip->i_nlink = 0; ip->i_flag |= IN_CHANGE; vput(tvp); } out: zfree(namei_zone, cnp->cn_pnbuf); - vput(dvp); return (error); } /* * Rmdir system call. */ int ufs_rmdir(ap) struct vop_rmdir_args /* { struct vnode *a_dvp; struct vnode *a_vp; struct componentname *a_cnp; } */ *ap; { struct vnode *vp = ap->a_vp; struct vnode *dvp = ap->a_dvp; struct componentname *cnp = ap->a_cnp; struct inode *ip, *dp; int error; ip = VTOI(vp); dp = VTOI(dvp); /* * Do not remove a directory that is in the process of being renamed. * Verify the directory is empty (and valid). Rmdir ".." will not be * valid since ".." will contain a reference to the current directory * and thus be non-empty. */ error = 0; if (ip->i_flag & IN_RENAME) { error = EINVAL; goto out; } if (ip->i_effnlink != 2 || !ufs_dirempty(ip, dp->i_number, cnp->cn_cred)) { error = ENOTEMPTY; goto out; } if ((dp->i_flags & APPEND) || (ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND))) { error = EPERM; goto out; } /* * Delete reference to directory before purging * inode. If we crash in between, the directory * will be reattached to lost+found, */ error = ufs_dirremove(dvp, ip, cnp->cn_flags, 1); if (error) goto out; VN_POLLEVENT(dvp, POLLWRITE|POLLNLINK); cache_purge(dvp); /* * Truncate inode. The only stuff left in the directory is "." and * "..". The "." reference is inconsequential since we are quashing * it. We have removed the "." reference and the reference in the * parent directory, but there may be other hard links. So, * ufs_dirremove will set the UF_IMMUTABLE flag to ensure that no * new entries are made. The soft dependency code will arrange to * do these operations after the parent directory entry has been * deleted on disk, so when running with that code we avoid doing * them now. */ dp->i_effnlink--; dp->i_flag |= IN_CHANGE; ip->i_effnlink--; ip->i_flag |= IN_CHANGE; if (!DOINGSOFTDEP(vp)) { dp->i_nlink--; ip->i_nlink--; error = UFS_TRUNCATE(vp, (off_t)0, IO_SYNC, cnp->cn_cred, cnp->cn_proc); } cache_purge(vp); out: - vput(dvp); VN_POLLEVENT(vp, POLLNLINK); - vput(vp); return (error); } /* * symlink -- make a symbolic link */ int ufs_symlink(ap) struct vop_symlink_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; struct vattr *a_vap; char *a_target; } */ *ap; { register struct vnode *vp, **vpp = ap->a_vpp; register struct inode *ip; int len, error; error = ufs_makeinode(IFLNK | ap->a_vap->va_mode, ap->a_dvp, vpp, ap->a_cnp); if (error) return (error); VN_POLLEVENT(ap->a_dvp, POLLWRITE); vp = *vpp; len = strlen(ap->a_target); if (len < vp->v_mount->mnt_maxsymlinklen) { ip = VTOI(vp); bcopy(ap->a_target, (char *)ip->i_shortlink, len); ip->i_size = len; ip->i_flag |= IN_CHANGE | IN_UPDATE; } else error = vn_rdwr(UIO_WRITE, vp, ap->a_target, len, (off_t)0, UIO_SYSSPACE, IO_NODELOCKED, ap->a_cnp->cn_cred, (int *)0, (struct proc *)0); vput(vp); return (error); } /* * Vnode op for reading directories. * * The routine below assumes that the on-disk format of a directory * is the same as that defined by . If the on-disk * format changes, then it will be necessary to do a conversion * from the on-disk format that read returns to the format defined * by . */ int ufs_readdir(ap) struct vop_readdir_args /* { struct vnode *a_vp; struct uio *a_uio; struct ucred *a_cred; int *a_eofflag; int *ncookies; u_long **a_cookies; } */ *ap; { register struct uio *uio = ap->a_uio; int error; size_t count, lost; off_t off; if (ap->a_ncookies != NULL) /* * Ensure that the block is aligned. The caller can use * the cookies to determine where in the block to start. */ uio->uio_offset &= ~(DIRBLKSIZ - 1); off = uio->uio_offset; count = uio->uio_resid; /* Make sure we don't return partial entries. */ count -= (uio->uio_offset + count) & (DIRBLKSIZ -1); if (count <= 0) return (EINVAL); lost = uio->uio_resid - count; uio->uio_resid = count; uio->uio_iov->iov_len = count; # if (BYTE_ORDER == LITTLE_ENDIAN) if (ap->a_vp->v_mount->mnt_maxsymlinklen > 0) { error = VOP_READ(ap->a_vp, uio, 0, ap->a_cred); } else { struct dirent *dp, *edp; struct uio auio; struct iovec aiov; caddr_t dirbuf; int readcnt; u_char tmp; auio = *uio; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_segflg = UIO_SYSSPACE; aiov.iov_len = count; MALLOC(dirbuf, caddr_t, count, M_TEMP, M_WAITOK); aiov.iov_base = dirbuf; error = VOP_READ(ap->a_vp, &auio, 0, ap->a_cred); if (error == 0) { readcnt = count - auio.uio_resid; edp = (struct dirent *)&dirbuf[readcnt]; for (dp = (struct dirent *)dirbuf; dp < edp; ) { tmp = dp->d_namlen; dp->d_namlen = dp->d_type; dp->d_type = tmp; if (dp->d_reclen > 0) { dp = (struct dirent *) ((char *)dp + dp->d_reclen); } else { error = EIO; break; } } if (dp >= edp) error = uiomove(dirbuf, readcnt, uio); } FREE(dirbuf, M_TEMP); } # else error = VOP_READ(ap->a_vp, uio, 0, ap->a_cred); # endif if (!error && ap->a_ncookies != NULL) { struct dirent* dpStart; struct dirent* dpEnd; struct dirent* dp; int ncookies; u_long *cookies; u_long *cookiep; if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1) panic("ufs_readdir: unexpected uio from NFS server"); dpStart = (struct dirent *) (uio->uio_iov->iov_base - (uio->uio_offset - off)); dpEnd = (struct dirent *) uio->uio_iov->iov_base; for (dp = dpStart, ncookies = 0; dp < dpEnd; dp = (struct dirent *)((caddr_t) dp + dp->d_reclen)) ncookies++; MALLOC(cookies, u_long *, ncookies * sizeof(u_long), M_TEMP, M_WAITOK); for (dp = dpStart, cookiep = cookies; dp < dpEnd; dp = (struct dirent *)((caddr_t) dp + dp->d_reclen)) { off += dp->d_reclen; *cookiep++ = (u_long) off; } *ap->a_ncookies = ncookies; *ap->a_cookies = cookies; } uio->uio_resid += lost; if (ap->a_eofflag) *ap->a_eofflag = VTOI(ap->a_vp)->i_size <= uio->uio_offset; return (error); } /* * Return target name of a symbolic link */ int ufs_readlink(ap) struct vop_readlink_args /* { struct vnode *a_vp; struct uio *a_uio; struct ucred *a_cred; } */ *ap; { register struct vnode *vp = ap->a_vp; register struct inode *ip = VTOI(vp); int isize; isize = ip->i_size; if ((isize < vp->v_mount->mnt_maxsymlinklen) || (ip->i_din.di_blocks == 0)) { /* XXX - for old fastlink support */ uiomove((char *)ip->i_shortlink, isize, ap->a_uio); return (0); } return (VOP_READ(vp, ap->a_uio, 0, ap->a_cred)); } /* * Ufs abort op, called after namei() when a CREATE/DELETE isn't actually * done. If a buffer has been saved in anticipation of a CREATE, delete it. */ /* ARGSUSED */ int ufs_abortop(ap) struct vop_abortop_args /* { struct vnode *a_dvp; struct componentname *a_cnp; } */ *ap; { if ((ap->a_cnp->cn_flags & (HASBUF | SAVESTART)) == HASBUF) zfree(namei_zone, ap->a_cnp->cn_pnbuf); return (0); } /* * Calculate the logical to physical mapping if not done already, * then call the device strategy routine. */ int ufs_strategy(ap) struct vop_strategy_args /* { struct buf *a_bp; } */ *ap; { register struct buf *bp = ap->a_bp; register struct vnode *vp = bp->b_vp; register struct inode *ip; int error; ip = VTOI(vp); if (vp->v_type == VBLK || vp->v_type == VCHR) panic("ufs_strategy: spec"); if (bp->b_blkno == bp->b_lblkno) { error = VOP_BMAP(vp, bp->b_lblkno, NULL, &bp->b_blkno, NULL, NULL); if (error) { bp->b_error = error; bp->b_flags |= B_ERROR; biodone(bp); return (error); } if ((long)bp->b_blkno == -1) vfs_bio_clrbuf(bp); } if ((long)bp->b_blkno == -1) { biodone(bp); return (0); } vp = ip->i_devvp; bp->b_dev = vp->v_rdev; VOCALL (vp->v_op, VOFFSET(vop_strategy), ap); return (0); } /* * Print out the contents of an inode. */ int ufs_print(ap) struct vop_print_args /* { struct vnode *a_vp; } */ *ap; { register struct vnode *vp = ap->a_vp; register struct inode *ip = VTOI(vp); printf("tag VT_UFS, ino %ld, on dev %d, %d", ip->i_number, major(ip->i_dev), minor(ip->i_dev)); if (vp->v_type == VFIFO) fifo_printinfo(vp); lockmgr_printinfo(&ip->i_lock); printf("\n"); return (0); } /* * Read wrapper for special devices. */ int ufsspec_read(ap) struct vop_read_args /* { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; struct ucred *a_cred; } */ *ap; { /* * Set access flag. */ if (!(ap->a_vp->v_mount->mnt_flag & MNT_NOATIME)) VTOI(ap->a_vp)->i_flag |= IN_ACCESS; return (VOCALL (spec_vnodeop_p, VOFFSET(vop_read), ap)); } /* * Write wrapper for special devices. */ int ufsspec_write(ap) struct vop_write_args /* { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; struct ucred *a_cred; } */ *ap; { /* * Set update and change flags. */ VTOI(ap->a_vp)->i_flag |= IN_CHANGE | IN_UPDATE; return (VOCALL (spec_vnodeop_p, VOFFSET(vop_write), ap)); } /* * Close wrapper for special devices. * * Update the times on the inode then do device close. */ int ufsspec_close(ap) struct vop_close_args /* { struct vnode *a_vp; int a_fflag; struct ucred *a_cred; struct proc *a_p; } */ *ap; { struct vnode *vp = ap->a_vp; struct inode *ip = VTOI(vp); simple_lock(&vp->v_interlock); if (ap->a_vp->v_usecount > 1) ITIMES(ip); simple_unlock(&vp->v_interlock); return (VOCALL (spec_vnodeop_p, VOFFSET(vop_close), ap)); } /* * Read wrapper for fifo's */ int ufsfifo_read(ap) struct vop_read_args /* { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; struct ucred *a_cred; } */ *ap; { /* * Set access flag. */ if (!(ap->a_vp->v_mount->mnt_flag & MNT_NOATIME)) VTOI(ap->a_vp)->i_flag |= IN_ACCESS; return (VOCALL (fifo_vnodeop_p, VOFFSET(vop_read), ap)); } /* * Write wrapper for fifo's. */ int ufsfifo_write(ap) struct vop_write_args /* { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; struct ucred *a_cred; } */ *ap; { /* * Set update and change flags. */ VTOI(ap->a_vp)->i_flag |= IN_CHANGE | IN_UPDATE; return (VOCALL (fifo_vnodeop_p, VOFFSET(vop_write), ap)); } /* * Close wrapper for fifo's. * * Update the times on the inode then do device close. */ int ufsfifo_close(ap) struct vop_close_args /* { struct vnode *a_vp; int a_fflag; struct ucred *a_cred; struct proc *a_p; } */ *ap; { struct vnode *vp = ap->a_vp; struct inode *ip = VTOI(vp); simple_lock(&vp->v_interlock); if (ap->a_vp->v_usecount > 1) ITIMES(ip); simple_unlock(&vp->v_interlock); return (VOCALL (fifo_vnodeop_p, VOFFSET(vop_close), ap)); } /* * Return POSIX pathconf information applicable to ufs filesystems. */ int ufs_pathconf(ap) struct vop_pathconf_args /* { struct vnode *a_vp; int a_name; int *a_retval; } */ *ap; { switch (ap->a_name) { case _PC_LINK_MAX: *ap->a_retval = LINK_MAX; return (0); case _PC_NAME_MAX: *ap->a_retval = NAME_MAX; return (0); case _PC_PATH_MAX: *ap->a_retval = PATH_MAX; return (0); case _PC_PIPE_BUF: *ap->a_retval = PIPE_BUF; return (0); case _PC_CHOWN_RESTRICTED: *ap->a_retval = 1; return (0); case _PC_NO_TRUNC: *ap->a_retval = 1; return (0); default: return (EINVAL); } /* NOTREACHED */ } /* * Advisory record locking support */ int ufs_advlock(ap) struct vop_advlock_args /* { struct vnode *a_vp; caddr_t a_id; int a_op; struct flock *a_fl; int a_flags; } */ *ap; { register struct inode *ip = VTOI(ap->a_vp); return (lf_advlock(ap, &(ip->i_lockf), ip->i_size)); } /* * Initialize the vnode associated with a new inode, handle aliased * vnodes. */ int ufs_vinit(mntp, specops, fifoops, vpp) struct mount *mntp; vop_t **specops; vop_t **fifoops; struct vnode **vpp; { struct inode *ip; struct vnode *vp, *nvp; struct timeval tv; vp = *vpp; ip = VTOI(vp); switch(vp->v_type = IFTOVT(ip->i_mode)) { case VCHR: case VBLK: vp->v_op = specops; nvp = checkalias(vp, ip->i_rdev, mntp); if (nvp) { /* * Discard unneeded vnode, but save its inode. * Note that the lock is carried over in the inode * to the replacement vnode. */ nvp->v_data = vp->v_data; vp->v_data = NULL; vp->v_op = spec_vnodeop_p; vrele(vp); vgone(vp); /* * Reinitialize aliased inode. */ vp = nvp; ip->i_vnode = vp; } break; case VFIFO: vp->v_op = fifoops; break; default: break; } if (ip->i_number == ROOTINO) vp->v_flag |= VROOT; /* * Initialize modrev times */ getmicroruntime(&tv); SETHIGH(ip->i_modrev, tv.tv_sec); SETLOW(ip->i_modrev, tv.tv_usec * 4294); *vpp = vp; return (0); } /* * Allocate a new inode. */ int ufs_makeinode(mode, dvp, vpp, cnp) int mode; struct vnode *dvp; struct vnode **vpp; struct componentname *cnp; { register struct inode *ip, *pdir; struct direct newdir; struct timeval tv; struct vnode *tvp; int error; pdir = VTOI(dvp); #ifdef DIAGNOSTIC if ((cnp->cn_flags & HASBUF) == 0) panic("ufs_makeinode: no name"); #endif *vpp = NULL; if ((mode & IFMT) == 0) mode |= IFREG; error = UFS_VALLOC(dvp, mode, cnp->cn_cred, &tvp); if (error) { zfree(namei_zone, cnp->cn_pnbuf); - vput(dvp); return (error); } ip = VTOI(tvp); ip->i_gid = pdir->i_gid; #ifdef SUIDDIR { #ifdef QUOTA struct ucred ucred, *ucp; ucp = cnp->cn_cred; #endif I /* * If we are not the owner of the directory, * and we are hacking owners here, (only do this where told to) * and we are not giving it TOO root, (would subvert quotas) * then go ahead and give it to the other user. * Note that this drops off the execute bits for security. */ if ((dvp->v_mount->mnt_flag & MNT_SUIDDIR) && (pdir->i_mode & ISUID) && (pdir->i_uid != cnp->cn_cred->cr_uid) && pdir->i_uid) { ip->i_uid = pdir->i_uid; mode &= ~07111; #ifdef QUOTA /* * Make sure the correct user gets charged * for the space. * Quickly knock up a dummy credential for the victim. * XXX This seems to never be accessed out of our * context so a stack variable is ok. */ ucred.cr_ref = 1; ucred.cr_uid = ip->i_uid; ucred.cr_ngroups = 1; ucred.cr_groups[0] = pdir->i_gid; ucp = &ucred; #endif } else ip->i_uid = cnp->cn_cred->cr_uid; #ifdef QUOTA if ((error = getinoquota(ip)) || (error = chkiq(ip, 1, ucp, 0))) { zfree(namei_zone, cnp->cn_pnbuf); UFS_VFREE(tvp, ip->i_number, mode); vput(tvp); - vput(dvp); return (error); } #endif } #else /* !SUIDDIR */ ip->i_uid = cnp->cn_cred->cr_uid; #ifdef QUOTA if ((error = getinoquota(ip)) || (error = chkiq(ip, 1, cnp->cn_cred, 0))) { zfree(namei_zone, cnp->cn_pnbuf); UFS_VFREE(tvp, ip->i_number, mode); vput(tvp); - vput(dvp); return (error); } #endif #endif /* !SUIDDIR */ ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE; ip->i_mode = mode; tvp->v_type = IFTOVT(mode); /* Rest init'd in getnewvnode(). */ ip->i_effnlink = 1; ip->i_nlink = 1; if (DOINGSOFTDEP(tvp)) softdep_increase_linkcnt(ip); if ((ip->i_mode & ISGID) && !groupmember(ip->i_gid, cnp->cn_cred) && suser(cnp->cn_cred, NULL)) ip->i_mode &= ~ISGID; if (cnp->cn_flags & ISWHITEOUT) ip->i_flags |= UF_OPAQUE; /* * Make sure inode goes to disk before directory entry. */ getmicrotime(&tv); error = UFS_UPDATE(tvp, &tv, &tv, !DOINGSOFTDEP(tvp)); if (error) goto bad; ufs_makedirentry(ip, cnp, &newdir); error = ufs_direnter(dvp, tvp, &newdir, cnp, NULL); if (error) goto bad; if ((cnp->cn_flags & SAVESTART) == 0) zfree(namei_zone, cnp->cn_pnbuf); - vput(dvp); *vpp = tvp; return (0); bad: /* * Write error occurred trying to update the inode * or the directory so must deallocate the inode. */ zfree(namei_zone, cnp->cn_pnbuf); - vput(dvp); ip->i_effnlink = 0; ip->i_nlink = 0; ip->i_flag |= IN_CHANGE; vput(tvp); return (error); } static int ufs_missingop(ap) struct vop_generic_args *ap; { panic("no vop function for %s in ufs child", ap->a_desc->vdesc_name); return (EOPNOTSUPP); } /* Global vfs data structures for ufs. */ static vop_t **ufs_vnodeop_p; static struct vnodeopv_entry_desc ufs_vnodeop_entries[] = { { &vop_default_desc, (vop_t *) vop_defaultop }, { &vop_fsync_desc, (vop_t *) ufs_missingop }, { &vop_read_desc, (vop_t *) ufs_missingop }, { &vop_reallocblks_desc, (vop_t *) ufs_missingop }, { &vop_write_desc, (vop_t *) ufs_missingop }, { &vop_abortop_desc, (vop_t *) ufs_abortop }, { &vop_access_desc, (vop_t *) ufs_access }, { &vop_advlock_desc, (vop_t *) ufs_advlock }, { &vop_bmap_desc, (vop_t *) ufs_bmap }, { &vop_cachedlookup_desc, (vop_t *) ufs_lookup }, { &vop_close_desc, (vop_t *) ufs_close }, { &vop_create_desc, (vop_t *) ufs_create }, { &vop_getattr_desc, (vop_t *) ufs_getattr }, { &vop_inactive_desc, (vop_t *) ufs_inactive }, { &vop_islocked_desc, (vop_t *) vop_stdislocked }, { &vop_link_desc, (vop_t *) ufs_link }, { &vop_lock_desc, (vop_t *) vop_stdlock }, { &vop_lookup_desc, (vop_t *) vfs_cache_lookup }, { &vop_mkdir_desc, (vop_t *) ufs_mkdir }, { &vop_mknod_desc, (vop_t *) ufs_mknod }, { &vop_mmap_desc, (vop_t *) ufs_mmap }, { &vop_open_desc, (vop_t *) ufs_open }, { &vop_pathconf_desc, (vop_t *) ufs_pathconf }, { &vop_poll_desc, (vop_t *) vop_stdpoll }, { &vop_print_desc, (vop_t *) ufs_print }, { &vop_readdir_desc, (vop_t *) ufs_readdir }, { &vop_readlink_desc, (vop_t *) ufs_readlink }, { &vop_reclaim_desc, (vop_t *) ufs_reclaim }, { &vop_remove_desc, (vop_t *) ufs_remove }, { &vop_rename_desc, (vop_t *) ufs_rename }, { &vop_rmdir_desc, (vop_t *) ufs_rmdir }, { &vop_setattr_desc, (vop_t *) ufs_setattr }, { &vop_strategy_desc, (vop_t *) ufs_strategy }, { &vop_symlink_desc, (vop_t *) ufs_symlink }, { &vop_unlock_desc, (vop_t *) vop_stdunlock }, { &vop_whiteout_desc, (vop_t *) ufs_whiteout }, { NULL, NULL } }; static struct vnodeopv_desc ufs_vnodeop_opv_desc = { &ufs_vnodeop_p, ufs_vnodeop_entries }; static vop_t **ufs_specop_p; static struct vnodeopv_entry_desc ufs_specop_entries[] = { { &vop_default_desc, (vop_t *) spec_vnoperate }, { &vop_fsync_desc, (vop_t *) ufs_missingop }, { &vop_access_desc, (vop_t *) ufs_access }, { &vop_close_desc, (vop_t *) ufsspec_close }, { &vop_getattr_desc, (vop_t *) ufs_getattr }, { &vop_inactive_desc, (vop_t *) ufs_inactive }, { &vop_islocked_desc, (vop_t *) vop_stdislocked }, { &vop_lock_desc, (vop_t *) vop_stdlock }, { &vop_print_desc, (vop_t *) ufs_print }, { &vop_read_desc, (vop_t *) ufsspec_read }, { &vop_reclaim_desc, (vop_t *) ufs_reclaim }, { &vop_setattr_desc, (vop_t *) ufs_setattr }, { &vop_unlock_desc, (vop_t *) vop_stdunlock }, { &vop_write_desc, (vop_t *) ufsspec_write }, { NULL, NULL } }; static struct vnodeopv_desc ufs_specop_opv_desc = { &ufs_specop_p, ufs_specop_entries }; static vop_t **ufs_fifoop_p; static struct vnodeopv_entry_desc ufs_fifoop_entries[] = { { &vop_default_desc, (vop_t *) fifo_vnoperate }, { &vop_fsync_desc, (vop_t *) ufs_missingop }, { &vop_access_desc, (vop_t *) ufs_access }, { &vop_close_desc, (vop_t *) ufsfifo_close }, { &vop_getattr_desc, (vop_t *) ufs_getattr }, { &vop_inactive_desc, (vop_t *) ufs_inactive }, { &vop_islocked_desc, (vop_t *) vop_stdislocked }, { &vop_lock_desc, (vop_t *) vop_stdlock }, { &vop_print_desc, (vop_t *) ufs_print }, { &vop_read_desc, (vop_t *) ufsfifo_read }, { &vop_reclaim_desc, (vop_t *) ufs_reclaim }, { &vop_setattr_desc, (vop_t *) ufs_setattr }, { &vop_unlock_desc, (vop_t *) vop_stdunlock }, { &vop_write_desc, (vop_t *) ufsfifo_write }, { NULL, NULL } }; static struct vnodeopv_desc ufs_fifoop_opv_desc = { &ufs_fifoop_p, ufs_fifoop_entries }; VNODEOP_SET(ufs_vnodeop_opv_desc); VNODEOP_SET(ufs_specop_opv_desc); VNODEOP_SET(ufs_fifoop_opv_desc); int ufs_vnoperate(ap) struct vop_generic_args /* { struct vnodeop_desc *a_desc; } */ *ap; { return (VOCALL(ufs_vnodeop_p, ap->a_desc->vdesc_offset, ap)); } int ufs_vnoperatefifo(ap) struct vop_generic_args /* { struct vnodeop_desc *a_desc; } */ *ap; { return (VOCALL(ufs_fifoop_p, ap->a_desc->vdesc_offset, ap)); } int ufs_vnoperatespec(ap) struct vop_generic_args /* { struct vnodeop_desc *a_desc; } */ *ap; { return (VOCALL(ufs_specop_p, ap->a_desc->vdesc_offset, ap)); }