Index: head/sys/fs/devfs/devfs_vnops.c =================================================================== --- head/sys/fs/devfs/devfs_vnops.c (revision 183214) +++ head/sys/fs/devfs/devfs_vnops.c (revision 183215) @@ -1,1502 +1,1501 @@ /*- * Copyright (c) 2000-2004 * Poul-Henning Kamp. All rights reserved. * Copyright (c) 1989, 1992-1993, 1995 * The Regents of the University of California. All rights reserved. * * This code is derived from software donated to Berkeley by * Jan-Simon Pendry. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)kernfs_vnops.c 8.15 (Berkeley) 5/21/95 * From: FreeBSD: src/sys/miscfs/kernfs/kernfs_vnops.c 1.43 * * $FreeBSD$ */ /* * TODO: * remove empty directories * mkdir: want it ? */ #include "opt_mac.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static struct vop_vector devfs_vnodeops; static struct vop_vector devfs_specops; static struct fileops devfs_ops_f; #include #include #include static MALLOC_DEFINE(M_CDEVPDATA, "DEVFSP", "Metainfo for cdev-fp data"); struct mtx devfs_de_interlock; MTX_SYSINIT(devfs_de_interlock, &devfs_de_interlock, "devfs interlock", MTX_DEF); struct sx clone_drain_lock; SX_SYSINIT(clone_drain_lock, &clone_drain_lock, "clone events drain lock"); struct mtx cdevpriv_mtx; MTX_SYSINIT(cdevpriv_mtx, &cdevpriv_mtx, "cdevpriv lock", MTX_DEF); static int devfs_fp_check(struct file *fp, struct cdev **devp, struct cdevsw **dswp) { *dswp = devvn_refthread(fp->f_vnode, devp); if (*devp != fp->f_data) { if (*dswp != NULL) dev_relthread(*devp); return (ENXIO); } KASSERT((*devp)->si_refcount > 0, ("devfs: un-referenced struct cdev *(%s)", devtoname(*devp))); if (*dswp == NULL) return (ENXIO); curthread->td_fpop = fp; return (0); } int devfs_get_cdevpriv(void **datap) { struct file *fp; struct cdev_privdata *p; int error; fp = curthread->td_fpop; if (fp == NULL) return (EBADF); p = fp->f_cdevpriv; if (p != NULL) { error = 0; *datap = p->cdpd_data; } else error = ENOENT; return (error); } int devfs_set_cdevpriv(void *priv, cdevpriv_dtr_t priv_dtr) { struct file *fp; struct cdev_priv *cdp; struct cdev_privdata *p; int error; fp = curthread->td_fpop; if (fp == NULL) return (ENOENT); cdp = cdev2priv((struct cdev *)fp->f_data); p = malloc(sizeof(struct cdev_privdata), M_CDEVPDATA, M_WAITOK); p->cdpd_data = priv; p->cdpd_dtr = priv_dtr; p->cdpd_fp = fp; mtx_lock(&cdevpriv_mtx); if (fp->f_cdevpriv == NULL) { LIST_INSERT_HEAD(&cdp->cdp_fdpriv, p, cdpd_list); fp->f_cdevpriv = p; mtx_unlock(&cdevpriv_mtx); error = 0; } else { mtx_unlock(&cdevpriv_mtx); free(p, M_CDEVPDATA); error = EBUSY; } return (error); } void devfs_destroy_cdevpriv(struct cdev_privdata *p) { mtx_assert(&cdevpriv_mtx, MA_OWNED); p->cdpd_fp->f_cdevpriv = NULL; LIST_REMOVE(p, cdpd_list); mtx_unlock(&cdevpriv_mtx); (p->cdpd_dtr)(p->cdpd_data); free(p, M_CDEVPDATA); } void devfs_fpdrop(struct file *fp) { struct cdev_privdata *p; mtx_lock(&cdevpriv_mtx); if ((p = fp->f_cdevpriv) == NULL) { mtx_unlock(&cdevpriv_mtx); return; } devfs_destroy_cdevpriv(p); } void devfs_clear_cdevpriv(void) { struct file *fp; fp = curthread->td_fpop; if (fp == NULL) return; devfs_fpdrop(fp); } /* * Construct the fully qualified path name relative to the mountpoint */ static char * devfs_fqpn(char *buf, struct vnode *dvp, struct componentname *cnp) { int i; struct devfs_dirent *de, *dd; struct devfs_mount *dmp; dmp = VFSTODEVFS(dvp->v_mount); dd = dvp->v_data; i = SPECNAMELEN; buf[i] = '\0'; i -= cnp->cn_namelen; if (i < 0) return (NULL); bcopy(cnp->cn_nameptr, buf + i, cnp->cn_namelen); de = dd; while (de != dmp->dm_rootdir) { i--; if (i < 0) return (NULL); buf[i] = '/'; i -= de->de_dirent->d_namlen; if (i < 0) return (NULL); bcopy(de->de_dirent->d_name, buf + i, de->de_dirent->d_namlen); de = TAILQ_FIRST(&de->de_dlist); /* "." */ de = TAILQ_NEXT(de, de_list); /* ".." */ de = de->de_dir; } return (buf + i); } static int devfs_allocv_drop_refs(int drop_dm_lock, struct devfs_mount *dmp, struct devfs_dirent *de) { int not_found; not_found = 0; if (de->de_flags & DE_DOOMED) not_found = 1; if (DEVFS_DE_DROP(de)) { KASSERT(not_found == 1, ("DEVFS de dropped but not doomed")); devfs_dirent_free(de); } if (DEVFS_DMP_DROP(dmp)) { KASSERT(not_found == 1, ("DEVFS mount struct freed before dirent")); not_found = 2; sx_xunlock(&dmp->dm_lock); devfs_unmount_final(dmp); } if (not_found == 1 || (drop_dm_lock && not_found != 2)) sx_unlock(&dmp->dm_lock); return (not_found); } static void devfs_insmntque_dtr(struct vnode *vp, void *arg) { struct devfs_dirent *de; de = (struct devfs_dirent *)arg; mtx_lock(&devfs_de_interlock); vp->v_data = NULL; de->de_vnode = NULL; mtx_unlock(&devfs_de_interlock); vgone(vp); vput(vp); } /* * devfs_allocv shall be entered with dmp->dm_lock held, and it drops * it on return. */ int devfs_allocv(struct devfs_dirent *de, struct mount *mp, struct vnode **vpp, struct thread *td) { int error; struct vnode *vp; struct cdev *dev; struct devfs_mount *dmp; KASSERT(td == curthread, ("devfs_allocv: td != curthread")); dmp = VFSTODEVFS(mp); if (de->de_flags & DE_DOOMED) { sx_xunlock(&dmp->dm_lock); return (ENOENT); } DEVFS_DE_HOLD(de); DEVFS_DMP_HOLD(dmp); mtx_lock(&devfs_de_interlock); vp = de->de_vnode; if (vp != NULL) { VI_LOCK(vp); mtx_unlock(&devfs_de_interlock); sx_xunlock(&dmp->dm_lock); error = vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td); sx_xlock(&dmp->dm_lock); if (devfs_allocv_drop_refs(0, dmp, de)) { if (error == 0) vput(vp); return (ENOENT); } else if (error) { sx_xunlock(&dmp->dm_lock); return (error); } sx_xunlock(&dmp->dm_lock); *vpp = vp; return (0); } mtx_unlock(&devfs_de_interlock); if (de->de_dirent->d_type == DT_CHR) { if (!(de->de_cdp->cdp_flags & CDP_ACTIVE)) { devfs_allocv_drop_refs(1, dmp, de); return (ENOENT); } dev = &de->de_cdp->cdp_c; } else { dev = NULL; } error = getnewvnode("devfs", mp, &devfs_vnodeops, &vp); if (error != 0) { devfs_allocv_drop_refs(1, dmp, de); printf("devfs_allocv: failed to allocate new vnode\n"); return (error); } if (de->de_dirent->d_type == DT_CHR) { vp->v_type = VCHR; VI_LOCK(vp); dev_lock(); dev_refl(dev); /* XXX: v_rdev should be protect by vnode lock */ vp->v_rdev = dev; KASSERT(vp->v_usecount == 1, ("%s %d (%d)\n", __func__, __LINE__, vp->v_usecount)); dev->si_usecount += vp->v_usecount; dev_unlock(); VI_UNLOCK(vp); vp->v_op = &devfs_specops; } else if (de->de_dirent->d_type == DT_DIR) { vp->v_type = VDIR; } else if (de->de_dirent->d_type == DT_LNK) { vp->v_type = VLNK; } else { vp->v_type = VBAD; } vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_NOWITNESS); mtx_lock(&devfs_de_interlock); vp->v_data = de; de->de_vnode = vp; mtx_unlock(&devfs_de_interlock); error = insmntque1(vp, mp, devfs_insmntque_dtr, de); if (error != 0) { (void) devfs_allocv_drop_refs(1, dmp, de); return (error); } if (devfs_allocv_drop_refs(0, dmp, de)) { vput(vp); return (ENOENT); } #ifdef MAC mac_devfs_vnode_associate(mp, de, vp); #endif sx_xunlock(&dmp->dm_lock); *vpp = vp; return (0); } static int devfs_access(struct vop_access_args *ap) { struct vnode *vp = ap->a_vp; struct devfs_dirent *de; int error; de = vp->v_data; if (vp->v_type == VDIR) de = de->de_dir; error = vaccess(vp->v_type, de->de_mode, de->de_uid, de->de_gid, ap->a_mode, ap->a_cred, NULL); if (!error) return (error); if (error != EACCES) return (error); /* We do, however, allow access to the controlling terminal */ if (!(ap->a_td->td_proc->p_flag & P_CONTROLT)) return (error); if (ap->a_td->td_proc->p_session->s_ttyvp == de->de_vnode) return (0); return (error); } /* ARGSUSED */ static int devfs_advlock(struct vop_advlock_args *ap) { return (ap->a_flags & F_FLOCK ? EOPNOTSUPP : EINVAL); } /* ARGSUSED */ static int devfs_close(struct vop_close_args *ap) { struct vnode *vp = ap->a_vp, *oldvp; struct thread *td = ap->a_td; struct cdev *dev = vp->v_rdev; struct cdevsw *dsw; int vp_locked, error; /* * Hack: a tty device that is a controlling terminal * has a reference from the session structure. * We cannot easily tell that a character device is * a controlling terminal, unless it is the closing * process' controlling terminal. In that case, * if the reference count is 2 (this last descriptor * plus the session), release the reference from the session. */ oldvp = NULL; sx_xlock(&proctree_lock); if (td && vp == td->td_proc->p_session->s_ttyvp) { SESS_LOCK(td->td_proc->p_session); VI_LOCK(vp); if (count_dev(dev) == 2 && (vp->v_iflag & VI_DOOMED) == 0) { td->td_proc->p_session->s_ttyvp = NULL; oldvp = vp; } VI_UNLOCK(vp); SESS_UNLOCK(td->td_proc->p_session); } sx_xunlock(&proctree_lock); if (oldvp != NULL) vrele(oldvp); /* * We do not want to really close the device if it * is still in use unless we are trying to close it * forcibly. Since every use (buffer, vnode, swap, cmap) * holds a reference to the vnode, and because we mark * any other vnodes that alias this device, when the * sum of the reference counts on all the aliased * vnodes descends to one, we are on last close. */ dsw = dev_refthread(dev); if (dsw == NULL) return (ENXIO); VI_LOCK(vp); if (vp->v_iflag & VI_DOOMED) { /* Forced close. */ } else if (dsw->d_flags & D_TRACKCLOSE) { /* Keep device updated on status. */ } else if (count_dev(dev) > 1) { VI_UNLOCK(vp); dev_relthread(dev); return (0); } vholdl(vp); VI_UNLOCK(vp); vp_locked = VOP_ISLOCKED(vp); VOP_UNLOCK(vp, 0); KASSERT(dev->si_refcount > 0, ("devfs_close() on un-referenced struct cdev *(%s)", devtoname(dev))); error = dsw->d_close(dev, ap->a_fflag, S_IFCHR, td); dev_relthread(dev); vn_lock(vp, vp_locked | LK_RETRY); vdrop(vp); return (error); } static int devfs_close_f(struct file *fp, struct thread *td) { int error; curthread->td_fpop = fp; error = vnops.fo_close(fp, td); curthread->td_fpop = NULL; return (error); } /* ARGSUSED */ static int devfs_fsync(struct vop_fsync_args *ap) { if (!vn_isdisk(ap->a_vp, NULL)) return (0); return (vop_stdfsync(ap)); } static int devfs_getattr(struct vop_getattr_args *ap) { struct vnode *vp = ap->a_vp; struct vattr *vap = ap->a_vap; int error = 0; struct devfs_dirent *de; struct cdev *dev; de = vp->v_data; KASSERT(de != NULL, ("Null dirent in devfs_getattr vp=%p", vp)); if (vp->v_type == VDIR) { de = de->de_dir; KASSERT(de != NULL, ("Null dir dirent in devfs_getattr vp=%p", vp)); } - bzero((caddr_t) vap, sizeof(*vap)); - vattr_null(vap); vap->va_uid = de->de_uid; vap->va_gid = de->de_gid; vap->va_mode = de->de_mode; if (vp->v_type == VLNK) vap->va_size = strlen(de->de_symlink); else if (vp->v_type == VDIR) vap->va_size = vap->va_bytes = DEV_BSIZE; else vap->va_size = 0; if (vp->v_type != VDIR) vap->va_bytes = 0; vap->va_blocksize = DEV_BSIZE; vap->va_type = vp->v_type; #define fix(aa) \ do { \ if ((aa).tv_sec <= 3600) { \ (aa).tv_sec = boottime.tv_sec; \ (aa).tv_nsec = boottime.tv_usec * 1000; \ } \ } while (0) if (vp->v_type != VCHR) { fix(de->de_atime); vap->va_atime = de->de_atime; fix(de->de_mtime); vap->va_mtime = de->de_mtime; fix(de->de_ctime); vap->va_ctime = de->de_ctime; } else { dev = vp->v_rdev; fix(dev->si_atime); vap->va_atime = dev->si_atime; fix(dev->si_mtime); vap->va_mtime = dev->si_mtime; fix(dev->si_ctime); vap->va_ctime = dev->si_ctime; vap->va_rdev = cdev2priv(dev)->cdp_inode; } vap->va_gen = 0; vap->va_flags = 0; + vap->va_filerev = 0; vap->va_nlink = de->de_links; vap->va_fileid = de->de_inode; return (error); } /* ARGSUSED */ static int devfs_ioctl_f(struct file *fp, u_long com, void *data, struct ucred *cred, struct thread *td) { struct cdev *dev; struct cdevsw *dsw; struct vnode *vp; struct vnode *vpold; int error, i; const char *p; struct fiodgname_arg *fgn; error = devfs_fp_check(fp, &dev, &dsw); if (error) return (error); if (com == FIODTYPE) { *(int *)data = dsw->d_flags & D_TYPEMASK; td->td_fpop = NULL; dev_relthread(dev); return (0); } else if (com == FIODGNAME) { fgn = data; p = devtoname(dev); i = strlen(p) + 1; if (i > fgn->len) error = EINVAL; else error = copyout(p, fgn->buf, i); td->td_fpop = NULL; dev_relthread(dev); return (error); } error = dsw->d_ioctl(dev, com, data, fp->f_flag, td); td->td_fpop = NULL; dev_relthread(dev); if (error == ENOIOCTL) error = ENOTTY; if (error == 0 && com == TIOCSCTTY) { vp = fp->f_vnode; /* Do nothing if reassigning same control tty */ sx_slock(&proctree_lock); if (td->td_proc->p_session->s_ttyvp == vp) { sx_sunlock(&proctree_lock); return (0); } vpold = td->td_proc->p_session->s_ttyvp; VREF(vp); SESS_LOCK(td->td_proc->p_session); td->td_proc->p_session->s_ttyvp = vp; SESS_UNLOCK(td->td_proc->p_session); sx_sunlock(&proctree_lock); /* Get rid of reference to old control tty */ if (vpold) vrele(vpold); } return (error); } /* ARGSUSED */ static int devfs_kqfilter_f(struct file *fp, struct knote *kn) { struct cdev *dev; struct cdevsw *dsw; int error; error = devfs_fp_check(fp, &dev, &dsw); if (error) return (error); error = dsw->d_kqfilter(dev, kn); curthread->td_fpop = NULL; dev_relthread(dev); return (error); } static int devfs_lookupx(struct vop_lookup_args *ap, int *dm_unlock) { struct componentname *cnp; struct vnode *dvp, **vpp; struct thread *td; struct devfs_dirent *de, *dd; struct devfs_dirent **dde; struct devfs_mount *dmp; struct cdev *cdev; int error, flags, nameiop; char specname[SPECNAMELEN + 1], *pname; cnp = ap->a_cnp; vpp = ap->a_vpp; dvp = ap->a_dvp; pname = cnp->cn_nameptr; td = cnp->cn_thread; flags = cnp->cn_flags; nameiop = cnp->cn_nameiop; dmp = VFSTODEVFS(dvp->v_mount); dd = dvp->v_data; *vpp = NULLVP; if ((flags & ISLASTCN) && nameiop == RENAME) return (EOPNOTSUPP); if (dvp->v_type != VDIR) return (ENOTDIR); if ((flags & ISDOTDOT) && (dvp->v_vflag & VV_ROOT)) return (EIO); error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, td); if (error) return (error); if (cnp->cn_namelen == 1 && *pname == '.') { if ((flags & ISLASTCN) && nameiop != LOOKUP) return (EINVAL); *vpp = dvp; VREF(dvp); return (0); } if (flags & ISDOTDOT) { if ((flags & ISLASTCN) && nameiop != LOOKUP) return (EINVAL); VOP_UNLOCK(dvp, 0); de = TAILQ_FIRST(&dd->de_dlist); /* "." */ de = TAILQ_NEXT(de, de_list); /* ".." */ de = de->de_dir; error = devfs_allocv(de, dvp->v_mount, vpp, td); *dm_unlock = 0; vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY); return (error); } DEVFS_DMP_HOLD(dmp); devfs_populate(dmp); if (DEVFS_DMP_DROP(dmp)) { *dm_unlock = 0; sx_xunlock(&dmp->dm_lock); devfs_unmount_final(dmp); return (ENOENT); } dd = dvp->v_data; de = devfs_find(dd, cnp->cn_nameptr, cnp->cn_namelen); while (de == NULL) { /* While(...) so we can use break */ if (nameiop == DELETE) return (ENOENT); /* * OK, we didn't have an entry for the name we were asked for * so we try to see if anybody can create it on demand. */ pname = devfs_fqpn(specname, dvp, cnp); if (pname == NULL) break; cdev = NULL; DEVFS_DMP_HOLD(dmp); sx_xunlock(&dmp->dm_lock); sx_slock(&clone_drain_lock); EVENTHANDLER_INVOKE(dev_clone, td->td_ucred, pname, strlen(pname), &cdev); sx_sunlock(&clone_drain_lock); sx_xlock(&dmp->dm_lock); if (DEVFS_DMP_DROP(dmp)) { *dm_unlock = 0; sx_xunlock(&dmp->dm_lock); devfs_unmount_final(dmp); return (ENOENT); } if (cdev == NULL) break; DEVFS_DMP_HOLD(dmp); devfs_populate(dmp); if (DEVFS_DMP_DROP(dmp)) { *dm_unlock = 0; sx_xunlock(&dmp->dm_lock); devfs_unmount_final(dmp); return (ENOENT); } dev_lock(); dde = &cdev2priv(cdev)->cdp_dirents[dmp->dm_idx]; if (dde != NULL && *dde != NULL) de = *dde; dev_unlock(); dev_rel(cdev); break; } if (de == NULL || de->de_flags & DE_WHITEOUT) { if ((nameiop == CREATE || nameiop == RENAME) && (flags & (LOCKPARENT | WANTPARENT)) && (flags & ISLASTCN)) { cnp->cn_flags |= SAVENAME; return (EJUSTRETURN); } return (ENOENT); } if ((cnp->cn_nameiop == DELETE) && (flags & ISLASTCN)) { error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred, td); if (error) return (error); if (*vpp == dvp) { VREF(dvp); *vpp = dvp; return (0); } } error = devfs_allocv(de, dvp->v_mount, vpp, td); *dm_unlock = 0; return (error); } static int devfs_lookup(struct vop_lookup_args *ap) { int j; struct devfs_mount *dmp; int dm_unlock; dmp = VFSTODEVFS(ap->a_dvp->v_mount); dm_unlock = 1; sx_xlock(&dmp->dm_lock); j = devfs_lookupx(ap, &dm_unlock); if (dm_unlock == 1) sx_xunlock(&dmp->dm_lock); return (j); } static int devfs_mknod(struct vop_mknod_args *ap) { struct componentname *cnp; struct vnode *dvp, **vpp; struct thread *td; struct devfs_dirent *dd, *de; struct devfs_mount *dmp; int error; /* * The only type of node we should be creating here is a * character device, for anything else return EOPNOTSUPP. */ if (ap->a_vap->va_type != VCHR) return (EOPNOTSUPP); dvp = ap->a_dvp; dmp = VFSTODEVFS(dvp->v_mount); cnp = ap->a_cnp; vpp = ap->a_vpp; td = cnp->cn_thread; dd = dvp->v_data; error = ENOENT; sx_xlock(&dmp->dm_lock); TAILQ_FOREACH(de, &dd->de_dlist, de_list) { if (cnp->cn_namelen != de->de_dirent->d_namlen) continue; if (bcmp(cnp->cn_nameptr, de->de_dirent->d_name, de->de_dirent->d_namlen) != 0) continue; if (de->de_flags & DE_WHITEOUT) break; goto notfound; } if (de == NULL) goto notfound; de->de_flags &= ~DE_WHITEOUT; error = devfs_allocv(de, dvp->v_mount, vpp, td); return (error); notfound: sx_xunlock(&dmp->dm_lock); return (error); } /* ARGSUSED */ static int devfs_open(struct vop_open_args *ap) { struct thread *td = ap->a_td; struct vnode *vp = ap->a_vp; struct cdev *dev = vp->v_rdev; struct file *fp = ap->a_fp; int error; struct cdevsw *dsw; if (vp->v_type == VBLK) return (ENXIO); if (dev == NULL) return (ENXIO); /* Make this field valid before any I/O in d_open. */ if (dev->si_iosize_max == 0) dev->si_iosize_max = DFLTPHYS; dsw = dev_refthread(dev); if (dsw == NULL) return (ENXIO); /* XXX: Special casing of ttys for deadfs. Probably redundant. */ if (dsw->d_flags & D_TTY) vp->v_vflag |= VV_ISTTY; VOP_UNLOCK(vp, 0); if (fp != NULL) { td->td_fpop = fp; fp->f_data = dev; } if (dsw->d_fdopen != NULL) error = dsw->d_fdopen(dev, ap->a_mode, td, fp); else error = dsw->d_open(dev, ap->a_mode, S_IFCHR, td); td->td_fpop = NULL; vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); dev_relthread(dev); if (error) return (error); #if 0 /* /dev/console */ KASSERT(fp != NULL, ("Could not vnode bypass device on NULL fp")); #else if(fp == NULL) return (error); #endif if (fp->f_ops == &badfileops) finit(fp, fp->f_flag, DTYPE_VNODE, dev, &devfs_ops_f); return (error); } static int devfs_pathconf(struct vop_pathconf_args *ap) { switch (ap->a_name) { case _PC_MAC_PRESENT: #ifdef MAC /* * If MAC is enabled, devfs automatically supports * trivial non-persistant label storage. */ *ap->a_retval = 1; #else *ap->a_retval = 0; #endif return (0); default: return (vop_stdpathconf(ap)); } /* NOTREACHED */ } /* ARGSUSED */ static int devfs_poll_f(struct file *fp, int events, struct ucred *cred, struct thread *td) { struct cdev *dev; struct cdevsw *dsw; int error; error = devfs_fp_check(fp, &dev, &dsw); if (error) return (error); error = dsw->d_poll(dev, events, td); curthread->td_fpop = NULL; dev_relthread(dev); return(error); } /* * Print out the contents of a special device vnode. */ static int devfs_print(struct vop_print_args *ap) { printf("\tdev %s\n", devtoname(ap->a_vp->v_rdev)); return (0); } /* ARGSUSED */ static int devfs_read_f(struct file *fp, struct uio *uio, struct ucred *cred, int flags, struct thread *td) { struct cdev *dev; int ioflag, error, resid; struct cdevsw *dsw; error = devfs_fp_check(fp, &dev, &dsw); if (error) return (error); resid = uio->uio_resid; ioflag = fp->f_flag & (O_NONBLOCK | O_DIRECT); if (ioflag & O_DIRECT) ioflag |= IO_DIRECT; if ((flags & FOF_OFFSET) == 0) uio->uio_offset = fp->f_offset; error = dsw->d_read(dev, uio, ioflag); if (uio->uio_resid != resid || (error == 0 && resid != 0)) vfs_timestamp(&dev->si_atime); curthread->td_fpop = NULL; dev_relthread(dev); if ((flags & FOF_OFFSET) == 0) fp->f_offset = uio->uio_offset; fp->f_nextoff = uio->uio_offset; return (error); } static int devfs_readdir(struct vop_readdir_args *ap) { int error; struct uio *uio; struct dirent *dp; struct devfs_dirent *dd; struct devfs_dirent *de; struct devfs_mount *dmp; off_t off, oldoff; int *tmp_ncookies = NULL; if (ap->a_vp->v_type != VDIR) return (ENOTDIR); uio = ap->a_uio; if (uio->uio_offset < 0) return (EINVAL); /* * XXX: This is a temporary hack to get around this filesystem not * supporting cookies. We store the location of the ncookies pointer * in a temporary variable before calling vfs_subr.c:vfs_read_dirent() * and set the number of cookies to 0. We then set the pointer to * NULL so that vfs_read_dirent doesn't try to call realloc() on * ap->a_cookies. Later in this function, we restore the ap->a_ncookies * pointer to its original location before returning to the caller. */ if (ap->a_ncookies != NULL) { tmp_ncookies = ap->a_ncookies; *ap->a_ncookies = 0; ap->a_ncookies = NULL; } dmp = VFSTODEVFS(ap->a_vp->v_mount); sx_xlock(&dmp->dm_lock); DEVFS_DMP_HOLD(dmp); devfs_populate(dmp); if (DEVFS_DMP_DROP(dmp)) { sx_xunlock(&dmp->dm_lock); devfs_unmount_final(dmp); if (tmp_ncookies != NULL) ap->a_ncookies = tmp_ncookies; return (EIO); } error = 0; de = ap->a_vp->v_data; off = 0; oldoff = uio->uio_offset; TAILQ_FOREACH(dd, &de->de_dlist, de_list) { KASSERT(dd->de_cdp != (void *)0xdeadc0de, ("%s %d\n", __func__, __LINE__)); if (dd->de_flags & DE_WHITEOUT) continue; if (dd->de_dirent->d_type == DT_DIR) de = dd->de_dir; else de = dd; dp = dd->de_dirent; if (dp->d_reclen > uio->uio_resid) break; dp->d_fileno = de->de_inode; if (off >= uio->uio_offset) { error = vfs_read_dirent(ap, dp, off); if (error) break; } off += dp->d_reclen; } sx_xunlock(&dmp->dm_lock); uio->uio_offset = off; /* * Restore ap->a_ncookies if it wasn't originally NULL in the first * place. */ if (tmp_ncookies != NULL) ap->a_ncookies = tmp_ncookies; return (error); } static int devfs_readlink(struct vop_readlink_args *ap) { struct devfs_dirent *de; de = ap->a_vp->v_data; return (uiomove(de->de_symlink, strlen(de->de_symlink), ap->a_uio)); } static int devfs_reclaim(struct vop_reclaim_args *ap) { struct vnode *vp = ap->a_vp; struct devfs_dirent *de; struct cdev *dev; mtx_lock(&devfs_de_interlock); de = vp->v_data; if (de != NULL) { de->de_vnode = NULL; vp->v_data = NULL; } mtx_unlock(&devfs_de_interlock); vnode_destroy_vobject(vp); VI_LOCK(vp); dev_lock(); dev = vp->v_rdev; vp->v_rdev = NULL; if (dev == NULL) { dev_unlock(); VI_UNLOCK(vp); return (0); } dev->si_usecount -= vp->v_usecount; dev_unlock(); VI_UNLOCK(vp); dev_rel(dev); return (0); } static int devfs_remove(struct vop_remove_args *ap) { struct vnode *vp = ap->a_vp; struct devfs_dirent *dd; struct devfs_dirent *de; struct devfs_mount *dmp = VFSTODEVFS(vp->v_mount); sx_xlock(&dmp->dm_lock); dd = ap->a_dvp->v_data; de = vp->v_data; if (de->de_cdp == NULL) { TAILQ_REMOVE(&dd->de_dlist, de, de_list); devfs_delete(dmp, de, 1); } else { de->de_flags |= DE_WHITEOUT; } sx_xunlock(&dmp->dm_lock); return (0); } /* * Revoke is called on a tty when a terminal session ends. The vnode * is orphaned by setting v_op to deadfs so we need to let go of it * as well so that we create a new one next time around. * */ static int devfs_revoke(struct vop_revoke_args *ap) { struct vnode *vp = ap->a_vp, *vp2; struct cdev *dev; struct cdev_priv *cdp; struct devfs_dirent *de; int i; KASSERT((ap->a_flags & REVOKEALL) != 0, ("devfs_revoke !REVOKEALL")); dev = vp->v_rdev; cdp = cdev2priv(dev); dev_lock(); cdp->cdp_inuse++; dev_unlock(); vhold(vp); vgone(vp); vdrop(vp); VOP_UNLOCK(vp,0); loop: for (;;) { mtx_lock(&devfs_de_interlock); dev_lock(); vp2 = NULL; for (i = 0; i <= cdp->cdp_maxdirent; i++) { de = cdp->cdp_dirents[i]; if (de == NULL) continue; vp2 = de->de_vnode; if (vp2 != NULL) { dev_unlock(); VI_LOCK(vp2); mtx_unlock(&devfs_de_interlock); if (vget(vp2, LK_EXCLUSIVE | LK_INTERLOCK, curthread)) goto loop; vhold(vp2); vgone(vp2); vdrop(vp2); vput(vp2); break; } } if (vp2 != NULL) { continue; } dev_unlock(); mtx_unlock(&devfs_de_interlock); break; } dev_lock(); cdp->cdp_inuse--; if (!(cdp->cdp_flags & CDP_ACTIVE) && cdp->cdp_inuse == 0) { TAILQ_REMOVE(&cdevp_list, cdp, cdp_list); dev_unlock(); dev_rel(&cdp->cdp_c); } else dev_unlock(); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); return (0); } static int devfs_rioctl(struct vop_ioctl_args *ap) { int error; struct devfs_mount *dmp; dmp = VFSTODEVFS(ap->a_vp->v_mount); sx_xlock(&dmp->dm_lock); DEVFS_DMP_HOLD(dmp); devfs_populate(dmp); if (DEVFS_DMP_DROP(dmp)) { sx_xunlock(&dmp->dm_lock); devfs_unmount_final(dmp); return (ENOENT); } error = devfs_rules_ioctl(dmp, ap->a_command, ap->a_data, ap->a_td); sx_xunlock(&dmp->dm_lock); return (error); } static int devfs_rread(struct vop_read_args *ap) { if (ap->a_vp->v_type != VDIR) return (EINVAL); return (VOP_READDIR(ap->a_vp, ap->a_uio, ap->a_cred, NULL, NULL, NULL)); } static int devfs_setattr(struct vop_setattr_args *ap) { struct devfs_dirent *de; struct vattr *vap; struct vnode *vp; struct thread *td; int c, error; uid_t uid; gid_t gid; vap = ap->a_vap; vp = ap->a_vp; td = curthread; if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) || (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) || (vap->va_blocksize != VNOVAL) || (vap->va_flags != VNOVAL && vap->va_flags != 0) || (vap->va_rdev != VNOVAL) || ((int)vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) { return (EINVAL); } de = vp->v_data; if (vp->v_type == VDIR) de = de->de_dir; error = c = 0; if (vap->va_uid == (uid_t)VNOVAL) uid = de->de_uid; else uid = vap->va_uid; if (vap->va_gid == (gid_t)VNOVAL) gid = de->de_gid; else gid = vap->va_gid; if (uid != de->de_uid || gid != de->de_gid) { if ((ap->a_cred->cr_uid != de->de_uid) || uid != de->de_uid || (gid != de->de_gid && !groupmember(gid, ap->a_cred))) { error = priv_check(td, PRIV_VFS_CHOWN); if (error) return (error); } de->de_uid = uid; de->de_gid = gid; c = 1; } if (vap->va_mode != (mode_t)VNOVAL) { if (ap->a_cred->cr_uid != de->de_uid) { error = priv_check(td, PRIV_VFS_ADMIN); if (error) return (error); } de->de_mode = vap->va_mode; c = 1; } if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) { /* See the comment in ufs_vnops::ufs_setattr(). */ if ((error = VOP_ACCESS(vp, VADMIN, ap->a_cred, td)) && ((vap->va_vaflags & VA_UTIMES_NULL) == 0 || (error = VOP_ACCESS(vp, VWRITE, ap->a_cred, td)))) return (error); if (vap->va_atime.tv_sec != VNOVAL) { if (vp->v_type == VCHR) vp->v_rdev->si_atime = vap->va_atime; else de->de_atime = vap->va_atime; } if (vap->va_mtime.tv_sec != VNOVAL) { if (vp->v_type == VCHR) vp->v_rdev->si_mtime = vap->va_mtime; else de->de_mtime = vap->va_mtime; } c = 1; } if (c) { if (vp->v_type == VCHR) vfs_timestamp(&vp->v_rdev->si_ctime); else vfs_timestamp(&de->de_mtime); } return (0); } #ifdef MAC static int devfs_setlabel(struct vop_setlabel_args *ap) { struct vnode *vp; struct devfs_dirent *de; vp = ap->a_vp; de = vp->v_data; mac_vnode_relabel(ap->a_cred, vp, ap->a_label); mac_devfs_update(vp->v_mount, de, vp); return (0); } #endif static int devfs_stat_f(struct file *fp, struct stat *sb, struct ucred *cred, struct thread *td) { return (vnops.fo_stat(fp, sb, cred, td)); } static int devfs_symlink(struct vop_symlink_args *ap) { int i, error; struct devfs_dirent *dd; struct devfs_dirent *de; struct devfs_mount *dmp; struct thread *td; td = ap->a_cnp->cn_thread; KASSERT(td == curthread, ("devfs_symlink: td != curthread")); error = priv_check(td, PRIV_DEVFS_SYMLINK); if (error) return(error); dmp = VFSTODEVFS(ap->a_dvp->v_mount); dd = ap->a_dvp->v_data; de = devfs_newdirent(ap->a_cnp->cn_nameptr, ap->a_cnp->cn_namelen); de->de_uid = 0; de->de_gid = 0; de->de_mode = 0755; de->de_inode = alloc_unr(devfs_inos); de->de_dirent->d_type = DT_LNK; i = strlen(ap->a_target) + 1; de->de_symlink = malloc(i, M_DEVFS, M_WAITOK); bcopy(ap->a_target, de->de_symlink, i); sx_xlock(&dmp->dm_lock); #ifdef MAC mac_devfs_create_symlink(ap->a_cnp->cn_cred, dmp->dm_mount, dd, de); #endif TAILQ_INSERT_TAIL(&dd->de_dlist, de, de_list); return (devfs_allocv(de, ap->a_dvp->v_mount, ap->a_vpp, td)); } static int devfs_truncate_f(struct file *fp, off_t length, struct ucred *cred, struct thread *td) { return (vnops.fo_truncate(fp, length, cred, td)); } /* ARGSUSED */ static int devfs_write_f(struct file *fp, struct uio *uio, struct ucred *cred, int flags, struct thread *td) { struct cdev *dev; int error, ioflag, resid; struct cdevsw *dsw; error = devfs_fp_check(fp, &dev, &dsw); if (error) return (error); KASSERT(uio->uio_td == td, ("uio_td %p is not td %p", uio->uio_td, td)); ioflag = fp->f_flag & (O_NONBLOCK | O_DIRECT | O_FSYNC); if (ioflag & O_DIRECT) ioflag |= IO_DIRECT; if ((flags & FOF_OFFSET) == 0) uio->uio_offset = fp->f_offset; resid = uio->uio_resid; error = dsw->d_write(dev, uio, ioflag); if (uio->uio_resid != resid || (error == 0 && resid != 0)) { vfs_timestamp(&dev->si_ctime); dev->si_mtime = dev->si_ctime; } curthread->td_fpop = NULL; dev_relthread(dev); if ((flags & FOF_OFFSET) == 0) fp->f_offset = uio->uio_offset; fp->f_nextoff = uio->uio_offset; return (error); } dev_t dev2udev(struct cdev *x) { if (x == NULL) return (NODEV); return (cdev2priv(x)->cdp_inode); } static struct fileops devfs_ops_f = { .fo_read = devfs_read_f, .fo_write = devfs_write_f, .fo_truncate = devfs_truncate_f, .fo_ioctl = devfs_ioctl_f, .fo_poll = devfs_poll_f, .fo_kqfilter = devfs_kqfilter_f, .fo_stat = devfs_stat_f, .fo_close = devfs_close_f, .fo_flags = DFLAG_PASSABLE | DFLAG_SEEKABLE }; static struct vop_vector devfs_vnodeops = { .vop_default = &default_vnodeops, .vop_access = devfs_access, .vop_getattr = devfs_getattr, .vop_ioctl = devfs_rioctl, .vop_lookup = devfs_lookup, .vop_mknod = devfs_mknod, .vop_pathconf = devfs_pathconf, .vop_read = devfs_rread, .vop_readdir = devfs_readdir, .vop_readlink = devfs_readlink, .vop_reclaim = devfs_reclaim, .vop_remove = devfs_remove, .vop_revoke = devfs_revoke, .vop_setattr = devfs_setattr, #ifdef MAC .vop_setlabel = devfs_setlabel, #endif .vop_symlink = devfs_symlink, }; static struct vop_vector devfs_specops = { .vop_default = &default_vnodeops, .vop_access = devfs_access, .vop_advlock = devfs_advlock, .vop_bmap = VOP_PANIC, .vop_close = devfs_close, .vop_create = VOP_PANIC, .vop_fsync = devfs_fsync, .vop_getattr = devfs_getattr, .vop_lease = VOP_NULL, .vop_link = VOP_PANIC, .vop_mkdir = VOP_PANIC, .vop_mknod = VOP_PANIC, .vop_open = devfs_open, .vop_pathconf = devfs_pathconf, .vop_print = devfs_print, .vop_read = VOP_PANIC, .vop_readdir = VOP_PANIC, .vop_readlink = VOP_PANIC, .vop_reallocblks = VOP_PANIC, .vop_reclaim = devfs_reclaim, .vop_remove = devfs_remove, .vop_rename = VOP_PANIC, .vop_revoke = devfs_revoke, .vop_rmdir = VOP_PANIC, .vop_setattr = devfs_setattr, #ifdef MAC .vop_setlabel = devfs_setlabel, #endif .vop_strategy = VOP_PANIC, .vop_symlink = VOP_PANIC, .vop_write = VOP_PANIC, }; /* * Our calling convention to the device drivers used to be that we passed * vnode.h IO_* flags to read()/write(), but we're moving to fcntl.h O_ * flags instead since that's what open(), close() and ioctl() takes and * we don't really want vnode.h in device drivers. * We solved the source compatibility by redefining some vnode flags to * be the same as the fcntl ones and by sending down the bitwise OR of * the respective fcntl/vnode flags. These CTASSERTS make sure nobody * pulls the rug out under this. */ CTASSERT(O_NONBLOCK == IO_NDELAY); CTASSERT(O_FSYNC == IO_SYNC); Index: head/sys/fs/fdescfs/fdesc_vnops.c =================================================================== --- head/sys/fs/fdescfs/fdesc_vnops.c (revision 183214) +++ head/sys/fs/fdescfs/fdesc_vnops.c (revision 183215) @@ -1,624 +1,624 @@ /*- * Copyright (c) 1992, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software donated to Berkeley by * Jan-Simon Pendry. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)fdesc_vnops.c 8.9 (Berkeley) 1/21/94 * * $FreeBSD$ */ /* * /dev/fd Filesystem */ #include #include #include #include #include #include /* boottime */ #include #include #include #include /* Must come after sys/malloc.h */ #include #include #include #include #include #include #define NFDCACHE 4 #define FD_NHASH(ix) \ (&fdhashtbl[(ix) & fdhash]) static LIST_HEAD(fdhashhead, fdescnode) *fdhashtbl; static u_long fdhash; struct mtx fdesc_hashmtx; static vop_getattr_t fdesc_getattr; static vop_lookup_t fdesc_lookup; static vop_open_t fdesc_open; static vop_readdir_t fdesc_readdir; static vop_reclaim_t fdesc_reclaim; static vop_setattr_t fdesc_setattr; static struct vop_vector fdesc_vnodeops = { .vop_default = &default_vnodeops, .vop_access = VOP_NULL, .vop_getattr = fdesc_getattr, .vop_lookup = fdesc_lookup, .vop_open = fdesc_open, .vop_pathconf = vop_stdpathconf, .vop_readdir = fdesc_readdir, .vop_reclaim = fdesc_reclaim, .vop_setattr = fdesc_setattr, }; static void fdesc_insmntque_dtr(struct vnode *, void *); static void fdesc_remove_entry(struct fdescnode *); /* * Initialise cache headers */ int fdesc_init(vfsp) struct vfsconf *vfsp; { mtx_init(&fdesc_hashmtx, "fdescfs_hash", NULL, MTX_DEF); fdhashtbl = hashinit(NFDCACHE, M_CACHE, &fdhash); return (0); } /* * Uninit ready for unload. */ int fdesc_uninit(vfsp) struct vfsconf *vfsp; { hashdestroy(fdhashtbl, M_CACHE, fdhash); mtx_destroy(&fdesc_hashmtx); return (0); } /* * If allocating vnode fails, call this. */ static void fdesc_insmntque_dtr(struct vnode *vp, void *arg) { vgone(vp); vput(vp); } /* * Remove an entry from the hash if it exists. */ static void fdesc_remove_entry(struct fdescnode *fd) { struct fdhashhead *fc; struct fdescnode *fd2; fc = FD_NHASH(fd->fd_ix); mtx_lock(&fdesc_hashmtx); LIST_FOREACH(fd2, fc, fd_hash) { if (fd == fd2) { LIST_REMOVE(fd, fd_hash); break; } } mtx_unlock(&fdesc_hashmtx); } int fdesc_allocvp(ftype, fd_fd, ix, mp, vpp, td) fdntype ftype; unsigned fd_fd; int ix; struct mount *mp; struct vnode **vpp; struct thread *td; { struct fdescmount *fmp; struct fdhashhead *fc; struct fdescnode *fd, *fd2; struct vnode *vp, *vp2; int error = 0; fc = FD_NHASH(ix); loop: mtx_lock(&fdesc_hashmtx); /* * If a forced unmount is progressing, we need to drop it. The flags are * protected by the hashmtx. */ fmp = (struct fdescmount *)mp->mnt_data; if (fmp == NULL || fmp->flags & FMNT_UNMOUNTF) { mtx_unlock(&fdesc_hashmtx); return (-1); } LIST_FOREACH(fd, fc, fd_hash) { if (fd->fd_ix == ix && fd->fd_vnode->v_mount == mp) { /* Get reference to vnode in case it's being free'd */ vp = fd->fd_vnode; VI_LOCK(vp); mtx_unlock(&fdesc_hashmtx); if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) goto loop; *vpp = vp; return (0); } } mtx_unlock(&fdesc_hashmtx); MALLOC(fd, struct fdescnode *, sizeof(struct fdescnode), M_TEMP, M_WAITOK); error = getnewvnode("fdescfs", mp, &fdesc_vnodeops, &vp); if (error) { FREE(fd, M_TEMP); return (error); } vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); vp->v_data = fd; fd->fd_vnode = vp; fd->fd_type = ftype; fd->fd_fd = fd_fd; fd->fd_ix = ix; error = insmntque1(vp, mp, fdesc_insmntque_dtr, NULL); if (error != 0) { *vpp = NULLVP; return (error); } /* Make sure that someone didn't beat us when inserting the vnode. */ mtx_lock(&fdesc_hashmtx); /* * If a forced unmount is progressing, we need to drop it. The flags are * protected by the hashmtx. */ fmp = (struct fdescmount *)mp->mnt_data; if (fmp == NULL || fmp->flags & FMNT_UNMOUNTF) { mtx_unlock(&fdesc_hashmtx); vgone(vp); vput(vp); *vpp = NULLVP; return (-1); } LIST_FOREACH(fd2, fc, fd_hash) { if (fd2->fd_ix == ix && fd2->fd_vnode->v_mount == mp) { /* Get reference to vnode in case it's being free'd */ vp2 = fd2->fd_vnode; VI_LOCK(vp2); mtx_unlock(&fdesc_hashmtx); error = vget(vp2, LK_EXCLUSIVE | LK_INTERLOCK, td); /* Someone beat us, dec use count and wait for reclaim */ vgone(vp); vput(vp); /* If we didn't get it, return no vnode. */ if (error) vp2 = NULLVP; *vpp = vp2; return (error); } } /* If we came here, we can insert it safely. */ LIST_INSERT_HEAD(fc, fd, fd_hash); mtx_unlock(&fdesc_hashmtx); *vpp = vp; return (0); } /* * vp is the current namei directory * ndp is the name to locate in that directory... */ static int fdesc_lookup(ap) struct vop_lookup_args /* { struct vnode * a_dvp; struct vnode ** a_vpp; struct componentname * a_cnp; } */ *ap; { struct vnode **vpp = ap->a_vpp; struct vnode *dvp = ap->a_dvp; struct componentname *cnp = ap->a_cnp; char *pname = cnp->cn_nameptr; struct thread *td = cnp->cn_thread; struct file *fp; int nlen = cnp->cn_namelen; u_int fd; int error; struct vnode *fvp; if ((cnp->cn_flags & ISLASTCN) && (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) { error = EROFS; goto bad; } if (cnp->cn_namelen == 1 && *pname == '.') { *vpp = dvp; VREF(dvp); return (0); } if (VTOFDESC(dvp)->fd_type != Froot) { error = ENOTDIR; goto bad; } fd = 0; /* the only time a leading 0 is acceptable is if it's "0" */ if (*pname == '0' && nlen != 1) { error = ENOENT; goto bad; } while (nlen--) { if (*pname < '0' || *pname > '9') { error = ENOENT; goto bad; } fd = 10 * fd + *pname++ - '0'; } if ((error = fget(td, fd, &fp)) != 0) goto bad; /* Check if we're looking up ourselves. */ if (VTOFDESC(dvp)->fd_ix == FD_DESC + fd) { /* * In case we're holding the last reference to the file, the dvp * will be re-acquired. */ vhold(dvp); VOP_UNLOCK(dvp, 0); fdrop(fp, td); /* Re-aquire the lock afterwards. */ vn_lock(dvp, LK_RETRY | LK_EXCLUSIVE); vdrop(dvp); fvp = dvp; } else { /* * Unlock our root node (dvp) when doing this, since we might * deadlock since the vnode might be locked by another thread * and the root vnode lock will be obtained afterwards (in case * we're looking up the fd of the root vnode), which will be the * opposite lock order. Vhold the root vnode first so we don't * loose it. */ vhold(dvp); VOP_UNLOCK(dvp, 0); error = fdesc_allocvp(Fdesc, fd, FD_DESC + fd, dvp->v_mount, &fvp, td); fdrop(fp, td); /* * The root vnode must be locked last to prevent deadlock condition. */ vn_lock(dvp, LK_RETRY | LK_EXCLUSIVE); vdrop(dvp); } if (error) goto bad; *vpp = fvp; return (0); bad: *vpp = NULL; return (error); } static int fdesc_open(ap) struct vop_open_args /* { struct vnode *a_vp; int a_mode; struct ucred *a_cred; struct thread *a_td; } */ *ap; { struct vnode *vp = ap->a_vp; if (VTOFDESC(vp)->fd_type == Froot) return (0); /* * XXX Kludge: set td->td_proc->p_dupfd to contain the value of the the file * descriptor being sought for duplication. The error return ensures * that the vnode for this device will be released by vn_open. Open * will detect this special error and take the actions in dupfdopen. * Other callers of vn_open or VOP_OPEN will simply report the * error. */ ap->a_td->td_dupfd = VTOFDESC(vp)->fd_fd; /* XXX */ return (ENODEV); } static int fdesc_getattr(ap) struct vop_getattr_args /* { struct vnode *a_vp; struct vattr *a_vap; struct ucred *a_cred; } */ *ap; { struct vnode *vp = ap->a_vp; struct vattr *vap = ap->a_vap; struct thread *td = curthread; struct file *fp; struct stat stb; u_int fd; int error = 0; switch (VTOFDESC(vp)->fd_type) { case Froot: - VATTR_NULL(vap); - vap->va_mode = S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH; vap->va_type = VDIR; vap->va_nlink = 2; vap->va_size = DEV_BSIZE; vap->va_fileid = VTOFDESC(vp)->fd_ix; vap->va_uid = 0; vap->va_gid = 0; vap->va_blocksize = DEV_BSIZE; vap->va_atime.tv_sec = boottime.tv_sec; vap->va_atime.tv_nsec = 0; vap->va_mtime = vap->va_atime; vap->va_ctime = vap->va_mtime; vap->va_gen = 0; vap->va_flags = 0; vap->va_rdev = NODEV; vap->va_bytes = 0; + vap->va_filerev = 0; break; case Fdesc: fd = VTOFDESC(vp)->fd_fd; if ((error = fget(td, fd, &fp)) != 0) return (error); bzero(&stb, sizeof(stb)); error = fo_stat(fp, &stb, td->td_ucred, td); fdrop(fp, td); if (error == 0) { - VATTR_NULL(vap); vap->va_type = IFTOVT(stb.st_mode); vap->va_mode = stb.st_mode; #define FDRX (VREAD|VEXEC) if (vap->va_type == VDIR) vap->va_mode &= ~((FDRX)|(FDRX>>3)|(FDRX>>6)); #undef FDRX vap->va_nlink = 1; vap->va_flags = 0; vap->va_bytes = stb.st_blocks * stb.st_blksize; vap->va_fileid = VTOFDESC(vp)->fd_ix; vap->va_size = stb.st_size; vap->va_blocksize = stb.st_blksize; vap->va_rdev = stb.st_rdev; /* * If no time data is provided, use the current time. */ if (stb.st_atimespec.tv_sec == 0 && stb.st_atimespec.tv_nsec == 0) nanotime(&stb.st_atimespec); if (stb.st_ctimespec.tv_sec == 0 && stb.st_ctimespec.tv_nsec == 0) nanotime(&stb.st_ctimespec); if (stb.st_mtimespec.tv_sec == 0 && stb.st_mtimespec.tv_nsec == 0) nanotime(&stb.st_mtimespec); vap->va_atime = stb.st_atimespec; vap->va_mtime = stb.st_mtimespec; vap->va_ctime = stb.st_ctimespec; vap->va_uid = stb.st_uid; vap->va_gid = stb.st_gid; + vap->va_gen = 0; + vap->va_filerev = 0; } break; default: panic("fdesc_getattr"); break; } if (error == 0) vp->v_type = vap->va_type; return (error); } static int fdesc_setattr(ap) struct vop_setattr_args /* { struct vnode *a_vp; struct vattr *a_vap; struct ucred *a_cred; } */ *ap; { struct vattr *vap = ap->a_vap; struct vnode *vp; struct mount *mp; struct file *fp; struct thread *td = curthread; unsigned fd; int error; /* * Can't mess with the root vnode */ if (VTOFDESC(ap->a_vp)->fd_type == Froot) return (EACCES); fd = VTOFDESC(ap->a_vp)->fd_fd; /* * Allow setattr where there is an underlying vnode. */ error = getvnode(td->td_proc->p_fd, fd, &fp); if (error) { /* * getvnode() returns EINVAL if the file descriptor is not * backed by a vnode. Silently drop all changes except * chflags(2) in this case. */ if (error == EINVAL) { if (vap->va_flags != VNOVAL) error = EOPNOTSUPP; else error = 0; } return (error); } vp = fp->f_vnode; if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) == 0) { vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); error = VOP_SETATTR(vp, ap->a_vap, ap->a_cred); VOP_UNLOCK(vp, 0); vn_finished_write(mp); } fdrop(fp, td); return (error); } #define UIO_MX 16 static int fdesc_readdir(ap) struct vop_readdir_args /* { struct vnode *a_vp; struct uio *a_uio; struct ucred *a_cred; int *a_eofflag; u_long *a_cookies; int a_ncookies; } */ *ap; { struct uio *uio = ap->a_uio; struct filedesc *fdp; struct dirent d; struct dirent *dp = &d; int error, i, off, fcnt; /* * We don't allow exporting fdesc mounts, and currently local * requests do not need cookies. */ if (ap->a_ncookies) panic("fdesc_readdir: not hungry"); if (VTOFDESC(ap->a_vp)->fd_type != Froot) panic("fdesc_readdir: not dir"); off = (int)uio->uio_offset; if (off != uio->uio_offset || off < 0 || (u_int)off % UIO_MX != 0 || uio->uio_resid < UIO_MX) return (EINVAL); i = (u_int)off / UIO_MX; fdp = uio->uio_td->td_proc->p_fd; error = 0; fcnt = i - 2; /* The first two nodes are `.' and `..' */ FILEDESC_SLOCK(fdp); while (i < fdp->fd_nfiles + 2 && uio->uio_resid >= UIO_MX) { switch (i) { case 0: /* `.' */ case 1: /* `..' */ bzero((caddr_t)dp, UIO_MX); dp->d_fileno = i + FD_ROOT; dp->d_namlen = i + 1; dp->d_reclen = UIO_MX; bcopy("..", dp->d_name, dp->d_namlen); dp->d_name[i + 1] = '\0'; dp->d_type = DT_DIR; break; default: if (fdp->fd_ofiles[fcnt] == NULL) { FILEDESC_SUNLOCK(fdp); goto done; } bzero((caddr_t) dp, UIO_MX); dp->d_namlen = sprintf(dp->d_name, "%d", fcnt); dp->d_reclen = UIO_MX; dp->d_type = DT_UNKNOWN; dp->d_fileno = i + FD_DESC; break; } /* * And ship to userland */ FILEDESC_SUNLOCK(fdp); error = uiomove(dp, UIO_MX, uio); if (error) goto done; FILEDESC_SLOCK(fdp); i++; fcnt++; } FILEDESC_SUNLOCK(fdp); done: uio->uio_offset = i * UIO_MX; return (error); } static int fdesc_reclaim(ap) struct vop_reclaim_args /* { struct vnode *a_vp; } */ *ap; { struct vnode *vp; struct fdescnode *fd; vp = ap->a_vp; fd = VTOFDESC(vp); fdesc_remove_entry(fd); FREE(vp->v_data, M_TEMP); vp->v_data = NULL; return (0); } Index: head/sys/fs/portalfs/portal_vnops.c =================================================================== --- head/sys/fs/portalfs/portal_vnops.c (revision 183214) +++ head/sys/fs/portalfs/portal_vnops.c (revision 183215) @@ -1,564 +1,563 @@ /*- * Copyright (c) 1992, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software donated to Berkeley by * Jan-Simon Pendry. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)portal_vnops.c 8.14 (Berkeley) 5/21/95 * * $FreeBSD$ */ /* * Portal Filesystem */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static int portal_fileid = PORTAL_ROOTFILEID+1; static void portal_closefd(struct thread *td, int fd); static int portal_connect(struct socket *so, struct socket *so2); static vop_getattr_t portal_getattr; static vop_lookup_t portal_lookup; static vop_open_t portal_open; static vop_readdir_t portal_readdir; static vop_reclaim_t portal_reclaim; static vop_setattr_t portal_setattr; static void portal_closefd(td, fd) struct thread *td; int fd; { int error; error = kern_close(td, fd); /* * We should never get an error, and there isn't anything * we could do if we got one, so just print a message. */ if (error) printf("portal_closefd: error = %d\n", error); } /* * vp is the current namei directory * cnp is the name to locate in that directory... */ static int portal_lookup(ap) struct vop_lookup_args /* { struct vnode * a_dvp; struct vnode ** a_vpp; struct componentname * a_cnp; } */ *ap; { struct componentname *cnp = ap->a_cnp; struct vnode **vpp = ap->a_vpp; struct vnode *dvp = ap->a_dvp; char *pname = cnp->cn_nameptr; struct portalnode *pt; int error; struct vnode *fvp = 0; char *path; int size; *vpp = NULLVP; if (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME) return (EROFS); if (cnp->cn_namelen == 1 && *pname == '.') { *vpp = dvp; VREF(dvp); return (0); } KASSERT((cnp->cn_flags & ISDOTDOT) == 0, ("portal_lookup: Can not handle dotdot lookups.")); /* * Do the MALLOC before the getnewvnode since doing so afterward * might cause a bogus v_data pointer to get dereferenced * elsewhere if MALLOC should block. */ MALLOC(pt, struct portalnode *, sizeof(struct portalnode), M_TEMP, M_WAITOK); error = getnewvnode("portal", dvp->v_mount, &portal_vnodeops, &fvp); if (error) { FREE(pt, M_TEMP); goto bad; } fvp->v_type = VREG; fvp->v_data = pt; /* * Save all of the remaining pathname and * advance the namei next pointer to the end * of the string. */ for (size = 0, path = pname; *path; path++) size++; cnp->cn_consume = size - cnp->cn_namelen; pt->pt_arg = malloc(size+1, M_TEMP, M_WAITOK); pt->pt_size = size+1; bcopy(pname, pt->pt_arg, pt->pt_size); pt->pt_fileid = portal_fileid++; *vpp = fvp; vn_lock(fvp, LK_EXCLUSIVE | LK_RETRY); error = insmntque(fvp, dvp->v_mount); if (error != 0) { *vpp = NULLVP; return (error); } return (0); bad:; if (fvp) vrele(fvp); return (error); } static int portal_connect(so, so2) struct socket *so; struct socket *so2; { /* from unp_connect, bypassing the namei stuff... */ struct socket *so3; struct unpcb *unp2; struct unpcb *unp3; if (so2 == 0) return (ECONNREFUSED); if (so->so_type != so2->so_type) return (EPROTOTYPE); if ((so2->so_options & SO_ACCEPTCONN) == 0) return (ECONNREFUSED); if ((so3 = sonewconn(so2, 0)) == 0) return (ECONNREFUSED); unp2 = sotounpcb(so2); unp3 = sotounpcb(so3); if (unp2->unp_addr) unp3->unp_addr = (struct sockaddr_un *) sodupsockaddr((struct sockaddr *)unp2->unp_addr, M_NOWAIT); so2 = so3; return (uipc_connect2(so, so2)); } static int portal_open(ap) struct vop_open_args /* { struct vnode *a_vp; int a_mode; struct ucred *a_cred; struct thread *a_td; } */ *ap; { struct socket *so = 0; struct portalnode *pt; struct thread *td = ap->a_td; struct vnode *vp = ap->a_vp; struct uio auio; struct iovec aiov[2]; int res; struct mbuf *cm = 0; struct cmsghdr *cmsg; int newfds; int *ip; int fd; int error; int len; struct portalmount *fmp; struct file *fp; struct portal_cred pcred; /* * Nothing to do when opening the root node. */ if (vp->v_vflag & VV_ROOT) return (0); /* * Can't be opened unless the caller is set up * to deal with the side effects. Check for this * by testing whether td_dupfd has been set. */ if (td->td_dupfd >= 0) return (ENODEV); pt = VTOPORTAL(vp); fmp = VFSTOPORTAL(vp->v_mount); /* * Create a new socket. */ error = socreate(AF_UNIX, &so, SOCK_STREAM, 0, ap->a_td->td_ucred, ap->a_td); if (error) goto bad; /* * Reserve some buffer space */ res = pt->pt_size + sizeof(pcred) + 512; /* XXX */ error = soreserve(so, res, res); if (error) goto bad; /* * Kick off connection */ error = portal_connect(so, fmp->pm_server->f_data); if (error) goto bad; /* * Wait for connection to complete */ /* * XXX: Since the mount point is holding a reference on the * underlying server socket, it is not easy to find out whether * the server process is still running. To handle this problem * we loop waiting for the new socket to be connected (something * which will only happen if the server is still running) or for * the reference count on the server socket to drop to 1, which * will happen if the server dies. Sleep for 5 second intervals * and keep polling the reference count. XXX. */ /* XXXRW: Locking? */ SOCK_LOCK(so); while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { if (fmp->pm_server->f_count == 1) { SOCK_UNLOCK(so); error = ECONNREFUSED; goto bad; } (void) msleep((caddr_t) &so->so_timeo, SOCK_MTX(so), PSOCK, "portalcon", 5 * hz); } SOCK_UNLOCK(so); if (so->so_error) { error = so->so_error; goto bad; } /* * Set miscellaneous flags */ SOCKBUF_LOCK(&so->so_rcv); so->so_rcv.sb_timeo = 0; so->so_rcv.sb_flags |= SB_NOINTR; SOCKBUF_UNLOCK(&so->so_rcv); SOCKBUF_LOCK(&so->so_snd); so->so_snd.sb_timeo = 0; so->so_snd.sb_flags |= SB_NOINTR; SOCKBUF_UNLOCK(&so->so_snd); pcred.pcr_flag = ap->a_mode; pcred.pcr_uid = ap->a_cred->cr_uid; pcred.pcr_ngroups = ap->a_cred->cr_ngroups; bcopy(ap->a_cred->cr_groups, pcred.pcr_groups, NGROUPS * sizeof(gid_t)); aiov[0].iov_base = (caddr_t) &pcred; aiov[0].iov_len = sizeof(pcred); aiov[1].iov_base = pt->pt_arg; aiov[1].iov_len = pt->pt_size; auio.uio_iov = aiov; auio.uio_iovcnt = 2; auio.uio_rw = UIO_WRITE; auio.uio_segflg = UIO_SYSSPACE; auio.uio_td = td; auio.uio_offset = 0; auio.uio_resid = aiov[0].iov_len + aiov[1].iov_len; error = sosend(so, (struct sockaddr *) 0, &auio, (struct mbuf *) 0, (struct mbuf *) 0, 0 , td); if (error) goto bad; len = auio.uio_resid = sizeof(int); do { struct mbuf *m = 0; int flags = MSG_WAITALL; error = soreceive(so, (struct sockaddr **) 0, &auio, &m, &cm, &flags); if (error) goto bad; /* * Grab an error code from the mbuf. */ if (m) { m = m_pullup(m, sizeof(int)); /* Needed? */ if (m) { error = *(mtod(m, int *)); m_freem(m); } else { error = EINVAL; } } else { if (cm == 0) { error = ECONNRESET; /* XXX */ #ifdef notdef break; #endif } } } while (cm == 0 && auio.uio_resid == len && !error); if (cm == 0) goto bad; if (auio.uio_resid) { error = 0; #ifdef notdef error = EMSGSIZE; goto bad; #endif } /* * XXX: Break apart the control message, and retrieve the * received file descriptor. Note that more than one descriptor * may have been received, or that the rights chain may have more * than a single mbuf in it. What to do? */ cmsg = mtod(cm, struct cmsghdr *); newfds = (cmsg->cmsg_len - sizeof(*cmsg)) / sizeof (int); if (newfds == 0) { error = ECONNREFUSED; goto bad; } /* * At this point the rights message consists of a control message * header, followed by a data region containing a vector of * integer file descriptors. The fds were allocated by the action * of receiving the control message. */ ip = (int *) (cmsg + 1); fd = *ip++; if (newfds > 1) { /* * Close extra fds. */ int i; printf("portal_open: %d extra fds\n", newfds - 1); for (i = 1; i < newfds; i++) { portal_closefd(td, *ip); ip++; } } /* * Check that the mode the file is being opened for is a subset * of the mode of the existing descriptor. */ if ((error = fget(td, fd, &fp)) != 0) goto bad; if (((ap->a_mode & (FREAD|FWRITE)) | fp->f_flag) != fp->f_flag) { fdrop(fp, td); portal_closefd(td, fd); error = EACCES; goto bad; } fdrop(fp, td); /* * Save the dup fd in the proc structure then return the * special error code (ENXIO) which causes magic things to * happen in vn_open. The whole concept is, well, hmmm. */ td->td_dupfd = fd; error = ENXIO; bad:; /* * And discard the control message. */ if (cm) { m_freem(cm); } if (so) { soshutdown(so, 2); soclose(so); } return (error); } static int portal_getattr(ap) struct vop_getattr_args /* { struct vnode *a_vp; struct vattr *a_vap; struct ucred *a_cred; } */ *ap; { struct vnode *vp = ap->a_vp; struct vattr *vap = ap->a_vap; - bzero(vap, sizeof(*vap)); - vattr_null(vap); vap->va_uid = 0; vap->va_gid = 0; vap->va_size = DEV_BSIZE; vap->va_blocksize = DEV_BSIZE; nanotime(&vap->va_atime); vap->va_mtime = vap->va_atime; vap->va_ctime = vap->va_mtime; vap->va_gen = 0; vap->va_flags = 0; vap->va_rdev = NODEV; /* vap->va_qbytes = 0; */ vap->va_bytes = 0; + vap->va_filerev = 0; /* vap->va_qsize = 0; */ if (vp->v_vflag & VV_ROOT) { vap->va_type = VDIR; vap->va_mode = S_IRUSR|S_IWUSR|S_IXUSR| S_IRGRP|S_IWGRP|S_IXGRP| S_IROTH|S_IWOTH|S_IXOTH; vap->va_nlink = 2; vap->va_fileid = 2; } else { vap->va_type = VREG; vap->va_mode = S_IRUSR|S_IWUSR| S_IRGRP|S_IWGRP| S_IROTH|S_IWOTH; vap->va_nlink = 1; vap->va_fileid = VTOPORTAL(vp)->pt_fileid; } return (0); } static int portal_setattr(ap) struct vop_setattr_args /* { struct vnode *a_vp; struct vattr *a_vap; struct ucred *a_cred; } */ *ap; { /* * Can't mess with the root vnode */ if (ap->a_vp->v_vflag & VV_ROOT) return (EACCES); if (ap->a_vap->va_flags != VNOVAL) return (EOPNOTSUPP); return (0); } /* * Fake readdir, just return empty directory. * It is hard to deal with '.' and '..' so don't bother. */ static int portal_readdir(ap) struct vop_readdir_args /* { struct vnode *a_vp; struct uio *a_uio; struct ucred *a_cred; int *a_eofflag; u_long *a_cookies; int a_ncookies; } */ *ap; { /* * We don't allow exporting portal mounts, and currently local * requests do not need cookies. */ if (ap->a_ncookies) panic("portal_readdir: not hungry"); return (0); } static int portal_reclaim(ap) struct vop_reclaim_args /* { struct vnode *a_vp; } */ *ap; { struct portalnode *pt = VTOPORTAL(ap->a_vp); if (pt->pt_arg) { free((caddr_t) pt->pt_arg, M_TEMP); pt->pt_arg = 0; } FREE(ap->a_vp->v_data, M_TEMP); ap->a_vp->v_data = 0; return (0); } struct vop_vector portal_vnodeops = { .vop_default = &default_vnodeops, .vop_access = VOP_NULL, .vop_getattr = portal_getattr, .vop_lookup = portal_lookup, .vop_open = portal_open, .vop_pathconf = vop_stdpathconf, .vop_readdir = portal_readdir, .vop_reclaim = portal_reclaim, .vop_setattr = portal_setattr, }; Index: head/sys/fs/pseudofs/pseudofs_vnops.c =================================================================== --- head/sys/fs/pseudofs/pseudofs_vnops.c (revision 183214) +++ head/sys/fs/pseudofs/pseudofs_vnops.c (revision 183215) @@ -1,892 +1,892 @@ /*- * Copyright (c) 2001 Dag-Erling Coïdan Smørgrav * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer * in this position and unchanged. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_pseudofs.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * Returns the fileno, adjusted for target pid */ static uint32_t pn_fileno(struct pfs_node *pn, pid_t pid) { KASSERT(pn->pn_fileno > 0, ("%s(): no fileno allocated", __func__)); if (pid != NO_PID) return (pn->pn_fileno * NO_PID + pid); return (pn->pn_fileno); } /* * Returns non-zero if given file is visible to given thread. */ static int pfs_visible_proc(struct thread *td, struct pfs_node *pn, struct proc *proc) { int visible; if (proc == NULL) return (0); PROC_LOCK_ASSERT(proc, MA_OWNED); visible = ((proc->p_flag & P_WEXIT) == 0); if (visible) visible = (p_cansee(td, proc) == 0); if (visible && pn->pn_vis != NULL) visible = pn_vis(td, proc, pn); if (!visible) return (0); return (1); } static int pfs_visible(struct thread *td, struct pfs_node *pn, pid_t pid, struct proc **p) { struct proc *proc; PFS_TRACE(("%s (pid: %d, req: %d)", pn->pn_name, pid, td->td_proc->p_pid)); if (p) *p = NULL; if (pid == NO_PID) PFS_RETURN (1); if ((proc = pfind(pid)) == NULL) PFS_RETURN (0); if (pfs_visible_proc(td, pn, proc)) { if (p) *p = proc; else PROC_UNLOCK(proc); PFS_RETURN (1); } PROC_UNLOCK(proc); PFS_RETURN (0); } /* * Verify permissions */ static int pfs_access(struct vop_access_args *va) { struct vnode *vn = va->a_vp; struct pfs_vdata *pvd = vn->v_data; struct vattr vattr; int error; PFS_TRACE(("%s", pvd->pvd_pn->pn_name)); (void)pvd; error = VOP_GETATTR(vn, &vattr, va->a_cred); if (error) PFS_RETURN (error); error = vaccess(vn->v_type, vattr.va_mode, vattr.va_uid, vattr.va_gid, va->a_mode, va->a_cred, NULL); PFS_RETURN (error); } /* * Close a file or directory */ static int pfs_close(struct vop_close_args *va) { struct vnode *vn = va->a_vp; struct pfs_vdata *pvd = vn->v_data; struct pfs_node *pn = pvd->pvd_pn; struct proc *proc; int error; PFS_TRACE(("%s", pn->pn_name)); pfs_assert_not_owned(pn); /* * Do nothing unless this is the last close and the node has a * last-close handler. */ if (vrefcnt(vn) > 1 || pn->pn_close == NULL) PFS_RETURN (0); if (pvd->pvd_pid != NO_PID) { proc = pfind(pvd->pvd_pid); } else { proc = NULL; } error = pn_close(va->a_td, proc, pn); if (proc != NULL) PROC_UNLOCK(proc); PFS_RETURN (error); } /* * Get file attributes */ static int pfs_getattr(struct vop_getattr_args *va) { struct vnode *vn = va->a_vp; struct pfs_vdata *pvd = vn->v_data; struct pfs_node *pn = pvd->pvd_pn; struct vattr *vap = va->a_vap; struct proc *proc; int error = 0; PFS_TRACE(("%s", pn->pn_name)); pfs_assert_not_owned(pn); if (!pfs_visible(curthread, pn, pvd->pvd_pid, &proc)) PFS_RETURN (ENOENT); - VATTR_NULL(vap); vap->va_type = vn->v_type; vap->va_fileid = pn_fileno(pn, pvd->pvd_pid); vap->va_flags = 0; vap->va_blocksize = PAGE_SIZE; vap->va_bytes = vap->va_size = 0; + vap->va_filerev = 0; vap->va_fsid = vn->v_mount->mnt_stat.f_fsid.val[0]; vap->va_nlink = 1; nanotime(&vap->va_ctime); vap->va_atime = vap->va_mtime = vap->va_ctime; switch (pn->pn_type) { case pfstype_procdir: case pfstype_root: case pfstype_dir: #if 0 pfs_lock(pn); /* compute link count */ pfs_unlock(pn); #endif vap->va_mode = 0555; break; case pfstype_file: case pfstype_symlink: vap->va_mode = 0444; break; default: printf("shouldn't be here!\n"); vap->va_mode = 0; break; } if (proc != NULL) { vap->va_uid = proc->p_ucred->cr_ruid; vap->va_gid = proc->p_ucred->cr_rgid; if (pn->pn_attr != NULL) error = pn_attr(curthread, proc, pn, vap); PROC_UNLOCK(proc); } else { vap->va_uid = 0; vap->va_gid = 0; } PFS_RETURN (error); } /* * Perform an ioctl */ static int pfs_ioctl(struct vop_ioctl_args *va) { struct vnode *vn = va->a_vp; struct pfs_vdata *pvd = vn->v_data; struct pfs_node *pn = pvd->pvd_pn; struct proc *proc; int error; PFS_TRACE(("%s: %lx", pn->pn_name, va->a_command)); pfs_assert_not_owned(pn); if (vn->v_type != VREG) PFS_RETURN (EINVAL); if (pn->pn_ioctl == NULL) PFS_RETURN (ENOTTY); /* * This is necessary because process' privileges may * have changed since the open() call. */ if (!pfs_visible(curthread, pn, pvd->pvd_pid, &proc)) PFS_RETURN (EIO); error = pn_ioctl(curthread, proc, pn, va->a_command, va->a_data); if (proc != NULL) PROC_UNLOCK(proc); PFS_RETURN (error); } /* * Perform getextattr */ static int pfs_getextattr(struct vop_getextattr_args *va) { struct vnode *vn = va->a_vp; struct pfs_vdata *pvd = vn->v_data; struct pfs_node *pn = pvd->pvd_pn; struct proc *proc; int error; PFS_TRACE(("%s", pn->pn_name)); pfs_assert_not_owned(pn); /* * This is necessary because either process' privileges may * have changed since the open() call. */ if (!pfs_visible(curthread, pn, pvd->pvd_pid, &proc)) PFS_RETURN (EIO); if (pn->pn_getextattr == NULL) error = EOPNOTSUPP; else error = pn_getextattr(curthread, proc, pn, va->a_attrnamespace, va->a_name, va->a_uio, va->a_size, va->a_cred); if (proc != NULL) PROC_UNLOCK(proc); pfs_unlock(pn); PFS_RETURN (error); } /* * Look up a file or directory */ static int pfs_lookup(struct vop_cachedlookup_args *va) { struct vnode *vn = va->a_dvp; struct vnode **vpp = va->a_vpp; struct componentname *cnp = va->a_cnp; struct pfs_vdata *pvd = vn->v_data; struct pfs_node *pd = pvd->pvd_pn; struct pfs_node *pn, *pdn = NULL; pid_t pid = pvd->pvd_pid; char *pname; int error, i, namelen, visible; PFS_TRACE(("%.*s", (int)cnp->cn_namelen, cnp->cn_nameptr)); pfs_assert_not_owned(pd); if (vn->v_type != VDIR) PFS_RETURN (ENOTDIR); error = VOP_ACCESS(vn, VEXEC, cnp->cn_cred, cnp->cn_thread); if (error) PFS_RETURN (error); /* * Don't support DELETE or RENAME. CREATE is supported so * that O_CREAT will work, but the lookup will still fail if * the file does not exist. */ if ((cnp->cn_flags & ISLASTCN) && (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) PFS_RETURN (EOPNOTSUPP); /* shortcut: check if the name is too long */ if (cnp->cn_namelen >= PFS_NAMELEN) PFS_RETURN (ENOENT); /* check that parent directory is visible... */ if (!pfs_visible(curthread, pd, pvd->pvd_pid, NULL)) PFS_RETURN (ENOENT); /* self */ namelen = cnp->cn_namelen; pname = cnp->cn_nameptr; if (namelen == 1 && pname[0] == '.') { pn = pd; *vpp = vn; VREF(vn); PFS_RETURN (0); } /* parent */ if (cnp->cn_flags & ISDOTDOT) { if (pd->pn_type == pfstype_root) PFS_RETURN (EIO); VOP_UNLOCK(vn, 0); KASSERT(pd->pn_parent != NULL, ("%s(): non-root directory has no parent", __func__)); /* * This one is tricky. Descendents of procdir nodes * inherit their parent's process affinity, but * there's no easy reverse mapping. For simplicity, * we assume that if this node is a procdir, its * parent isn't (which is correct as long as * descendents of procdir nodes are never procdir * nodes themselves) */ if (pd->pn_type == pfstype_procdir) pid = NO_PID; pfs_lock(pd); pn = pd->pn_parent; pfs_unlock(pd); goto got_pnode; } pfs_lock(pd); /* named node */ for (pn = pd->pn_nodes; pn != NULL; pn = pn->pn_next) if (pn->pn_type == pfstype_procdir) pdn = pn; else if (pn->pn_name[namelen] == '\0' && bcmp(pname, pn->pn_name, namelen) == 0) { pfs_unlock(pd); goto got_pnode; } /* process dependent node */ if ((pn = pdn) != NULL) { pid = 0; for (pid = 0, i = 0; i < namelen && isdigit(pname[i]); ++i) if ((pid = pid * 10 + pname[i] - '0') > PID_MAX) break; if (i == cnp->cn_namelen) { pfs_unlock(pd); goto got_pnode; } } pfs_unlock(pd); PFS_RETURN (ENOENT); got_pnode: pfs_assert_not_owned(pd); pfs_assert_not_owned(pn); visible = pfs_visible(curthread, pn, pid, NULL); if (!visible) { error = ENOENT; goto failed; } error = pfs_vncache_alloc(vn->v_mount, vpp, pn, pid); if (error) goto failed; if (cnp->cn_flags & ISDOTDOT) vn_lock(vn, LK_EXCLUSIVE|LK_RETRY); if (cnp->cn_flags & MAKEENTRY) cache_enter(vn, *vpp, cnp); PFS_RETURN (0); failed: if (cnp->cn_flags & ISDOTDOT) vn_lock(vn, LK_EXCLUSIVE|LK_RETRY); PFS_RETURN(error); } /* * Open a file or directory. */ static int pfs_open(struct vop_open_args *va) { struct vnode *vn = va->a_vp; struct pfs_vdata *pvd = vn->v_data; struct pfs_node *pn = pvd->pvd_pn; int mode = va->a_mode; PFS_TRACE(("%s (mode 0x%x)", pn->pn_name, mode)); pfs_assert_not_owned(pn); /* check if the requested mode is permitted */ if (((mode & FREAD) && !(mode & PFS_RD)) || ((mode & FWRITE) && !(mode & PFS_WR))) PFS_RETURN (EPERM); /* we don't support locking */ if ((mode & O_SHLOCK) || (mode & O_EXLOCK)) PFS_RETURN (EOPNOTSUPP); PFS_RETURN (0); } /* * Read from a file */ static int pfs_read(struct vop_read_args *va) { struct vnode *vn = va->a_vp; struct pfs_vdata *pvd = vn->v_data; struct pfs_node *pn = pvd->pvd_pn; struct uio *uio = va->a_uio; struct proc *proc; struct sbuf *sb = NULL; int error; unsigned int buflen, offset, resid; PFS_TRACE(("%s", pn->pn_name)); pfs_assert_not_owned(pn); if (vn->v_type != VREG) PFS_RETURN (EINVAL); if (!(pn->pn_flags & PFS_RD)) PFS_RETURN (EBADF); if (pn->pn_fill == NULL) PFS_RETURN (EIO); /* * This is necessary because either process' privileges may * have changed since the open() call. */ if (!pfs_visible(curthread, pn, pvd->pvd_pid, &proc)) PFS_RETURN (EIO); if (proc != NULL) { _PHOLD(proc); PROC_UNLOCK(proc); } if (pn->pn_flags & PFS_RAWRD) { PFS_TRACE(("%lu resid", (unsigned long)uio->uio_resid)); error = pn_fill(curthread, proc, pn, NULL, uio); PFS_TRACE(("%lu resid", (unsigned long)uio->uio_resid)); if (proc != NULL) PRELE(proc); PFS_RETURN (error); } /* beaucoup sanity checks so we don't ask for bogus allocation */ if (uio->uio_offset < 0 || uio->uio_resid < 0 || (offset = uio->uio_offset) != uio->uio_offset || (resid = uio->uio_resid) != uio->uio_resid || (buflen = offset + resid + 1) < offset || buflen > INT_MAX) { if (proc != NULL) PRELE(proc); PFS_RETURN (EINVAL); } if (buflen > MAXPHYS + 1) { if (proc != NULL) PRELE(proc); PFS_RETURN (EIO); } sb = sbuf_new(sb, NULL, buflen, 0); if (sb == NULL) { if (proc != NULL) PRELE(proc); PFS_RETURN (EIO); } error = pn_fill(curthread, proc, pn, sb, uio); if (proc != NULL) PRELE(proc); if (error) { sbuf_delete(sb); PFS_RETURN (error); } sbuf_finish(sb); error = uiomove_frombuf(sbuf_data(sb), sbuf_len(sb), uio); sbuf_delete(sb); PFS_RETURN (error); } /* * Iterate through directory entries */ static int pfs_iterate(struct thread *td, struct proc *proc, struct pfs_node *pd, struct pfs_node **pn, struct proc **p) { int visible; sx_assert(&allproc_lock, SX_SLOCKED); pfs_assert_owned(pd); again: if (*pn == NULL) { /* first node */ *pn = pd->pn_nodes; } else if ((*pn)->pn_type != pfstype_procdir) { /* next node */ *pn = (*pn)->pn_next; } if (*pn != NULL && (*pn)->pn_type == pfstype_procdir) { /* next process */ if (*p == NULL) *p = LIST_FIRST(&allproc); else *p = LIST_NEXT(*p, p_list); /* out of processes: next node */ if (*p == NULL) *pn = (*pn)->pn_next; else PROC_LOCK(*p); } if ((*pn) == NULL) return (-1); if (*p != NULL) { visible = pfs_visible_proc(td, *pn, *p); PROC_UNLOCK(*p); } else if (proc != NULL) { visible = pfs_visible_proc(td, *pn, proc); } else { visible = 1; } if (!visible) goto again; return (0); } /* * Return directory entries. */ static int pfs_readdir(struct vop_readdir_args *va) { struct vnode *vn = va->a_vp; struct pfs_vdata *pvd = vn->v_data; struct pfs_node *pd = pvd->pvd_pn; pid_t pid = pvd->pvd_pid; struct proc *p, *proc; struct pfs_node *pn; struct dirent *entry; struct uio *uio; off_t offset; int error, i, resid; char *buf, *ent; KASSERT(pd->pn_info == vn->v_mount->mnt_data, ("%s(): pn_info does not match mountpoint", __func__)); PFS_TRACE(("%s pid %lu", pd->pn_name, (unsigned long)pid)); pfs_assert_not_owned(pd); if (vn->v_type != VDIR) PFS_RETURN (ENOTDIR); uio = va->a_uio; /* only allow reading entire entries */ offset = uio->uio_offset; resid = uio->uio_resid; if (offset < 0 || offset % PFS_DELEN != 0 || (resid && resid < PFS_DELEN)) PFS_RETURN (EINVAL); if (resid == 0) PFS_RETURN (0); /* can't do this while holding the proc lock... */ buf = malloc(resid, M_IOV, M_WAITOK | M_ZERO); sx_slock(&allproc_lock); pfs_lock(pd); /* check if the directory is visible to the caller */ if (!pfs_visible(curthread, pd, pid, &proc)) { sx_sunlock(&allproc_lock); pfs_unlock(pd); free(buf, M_IOV); PFS_RETURN (ENOENT); } KASSERT(pid == NO_PID || proc != NULL, ("%s(): no process for pid %lu", __func__, (unsigned long)pid)); /* skip unwanted entries */ for (pn = NULL, p = NULL; offset > 0; offset -= PFS_DELEN) { if (pfs_iterate(curthread, proc, pd, &pn, &p) == -1) { /* nothing left... */ if (proc != NULL) PROC_UNLOCK(proc); pfs_unlock(pd); sx_sunlock(&allproc_lock); free(buf, M_IOV); PFS_RETURN (0); } } /* fill in entries */ ent = buf; while (pfs_iterate(curthread, proc, pd, &pn, &p) != -1 && resid >= PFS_DELEN) { entry = (struct dirent *)ent; entry->d_reclen = PFS_DELEN; entry->d_fileno = pn_fileno(pn, pid); /* PFS_DELEN was picked to fit PFS_NAMLEN */ for (i = 0; i < PFS_NAMELEN - 1 && pn->pn_name[i] != '\0'; ++i) entry->d_name[i] = pn->pn_name[i]; entry->d_name[i] = 0; entry->d_namlen = i; switch (pn->pn_type) { case pfstype_procdir: KASSERT(p != NULL, ("reached procdir node with p == NULL")); entry->d_namlen = snprintf(entry->d_name, PFS_NAMELEN, "%d", p->p_pid); /* fall through */ case pfstype_root: case pfstype_dir: case pfstype_this: case pfstype_parent: entry->d_type = DT_DIR; break; case pfstype_file: entry->d_type = DT_REG; break; case pfstype_symlink: entry->d_type = DT_LNK; break; default: panic("%s has unexpected node type: %d", pn->pn_name, pn->pn_type); } PFS_TRACE(("%s", entry->d_name)); offset += PFS_DELEN; resid -= PFS_DELEN; ent += PFS_DELEN; } if (proc != NULL) PROC_UNLOCK(proc); pfs_unlock(pd); sx_sunlock(&allproc_lock); PFS_TRACE(("%zd bytes", ent - buf)); error = uiomove(buf, ent - buf, uio); free(buf, M_IOV); PFS_RETURN (error); } /* * Read a symbolic link */ static int pfs_readlink(struct vop_readlink_args *va) { struct vnode *vn = va->a_vp; struct pfs_vdata *pvd = vn->v_data; struct pfs_node *pn = pvd->pvd_pn; struct uio *uio = va->a_uio; struct proc *proc = NULL; char buf[PATH_MAX]; struct sbuf sb; int error; PFS_TRACE(("%s", pn->pn_name)); pfs_assert_not_owned(pn); if (vn->v_type != VLNK) PFS_RETURN (EINVAL); if (pn->pn_fill == NULL) PFS_RETURN (EIO); if (pvd->pvd_pid != NO_PID) { if ((proc = pfind(pvd->pvd_pid)) == NULL) PFS_RETURN (EIO); if (proc->p_flag & P_WEXIT) { PROC_UNLOCK(proc); PFS_RETURN (EIO); } _PHOLD(proc); PROC_UNLOCK(proc); } /* sbuf_new() can't fail with a static buffer */ sbuf_new(&sb, buf, sizeof buf, 0); error = pn_fill(curthread, proc, pn, &sb, NULL); if (proc != NULL) PRELE(proc); if (error) { sbuf_delete(&sb); PFS_RETURN (error); } sbuf_finish(&sb); error = uiomove_frombuf(sbuf_data(&sb), sbuf_len(&sb), uio); sbuf_delete(&sb); PFS_RETURN (error); } /* * Reclaim a vnode */ static int pfs_reclaim(struct vop_reclaim_args *va) { struct vnode *vn = va->a_vp; struct pfs_vdata *pvd = vn->v_data; struct pfs_node *pn = pvd->pvd_pn; PFS_TRACE(("%s", pn->pn_name)); pfs_assert_not_owned(pn); return (pfs_vncache_free(va->a_vp)); } /* * Set attributes */ static int pfs_setattr(struct vop_setattr_args *va) { struct vnode *vn = va->a_vp; struct pfs_vdata *pvd = vn->v_data; struct pfs_node *pn = pvd->pvd_pn; PFS_TRACE(("%s", pn->pn_name)); pfs_assert_not_owned(pn); PFS_RETURN (EOPNOTSUPP); } /* * Write to a file */ static int pfs_write(struct vop_write_args *va) { struct vnode *vn = va->a_vp; struct pfs_vdata *pvd = vn->v_data; struct pfs_node *pn = pvd->pvd_pn; struct uio *uio = va->a_uio; struct proc *proc; struct sbuf sb; int error; PFS_TRACE(("%s", pn->pn_name)); pfs_assert_not_owned(pn); if (vn->v_type != VREG) PFS_RETURN (EINVAL); KASSERT(pn->pn_type != pfstype_file, ("%s(): VREG vnode refers to non-file pfs_node", __func__)); if (!(pn->pn_flags & PFS_WR)) PFS_RETURN (EBADF); if (pn->pn_fill == NULL) PFS_RETURN (EIO); /* * This is necessary because either process' privileges may * have changed since the open() call. */ if (!pfs_visible(curthread, pn, pvd->pvd_pid, &proc)) PFS_RETURN (EIO); if (proc != NULL) { _PHOLD(proc); PROC_UNLOCK(proc); } if (pn->pn_flags & PFS_RAWWR) { pfs_lock(pn); error = pn_fill(curthread, proc, pn, NULL, uio); pfs_unlock(pn); if (proc != NULL) PRELE(proc); PFS_RETURN (error); } sbuf_uionew(&sb, uio, &error); if (error) { if (proc != NULL) PRELE(proc); PFS_RETURN (error); } error = pn_fill(curthread, proc, pn, &sb, uio); sbuf_delete(&sb); if (proc != NULL) PRELE(proc); PFS_RETURN (error); } /* * Vnode operations */ struct vop_vector pfs_vnodeops = { .vop_default = &default_vnodeops, .vop_access = pfs_access, .vop_cachedlookup = pfs_lookup, .vop_close = pfs_close, .vop_create = VOP_EOPNOTSUPP, .vop_getattr = pfs_getattr, .vop_getextattr = pfs_getextattr, .vop_ioctl = pfs_ioctl, .vop_link = VOP_EOPNOTSUPP, .vop_lookup = vfs_cache_lookup, .vop_mkdir = VOP_EOPNOTSUPP, .vop_mknod = VOP_EOPNOTSUPP, .vop_open = pfs_open, .vop_read = pfs_read, .vop_readdir = pfs_readdir, .vop_readlink = pfs_readlink, .vop_reclaim = pfs_reclaim, .vop_remove = VOP_EOPNOTSUPP, .vop_rename = VOP_EOPNOTSUPP, .vop_rmdir = VOP_EOPNOTSUPP, .vop_setattr = pfs_setattr, .vop_symlink = VOP_EOPNOTSUPP, .vop_write = pfs_write, /* XXX I've probably forgotten a few that need VOP_EOPNOTSUPP */ }; Index: head/sys/fs/tmpfs/tmpfs_vnops.c =================================================================== --- head/sys/fs/tmpfs/tmpfs_vnops.c (revision 183214) +++ head/sys/fs/tmpfs/tmpfs_vnops.c (revision 183215) @@ -1,1472 +1,1470 @@ /* $NetBSD: tmpfs_vnops.c,v 1.39 2007/07/23 15:41:01 jmmv Exp $ */ /*- * Copyright (c) 2005, 2006 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Julio M. Merino Vidal, developed as part of Google's Summer of Code * 2005 program. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * tmpfs vnode interface. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* --------------------------------------------------------------------- */ static int tmpfs_lookup(struct vop_cachedlookup_args *v) { struct vnode *dvp = v->a_dvp; struct vnode **vpp = v->a_vpp; struct componentname *cnp = v->a_cnp; struct thread *td = cnp->cn_thread; int error; struct tmpfs_dirent *de; struct tmpfs_node *dnode; dnode = VP_TO_TMPFS_DIR(dvp); *vpp = NULLVP; /* Check accessibility of requested node as a first step. */ error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, td); if (error != 0) goto out; /* We cannot be requesting the parent directory of the root node. */ MPASS(IMPLIES(dnode->tn_type == VDIR && dnode->tn_dir.tn_parent == dnode, !(cnp->cn_flags & ISDOTDOT))); if (cnp->cn_flags & ISDOTDOT) { int ltype = 0; ltype = VOP_ISLOCKED(dvp); vhold(dvp); VOP_UNLOCK(dvp, 0); /* Allocate a new vnode on the matching entry. */ error = tmpfs_alloc_vp(dvp->v_mount, dnode->tn_dir.tn_parent, cnp->cn_lkflags, vpp, td); vn_lock(dvp, ltype | LK_RETRY); vdrop(dvp); } else if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') { VREF(dvp); *vpp = dvp; error = 0; } else { de = tmpfs_dir_lookup(dnode, cnp); if (de == NULL) { /* The entry was not found in the directory. * This is OK if we are creating or renaming an * entry and are working on the last component of * the path name. */ if ((cnp->cn_flags & ISLASTCN) && (cnp->cn_nameiop == CREATE || \ cnp->cn_nameiop == RENAME)) { error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred, cnp->cn_thread); if (error != 0) goto out; /* Keep the component name in the buffer for * future uses. */ cnp->cn_flags |= SAVENAME; error = EJUSTRETURN; } else error = ENOENT; } else { struct tmpfs_node *tnode; /* The entry was found, so get its associated * tmpfs_node. */ tnode = de->td_node; /* If we are not at the last path component and * found a non-directory or non-link entry (which * may itself be pointing to a directory), raise * an error. */ if ((tnode->tn_type != VDIR && tnode->tn_type != VLNK) && !(cnp->cn_flags & ISLASTCN)) { error = ENOTDIR; goto out; } /* If we are deleting or renaming the entry, keep * track of its tmpfs_dirent so that it can be * easily deleted later. */ if ((cnp->cn_flags & ISLASTCN) && (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) { error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred, cnp->cn_thread); if (error != 0) goto out; /* Allocate a new vnode on the matching entry. */ error = tmpfs_alloc_vp(dvp->v_mount, tnode, cnp->cn_lkflags, vpp, td); if (error != 0) goto out; if ((dnode->tn_mode & S_ISTXT) && VOP_ACCESS(dvp, VADMIN, cnp->cn_cred, cnp->cn_thread) && VOP_ACCESS(*vpp, VADMIN, cnp->cn_cred, cnp->cn_thread)) { error = EPERM; vput(*vpp); *vpp = NULL; goto out; } cnp->cn_flags |= SAVENAME; } else { error = tmpfs_alloc_vp(dvp->v_mount, tnode, cnp->cn_lkflags, vpp, td); } } } /* Store the result of this lookup in the cache. Avoid this if the * request was for creation, as it does not improve timings on * emprical tests. */ if ((cnp->cn_flags & MAKEENTRY) && cnp->cn_nameiop != CREATE) cache_enter(dvp, *vpp, cnp); out: /* If there were no errors, *vpp cannot be null and it must be * locked. */ MPASS(IFF(error == 0, *vpp != NULLVP && VOP_ISLOCKED(*vpp))); return error; } /* --------------------------------------------------------------------- */ static int tmpfs_create(struct vop_create_args *v) { struct vnode *dvp = v->a_dvp; struct vnode **vpp = v->a_vpp; struct componentname *cnp = v->a_cnp; struct vattr *vap = v->a_vap; MPASS(vap->va_type == VREG || vap->va_type == VSOCK); return tmpfs_alloc_file(dvp, vpp, vap, cnp, NULL); } /* --------------------------------------------------------------------- */ static int tmpfs_mknod(struct vop_mknod_args *v) { struct vnode *dvp = v->a_dvp; struct vnode **vpp = v->a_vpp; struct componentname *cnp = v->a_cnp; struct vattr *vap = v->a_vap; if (vap->va_type != VBLK && vap->va_type != VCHR && vap->va_type != VFIFO) return EINVAL; return tmpfs_alloc_file(dvp, vpp, vap, cnp, NULL); } /* --------------------------------------------------------------------- */ static int tmpfs_open(struct vop_open_args *v) { struct vnode *vp = v->a_vp; int mode = v->a_mode; int error; struct tmpfs_node *node; MPASS(VOP_ISLOCKED(vp)); node = VP_TO_TMPFS_NODE(vp); /* The file is still active but all its names have been removed * (e.g. by a "rmdir $(pwd)"). It cannot be opened any more as * it is about to die. */ if (node->tn_links < 1) return (ENOENT); /* If the file is marked append-only, deny write requests. */ if (node->tn_flags & APPEND && (mode & (FWRITE | O_APPEND)) == FWRITE) error = EPERM; else { error = 0; vnode_create_vobject(vp, node->tn_size, v->a_td); } MPASS(VOP_ISLOCKED(vp)); return error; } /* --------------------------------------------------------------------- */ static int tmpfs_close(struct vop_close_args *v) { struct vnode *vp = v->a_vp; struct tmpfs_node *node; MPASS(VOP_ISLOCKED(vp)); node = VP_TO_TMPFS_NODE(vp); if (node->tn_links > 0) { /* Update node times. No need to do it if the node has * been deleted, because it will vanish after we return. */ tmpfs_update(vp); } return 0; } /* --------------------------------------------------------------------- */ int tmpfs_access(struct vop_access_args *v) { struct vnode *vp = v->a_vp; int mode = v->a_mode; struct ucred *cred = v->a_cred; int error; struct tmpfs_node *node; MPASS(VOP_ISLOCKED(vp)); node = VP_TO_TMPFS_NODE(vp); switch (vp->v_type) { case VDIR: /* FALLTHROUGH */ case VLNK: /* FALLTHROUGH */ case VREG: if (mode & VWRITE && vp->v_mount->mnt_flag & MNT_RDONLY) { error = EROFS; goto out; } break; case VBLK: /* FALLTHROUGH */ case VCHR: /* FALLTHROUGH */ case VSOCK: /* FALLTHROUGH */ case VFIFO: break; default: error = EINVAL; goto out; } if (mode & VWRITE && node->tn_flags & IMMUTABLE) { error = EPERM; goto out; } error = vaccess(vp->v_type, node->tn_mode, node->tn_uid, node->tn_gid, mode, cred, NULL); out: MPASS(VOP_ISLOCKED(vp)); return error; } /* --------------------------------------------------------------------- */ int tmpfs_getattr(struct vop_getattr_args *v) { struct vnode *vp = v->a_vp; struct vattr *vap = v->a_vap; struct tmpfs_node *node; node = VP_TO_TMPFS_NODE(vp); - VATTR_NULL(vap); - tmpfs_update(vp); vap->va_type = vp->v_type; vap->va_mode = node->tn_mode; vap->va_nlink = node->tn_links; vap->va_uid = node->tn_uid; vap->va_gid = node->tn_gid; vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0]; vap->va_fileid = node->tn_id; vap->va_size = node->tn_size; vap->va_blocksize = PAGE_SIZE; vap->va_atime = node->tn_atime; vap->va_mtime = node->tn_mtime; vap->va_ctime = node->tn_ctime; vap->va_birthtime = node->tn_birthtime; vap->va_gen = node->tn_gen; vap->va_flags = node->tn_flags; vap->va_rdev = (vp->v_type == VBLK || vp->v_type == VCHR) ? node->tn_rdev : NODEV; vap->va_bytes = round_page(node->tn_size); vap->va_filerev = 0; return 0; } /* --------------------------------------------------------------------- */ /* XXX Should this operation be atomic? I think it should, but code in * XXX other places (e.g., ufs) doesn't seem to be... */ int tmpfs_setattr(struct vop_setattr_args *v) { struct vnode *vp = v->a_vp; struct vattr *vap = v->a_vap; struct ucred *cred = v->a_cred; struct thread *td = curthread; int error; MPASS(VOP_ISLOCKED(vp)); error = 0; /* Abort if any unsettable attribute is given. */ if (vap->va_type != VNON || vap->va_nlink != VNOVAL || vap->va_fsid != VNOVAL || vap->va_fileid != VNOVAL || vap->va_blocksize != VNOVAL || vap->va_gen != VNOVAL || vap->va_rdev != VNOVAL || vap->va_bytes != VNOVAL) error = EINVAL; if (error == 0 && (vap->va_flags != VNOVAL)) error = tmpfs_chflags(vp, vap->va_flags, cred, td); if (error == 0 && (vap->va_size != VNOVAL)) error = tmpfs_chsize(vp, vap->va_size, cred, td); if (error == 0 && (vap->va_uid != VNOVAL || vap->va_gid != VNOVAL)) error = tmpfs_chown(vp, vap->va_uid, vap->va_gid, cred, td); if (error == 0 && (vap->va_mode != (mode_t)VNOVAL)) error = tmpfs_chmod(vp, vap->va_mode, cred, td); if (error == 0 && ((vap->va_atime.tv_sec != VNOVAL && vap->va_atime.tv_nsec != VNOVAL) || (vap->va_mtime.tv_sec != VNOVAL && vap->va_mtime.tv_nsec != VNOVAL) || (vap->va_birthtime.tv_sec != VNOVAL && vap->va_birthtime.tv_nsec != VNOVAL))) error = tmpfs_chtimes(vp, &vap->va_atime, &vap->va_mtime, &vap->va_birthtime, vap->va_vaflags, cred, td); /* Update the node times. We give preference to the error codes * generated by this function rather than the ones that may arise * from tmpfs_update. */ tmpfs_update(vp); MPASS(VOP_ISLOCKED(vp)); return error; } /* --------------------------------------------------------------------- */ static int tmpfs_mappedread(vm_object_t vobj, vm_object_t tobj, size_t len, struct uio *uio) { vm_pindex_t idx; vm_page_t m; struct sf_buf *sf; off_t offset, addr; size_t tlen; caddr_t va; int error; addr = uio->uio_offset; idx = OFF_TO_IDX(addr); offset = addr & PAGE_MASK; tlen = MIN(PAGE_SIZE - offset, len); if ((vobj == NULL) || (vobj->resident_page_count == 0)) goto nocache; VM_OBJECT_LOCK(vobj); lookupvpg: if (((m = vm_page_lookup(vobj, idx)) != NULL) && vm_page_is_valid(m, offset, tlen)) { if (vm_page_sleep_if_busy(m, FALSE, "tmfsmr")) goto lookupvpg; vm_page_busy(m); VM_OBJECT_UNLOCK(vobj); sched_pin(); sf = sf_buf_alloc(m, SFB_CPUPRIVATE); va = (caddr_t)sf_buf_kva(sf); error = uiomove(va + offset, tlen, uio); sf_buf_free(sf); sched_unpin(); VM_OBJECT_LOCK(vobj); vm_page_wakeup(m); VM_OBJECT_UNLOCK(vobj); return (error); } VM_OBJECT_UNLOCK(vobj); nocache: VM_OBJECT_LOCK(tobj); vm_object_pip_add(tobj, 1); m = vm_page_grab(tobj, idx, VM_ALLOC_WIRED | VM_ALLOC_ZERO | VM_ALLOC_NORMAL | VM_ALLOC_RETRY); if (m->valid != VM_PAGE_BITS_ALL) { int behind, ahead; if (vm_pager_has_page(tobj, idx, &behind, &ahead)) { error = vm_pager_get_pages(tobj, &m, 1, 0); if (error != 0) { printf("tmpfs get pages from pager error [read]\n"); goto out; } } else vm_page_zero_invalid(m, TRUE); } VM_OBJECT_UNLOCK(tobj); sched_pin(); sf = sf_buf_alloc(m, SFB_CPUPRIVATE); va = (caddr_t)sf_buf_kva(sf); error = uiomove(va + offset, tlen, uio); sf_buf_free(sf); sched_unpin(); VM_OBJECT_LOCK(tobj); out: vm_page_lock_queues(); vm_page_unwire(m, 0); vm_page_activate(m); vm_page_unlock_queues(); vm_page_wakeup(m); vm_object_pip_subtract(tobj, 1); VM_OBJECT_UNLOCK(tobj); return (error); } static int tmpfs_read(struct vop_read_args *v) { struct vnode *vp = v->a_vp; struct uio *uio = v->a_uio; struct tmpfs_node *node; vm_object_t uobj; size_t len; int resid; int error = 0; node = VP_TO_TMPFS_NODE(vp); if (vp->v_type != VREG) { error = EISDIR; goto out; } if (uio->uio_offset < 0) { error = EINVAL; goto out; } node->tn_status |= TMPFS_NODE_ACCESSED; uobj = node->tn_reg.tn_aobj; while ((resid = uio->uio_resid) > 0) { error = 0; if (node->tn_size <= uio->uio_offset) break; len = MIN(node->tn_size - uio->uio_offset, resid); if (len == 0) break; error = tmpfs_mappedread(vp->v_object, uobj, len, uio); if ((error != 0) || (resid == uio->uio_resid)) break; } out: return error; } /* --------------------------------------------------------------------- */ static int tmpfs_mappedwrite(vm_object_t vobj, vm_object_t tobj, size_t len, struct uio *uio) { vm_pindex_t idx; vm_page_t vpg, tpg; struct sf_buf *sf; off_t offset, addr; size_t tlen; caddr_t va; int error; error = 0; addr = uio->uio_offset; idx = OFF_TO_IDX(addr); offset = addr & PAGE_MASK; tlen = MIN(PAGE_SIZE - offset, len); if ((vobj == NULL) || (vobj->resident_page_count == 0)) { vpg = NULL; goto nocache; } VM_OBJECT_LOCK(vobj); lookupvpg: if (((vpg = vm_page_lookup(vobj, idx)) != NULL) && vm_page_is_valid(vpg, offset, tlen)) { if (vm_page_sleep_if_busy(vpg, FALSE, "tmfsmw")) goto lookupvpg; vm_page_busy(vpg); vm_page_lock_queues(); vm_page_undirty(vpg); vm_page_unlock_queues(); VM_OBJECT_UNLOCK(vobj); sched_pin(); sf = sf_buf_alloc(vpg, SFB_CPUPRIVATE); va = (caddr_t)sf_buf_kva(sf); error = uiomove(va + offset, tlen, uio); sf_buf_free(sf); sched_unpin(); } else { VM_OBJECT_UNLOCK(vobj); vpg = NULL; } nocache: VM_OBJECT_LOCK(tobj); vm_object_pip_add(tobj, 1); tpg = vm_page_grab(tobj, idx, VM_ALLOC_WIRED | VM_ALLOC_ZERO | VM_ALLOC_NORMAL | VM_ALLOC_RETRY); if (tpg->valid != VM_PAGE_BITS_ALL) { int behind, ahead; if (vm_pager_has_page(tobj, idx, &behind, &ahead)) { error = vm_pager_get_pages(tobj, &tpg, 1, 0); if (error != 0) { printf("tmpfs get pages from pager error [write]\n"); goto out; } } else vm_page_zero_invalid(tpg, TRUE); } VM_OBJECT_UNLOCK(tobj); if (vpg == NULL) { sched_pin(); sf = sf_buf_alloc(tpg, SFB_CPUPRIVATE); va = (caddr_t)sf_buf_kva(sf); error = uiomove(va + offset, tlen, uio); sf_buf_free(sf); sched_unpin(); } else { KASSERT(vpg->valid == VM_PAGE_BITS_ALL, ("parts of vpg invalid")); pmap_copy_page(vpg, tpg); } VM_OBJECT_LOCK(tobj); out: if (vobj != NULL) VM_OBJECT_LOCK(vobj); vm_page_lock_queues(); if (error == 0) { vm_page_set_validclean(tpg, offset, tlen); vm_page_zero_invalid(tpg, TRUE); vm_page_dirty(tpg); } vm_page_unwire(tpg, 0); vm_page_activate(tpg); vm_page_unlock_queues(); vm_page_wakeup(tpg); if (vpg != NULL) vm_page_wakeup(vpg); if (vobj != NULL) VM_OBJECT_UNLOCK(vobj); vm_object_pip_subtract(tobj, 1); VM_OBJECT_UNLOCK(tobj); return (error); } static int tmpfs_write(struct vop_write_args *v) { struct vnode *vp = v->a_vp; struct uio *uio = v->a_uio; int ioflag = v->a_ioflag; struct thread *td = uio->uio_td; boolean_t extended; int error = 0; off_t oldsize; struct tmpfs_node *node; vm_object_t uobj; size_t len; int resid; node = VP_TO_TMPFS_NODE(vp); oldsize = node->tn_size; if (uio->uio_offset < 0 || vp->v_type != VREG) { error = EINVAL; goto out; } if (uio->uio_resid == 0) { error = 0; goto out; } if (ioflag & IO_APPEND) uio->uio_offset = node->tn_size; if (uio->uio_offset + uio->uio_resid > VFS_TO_TMPFS(vp->v_mount)->tm_maxfilesize) return (EFBIG); if (vp->v_type == VREG && td != NULL) { PROC_LOCK(td->td_proc); if (uio->uio_offset + uio->uio_resid > lim_cur(td->td_proc, RLIMIT_FSIZE)) { psignal(td->td_proc, SIGXFSZ); PROC_UNLOCK(td->td_proc); return (EFBIG); } PROC_UNLOCK(td->td_proc); } extended = uio->uio_offset + uio->uio_resid > node->tn_size; if (extended) { error = tmpfs_reg_resize(vp, uio->uio_offset + uio->uio_resid); if (error != 0) goto out; } uobj = node->tn_reg.tn_aobj; while ((resid = uio->uio_resid) > 0) { if (node->tn_size <= uio->uio_offset) break; len = MIN(node->tn_size - uio->uio_offset, resid); if (len == 0) break; error = tmpfs_mappedwrite(vp->v_object, uobj, len, uio); if ((error != 0) || (resid == uio->uio_resid)) break; } node->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_MODIFIED | (extended ? TMPFS_NODE_CHANGED : 0); if (node->tn_mode & (S_ISUID | S_ISGID)) { if (priv_check_cred(v->a_cred, PRIV_VFS_RETAINSUGID, 0)) node->tn_mode &= ~(S_ISUID | S_ISGID); } if (error != 0) (void)tmpfs_reg_resize(vp, oldsize); out: MPASS(IMPLIES(error == 0, uio->uio_resid == 0)); MPASS(IMPLIES(error != 0, oldsize == node->tn_size)); return error; } /* --------------------------------------------------------------------- */ static int tmpfs_fsync(struct vop_fsync_args *v) { struct vnode *vp = v->a_vp; MPASS(VOP_ISLOCKED(vp)); tmpfs_update(vp); return 0; } /* --------------------------------------------------------------------- */ static int tmpfs_remove(struct vop_remove_args *v) { struct vnode *dvp = v->a_dvp; struct vnode *vp = v->a_vp; int error; struct tmpfs_dirent *de; struct tmpfs_mount *tmp; struct tmpfs_node *dnode; struct tmpfs_node *node; MPASS(VOP_ISLOCKED(dvp)); MPASS(VOP_ISLOCKED(vp)); if (vp->v_type == VDIR) { error = EISDIR; goto out; } dnode = VP_TO_TMPFS_DIR(dvp); node = VP_TO_TMPFS_NODE(vp); tmp = VFS_TO_TMPFS(vp->v_mount); de = tmpfs_dir_search(dnode, node); MPASS(de != NULL); /* Files marked as immutable or append-only cannot be deleted. */ if ((node->tn_flags & (IMMUTABLE | APPEND | NOUNLINK)) || (dnode->tn_flags & APPEND)) { error = EPERM; goto out; } /* Remove the entry from the directory; as it is a file, we do not * have to change the number of hard links of the directory. */ tmpfs_dir_detach(dvp, de); /* Free the directory entry we just deleted. Note that the node * referred by it will not be removed until the vnode is really * reclaimed. */ tmpfs_free_dirent(tmp, de, TRUE); if (node->tn_links > 0) node->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | \ TMPFS_NODE_MODIFIED; error = 0; out: return error; } /* --------------------------------------------------------------------- */ static int tmpfs_link(struct vop_link_args *v) { struct vnode *dvp = v->a_tdvp; struct vnode *vp = v->a_vp; struct componentname *cnp = v->a_cnp; int error; struct tmpfs_dirent *de; struct tmpfs_node *node; MPASS(VOP_ISLOCKED(dvp)); MPASS(cnp->cn_flags & HASBUF); MPASS(dvp != vp); /* XXX When can this be false? */ node = VP_TO_TMPFS_NODE(vp); /* XXX: Why aren't the following two tests done by the caller? */ /* Hard links of directories are forbidden. */ if (vp->v_type == VDIR) { error = EPERM; goto out; } /* Cannot create cross-device links. */ if (dvp->v_mount != vp->v_mount) { error = EXDEV; goto out; } /* Ensure that we do not overflow the maximum number of links imposed * by the system. */ MPASS(node->tn_links <= LINK_MAX); if (node->tn_links == LINK_MAX) { error = EMLINK; goto out; } /* We cannot create links of files marked immutable or append-only. */ if (node->tn_flags & (IMMUTABLE | APPEND)) { error = EPERM; goto out; } /* Allocate a new directory entry to represent the node. */ error = tmpfs_alloc_dirent(VFS_TO_TMPFS(vp->v_mount), node, cnp->cn_nameptr, cnp->cn_namelen, &de); if (error != 0) goto out; /* Insert the new directory entry into the appropriate directory. */ tmpfs_dir_attach(dvp, de); /* vp link count has changed, so update node times. */ node->tn_status |= TMPFS_NODE_CHANGED; tmpfs_update(vp); error = 0; out: return error; } /* --------------------------------------------------------------------- */ static int tmpfs_rename(struct vop_rename_args *v) { struct vnode *fdvp = v->a_fdvp; struct vnode *fvp = v->a_fvp; struct componentname *fcnp = v->a_fcnp; struct vnode *tdvp = v->a_tdvp; struct vnode *tvp = v->a_tvp; struct componentname *tcnp = v->a_tcnp; char *newname; int error; struct tmpfs_dirent *de; struct tmpfs_node *fdnode; struct tmpfs_node *fnode; struct tmpfs_node *tnode; struct tmpfs_node *tdnode; MPASS(VOP_ISLOCKED(tdvp)); MPASS(IMPLIES(tvp != NULL, VOP_ISLOCKED(tvp))); MPASS(fcnp->cn_flags & HASBUF); MPASS(tcnp->cn_flags & HASBUF); tnode = (tvp == NULL) ? NULL : VP_TO_TMPFS_NODE(tvp); /* Disallow cross-device renames. * XXX Why isn't this done by the caller? */ if (fvp->v_mount != tdvp->v_mount || (tvp != NULL && fvp->v_mount != tvp->v_mount)) { error = EXDEV; goto out; } tdnode = VP_TO_TMPFS_DIR(tdvp); /* If source and target are the same file, there is nothing to do. */ if (fvp == tvp) { error = 0; goto out; } /* If we need to move the directory between entries, lock the * source so that we can safely operate on it. */ if (tdvp != fdvp) { error = vn_lock(fdvp, LK_EXCLUSIVE | LK_RETRY); if (error != 0) goto out; } fdnode = VP_TO_TMPFS_DIR(fdvp); fnode = VP_TO_TMPFS_NODE(fvp); de = tmpfs_dir_search(fdnode, fnode); /* Avoid manipulating '.' and '..' entries. */ if (de == NULL) { MPASS(fvp->v_type == VDIR); error = EINVAL; goto out_locked; } MPASS(de->td_node == fnode); /* If re-naming a directory to another preexisting directory * ensure that the target directory is empty so that its * removal causes no side effects. * Kern_rename gurantees the destination to be a directory * if the source is one. */ if (tvp != NULL) { MPASS(tnode != NULL); if ((tnode->tn_flags & (NOUNLINK | IMMUTABLE | APPEND)) || (tdnode->tn_flags & (APPEND | IMMUTABLE))) { error = EPERM; goto out_locked; } if (fnode->tn_type == VDIR && tnode->tn_type == VDIR) { if (tnode->tn_size > 0) { error = ENOTEMPTY; goto out_locked; } } else if (fnode->tn_type == VDIR && tnode->tn_type != VDIR) { error = ENOTDIR; goto out_locked; } else if (fnode->tn_type != VDIR && tnode->tn_type == VDIR) { error = EISDIR; goto out_locked; } else { MPASS(fnode->tn_type != VDIR && tnode->tn_type != VDIR); } } if ((fnode->tn_flags & (NOUNLINK | IMMUTABLE | APPEND)) || (fdnode->tn_flags & (APPEND | IMMUTABLE))) { error = EPERM; goto out_locked; } /* Ensure that we have enough memory to hold the new name, if it * has to be changed. */ if (fcnp->cn_namelen != tcnp->cn_namelen || memcmp(fcnp->cn_nameptr, tcnp->cn_nameptr, fcnp->cn_namelen) != 0) { newname = malloc(tcnp->cn_namelen, M_TMPFSNAME, M_WAITOK); } else newname = NULL; /* If the node is being moved to another directory, we have to do * the move. */ if (fdnode != tdnode) { /* In case we are moving a directory, we have to adjust its * parent to point to the new parent. */ if (de->td_node->tn_type == VDIR) { struct tmpfs_node *n; /* Ensure the target directory is not a child of the * directory being moved. Otherwise, we'd end up * with stale nodes. */ n = tdnode; while (n != n->tn_dir.tn_parent) { if (n == fnode) { error = EINVAL; if (newname != NULL) free(newname, M_TMPFSNAME); goto out_locked; } n = n->tn_dir.tn_parent; } /* Adjust the parent pointer. */ TMPFS_VALIDATE_DIR(fnode); de->td_node->tn_dir.tn_parent = tdnode; /* As a result of changing the target of the '..' * entry, the link count of the source and target * directories has to be adjusted. */ fdnode->tn_links--; tdnode->tn_links++; } /* Do the move: just remove the entry from the source directory * and insert it into the target one. */ tmpfs_dir_detach(fdvp, de); tmpfs_dir_attach(tdvp, de); } /* If the name has changed, we need to make it effective by changing * it in the directory entry. */ if (newname != NULL) { MPASS(tcnp->cn_namelen <= MAXNAMLEN); free(de->td_name, M_TMPFSNAME); de->td_namelen = (uint16_t)tcnp->cn_namelen; memcpy(newname, tcnp->cn_nameptr, tcnp->cn_namelen); de->td_name = newname; fnode->tn_status |= TMPFS_NODE_CHANGED; tdnode->tn_status |= TMPFS_NODE_MODIFIED; } /* If we are overwriting an entry, we have to remove the old one * from the target directory. */ if (tvp != NULL) { /* Remove the old entry from the target directory. */ de = tmpfs_dir_search(tdnode, tnode); tmpfs_dir_detach(tdvp, de); /* Free the directory entry we just deleted. Note that the * node referred by it will not be removed until the vnode is * really reclaimed. */ tmpfs_free_dirent(VFS_TO_TMPFS(tvp->v_mount), de, TRUE); } error = 0; out_locked: if (fdnode != tdnode) VOP_UNLOCK(fdvp, 0); out: /* Release target nodes. */ /* XXX: I don't understand when tdvp can be the same as tvp, but * other code takes care of this... */ if (tdvp == tvp) vrele(tdvp); else vput(tdvp); if (tvp != NULL) vput(tvp); /* Release source nodes. */ vrele(fdvp); vrele(fvp); return error; } /* --------------------------------------------------------------------- */ static int tmpfs_mkdir(struct vop_mkdir_args *v) { struct vnode *dvp = v->a_dvp; struct vnode **vpp = v->a_vpp; struct componentname *cnp = v->a_cnp; struct vattr *vap = v->a_vap; MPASS(vap->va_type == VDIR); return tmpfs_alloc_file(dvp, vpp, vap, cnp, NULL); } /* --------------------------------------------------------------------- */ static int tmpfs_rmdir(struct vop_rmdir_args *v) { struct vnode *dvp = v->a_dvp; struct vnode *vp = v->a_vp; int error; struct tmpfs_dirent *de; struct tmpfs_mount *tmp; struct tmpfs_node *dnode; struct tmpfs_node *node; MPASS(VOP_ISLOCKED(dvp)); MPASS(VOP_ISLOCKED(vp)); tmp = VFS_TO_TMPFS(dvp->v_mount); dnode = VP_TO_TMPFS_DIR(dvp); node = VP_TO_TMPFS_DIR(vp); /* Directories with more than two entries ('.' and '..') cannot be * removed. */ if (node->tn_size > 0) { error = ENOTEMPTY; goto out; } if ((dnode->tn_flags & APPEND) || (node->tn_flags & (NOUNLINK | IMMUTABLE | APPEND))) { error = EPERM; goto out; } /* This invariant holds only if we are not trying to remove "..". * We checked for that above so this is safe now. */ MPASS(node->tn_dir.tn_parent == dnode); /* Get the directory entry associated with node (vp). This was * filled by tmpfs_lookup while looking up the entry. */ de = tmpfs_dir_search(dnode, node); MPASS(TMPFS_DIRENT_MATCHES(de, v->a_cnp->cn_nameptr, v->a_cnp->cn_namelen)); /* Check flags to see if we are allowed to remove the directory. */ if (dnode->tn_flags & APPEND || node->tn_flags & (NOUNLINK | IMMUTABLE | APPEND)) { error = EPERM; goto out; } /* Detach the directory entry from the directory (dnode). */ tmpfs_dir_detach(dvp, de); node->tn_links--; node->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | \ TMPFS_NODE_MODIFIED; node->tn_dir.tn_parent->tn_links--; node->tn_dir.tn_parent->tn_status |= TMPFS_NODE_ACCESSED | \ TMPFS_NODE_CHANGED | TMPFS_NODE_MODIFIED; cache_purge(dvp); cache_purge(vp); /* Free the directory entry we just deleted. Note that the node * referred by it will not be removed until the vnode is really * reclaimed. */ tmpfs_free_dirent(tmp, de, TRUE); /* Release the deleted vnode (will destroy the node, notify * interested parties and clean it from the cache). */ dnode->tn_status |= TMPFS_NODE_CHANGED; tmpfs_update(dvp); error = 0; out: return error; } /* --------------------------------------------------------------------- */ static int tmpfs_symlink(struct vop_symlink_args *v) { struct vnode *dvp = v->a_dvp; struct vnode **vpp = v->a_vpp; struct componentname *cnp = v->a_cnp; struct vattr *vap = v->a_vap; char *target = v->a_target; #ifdef notyet /* XXX FreeBSD BUG: kern_symlink is not setting VLNK */ MPASS(vap->va_type == VLNK); #else vap->va_type = VLNK; #endif return tmpfs_alloc_file(dvp, vpp, vap, cnp, target); } /* --------------------------------------------------------------------- */ static int tmpfs_readdir(struct vop_readdir_args *v) { struct vnode *vp = v->a_vp; struct uio *uio = v->a_uio; int *eofflag = v->a_eofflag; u_long **cookies = v->a_cookies; int *ncookies = v->a_ncookies; int error; off_t startoff; off_t cnt = 0; struct tmpfs_node *node; /* This operation only makes sense on directory nodes. */ if (vp->v_type != VDIR) return ENOTDIR; node = VP_TO_TMPFS_DIR(vp); startoff = uio->uio_offset; if (uio->uio_offset == TMPFS_DIRCOOKIE_DOT) { error = tmpfs_dir_getdotdent(node, uio); if (error != 0) goto outok; cnt++; } if (uio->uio_offset == TMPFS_DIRCOOKIE_DOTDOT) { error = tmpfs_dir_getdotdotdent(node, uio); if (error != 0) goto outok; cnt++; } error = tmpfs_dir_getdents(node, uio, &cnt); outok: MPASS(error >= -1); if (error == -1) error = 0; if (eofflag != NULL) *eofflag = (error == 0 && uio->uio_offset == TMPFS_DIRCOOKIE_EOF); /* Update NFS-related variables. */ if (error == 0 && cookies != NULL && ncookies != NULL) { off_t i; off_t off = startoff; struct tmpfs_dirent *de = NULL; *ncookies = cnt; *cookies = malloc(cnt * sizeof(off_t), M_TEMP, M_WAITOK); for (i = 0; i < cnt; i++) { MPASS(off != TMPFS_DIRCOOKIE_EOF); if (off == TMPFS_DIRCOOKIE_DOT) { off = TMPFS_DIRCOOKIE_DOTDOT; } else { if (off == TMPFS_DIRCOOKIE_DOTDOT) { de = TAILQ_FIRST(&node->tn_dir.tn_dirhead); } else if (de != NULL) { de = TAILQ_NEXT(de, td_entries); } else { de = tmpfs_dir_lookupbycookie(node, off); MPASS(de != NULL); de = TAILQ_NEXT(de, td_entries); } if (de == NULL) off = TMPFS_DIRCOOKIE_EOF; else off = tmpfs_dircookie(de); } (*cookies)[i] = off; } MPASS(uio->uio_offset == off); } return error; } /* --------------------------------------------------------------------- */ static int tmpfs_readlink(struct vop_readlink_args *v) { struct vnode *vp = v->a_vp; struct uio *uio = v->a_uio; int error; struct tmpfs_node *node; MPASS(uio->uio_offset == 0); MPASS(vp->v_type == VLNK); node = VP_TO_TMPFS_NODE(vp); error = uiomove(node->tn_link, MIN(node->tn_size, uio->uio_resid), uio); node->tn_status |= TMPFS_NODE_ACCESSED; return error; } /* --------------------------------------------------------------------- */ static int tmpfs_inactive(struct vop_inactive_args *v) { struct vnode *vp = v->a_vp; struct thread *l = v->a_td; struct tmpfs_node *node; MPASS(VOP_ISLOCKED(vp)); node = VP_TO_TMPFS_NODE(vp); if (node->tn_links == 0) vrecycle(vp, l); return 0; } /* --------------------------------------------------------------------- */ int tmpfs_reclaim(struct vop_reclaim_args *v) { struct vnode *vp = v->a_vp; struct tmpfs_mount *tmp; struct tmpfs_node *node; node = VP_TO_TMPFS_NODE(vp); tmp = VFS_TO_TMPFS(vp->v_mount); vnode_destroy_vobject(vp); cache_purge(vp); tmpfs_free_vp(vp); /* If the node referenced by this vnode was deleted by the user, * we must free its associated data structures (now that the vnode * is being reclaimed). */ if (node->tn_links == 0) tmpfs_free_node(tmp, node); MPASS(vp->v_data == NULL); return 0; } /* --------------------------------------------------------------------- */ static int tmpfs_print(struct vop_print_args *v) { struct vnode *vp = v->a_vp; struct tmpfs_node *node; node = VP_TO_TMPFS_NODE(vp); printf("tag VT_TMPFS, tmpfs_node %p, flags 0x%x, links %d\n", node, node->tn_flags, node->tn_links); printf("\tmode 0%o, owner %d, group %d, size %" PRIdMAX ", status 0x%x\n", node->tn_mode, node->tn_uid, node->tn_gid, (uintmax_t)node->tn_size, node->tn_status); if (vp->v_type == VFIFO) fifo_printinfo(vp); printf("\n"); return 0; } /* --------------------------------------------------------------------- */ static int tmpfs_pathconf(struct vop_pathconf_args *v) { int name = v->a_name; register_t *retval = v->a_retval; int error; error = 0; switch (name) { case _PC_LINK_MAX: *retval = LINK_MAX; break; case _PC_NAME_MAX: *retval = NAME_MAX; break; case _PC_PATH_MAX: *retval = PATH_MAX; break; case _PC_PIPE_BUF: *retval = PIPE_BUF; break; case _PC_CHOWN_RESTRICTED: *retval = 1; break; case _PC_NO_TRUNC: *retval = 1; break; case _PC_SYNC_IO: *retval = 1; break; case _PC_FILESIZEBITS: *retval = 0; /* XXX Don't know which value should I return. */ break; default: error = EINVAL; } return error; } static int tmpfs_vptofh(struct vop_vptofh_args *ap) { struct tmpfs_fid *tfhp; struct tmpfs_node *node; tfhp = (struct tmpfs_fid *)ap->a_fhp; node = VP_TO_TMPFS_NODE(ap->a_vp); tfhp->tf_len = sizeof(struct tmpfs_fid); tfhp->tf_id = node->tn_id; tfhp->tf_gen = node->tn_gen; return (0); } /* --------------------------------------------------------------------- */ /* * vnode operations vector used for files stored in a tmpfs file system. */ struct vop_vector tmpfs_vnodeop_entries = { .vop_default = &default_vnodeops, .vop_lookup = vfs_cache_lookup, .vop_cachedlookup = tmpfs_lookup, .vop_create = tmpfs_create, .vop_mknod = tmpfs_mknod, .vop_open = tmpfs_open, .vop_close = tmpfs_close, .vop_access = tmpfs_access, .vop_getattr = tmpfs_getattr, .vop_setattr = tmpfs_setattr, .vop_read = tmpfs_read, .vop_write = tmpfs_write, .vop_fsync = tmpfs_fsync, .vop_remove = tmpfs_remove, .vop_link = tmpfs_link, .vop_rename = tmpfs_rename, .vop_mkdir = tmpfs_mkdir, .vop_rmdir = tmpfs_rmdir, .vop_symlink = tmpfs_symlink, .vop_readdir = tmpfs_readdir, .vop_readlink = tmpfs_readlink, .vop_inactive = tmpfs_inactive, .vop_reclaim = tmpfs_reclaim, .vop_print = tmpfs_print, .vop_pathconf = tmpfs_pathconf, .vop_vptofh = tmpfs_vptofh, .vop_bmap = VOP_EOPNOTSUPP, }; Index: head/sys/gnu/fs/xfs/FreeBSD/xfs_vnops.c =================================================================== --- head/sys/gnu/fs/xfs/FreeBSD/xfs_vnops.c (revision 183214) +++ head/sys/gnu/fs/xfs/FreeBSD/xfs_vnops.c (revision 183215) @@ -1,1691 +1,1690 @@ /* * Copyright (c) 2001, Alexander Kabaev * Copyright (c) 2006, Russell Cattelan Digital Elves Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define NO_VFS_MACROS #include "xfs.h" #include "xfs_types.h" #include "xfs_bit.h" #include "xfs_inum.h" #include "xfs_log.h" #include "xfs_trans.h" #include "xfs_trans_priv.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_dir.h" #include "xfs_dir2.h" #include "xfs_dmapi.h" #include "xfs_mount.h" #include "xfs_alloc_btree.h" #include "xfs_bmap_btree.h" #include "xfs_ialloc_btree.h" #include "xfs_btree.h" #include "xfs_imap.h" #include "xfs_attr.h" #include "xfs_attr_sf.h" #include "xfs_dir_sf.h" #include "xfs_dir2_sf.h" #include "xfs_dinode.h" #include "xfs_ialloc.h" #include "xfs_alloc.h" #include "xfs_inode.h" #include "xfs_inode_item.h" #include "xfs_acl.h" #include "xfs_cap.h" #include "xfs_mac.h" #include "xfs_iomap.h" #include "xfs_clnt.h" #include "xfs_mountops.h" /* * Prototypes for XFS vnode operations. */ static vop_access_t _xfs_access; static vop_advlock_t _xfs_advlock; static vop_bmap_t _xfs_bmap; static vop_cachedlookup_t _xfs_cachedlookup; static vop_close_t _xfs_close; static vop_create_t _xfs_create; static vop_deleteextattr_t _xfs_deleteextattr; static vop_fsync_t _xfs_fsync; static vop_getattr_t _xfs_getattr; static vop_getextattr_t _xfs_getextattr; static vop_inactive_t _xfs_inactive; static vop_ioctl_t _xfs_ioctl; static vop_link_t _xfs_link; static vop_listextattr_t _xfs_listextattr; static vop_mkdir_t _xfs_mkdir; static vop_mknod_t _xfs_mknod; static vop_open_t _xfs_open; static vop_read_t _xfs_read; static vop_readdir_t _xfs_readdir; static vop_readlink_t _xfs_readlink; static vop_reclaim_t _xfs_reclaim; static vop_remove_t _xfs_remove; static vop_rename_t _xfs_rename; static vop_rmdir_t _xfs_rmdir; static vop_setattr_t _xfs_setattr; static vop_setextattr_t _xfs_setextattr; static vop_strategy_t _xfs_strategy; static vop_symlink_t _xfs_symlink; static vop_write_t _xfs_write; static vop_vptofh_t _xfs_vptofh; struct vop_vector xfs_vnops = { .vop_default = &default_vnodeops, .vop_access = _xfs_access, .vop_advlock = _xfs_advlock, .vop_bmap = _xfs_bmap, .vop_cachedlookup = _xfs_cachedlookup, .vop_close = _xfs_close, .vop_create = _xfs_create, .vop_deleteextattr = _xfs_deleteextattr, .vop_fsync = _xfs_fsync, .vop_getattr = _xfs_getattr, .vop_getextattr = _xfs_getextattr, .vop_inactive = _xfs_inactive, .vop_ioctl = _xfs_ioctl, .vop_link = _xfs_link, .vop_listextattr = _xfs_listextattr, .vop_lookup = vfs_cache_lookup, .vop_mkdir = _xfs_mkdir, .vop_mknod = _xfs_mknod, .vop_open = _xfs_open, .vop_read = _xfs_read, .vop_readdir = _xfs_readdir, .vop_readlink = _xfs_readlink, .vop_reclaim = _xfs_reclaim, .vop_remove = _xfs_remove, .vop_rename = _xfs_rename, .vop_rmdir = _xfs_rmdir, .vop_setattr = _xfs_setattr, .vop_setextattr = _xfs_setextattr, .vop_strategy = _xfs_strategy, .vop_symlink = _xfs_symlink, .vop_write = _xfs_write, .vop_vptofh = _xfs_vptofh, }; /* * FIFO's specific operations. */ static vop_close_t _xfsfifo_close; static vop_read_t _xfsfifo_read; static vop_kqfilter_t _xfsfifo_kqfilter; static vop_write_t _xfsfifo_write; struct vop_vector xfs_fifoops = { .vop_default = &fifo_specops, .vop_access = _xfs_access, .vop_close = _xfsfifo_close, .vop_fsync = _xfs_fsync, .vop_getattr = _xfs_getattr, .vop_inactive = _xfs_inactive, .vop_kqfilter = _xfsfifo_kqfilter, .vop_read = _xfsfifo_read, .vop_reclaim = _xfs_reclaim, .vop_setattr = _xfs_setattr, .vop_write = _xfsfifo_write, .vop_vptofh = _xfs_vptofh, }; static int _xfs_access( struct vop_access_args /* { struct vnode *a_vp; int a_mode; struct ucred *a_cred; struct thread *a_td; } */ *ap) { int error; XVOP_ACCESS(VPTOXFSVP(ap->a_vp), ap->a_mode, ap->a_cred, error); return (error); } static int _xfs_open( struct vop_open_args /* { struct vnode *a_vp; int a_mode; struct ucred *a_cred; struct thread *a_td; int a_fdidx; } */ *ap) { int error; XVOP_OPEN(VPTOXFSVP(ap->a_vp), ap->a_cred, error); if (error == 0) vnode_create_vobject(ap->a_vp, 0, ap->a_td); return (error); } static int _xfs_close( struct vop_close_args /* { struct vnodeop_desc *a_desc; struct vnode *a_vp; int a_fflag; struct ucred *a_cred; struct thread *a_td; } */ *ap) { int error = 0; /* XVOP_CLOSE(VPTOXFSVP(ap->a_vp), NULL, error); */ return (error); } static int _xfs_getattr( struct vop_getattr_args /* { struct vnode *a_vp; struct vattr *a_vap; struct ucred *a_cred; } */ *ap) { struct vnode *vp = ap->a_vp; struct vattr *vap = ap->a_vap; struct mount *mp; xfs_vattr_t va; int error; /* extract the xfs vnode from the private data */ //xfs_vnode_t *xvp = (xfs_vnode_t *)vp->v_data; - VATTR_NULL(vap); memset(&va,0,sizeof(xfs_vattr_t)); va.va_mask = XFS_AT_STAT|XFS_AT_GENCOUNT|XFS_AT_XFLAGS; XVOP_GETATTR(VPTOXFSVP(vp), &va, 0, ap->a_cred, error); if (error) return (error); mp = vp->v_mount; vap->va_type = IFTOVT(((xfs_vnode_t *)vp->v_data)->v_inode->i_d.di_mode); vap->va_mode = va.va_mode; vap->va_nlink = va.va_nlink; vap->va_uid = va.va_uid; vap->va_gid = va.va_gid; vap->va_fsid = mp->mnt_stat.f_fsid.val[0]; vap->va_fileid = va.va_nodeid; vap->va_size = va.va_size; vap->va_blocksize = va.va_blocksize; vap->va_atime = va.va_atime; vap->va_mtime = va.va_mtime; vap->va_ctime = va.va_ctime; vap->va_gen = va.va_gen; vap->va_rdev = va.va_rdev; vap->va_bytes = (va.va_nblocks << BBSHIFT); /* XFS now supports devices that have block sizes * other than 512 so BBSHIFT will work for now * but need to get this value from the super block */ /* * Fields with no direct equivalent in XFS */ vap->va_filerev = 0; vap->va_flags = 0; return (0); } static int _xfs_setattr( struct vop_setattr_args /* { struct vnode *a_vp; struct vattr *a_vap; struct ucred *a_cred; } */ *ap) { struct vnode *vp = ap->a_vp; struct vattr *vap = ap->a_vap; xfs_vattr_t va; int error; /* * Check for unsettable attributes. */ #ifdef RMC if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) || (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) || (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) || ((int)vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) return (EINVAL); #endif memset(&va, 0, sizeof(va)); if (vap->va_uid != (uid_t)VNOVAL) { va.va_mask |= XFS_AT_UID; va.va_uid = vap->va_uid; } if (vap->va_gid != (gid_t)VNOVAL) { va.va_mask |= XFS_AT_GID; va.va_gid = vap->va_gid; } if (vap->va_size != VNOVAL) { va.va_mask |= XFS_AT_SIZE; va.va_size = vap->va_size; } if (vap->va_atime.tv_sec != VNOVAL) { va.va_mask |= XFS_AT_ATIME; va.va_atime = vap->va_atime; } if (vap->va_mtime.tv_sec != VNOVAL) { va.va_mask |= XFS_AT_MTIME; va.va_mtime = vap->va_mtime; } if (vap->va_ctime.tv_sec != VNOVAL) { va.va_mask |= XFS_AT_CTIME; va.va_ctime = vap->va_ctime; } if (vap->va_mode != (mode_t)VNOVAL) { va.va_mask |= XFS_AT_MODE; va.va_mode = vap->va_mode; } XVOP_SETATTR(VPTOXFSVP(vp), &va, 0, ap->a_cred, error); return (error); } static int _xfs_inactive( struct vop_inactive_args /* { struct vnode *a_vp; struct thread *a_td; } */ *ap) { struct vnode *vp = ap->a_vp; struct thread *td = ap->a_td; int error; XVOP_INACTIVE(VPTOXFSVP(vp), td->td_ucred, error); return (error); } static int _xfs_read( struct vop_read_args /* { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; struct ucred *a_cred; } */ *ap) { struct vnode *vp = ap->a_vp; struct uio *uio = ap->a_uio; int error; switch (vp->v_type) { case VREG: break; case VDIR: return (EISDIR); default: return (EPERM); }; XVOP_READ(VPTOXFSVP(vp), uio, ap->a_ioflag, ap->a_cred, error); return error; } int xfs_read_file(xfs_mount_t *mp, xfs_inode_t *ip, struct uio *uio, int ioflag) { xfs_fileoff_t lbn, nextlbn; xfs_fsize_t bytesinfile; long size, xfersize, blkoffset; struct buf *bp; struct vnode *vp; int error, orig_resid; int seqcount; seqcount = ioflag >> IO_SEQSHIFT; orig_resid = uio->uio_resid; if (orig_resid <= 0) return (0); vp = XFS_ITOV(ip)->v_vnode; /* * Ok so we couldn't do it all in one vm trick... * so cycle around trying smaller bites.. */ for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) { if ((bytesinfile = ip->i_d.di_size - uio->uio_offset) <= 0) break; lbn = XFS_B_TO_FSBT(mp, uio->uio_offset); nextlbn = lbn + 1; /* * size of buffer. The buffer representing the * end of the file is rounded up to the size of * the block type ( fragment or full block, * depending ). */ size = mp->m_sb.sb_blocksize; blkoffset = XFS_B_FSB_OFFSET(mp, uio->uio_offset); /* * The amount we want to transfer in this iteration is * one FS block less the amount of the data before * our startpoint (duh!) */ xfersize = mp->m_sb.sb_blocksize - blkoffset; /* * But if we actually want less than the block, * or the file doesn't have a whole block more of data, * then use the lesser number. */ if (uio->uio_resid < xfersize) xfersize = uio->uio_resid; if (bytesinfile < xfersize) xfersize = bytesinfile; if (XFS_FSB_TO_B(mp, nextlbn) >= ip->i_d.di_size ) { /* * Don't do readahead if this is the end of the file. */ error = bread(vp, lbn, size, NOCRED, &bp); } else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) { /* * Otherwise if we are allowed to cluster, * grab as much as we can. * * XXX This may not be a win if we are not * doing sequential access. */ error = cluster_read(vp, ip->i_d.di_size, lbn, size, NOCRED, uio->uio_resid, seqcount, &bp); } else if (seqcount > 1) { /* * If we are NOT allowed to cluster, then * if we appear to be acting sequentially, * fire off a request for a readahead * as well as a read. Note that the 4th and 5th * arguments point to arrays of the size specified in * the 6th argument. */ int nextsize = mp->m_sb.sb_blocksize; error = breadn(vp, lbn, size, &nextlbn, &nextsize, 1, NOCRED, &bp); } else { /* * Failing all of the above, just read what the * user asked for. Interestingly, the same as * the first option above. */ error = bread(vp, lbn, size, NOCRED, &bp); } if (error) { brelse(bp); bp = NULL; break; } /* * If IO_DIRECT then set B_DIRECT for the buffer. This * will cause us to attempt to release the buffer later on * and will cause the buffer cache to attempt to free the * underlying pages. */ if (ioflag & IO_DIRECT) bp->b_flags |= B_DIRECT; /* * We should only get non-zero b_resid when an I/O error * has occurred, which should cause us to break above. * However, if the short read did not cause an error, * then we want to ensure that we do not uiomove bad * or uninitialized data. */ size -= bp->b_resid; if (size < xfersize) { if (size == 0) break; xfersize = size; } /* * otherwise use the general form */ error = uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio); if (error) break; if (ioflag & (IO_VMIO|IO_DIRECT) ) { /* * If there are no dependencies, and it's VMIO, * then we don't need the buf, mark it available * for freeing. The VM has the data. */ bp->b_flags |= B_RELBUF; brelse(bp); } else { /* * Otherwise let whoever * made the request take care of * freeing it. We just queue * it onto another list. */ bqrelse(bp); } } /* * This can only happen in the case of an error * because the loop above resets bp to NULL on each iteration * and on normal completion has not set a new value into it. * so it must have come from a 'break' statement */ if (bp != NULL) { if (ioflag & (IO_VMIO|IO_DIRECT)) { bp->b_flags |= B_RELBUF; brelse(bp); } else bqrelse(bp); } return (error); } static int _xfs_write(struct vop_write_args /* { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; struct ucred *a_cred; } */ *ap) { struct vnode *vp = ap->a_vp; struct uio *uio = ap->a_uio; int ioflag = ap->a_ioflag; int error; xfs_vnode_t *xvp = (xfs_vnode_t *)vp->v_data; error = xfs_write(xvp->v_bh.bh_first, uio, ioflag, ap->a_cred); if (error < 0) { printf("Xfs_write got error %d\n",error); return -error; } return 0; } int xfs_write_file(xfs_inode_t *xip, struct uio *uio, int ioflag) { struct buf *bp; //struct thread *td; daddr_t lbn; off_t osize = 0; off_t offset= 0; int blkoffset, error, resid, xfersize; int fsblocksize; int seqcount; xfs_iomap_t iomap; int maps = 1; xfs_vnode_t *xvp = XFS_ITOV(xip); struct vnode *vp = xvp->v_vnode; xfs_mount_t *mp = (&xip->i_iocore)->io_mount; seqcount = ioflag >> IO_SEQSHIFT; memset(&iomap,0,sizeof(xfs_iomap_t)); /* * Maybe this should be above the vnode op call, but so long as * file servers have no limits, I don't think it matters. */ #if 0 td = uio->uio_td; if (vp->v_type == VREG && td != NULL) { PROC_LOCK(td->td_proc); if (uio->uio_offset + uio->uio_resid > lim_cur(td->td_proc, RLIMIT_FSIZE)) { psignal(td->td_proc, SIGXFSZ); PROC_UNLOCK(td->td_proc); return (EFBIG); } PROC_UNLOCK(td->td_proc); } #endif resid = uio->uio_resid; offset = uio->uio_offset; osize = xip->i_d.di_size; /* xfs bmap wants bytes for both offset and size */ XVOP_BMAP(xvp, uio->uio_offset, uio->uio_resid, BMAPI_WRITE|BMAPI_DIRECT, &iomap, &maps, error); if(error) { printf("XVOP_BMAP failed\n"); goto error; } for (error = 0; uio->uio_resid > 0;) { lbn = XFS_B_TO_FSBT(mp, offset); blkoffset = XFS_B_FSB_OFFSET(mp, offset); xfersize = mp->m_sb.sb_blocksize - blkoffset; fsblocksize = mp->m_sb.sb_blocksize; if (uio->uio_resid < xfersize) xfersize = uio->uio_resid; /* * getblk sets buf by blkno * bo->bo_bsize * bo_bsize is set from the mnt point fsize * so we call getblk in the case using fsblocks * not basic blocks */ bp = getblk(vp, lbn, fsblocksize, 0, 0, 0); if(!bp) { printf("getblk failed\n"); error = EINVAL; break; } if (!(bp->b_flags & B_CACHE) && fsblocksize > xfersize) vfs_bio_clrbuf(bp); if (offset + xfersize > xip->i_d.di_size) { xip->i_d.di_size = offset + xfersize; vnode_pager_setsize(vp, offset + fsblocksize); } /* move the offset for the next itteration of the loop */ offset += xfersize; error = uiomove((char *)bp->b_data + blkoffset, xfersize, uio); if ((ioflag & IO_VMIO) && (LIST_FIRST(&bp->b_dep) == NULL)) /* in ext2fs? */ bp->b_flags |= B_RELBUF; /* force to full direct for now */ bp->b_flags |= B_DIRECT; /* and sync ... the delay path is not pushing data out */ ioflag |= IO_SYNC; if (ioflag & IO_SYNC) { (void)bwrite(bp); } else if (0 /* RMC xfersize + blkoffset == fs->s_frag_size */) { if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0) { bp->b_flags |= B_CLUSTEROK; cluster_write(vp, bp, osize, seqcount); } else { bawrite(bp); } } else { bp->b_flags |= B_CLUSTEROK; bdwrite(bp); } if (error || xfersize == 0) break; } /* * If we successfully wrote any data, and we are not the superuser * we clear the setuid and setgid bits as a precaution against * tampering. */ #if 0 if (resid > uio->uio_resid && ap->a_cred && ap->a_cred->cr_uid != 0) ip->i_mode &= ~(ISUID | ISGID); #endif if (error) { if (ioflag & IO_UNIT) { #if 0 (void)ext2_truncate(vp, osize, ioflag & IO_SYNC, ap->a_cred, uio->uio_td); #endif uio->uio_offset -= resid - uio->uio_resid; uio->uio_resid = resid; } } else if (resid > uio->uio_resid && (ioflag & IO_SYNC)) { /* Update the vnode here? */ } error: return error; } static int _xfs_create( struct vop_create_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; struct vattr *a_vap; } */ *ap) { struct vnode *dvp = ap->a_dvp; struct vattr *vap = ap->a_vap; struct thread *td = curthread; struct ucred *credp = td->td_ucred; struct componentname *cnp = ap->a_cnp; xfs_vnode_t *xvp; xfs_vattr_t va; int error; memset(&va, 0, sizeof (va)); va.va_mask |= XFS_AT_MODE; va.va_mode = vap->va_mode; va.va_mask |= XFS_AT_TYPE; va.va_mode |= VTTOIF(vap->va_type); xvp = NULL; XVOP_CREATE(VPTOXFSVP(dvp), cnp, &va, &xvp, credp, error); if (error == 0) { *ap->a_vpp = xvp->v_vnode; VOP_LOCK(xvp->v_vnode, LK_EXCLUSIVE); } return (error); } extern int xfs_remove(bhv_desc_t *, bhv_desc_t *, vname_t *, cred_t *); static int _xfs_remove( struct vop_remove_args /* { struct vnodeop_desc *a_desc; struct vnode * a_dvp; struct vnode * a_vp; struct componentname * a_cnp; } */ *ap) { struct vnode *vp = ap->a_vp; struct thread *td = curthread; struct ucred *credp = td->td_ucred; /* struct vnode *dvp = ap->a_dvp; struct componentname *cnp = ap->a_cnp; */ int error; if (vp->v_type == VDIR || vp->v_usecount != 1) return (EPERM); error = xfs_remove(VPTOXFSVP(ap->a_dvp)->v_bh.bh_first, VPTOXFSVP(ap->a_vp)->v_bh.bh_first, ap->a_cnp,credp); cache_purge(vp); return error; } static int _xfs_rename( struct vop_rename_args /* { struct vnode *a_fdvp; struct vnode *a_fvp; struct componentname *a_fcnp; struct vnode *a_tdvp; struct vnode *a_tvp; struct componentname *a_tcnp; } */ *ap) { struct vnode *fvp = ap->a_fvp; struct vnode *tvp = ap->a_tvp; struct vnode *fdvp = ap->a_fdvp; struct vnode *tdvp = ap->a_tdvp; /* struct componentname *tcnp = ap->a_tcnp; */ /* struct componentname *fcnp = ap->a_fcnp;*/ int error = EPERM; if (error) goto out; /* Check for cross-device rename */ if ((fvp->v_mount != tdvp->v_mount) || (tvp && (fvp->v_mount != tvp->v_mount))) { error = EXDEV; goto out; } if (tvp && tvp->v_usecount > 1) { error = EBUSY; goto out; } if (fvp->v_type == VDIR) { if (tvp != NULL && tvp->v_type == VDIR) cache_purge(tdvp); cache_purge(fdvp); } out: if (tdvp == tvp) vrele(tdvp); else vput(tdvp); if (tvp) vput(tvp); vrele(fdvp); vrele(fvp); vgone(fvp); if (tvp) vgone(tvp); return (error); } static int _xfs_link( struct vop_link_args /* { struct vnode *a_tdvp; struct vnode *a_vp; struct componentname *a_cnp; } */ *ap) { xfs_vnode_t *tdvp, *vp; int error; tdvp = VPTOXFSVP(ap->a_tdvp); vp = VPTOXFSVP(ap->a_vp); XVOP_LINK(tdvp, vp, ap->a_cnp, NULL, error); return (error); } static int _xfs_symlink( struct vop_symlink_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; struct vattr *a_vap; char *a_target; } */ *ap) { struct thread *td = curthread; struct ucred *credp = td->td_ucred; xfs_vnode_t *xvp; xfs_vattr_t va; int error; memset(&va, 0, sizeof (va)); va.va_mask |= XFS_AT_MODE; va.va_mode = ap->a_vap->va_mode | S_IFLNK; va.va_mask |= XFS_AT_TYPE; XVOP_SYMLINK(VPTOXFSVP(ap->a_dvp), ap->a_cnp, &va, ap->a_target, &xvp, credp, error); if (error == 0) { *ap->a_vpp = xvp->v_vnode; VOP_LOCK(xvp->v_vnode, LK_EXCLUSIVE); } return (error); } static int _xfs_mknod( struct vop_mknod_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; struct vattr *a_vap; } */ *ap) { struct vnode *dvp = ap->a_dvp; struct vattr *vap = ap->a_vap; struct thread *td = curthread; struct ucred *credp = td->td_ucred; struct componentname *cnp = ap->a_cnp; xfs_vnode_t *xvp; xfs_vattr_t va; int error; memset(&va, 0, sizeof (va)); va.va_mask |= XFS_AT_MODE; va.va_mode = vap->va_mode | S_IFIFO; va.va_mask |= XFS_AT_TYPE; va.va_mask |= XFS_AT_RDEV; va.va_rdev = vap->va_rdev; xvp = NULL; XVOP_CREATE(VPTOXFSVP(dvp), cnp, &va, &xvp, credp, error); if (error == 0) { *ap->a_vpp = xvp->v_vnode; VOP_LOCK(xvp->v_vnode, LK_EXCLUSIVE); } return (error); } static int _xfs_mkdir( struct vop_mkdir_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; struct vattr *a_vap; } */ *ap) { struct vnode *dvp = ap->a_dvp; struct vattr *vap = ap->a_vap; struct thread *td = curthread; struct ucred *credp = td->td_ucred; struct componentname *cnp = ap->a_cnp; xfs_vnode_t *xvp; xfs_vattr_t va; int error; memset(&va, 0, sizeof (va)); va.va_mask |= XFS_AT_MODE; va.va_mode = vap->va_mode | S_IFDIR; va.va_mask |= XFS_AT_TYPE; xvp = NULL; XVOP_MKDIR(VPTOXFSVP(dvp), cnp, &va, &xvp, credp, error); if (error == 0) { *ap->a_vpp = xvp->v_vnode; VOP_LOCK(xvp->v_vnode, LK_EXCLUSIVE); } return (error); } static int _xfs_rmdir( struct vop_rmdir_args /* { struct vnode *a_dvp; struct vnode *a_vp; struct componentname *a_cnp; } */ *ap) { struct vnode *vp = ap->a_vp; struct vnode *dvp = ap->a_dvp; /* struct componentname *cnp = ap->a_cnp; */ int error; if (dvp == vp) return (EINVAL); error = EPERM; return (error); } static int _xfs_readdir( struct vop_readdir_args /* { struct vnode *a_vp; struct uio *a_uio; struct ucred *a_cred; int *a_eofflag; int *a_ncookies; u_long **a_cookies; } */ *ap) { struct vnode *vp = ap->a_vp; struct uio *uio = ap->a_uio; int error; off_t off; int eof = 0; if (vp->v_type != VDIR) return (EPERM); if (ap->a_ncookies) { return (EOPNOTSUPP); } error = 0; while (!eof){ off = (int)uio->uio_offset; XVOP_READDIR(VPTOXFSVP(vp), uio, NULL, &eof, error); if ((uio->uio_offset == off) || error) { break; } } if (ap->a_eofflag) *ap->a_eofflag = (eof != 0); return (error); } static int _xfs_readlink( struct vop_readlink_args /* { struct vnode *a_vp; struct uio *a_uio; struct ucred *a_cred; } */ *ap) { struct vnode *vp = ap->a_vp; struct uio *uio = ap->a_uio; struct ucred *cred = ap->a_cred; int error; XVOP_READLINK(VPTOXFSVP(vp), uio, 0, cred, error); return (error); } static int _xfs_fsync( struct vop_fsync_args /* { struct vnode * a_vp; int a_waitfor; struct thread * a_td; } */ *ap) { xfs_vnode_t *vp = VPTOXFSVP(ap->a_vp); int flags = FSYNC_DATA; int error; if (ap->a_waitfor == MNT_WAIT) flags |= FSYNC_WAIT; XVOP_FSYNC(vp, flags, ap->a_td->td_ucred, (xfs_off_t)0, (xfs_off_t)-1, error); return (error); } static int _xfs_bmap( struct vop_bmap_args /* { struct vnode *a_vp; daddr_t a_bn; struct bufobj **a_bop; daddr_t *a_bnp; int *a_runp; int *a_runb; } */ *ap) { xfs_iomap_t iomap; xfs_off_t offset; ssize_t size; struct mount *mp; struct xfs_mount *xmp; struct xfs_vnode *xvp; int error, maxrun, retbm; mp = ap->a_vp->v_mount; xmp = XFS_VFSTOM(MNTTOVFS(mp)); if (ap->a_bop != NULL) *ap->a_bop = &xmp->m_ddev_targp->specvp->v_bufobj; if (ap->a_bnp == NULL) return (0); xvp = VPTOXFSVP(ap->a_vp); retbm = 1; offset = XFS_FSB_TO_B(xmp, ap->a_bn); size = XFS_FSB_TO_B(xmp, 1); XVOP_BMAP(xvp, offset, size, BMAPI_READ, &iomap, &retbm, error); if (error) return (error); if (retbm == 0 || iomap.iomap_bn == IOMAP_DADDR_NULL) { *ap->a_bnp = (daddr_t)-1; if (ap->a_runb) *ap->a_runb = 0; if (ap->a_runp) *ap->a_runp = 0; } else { *ap->a_bnp = iomap.iomap_bn + btodb(iomap.iomap_delta); maxrun = mp->mnt_iosize_max / mp->mnt_stat.f_iosize - 1; if (ap->a_runb) { *ap->a_runb = XFS_B_TO_FSB(xmp, iomap.iomap_delta); if (*ap->a_runb > maxrun) *ap->a_runb = maxrun; } if (ap->a_runp) { *ap->a_runp = XFS_B_TO_FSB(xmp, iomap.iomap_bsize - iomap.iomap_delta - size); if (*ap->a_runp > maxrun) *ap->a_runp = maxrun; } } return (0); } static int _xfs_strategy( struct vop_strategy_args /* { struct vnode *a_vp; struct buf *a_bp; } */ *ap) { daddr_t blkno; struct buf *bp;; struct bufobj *bo; struct vnode *vp; struct xfs_mount *xmp; int error; bp = ap->a_bp; vp = ap->a_vp; KASSERT(ap->a_vp == ap->a_bp->b_vp, ("%s(%p != %p)", __func__, ap->a_vp, ap->a_bp->b_vp)); if (bp->b_blkno == bp->b_lblkno) { error = VOP_BMAP(vp, bp->b_lblkno, NULL, &blkno, NULL, NULL); bp->b_blkno = blkno; bp->b_iooffset = (blkno << BBSHIFT); if (error) { bp->b_error = error; bp->b_ioflags |= BIO_ERROR; bufdone(bp); return (error); } if ((long)bp->b_blkno == -1) vfs_bio_clrbuf(bp); } if ((long)bp->b_blkno == -1) { bufdone(bp); return (0); } xmp = XFS_VFSTOM(MNTTOVFS(vp->v_mount)); bo = &xmp->m_ddev_targp->specvp->v_bufobj; bo->bo_ops->bop_strategy(bo, bp); return (0); } int _xfs_ioctl( struct vop_ioctl_args /* { struct vnode *a_vp; u_long a_command; caddr_t a_data; int fflag; struct ucred *cred; struct thread *a_td; } */ *ap) { /* struct vnode *vp = ap->a_vp; */ /* struct thread *p = ap->a_td; */ /* struct file *fp; */ int error; xfs_vnode_t *xvp = VPTOXFSVP(ap->a_vp); printf("_xfs_ioctl cmd 0x%lx data %p\n",ap->a_command,ap->a_data); // XVOP_IOCTL(xvp,(void *)NULL,(void *)NULL,ap->a_fflag,ap->a_command,ap->a_data,error); error = xfs_ioctl(xvp->v_bh.bh_first,NULL,NULL,ap->a_fflag,ap->a_command,ap->a_data); return error; } int _xfs_advlock( struct vop_advlock_args /* { struct vnode *a_vp; caddr_t a_id; int a_op; struct flock *a_fl; int a_flags; } */ *ap) { /* struct vnode *vp = ap->a_vp;*/ struct flock *fl = ap->a_fl; /* caddr_t id = (caddr_t)1 */ /* ap->a_id */; /* int flags = ap->a_flags; */ off_t start, end, size; int error/* , lkop */; /*KAN: temp */ return (EOPNOTSUPP); size = 0; error = 0; switch (fl->l_whence) { case SEEK_SET: case SEEK_CUR: start = fl->l_start; break; case SEEK_END: start = fl->l_start + size; default: return (EINVAL); } if (start < 0) return (EINVAL); if (fl->l_len == 0) end = -1; else { end = start + fl->l_len - 1; if (end < start) return (EINVAL); } #ifdef notyet switch (ap->a_op) { case F_SETLK: error = lf_advlock(ap, &vp->v_lockf, size); break; case F_UNLCK: lf_advlock(ap, &vp->v_lockf, size); break; case F_GETLK: error = lf_advlock(ap, &vp->v_lockf, size); break; default: return (EINVAL); } #endif return (error); } static int _xfs_cachedlookup( struct vop_cachedlookup_args /* { struct vnode * a_dvp; struct vnode ** a_vpp; struct componentname * a_cnp; } */ *ap) { struct vnode *dvp, *tvp; struct xfs_vnode *cvp; int islastcn; int error; struct vnode **vpp = ap->a_vpp; struct componentname *cnp = ap->a_cnp; struct ucred *cred = cnp->cn_cred; int flags = cnp->cn_flags; int nameiop = cnp->cn_nameiop; struct thread *td = cnp->cn_thread; char *pname = cnp->cn_nameptr; int namelen = cnp->cn_namelen; *vpp = NULL; dvp = ap->a_dvp; islastcn = flags & ISLASTCN; XVOP_LOOKUP(VPTOXFSVP(dvp), cnp, &cvp, 0, NULL, cred, error); if (error == ENOENT) { if ((nameiop == CREATE || nameiop == RENAME || nameiop == DELETE) && islastcn) { error = VOP_ACCESS(dvp, VWRITE, cred, td); if (error) return (error); cnp->cn_flags |= SAVENAME; return (EJUSTRETURN); } if ((cnp->cn_flags & MAKEENTRY) && nameiop != CREATE) cache_enter(dvp, *vpp, cnp); return (error); } if (error) return (error); tvp = cvp->v_vnode; if (nameiop == DELETE && islastcn) { if ((error = vn_lock(tvp, LK_EXCLUSIVE))) { vrele(tvp); goto err_out; } *vpp = tvp; /* Directory should be writable for deletes. */ error = VOP_ACCESS(dvp, VWRITE, cred, td); if (error) goto err_out; /* XXXKAN: Permission checks for sticky dirs? */ return (0); } if (nameiop == RENAME && islastcn) { if ((error = vn_lock(tvp, LK_EXCLUSIVE))) { vrele(tvp); goto err_out; } *vpp = tvp; if ((error = VOP_ACCESS(dvp, VWRITE, cred, td))) goto err_out; return (0); } if (flags & ISDOTDOT) { VOP_UNLOCK(dvp, 0); error = vn_lock(tvp, cnp->cn_lkflags); if (error) { vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY); vrele(tvp); goto err_out; } *vpp = tvp; } else if (namelen == 1 && pname[0] == '.') { *vpp = tvp; KASSERT(tvp == dvp, ("not same directory")); } else { if ((error = vn_lock(tvp, cnp->cn_lkflags))) { vrele(tvp); goto err_out; } *vpp = tvp; } if (cnp->cn_flags & MAKEENTRY) cache_enter(dvp, *vpp, cnp); return (0); err_out: if (*vpp != 0) vput(*vpp); return (error); } static int _xfs_reclaim( struct vop_reclaim_args /* { struct vnode *a_vp; struct thread *a_td; } */ *ap) { struct vnode *vp = ap->a_vp; struct xfs_vnode *xfs_vp = VPTOXFSVP(vp); int error; XVOP_RECLAIM(xfs_vp, error); kmem_free(xfs_vp, sizeof(*xfs_vp)); vp->v_data = NULL; return (error); } static int _xfs_kqfilter( struct vop_kqfilter_args /* { struct vnodeop_desc *a_desc; struct vnode *a_vp; struct knote *a_kn; } */ *ap) { return (0); } struct xfs_inode * xfs_vtoi(struct xfs_vnode *xvp) { return(XFS_BHVTOI(xvp->v_fbhv)); } /* * Read wrapper for fifos. */ static int _xfsfifo_read( struct vop_read_args /* { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; struct ucred *a_cred; } */ *ap) { int error, resid; struct xfs_inode *ip; struct uio *uio; uio = ap->a_uio; resid = uio->uio_resid; error = fifo_specops.vop_read(ap); ip = xfs_vtoi(VPTOXFSVP(ap->a_vp)); if ((ap->a_vp->v_mount->mnt_flag & MNT_NOATIME) == 0 && ip != NULL && (uio->uio_resid != resid || (error == 0 && resid != 0))) xfs_ichgtime(ip, XFS_ICHGTIME_ACC); return (error); } /* * Write wrapper for fifos. */ static int _xfsfifo_write( struct vop_write_args /* { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; struct ucred *a_cred; } */ *ap) { int error, resid; struct uio *uio; struct xfs_inode *ip; uio = ap->a_uio; resid = uio->uio_resid; error = fifo_specops.vop_write(ap); ip = xfs_vtoi(VPTOXFSVP(ap->a_vp)); if (ip != NULL && (uio->uio_resid != resid || (error == 0 && resid != 0))) xfs_ichgtime(ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); return (error); } /* * Close wrapper for fifos. * * Update the times on the inode then do device close. */ static int _xfsfifo_close( struct vop_close_args /* { struct vnode *a_vp; int a_fflag; struct ucred *a_cred; struct thread *a_td; } */ *ap) { return (fifo_specops.vop_close(ap)); } /* * Kqfilter wrapper for fifos. * * Fall through to ufs kqfilter routines if needed */ static int _xfsfifo_kqfilter( struct vop_kqfilter_args /* { struct vnodeop_desc *a_desc; struct vnode *a_vp; struct knote *a_kn; } */ *ap) { int error; error = fifo_specops.vop_kqfilter(ap); if (error) error = _xfs_kqfilter(ap); return (error); } static int _xfs_getextattr( struct vop_getextattr_args /* { struct vnode *a_vp; int a_attrnamespace; const char *a_name; struct uio *a_uio; size_t *a_size; struct ucred *a_cred; struct thread *a_td; } */ *ap) { int error; char *value; int size; error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, ap->a_cred, ap->a_td, VREAD); if (error) return (error); size = ATTR_MAX_VALUELEN; value = (char *)kmem_zalloc(size, KM_SLEEP); if (value == NULL) return (ENOMEM); XVOP_ATTR_GET(VPTOXFSVP(ap->a_vp), ap->a_name, value, &size, 1, ap->a_cred, error); if (ap->a_uio != NULL) { if (ap->a_uio->uio_iov->iov_len < size) error = ERANGE; else uiomove(value, size, ap->a_uio); } if (ap->a_size != NULL) *ap->a_size = size; kmem_free(value, ATTR_MAX_VALUELEN); return (error); } static int _xfs_listextattr( struct vop_listextattr_args /* { struct vnode *a_vp; int a_attrnamespace; struct uio *a_uio; size_t *a_size; struct ucred *a_cred; struct thread *a_td; } */ *ap) { int error; char *buf = NULL; int buf_len = 0; attrlist_cursor_kern_t cursor = { 0 }; int i; char name_len; int attrnames_len = 0; int xfs_flags = ATTR_KERNAMELS; error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, ap->a_cred, ap->a_td, VREAD); if (error) return (error); if (ap->a_attrnamespace & EXTATTR_NAMESPACE_USER) xfs_flags |= ATTR_KERNORMALS; if (ap->a_attrnamespace & EXTATTR_NAMESPACE_SYSTEM) xfs_flags |= ATTR_KERNROOTLS; if (ap->a_uio == NULL || ap->a_uio->uio_iov[0].iov_base == NULL) { xfs_flags |= ATTR_KERNOVAL; buf_len = 0; } else { buf = ap->a_uio->uio_iov[0].iov_base; buf_len = ap->a_uio->uio_iov[0].iov_len; } XVOP_ATTR_LIST(VPTOXFSVP(ap->a_vp), buf, buf_len, xfs_flags, &cursor, ap->a_cred, error); if (error < 0) { attrnames_len = -error; error = 0; } if (buf == NULL) goto done; /* * extattr_list expects a list of names. Each list * entry consists of one byte for the name length, followed * by the name (not null terminated) */ name_len=0; for(i=attrnames_len-1; i > 0 ; --i) { buf[i] = buf[i-1]; if (buf[i]) ++name_len; else { buf[i] = name_len; name_len = 0; } } buf[0] = name_len; if (ap->a_uio != NULL) ap->a_uio->uio_resid -= attrnames_len; done: if (ap->a_size != NULL) *ap->a_size = attrnames_len; return (error); } static int _xfs_setextattr(struct vop_setextattr_args *ap) /* vop_setextattr { IN struct vnode *a_vp; IN int a_attrnamespace; IN const char *a_name; INOUT struct uio *a_uio; IN struct ucred *a_cred; IN struct thread *a_td; }; */ { char *val; size_t vallen; int error, xfs_flags; if (ap->a_vp->v_type == VCHR) return (EOPNOTSUPP); if (ap->a_uio == NULL) return (EINVAL); vallen = ap->a_uio->uio_resid; if (vallen > ATTR_MAX_VALUELEN) return (EOVERFLOW); if (ap->a_name[0] == '\0') return (EINVAL); error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, ap->a_cred, ap->a_td, VWRITE); if (error) return (error); xfs_flags = 0; if (ap->a_attrnamespace & EXTATTR_NAMESPACE_USER) xfs_flags |= ATTR_KERNORMALS; if (ap->a_attrnamespace & EXTATTR_NAMESPACE_SYSTEM) xfs_flags |= ATTR_KERNROOTLS; val = (char *)kmem_zalloc(vallen, KM_SLEEP); if (val == NULL) return (ENOMEM); error = uiomove(val, (int)vallen, ap->a_uio); if (error) goto err_out; XVOP_ATTR_SET(VPTOXFSVP(ap->a_vp), ap->a_name, val, vallen, xfs_flags, ap->a_cred, error); err_out: kmem_free(val, vallen); return(error); } static int _xfs_deleteextattr(struct vop_deleteextattr_args *ap) /* vop_deleteextattr { IN struct vnode *a_vp; IN int a_attrnamespace; IN const char *a_name; IN struct ucred *a_cred; IN struct thread *a_td; }; */ { int error, xfs_flags; if (ap->a_vp->v_type == VCHR) return (EOPNOTSUPP); if (ap->a_name[0] == '\0') return (EINVAL); error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, ap->a_cred, ap->a_td, VWRITE); if (error) return (error); xfs_flags = 0; if (ap->a_attrnamespace & EXTATTR_NAMESPACE_USER) xfs_flags |= ATTR_KERNORMALS; if (ap->a_attrnamespace & EXTATTR_NAMESPACE_SYSTEM) xfs_flags |= ATTR_KERNROOTLS; XVOP_ATTR_REMOVE(VPTOXFSVP(ap->a_vp), ap->a_name, xfs_flags, ap->a_cred, error); return (error); } static int _xfs_vptofh(struct vop_vptofh_args *ap) /* vop_vptofh { IN struct vnode *a_vp; IN struct fid *a_fhp; }; */ { printf("xfs_vptofh"); return ENOSYS; } Index: head/sys/kern/uipc_mqueue.c =================================================================== --- head/sys/kern/uipc_mqueue.c (revision 183214) +++ head/sys/kern/uipc_mqueue.c (revision 183215) @@ -1,2515 +1,2514 @@ /*- * Copyright (c) 2005 David Xu * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ /* * POSIX message queue implementation. * * 1) A mqueue filesystem can be mounted, each message queue appears * in mounted directory, user can change queue's permission and * ownership, or remove a queue. Manually creating a file in the * directory causes a message queue to be created in the kernel with * default message queue attributes applied and same name used, this * method is not advocated since mq_open syscall allows user to specify * different attributes. Also the file system can be mounted multiple * times at different mount points but shows same contents. * * 2) Standard POSIX message queue API. The syscalls do not use vfs layer, * but directly operate on internal data structure, this allows user to * use the IPC facility without having to mount mqueue file system. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * Limits and constants */ #define MQFS_NAMELEN NAME_MAX #define MQFS_DELEN (8 + MQFS_NAMELEN) /* node types */ typedef enum { mqfstype_none = 0, mqfstype_root, mqfstype_dir, mqfstype_this, mqfstype_parent, mqfstype_file, mqfstype_symlink, } mqfs_type_t; struct mqfs_node; /* * mqfs_info: describes a mqfs instance */ struct mqfs_info { struct sx mi_lock; struct mqfs_node *mi_root; struct unrhdr *mi_unrhdr; }; struct mqfs_vdata { LIST_ENTRY(mqfs_vdata) mv_link; struct mqfs_node *mv_node; struct vnode *mv_vnode; struct task mv_task; }; /* * mqfs_node: describes a node (file or directory) within a mqfs */ struct mqfs_node { char mn_name[MQFS_NAMELEN+1]; struct mqfs_info *mn_info; struct mqfs_node *mn_parent; LIST_HEAD(,mqfs_node) mn_children; LIST_ENTRY(mqfs_node) mn_sibling; LIST_HEAD(,mqfs_vdata) mn_vnodes; int mn_refcount; mqfs_type_t mn_type; int mn_deleted; u_int32_t mn_fileno; void *mn_data; struct timespec mn_birth; struct timespec mn_ctime; struct timespec mn_atime; struct timespec mn_mtime; uid_t mn_uid; gid_t mn_gid; int mn_mode; }; #define VTON(vp) (((struct mqfs_vdata *)((vp)->v_data))->mv_node) #define VTOMQ(vp) ((struct mqueue *)(VTON(vp)->mn_data)) #define VFSTOMQFS(m) ((struct mqfs_info *)((m)->mnt_data)) #define FPTOMQ(fp) ((struct mqueue *)(((struct mqfs_node *) \ (fp)->f_data)->mn_data)) TAILQ_HEAD(msgq, mqueue_msg); struct mqueue; struct mqueue_notifier { LIST_ENTRY(mqueue_notifier) nt_link; struct sigevent nt_sigev; ksiginfo_t nt_ksi; struct proc *nt_proc; }; struct mqueue { struct mtx mq_mutex; int mq_flags; long mq_maxmsg; long mq_msgsize; long mq_curmsgs; long mq_totalbytes; struct msgq mq_msgq; int mq_receivers; int mq_senders; struct selinfo mq_rsel; struct selinfo mq_wsel; struct mqueue_notifier *mq_notifier; }; #define MQ_RSEL 0x01 #define MQ_WSEL 0x02 struct mqueue_msg { TAILQ_ENTRY(mqueue_msg) msg_link; unsigned int msg_prio; unsigned int msg_size; /* following real data... */ }; SYSCTL_NODE(_kern, OID_AUTO, mqueue, CTLFLAG_RW, 0, "POSIX real time message queue"); static int default_maxmsg = 10; static int default_msgsize = 1024; static int maxmsg = 100; SYSCTL_INT(_kern_mqueue, OID_AUTO, maxmsg, CTLFLAG_RW, &maxmsg, 0, "Default maximum messages in queue"); static int maxmsgsize = 16384; SYSCTL_INT(_kern_mqueue, OID_AUTO, maxmsgsize, CTLFLAG_RW, &maxmsgsize, 0, "Default maximum message size"); static int maxmq = 100; SYSCTL_INT(_kern_mqueue, OID_AUTO, maxmq, CTLFLAG_RW, &maxmq, 0, "maximum message queues"); static int curmq = 0; SYSCTL_INT(_kern_mqueue, OID_AUTO, curmq, CTLFLAG_RW, &curmq, 0, "current message queue number"); static int unloadable = 0; static MALLOC_DEFINE(M_MQUEUEDATA, "mqdata", "mqueue data"); static eventhandler_tag exit_tag; /* Only one instance per-system */ static struct mqfs_info mqfs_data; static uma_zone_t mqnode_zone; static uma_zone_t mqueue_zone; static uma_zone_t mvdata_zone; static uma_zone_t mqnoti_zone; static struct vop_vector mqfs_vnodeops; static struct fileops mqueueops; /* * Directory structure construction and manipulation */ #ifdef notyet static struct mqfs_node *mqfs_create_dir(struct mqfs_node *parent, const char *name, int namelen, struct ucred *cred, int mode); static struct mqfs_node *mqfs_create_link(struct mqfs_node *parent, const char *name, int namelen, struct ucred *cred, int mode); #endif static struct mqfs_node *mqfs_create_file(struct mqfs_node *parent, const char *name, int namelen, struct ucred *cred, int mode); static int mqfs_destroy(struct mqfs_node *mn); static void mqfs_fileno_alloc(struct mqfs_info *mi, struct mqfs_node *mn); static void mqfs_fileno_free(struct mqfs_info *mi, struct mqfs_node *mn); static int mqfs_allocv(struct mount *mp, struct vnode **vpp, struct mqfs_node *pn); /* * Message queue construction and maniplation */ static struct mqueue *mqueue_alloc(const struct mq_attr *attr); static void mqueue_free(struct mqueue *mq); static int mqueue_send(struct mqueue *mq, const char *msg_ptr, size_t msg_len, unsigned msg_prio, int waitok, const struct timespec *abs_timeout); static int mqueue_receive(struct mqueue *mq, char *msg_ptr, size_t msg_len, unsigned *msg_prio, int waitok, const struct timespec *abs_timeout); static int _mqueue_send(struct mqueue *mq, struct mqueue_msg *msg, int timo); static int _mqueue_recv(struct mqueue *mq, struct mqueue_msg **msg, int timo); static void mqueue_send_notification(struct mqueue *mq); static void mqueue_fdclose(struct thread *td, int fd, struct file *fp); static void mq_proc_exit(void *arg, struct proc *p); /* * kqueue filters */ static void filt_mqdetach(struct knote *kn); static int filt_mqread(struct knote *kn, long hint); static int filt_mqwrite(struct knote *kn, long hint); struct filterops mq_rfiltops = { 1, NULL, filt_mqdetach, filt_mqread }; struct filterops mq_wfiltops = { 1, NULL, filt_mqdetach, filt_mqwrite }; /* * Initialize fileno bitmap */ static void mqfs_fileno_init(struct mqfs_info *mi) { struct unrhdr *up; up = new_unrhdr(1, INT_MAX, NULL); mi->mi_unrhdr = up; } /* * Tear down fileno bitmap */ static void mqfs_fileno_uninit(struct mqfs_info *mi) { struct unrhdr *up; up = mi->mi_unrhdr; mi->mi_unrhdr = NULL; delete_unrhdr(up); } /* * Allocate a file number */ static void mqfs_fileno_alloc(struct mqfs_info *mi, struct mqfs_node *mn) { /* make sure our parent has a file number */ if (mn->mn_parent && !mn->mn_parent->mn_fileno) mqfs_fileno_alloc(mi, mn->mn_parent); switch (mn->mn_type) { case mqfstype_root: case mqfstype_dir: case mqfstype_file: case mqfstype_symlink: mn->mn_fileno = alloc_unr(mi->mi_unrhdr); break; case mqfstype_this: KASSERT(mn->mn_parent != NULL, ("mqfstype_this node has no parent")); mn->mn_fileno = mn->mn_parent->mn_fileno; break; case mqfstype_parent: KASSERT(mn->mn_parent != NULL, ("mqfstype_parent node has no parent")); if (mn->mn_parent == mi->mi_root) { mn->mn_fileno = mn->mn_parent->mn_fileno; break; } KASSERT(mn->mn_parent->mn_parent != NULL, ("mqfstype_parent node has no grandparent")); mn->mn_fileno = mn->mn_parent->mn_parent->mn_fileno; break; default: KASSERT(0, ("mqfs_fileno_alloc() called for unknown type node: %d", mn->mn_type)); break; } } /* * Release a file number */ static void mqfs_fileno_free(struct mqfs_info *mi, struct mqfs_node *mn) { switch (mn->mn_type) { case mqfstype_root: case mqfstype_dir: case mqfstype_file: case mqfstype_symlink: free_unr(mi->mi_unrhdr, mn->mn_fileno); break; case mqfstype_this: case mqfstype_parent: /* ignore these, as they don't "own" their file number */ break; default: KASSERT(0, ("mqfs_fileno_free() called for unknown type node: %d", mn->mn_type)); break; } } static __inline struct mqfs_node * mqnode_alloc(void) { return uma_zalloc(mqnode_zone, M_WAITOK | M_ZERO); } static __inline void mqnode_free(struct mqfs_node *node) { uma_zfree(mqnode_zone, node); } static __inline void mqnode_addref(struct mqfs_node *node) { atomic_fetchadd_int(&node->mn_refcount, 1); } static __inline void mqnode_release(struct mqfs_node *node) { struct mqfs_info *mqfs; int old, exp; mqfs = node->mn_info; old = atomic_fetchadd_int(&node->mn_refcount, -1); if (node->mn_type == mqfstype_dir || node->mn_type == mqfstype_root) exp = 3; /* include . and .. */ else exp = 1; if (old == exp) { int locked = sx_xlocked(&mqfs->mi_lock); if (!locked) sx_xlock(&mqfs->mi_lock); mqfs_destroy(node); if (!locked) sx_xunlock(&mqfs->mi_lock); } } /* * Add a node to a directory */ static int mqfs_add_node(struct mqfs_node *parent, struct mqfs_node *node) { KASSERT(parent != NULL, ("%s(): parent is NULL", __func__)); KASSERT(parent->mn_info != NULL, ("%s(): parent has no mn_info", __func__)); KASSERT(parent->mn_type == mqfstype_dir || parent->mn_type == mqfstype_root, ("%s(): parent is not a directory", __func__)); node->mn_info = parent->mn_info; node->mn_parent = parent; LIST_INIT(&node->mn_children); LIST_INIT(&node->mn_vnodes); LIST_INSERT_HEAD(&parent->mn_children, node, mn_sibling); mqnode_addref(parent); return (0); } static struct mqfs_node * mqfs_create_node(const char *name, int namelen, struct ucred *cred, int mode, int nodetype) { struct mqfs_node *node; node = mqnode_alloc(); strncpy(node->mn_name, name, namelen); node->mn_type = nodetype; node->mn_refcount = 1; vfs_timestamp(&node->mn_birth); node->mn_ctime = node->mn_atime = node->mn_mtime = node->mn_birth; node->mn_uid = cred->cr_uid; node->mn_gid = cred->cr_gid; node->mn_mode = mode; return (node); } /* * Create a file */ static struct mqfs_node * mqfs_create_file(struct mqfs_node *parent, const char *name, int namelen, struct ucred *cred, int mode) { struct mqfs_node *node; node = mqfs_create_node(name, namelen, cred, mode, mqfstype_file); if (mqfs_add_node(parent, node) != 0) { mqnode_free(node); return (NULL); } return (node); } /* * Add . and .. to a directory */ static int mqfs_fixup_dir(struct mqfs_node *parent) { struct mqfs_node *dir; dir = mqnode_alloc(); dir->mn_name[0] = '.'; dir->mn_type = mqfstype_this; dir->mn_refcount = 1; if (mqfs_add_node(parent, dir) != 0) { mqnode_free(dir); return (-1); } dir = mqnode_alloc(); dir->mn_name[0] = dir->mn_name[1] = '.'; dir->mn_type = mqfstype_parent; dir->mn_refcount = 1; if (mqfs_add_node(parent, dir) != 0) { mqnode_free(dir); return (-1); } return (0); } #ifdef notyet /* * Create a directory */ static struct mqfs_node * mqfs_create_dir(struct mqfs_node *parent, const char *name, int namelen, struct ucred *cred, int mode) { struct mqfs_node *node; node = mqfs_create_node(name, namelen, cred, mode, mqfstype_dir); if (mqfs_add_node(parent, node) != 0) { mqnode_free(node); return (NULL); } if (mqfs_fixup_dir(node) != 0) { mqfs_destroy(node); return (NULL); } return (node); } /* * Create a symlink */ static struct mqfs_node * mqfs_create_link(struct mqfs_node *parent, const char *name, int namelen, struct ucred *cred, int mode) { struct mqfs_node *node; node = mqfs_create_node(name, namelen, cred, mode, mqfstype_symlink); if (mqfs_add_node(parent, node) != 0) { mqnode_free(node); return (NULL); } return (node); } #endif /* * Destroy a node or a tree of nodes */ static int mqfs_destroy(struct mqfs_node *node) { struct mqfs_node *parent; KASSERT(node != NULL, ("%s(): node is NULL", __func__)); KASSERT(node->mn_info != NULL, ("%s(): node has no mn_info", __func__)); /* destroy children */ if (node->mn_type == mqfstype_dir || node->mn_type == mqfstype_root) while (! LIST_EMPTY(&node->mn_children)) mqfs_destroy(LIST_FIRST(&node->mn_children)); /* unlink from parent */ if ((parent = node->mn_parent) != NULL) { KASSERT(parent->mn_info == node->mn_info, ("%s(): parent has different mn_info", __func__)); LIST_REMOVE(node, mn_sibling); } if (node->mn_fileno != 0) mqfs_fileno_free(node->mn_info, node); if (node->mn_data != NULL) mqueue_free(node->mn_data); mqnode_free(node); return (0); } /* * Mount a mqfs instance */ static int mqfs_mount(struct mount *mp, struct thread *td) { struct statfs *sbp; if (mp->mnt_flag & MNT_UPDATE) return (EOPNOTSUPP); mp->mnt_data = &mqfs_data; MNT_ILOCK(mp); mp->mnt_flag |= MNT_LOCAL; mp->mnt_kern_flag |= MNTK_MPSAFE; MNT_IUNLOCK(mp); vfs_getnewfsid(mp); sbp = &mp->mnt_stat; vfs_mountedfrom(mp, "mqueue"); sbp->f_bsize = PAGE_SIZE; sbp->f_iosize = PAGE_SIZE; sbp->f_blocks = 1; sbp->f_bfree = 0; sbp->f_bavail = 0; sbp->f_files = 1; sbp->f_ffree = 0; return (0); } /* * Unmount a mqfs instance */ static int mqfs_unmount(struct mount *mp, int mntflags, struct thread *td) { int error; error = vflush(mp, 0, (mntflags & MNT_FORCE) ? FORCECLOSE : 0, td); return (error); } /* * Return a root vnode */ static int mqfs_root(struct mount *mp, int flags, struct vnode **vpp, struct thread *td) { struct mqfs_info *mqfs; int ret; mqfs = VFSTOMQFS(mp); ret = mqfs_allocv(mp, vpp, mqfs->mi_root); return (ret); } /* * Return filesystem stats */ static int mqfs_statfs(struct mount *mp, struct statfs *sbp, struct thread *td) { /* XXX update statistics */ return (0); } /* * Initialize a mqfs instance */ static int mqfs_init(struct vfsconf *vfc) { struct mqfs_node *root; struct mqfs_info *mi; mqnode_zone = uma_zcreate("mqnode", sizeof(struct mqfs_node), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); mqueue_zone = uma_zcreate("mqueue", sizeof(struct mqueue), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); mvdata_zone = uma_zcreate("mvdata", sizeof(struct mqfs_vdata), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); mqnoti_zone = uma_zcreate("mqnotifier", sizeof(struct mqueue_notifier), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); mi = &mqfs_data; sx_init(&mi->mi_lock, "mqfs lock"); /* set up the root diretory */ root = mqfs_create_node("/", 1, curthread->td_ucred, 01777, mqfstype_root); root->mn_info = mi; LIST_INIT(&root->mn_children); LIST_INIT(&root->mn_vnodes); mi->mi_root = root; mqfs_fileno_init(mi); mqfs_fileno_alloc(mi, root); mqfs_fixup_dir(root); exit_tag = EVENTHANDLER_REGISTER(process_exit, mq_proc_exit, NULL, EVENTHANDLER_PRI_ANY); mq_fdclose = mqueue_fdclose; p31b_setcfg(CTL_P1003_1B_MESSAGE_PASSING, _POSIX_MESSAGE_PASSING); return (0); } /* * Destroy a mqfs instance */ static int mqfs_uninit(struct vfsconf *vfc) { struct mqfs_info *mi; if (!unloadable) return (EOPNOTSUPP); EVENTHANDLER_DEREGISTER(process_exit, exit_tag); mi = &mqfs_data; mqfs_destroy(mi->mi_root); mi->mi_root = NULL; mqfs_fileno_uninit(mi); sx_destroy(&mi->mi_lock); uma_zdestroy(mqnode_zone); uma_zdestroy(mqueue_zone); uma_zdestroy(mvdata_zone); uma_zdestroy(mqnoti_zone); return (0); } /* * task routine */ static void do_recycle(void *context, int pending __unused) { struct vnode *vp = (struct vnode *)context; vrecycle(vp, curthread); vdrop(vp); } /* * Allocate a vnode */ static int mqfs_allocv(struct mount *mp, struct vnode **vpp, struct mqfs_node *pn) { struct mqfs_vdata *vd; struct mqfs_info *mqfs; struct vnode *newvpp; int error; mqfs = pn->mn_info; *vpp = NULL; sx_xlock(&mqfs->mi_lock); LIST_FOREACH(vd, &pn->mn_vnodes, mv_link) { if (vd->mv_vnode->v_mount == mp) { vhold(vd->mv_vnode); break; } } if (vd != NULL) { found: *vpp = vd->mv_vnode; sx_xunlock(&mqfs->mi_lock); error = vget(*vpp, LK_RETRY | LK_EXCLUSIVE, curthread); vdrop(*vpp); return (error); } sx_xunlock(&mqfs->mi_lock); error = getnewvnode("mqueue", mp, &mqfs_vnodeops, &newvpp); if (error) return (error); vn_lock(newvpp, LK_EXCLUSIVE | LK_RETRY); error = insmntque(newvpp, mp); if (error != 0) return (error); sx_xlock(&mqfs->mi_lock); /* * Check if it has already been allocated * while we were blocked. */ LIST_FOREACH(vd, &pn->mn_vnodes, mv_link) { if (vd->mv_vnode->v_mount == mp) { vhold(vd->mv_vnode); sx_xunlock(&mqfs->mi_lock); vgone(newvpp); vput(newvpp); goto found; } } *vpp = newvpp; vd = uma_zalloc(mvdata_zone, M_WAITOK); (*vpp)->v_data = vd; vd->mv_vnode = *vpp; vd->mv_node = pn; TASK_INIT(&vd->mv_task, 0, do_recycle, *vpp); LIST_INSERT_HEAD(&pn->mn_vnodes, vd, mv_link); mqnode_addref(pn); switch (pn->mn_type) { case mqfstype_root: (*vpp)->v_vflag = VV_ROOT; /* fall through */ case mqfstype_dir: case mqfstype_this: case mqfstype_parent: (*vpp)->v_type = VDIR; break; case mqfstype_file: (*vpp)->v_type = VREG; break; case mqfstype_symlink: (*vpp)->v_type = VLNK; break; case mqfstype_none: KASSERT(0, ("mqfs_allocf called for null node\n")); default: panic("%s has unexpected type: %d", pn->mn_name, pn->mn_type); } sx_xunlock(&mqfs->mi_lock); return (0); } /* * Search a directory entry */ static struct mqfs_node * mqfs_search(struct mqfs_node *pd, const char *name, int len) { struct mqfs_node *pn; sx_assert(&pd->mn_info->mi_lock, SX_LOCKED); LIST_FOREACH(pn, &pd->mn_children, mn_sibling) { if (strncmp(pn->mn_name, name, len) == 0) return (pn); } return (NULL); } /* * Look up a file or directory. */ static int mqfs_lookupx(struct vop_cachedlookup_args *ap) { struct componentname *cnp; struct vnode *dvp, **vpp; struct mqfs_node *pd; struct mqfs_node *pn; struct mqfs_info *mqfs; int nameiop, flags, error, namelen; char *pname; struct thread *td; cnp = ap->a_cnp; vpp = ap->a_vpp; dvp = ap->a_dvp; pname = cnp->cn_nameptr; namelen = cnp->cn_namelen; td = cnp->cn_thread; flags = cnp->cn_flags; nameiop = cnp->cn_nameiop; pd = VTON(dvp); pn = NULL; mqfs = pd->mn_info; *vpp = NULLVP; if (dvp->v_type != VDIR) return (ENOTDIR); error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, cnp->cn_thread); if (error) return (error); /* shortcut: check if the name is too long */ if (cnp->cn_namelen >= MQFS_NAMELEN) return (ENOENT); /* self */ if (namelen == 1 && pname[0] == '.') { if ((flags & ISLASTCN) && nameiop != LOOKUP) return (EINVAL); pn = pd; *vpp = dvp; VREF(dvp); return (0); } /* parent */ if (cnp->cn_flags & ISDOTDOT) { if (dvp->v_vflag & VV_ROOT) return (EIO); if ((flags & ISLASTCN) && nameiop != LOOKUP) return (EINVAL); VOP_UNLOCK(dvp, 0); KASSERT(pd->mn_parent, ("non-root directory has no parent")); pn = pd->mn_parent; error = mqfs_allocv(dvp->v_mount, vpp, pn); vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY); return (error); } /* named node */ sx_xlock(&mqfs->mi_lock); pn = mqfs_search(pd, pname, namelen); if (pn != NULL) mqnode_addref(pn); sx_xunlock(&mqfs->mi_lock); /* found */ if (pn != NULL) { /* DELETE */ if (nameiop == DELETE && (flags & ISLASTCN)) { error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred, td); if (error) { mqnode_release(pn); return (error); } if (*vpp == dvp) { VREF(dvp); *vpp = dvp; mqnode_release(pn); return (0); } } /* allocate vnode */ error = mqfs_allocv(dvp->v_mount, vpp, pn); mqnode_release(pn); if (error == 0 && cnp->cn_flags & MAKEENTRY) cache_enter(dvp, *vpp, cnp); return (error); } /* not found */ /* will create a new entry in the directory ? */ if ((nameiop == CREATE || nameiop == RENAME) && (flags & LOCKPARENT) && (flags & ISLASTCN)) { error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred, td); if (error) return (error); cnp->cn_flags |= SAVENAME; return (EJUSTRETURN); } return (ENOENT); } #if 0 struct vop_lookup_args { struct vop_generic_args a_gen; struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; }; #endif /* * vnode lookup operation */ static int mqfs_lookup(struct vop_cachedlookup_args *ap) { int rc; rc = mqfs_lookupx(ap); return (rc); } #if 0 struct vop_create_args { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; struct vattr *a_vap; }; #endif /* * vnode creation operation */ static int mqfs_create(struct vop_create_args *ap) { struct mqfs_info *mqfs = VFSTOMQFS(ap->a_dvp->v_mount); struct componentname *cnp = ap->a_cnp; struct mqfs_node *pd; struct mqfs_node *pn; struct mqueue *mq; int error; pd = VTON(ap->a_dvp); if (pd->mn_type != mqfstype_root && pd->mn_type != mqfstype_dir) return (ENOTDIR); mq = mqueue_alloc(NULL); if (mq == NULL) return (EAGAIN); sx_xlock(&mqfs->mi_lock); if ((cnp->cn_flags & HASBUF) == 0) panic("%s: no name", __func__); pn = mqfs_create_file(pd, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_cred, ap->a_vap->va_mode); if (pn == NULL) { sx_xunlock(&mqfs->mi_lock); error = ENOSPC; } else { mqnode_addref(pn); sx_xunlock(&mqfs->mi_lock); error = mqfs_allocv(ap->a_dvp->v_mount, ap->a_vpp, pn); mqnode_release(pn); if (error) mqfs_destroy(pn); else pn->mn_data = mq; } if (error) mqueue_free(mq); return (error); } /* * Remove an entry */ static int do_unlink(struct mqfs_node *pn, struct ucred *ucred) { struct mqfs_node *parent; struct mqfs_vdata *vd; int error = 0; sx_assert(&pn->mn_info->mi_lock, SX_LOCKED); if (ucred->cr_uid != pn->mn_uid && (error = priv_check_cred(ucred, PRIV_MQ_ADMIN, 0)) != 0) error = EACCES; else if (!pn->mn_deleted) { parent = pn->mn_parent; pn->mn_parent = NULL; pn->mn_deleted = 1; LIST_REMOVE(pn, mn_sibling); LIST_FOREACH(vd, &pn->mn_vnodes, mv_link) { cache_purge(vd->mv_vnode); vhold(vd->mv_vnode); taskqueue_enqueue(taskqueue_thread, &vd->mv_task); } mqnode_release(pn); mqnode_release(parent); } else error = ENOENT; return (error); } #if 0 struct vop_remove_args { struct vnode *a_dvp; struct vnode *a_vp; struct componentname *a_cnp; }; #endif /* * vnode removal operation */ static int mqfs_remove(struct vop_remove_args *ap) { struct mqfs_info *mqfs = VFSTOMQFS(ap->a_dvp->v_mount); struct mqfs_node *pn; int error; if (ap->a_vp->v_type == VDIR) return (EPERM); pn = VTON(ap->a_vp); sx_xlock(&mqfs->mi_lock); error = do_unlink(pn, ap->a_cnp->cn_cred); sx_xunlock(&mqfs->mi_lock); return (error); } #if 0 struct vop_inactive_args { struct vnode *a_vp; struct thread *a_td; }; #endif static int mqfs_inactive(struct vop_inactive_args *ap) { struct mqfs_node *pn = VTON(ap->a_vp); if (pn->mn_deleted) vrecycle(ap->a_vp, ap->a_td); return (0); } #if 0 struct vop_reclaim_args { struct vop_generic_args a_gen; struct vnode *a_vp; struct thread *a_td; }; #endif static int mqfs_reclaim(struct vop_reclaim_args *ap) { struct mqfs_info *mqfs = VFSTOMQFS(ap->a_vp->v_mount); struct vnode *vp = ap->a_vp; struct mqfs_node *pn; struct mqfs_vdata *vd; vd = vp->v_data; pn = vd->mv_node; sx_xlock(&mqfs->mi_lock); vp->v_data = NULL; LIST_REMOVE(vd, mv_link); uma_zfree(mvdata_zone, vd); mqnode_release(pn); sx_xunlock(&mqfs->mi_lock); return (0); } #if 0 struct vop_open_args { struct vop_generic_args a_gen; struct vnode *a_vp; int a_mode; struct ucred *a_cred; struct thread *a_td; int a_fdidx; }; #endif static int mqfs_open(struct vop_open_args *ap) { return (0); } #if 0 struct vop_close_args { struct vop_generic_args a_gen; struct vnode *a_vp; int a_fflag; struct ucred *a_cred; struct thread *a_td; }; #endif static int mqfs_close(struct vop_close_args *ap) { return (0); } #if 0 struct vop_access_args { struct vop_generic_args a_gen; struct vnode *a_vp; int a_mode; struct ucred *a_cred; struct thread *a_td; }; #endif /* * Verify permissions */ static int mqfs_access(struct vop_access_args *ap) { struct vnode *vp = ap->a_vp; struct vattr vattr; int error; error = VOP_GETATTR(vp, &vattr, ap->a_cred); if (error) return (error); error = vaccess(vp->v_type, vattr.va_mode, vattr.va_uid, vattr.va_gid, ap->a_mode, ap->a_cred, NULL); return (error); } #if 0 struct vop_getattr_args { struct vop_generic_args a_gen; struct vnode *a_vp; struct vattr *a_vap; struct ucred *a_cred; }; #endif /* * Get file attributes */ static int mqfs_getattr(struct vop_getattr_args *ap) { struct vnode *vp = ap->a_vp; struct mqfs_node *pn = VTON(vp); struct vattr *vap = ap->a_vap; int error = 0; - VATTR_NULL(vap); vap->va_type = vp->v_type; vap->va_mode = pn->mn_mode; vap->va_nlink = 1; vap->va_uid = pn->mn_uid; vap->va_gid = pn->mn_gid; vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0]; vap->va_fileid = pn->mn_fileno; vap->va_size = 0; vap->va_blocksize = PAGE_SIZE; vap->va_bytes = vap->va_size = 0; vap->va_atime = pn->mn_atime; vap->va_mtime = pn->mn_mtime; vap->va_ctime = pn->mn_ctime; vap->va_birthtime = pn->mn_birth; vap->va_gen = 0; vap->va_flags = 0; vap->va_rdev = NODEV; vap->va_bytes = 0; vap->va_filerev = 0; return (error); } #if 0 struct vop_setattr_args { struct vop_generic_args a_gen; struct vnode *a_vp; struct vattr *a_vap; struct ucred *a_cred; }; #endif /* * Set attributes */ static int mqfs_setattr(struct vop_setattr_args *ap) { struct mqfs_node *pn; struct vattr *vap; struct vnode *vp; struct thread *td; int c, error; uid_t uid; gid_t gid; td = curthread; vap = ap->a_vap; vp = ap->a_vp; if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) || (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) || (vap->va_blocksize != VNOVAL) || (vap->va_flags != VNOVAL && vap->va_flags != 0) || (vap->va_rdev != VNOVAL) || ((int)vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) { return (EINVAL); } pn = VTON(vp); error = c = 0; if (vap->va_uid == (uid_t)VNOVAL) uid = pn->mn_uid; else uid = vap->va_uid; if (vap->va_gid == (gid_t)VNOVAL) gid = pn->mn_gid; else gid = vap->va_gid; if (uid != pn->mn_uid || gid != pn->mn_gid) { /* * To modify the ownership of a file, must possess VADMIN * for that file. */ if ((error = VOP_ACCESS(vp, VADMIN, ap->a_cred, td))) return (error); /* * XXXRW: Why is there a privilege check here: shouldn't the * check in VOP_ACCESS() be enough? Also, are the group bits * below definitely right? */ if (((ap->a_cred->cr_uid != pn->mn_uid) || uid != pn->mn_uid || (gid != pn->mn_gid && !groupmember(gid, ap->a_cred))) && (error = priv_check(td, PRIV_MQ_ADMIN)) != 0) return (error); pn->mn_uid = uid; pn->mn_gid = gid; c = 1; } if (vap->va_mode != (mode_t)VNOVAL) { if ((ap->a_cred->cr_uid != pn->mn_uid) && (error = priv_check(td, PRIV_MQ_ADMIN))) return (error); pn->mn_mode = vap->va_mode; c = 1; } if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) { /* See the comment in ufs_vnops::ufs_setattr(). */ if ((error = VOP_ACCESS(vp, VADMIN, ap->a_cred, td)) && ((vap->va_vaflags & VA_UTIMES_NULL) == 0 || (error = VOP_ACCESS(vp, VWRITE, ap->a_cred, td)))) return (error); if (vap->va_atime.tv_sec != VNOVAL) { pn->mn_atime = vap->va_atime; } if (vap->va_mtime.tv_sec != VNOVAL) { pn->mn_mtime = vap->va_mtime; } c = 1; } if (c) { vfs_timestamp(&pn->mn_ctime); } return (0); } #if 0 struct vop_read_args { struct vop_generic_args a_gen; struct vnode *a_vp; struct uio *a_uio; int a_ioflag; struct ucred *a_cred; }; #endif /* * Read from a file */ static int mqfs_read(struct vop_read_args *ap) { char buf[80]; struct vnode *vp = ap->a_vp; struct uio *uio = ap->a_uio; struct mqfs_node *pn; struct mqueue *mq; int len, error; if (vp->v_type != VREG) return (EINVAL); pn = VTON(vp); mq = VTOMQ(vp); snprintf(buf, sizeof(buf), "QSIZE:%-10ld MAXMSG:%-10ld CURMSG:%-10ld MSGSIZE:%-10ld\n", mq->mq_totalbytes, mq->mq_maxmsg, mq->mq_curmsgs, mq->mq_msgsize); buf[sizeof(buf)-1] = '\0'; len = strlen(buf); error = uiomove_frombuf(buf, len, uio); return (error); } #if 0 struct vop_readdir_args { struct vop_generic_args a_gen; struct vnode *a_vp; struct uio *a_uio; struct ucred *a_cred; int *a_eofflag; int *a_ncookies; u_long **a_cookies; }; #endif /* * Return directory entries. */ static int mqfs_readdir(struct vop_readdir_args *ap) { struct vnode *vp; struct mqfs_info *mi; struct mqfs_node *pd; struct mqfs_node *pn; struct dirent entry; struct uio *uio; int *tmp_ncookies = NULL; off_t offset; int error, i; vp = ap->a_vp; mi = VFSTOMQFS(vp->v_mount); pd = VTON(vp); uio = ap->a_uio; if (vp->v_type != VDIR) return (ENOTDIR); if (uio->uio_offset < 0) return (EINVAL); if (ap->a_ncookies != NULL) { tmp_ncookies = ap->a_ncookies; *ap->a_ncookies = 0; ap->a_ncookies = NULL; } error = 0; offset = 0; sx_xlock(&mi->mi_lock); LIST_FOREACH(pn, &pd->mn_children, mn_sibling) { entry.d_reclen = sizeof(entry); if (!pn->mn_fileno) mqfs_fileno_alloc(mi, pn); entry.d_fileno = pn->mn_fileno; for (i = 0; i < MQFS_NAMELEN - 1 && pn->mn_name[i] != '\0'; ++i) entry.d_name[i] = pn->mn_name[i]; entry.d_name[i] = 0; entry.d_namlen = i; switch (pn->mn_type) { case mqfstype_root: case mqfstype_dir: case mqfstype_this: case mqfstype_parent: entry.d_type = DT_DIR; break; case mqfstype_file: entry.d_type = DT_REG; break; case mqfstype_symlink: entry.d_type = DT_LNK; break; default: panic("%s has unexpected node type: %d", pn->mn_name, pn->mn_type); } if (entry.d_reclen > uio->uio_resid) break; if (offset >= uio->uio_offset) { error = vfs_read_dirent(ap, &entry, offset); if (error) break; } offset += entry.d_reclen; } sx_xunlock(&mi->mi_lock); uio->uio_offset = offset; if (tmp_ncookies != NULL) ap->a_ncookies = tmp_ncookies; return (error); } #ifdef notyet #if 0 struct vop_mkdir_args { struct vnode *a_dvp; struvt vnode **a_vpp; struvt componentname *a_cnp; struct vattr *a_vap; }; #endif /* * Create a directory. */ static int mqfs_mkdir(struct vop_mkdir_args *ap) { struct mqfs_info *mqfs = VFSTOMQFS(ap->a_dvp->v_mount); struct componentname *cnp = ap->a_cnp; struct mqfs_node *pd = VTON(ap->a_dvp); struct mqfs_node *pn; int error; if (pd->mn_type != mqfstype_root && pd->mn_type != mqfstype_dir) return (ENOTDIR); sx_xlock(&mqfs->mi_lock); if ((cnp->cn_flags & HASBUF) == 0) panic("%s: no name", __func__); pn = mqfs_create_dir(pd, cnp->cn_nameptr, cnp->cn_namelen, ap->a_vap->cn_cred, ap->a_vap->va_mode); if (pn != NULL) mqnode_addref(pn); sx_xunlock(&mqfs->mi_lock); if (pn == NULL) { error = ENOSPC; } else { error = mqfs_allocv(ap->a_dvp->v_mount, ap->a_vpp, pn); mqnode_release(pn); } return (error); } #if 0 struct vop_rmdir_args { struct vnode *a_dvp; struct vnode *a_vp; struct componentname *a_cnp; }; #endif /* * Remove a directory. */ static int mqfs_rmdir(struct vop_rmdir_args *ap) { struct mqfs_info *mqfs = VFSTOMQFS(ap->a_dvp->v_mount); struct mqfs_node *pn = VTON(ap->a_vp); struct mqfs_node *pt; if (pn->mn_type != mqfstype_dir) return (ENOTDIR); sx_xlock(&mqfs->mi_lock); if (pn->mn_deleted) { sx_xunlock(&mqfs->mi_lock); return (ENOENT); } pt = LIST_FIRST(&pn->mn_children); pt = LIST_NEXT(pt, mn_sibling); pt = LIST_NEXT(pt, mn_sibling); if (pt != NULL) { sx_xunlock(&mqfs->mi_lock); return (ENOTEMPTY); } pt = pn->mn_parent; pn->mn_parent = NULL; pn->mn_deleted = 1; LIST_REMOVE(pn, mn_sibling); mqnode_release(pn); mqnode_release(pt); sx_xunlock(&mqfs->mi_lock); cache_purge(ap->a_vp); return (0); } #endif /* notyet */ /* * Allocate a message queue */ static struct mqueue * mqueue_alloc(const struct mq_attr *attr) { struct mqueue *mq; if (curmq >= maxmq) return (NULL); mq = uma_zalloc(mqueue_zone, M_WAITOK | M_ZERO); TAILQ_INIT(&mq->mq_msgq); if (attr != NULL) { mq->mq_maxmsg = attr->mq_maxmsg; mq->mq_msgsize = attr->mq_msgsize; } else { mq->mq_maxmsg = default_maxmsg; mq->mq_msgsize = default_msgsize; } mtx_init(&mq->mq_mutex, "mqueue lock", NULL, MTX_DEF); knlist_init(&mq->mq_rsel.si_note, &mq->mq_mutex, NULL, NULL, NULL); knlist_init(&mq->mq_wsel.si_note, &mq->mq_mutex, NULL, NULL, NULL); atomic_add_int(&curmq, 1); return (mq); } /* * Destroy a message queue */ static void mqueue_free(struct mqueue *mq) { struct mqueue_msg *msg; while ((msg = TAILQ_FIRST(&mq->mq_msgq)) != NULL) { TAILQ_REMOVE(&mq->mq_msgq, msg, msg_link); FREE(msg, M_MQUEUEDATA); } mtx_destroy(&mq->mq_mutex); knlist_destroy(&mq->mq_rsel.si_note); knlist_destroy(&mq->mq_wsel.si_note); uma_zfree(mqueue_zone, mq); atomic_add_int(&curmq, -1); } /* * Load a message from user space */ static struct mqueue_msg * mqueue_loadmsg(const char *msg_ptr, size_t msg_size, int msg_prio) { struct mqueue_msg *msg; size_t len; int error; len = sizeof(struct mqueue_msg) + msg_size; MALLOC(msg, struct mqueue_msg *, len, M_MQUEUEDATA, M_WAITOK); error = copyin(msg_ptr, ((char *)msg) + sizeof(struct mqueue_msg), msg_size); if (error) { FREE(msg, M_MQUEUEDATA); msg = NULL; } else { msg->msg_size = msg_size; msg->msg_prio = msg_prio; } return (msg); } /* * Save a message to user space */ static int mqueue_savemsg(struct mqueue_msg *msg, char *msg_ptr, int *msg_prio) { int error; error = copyout(((char *)msg) + sizeof(*msg), msg_ptr, msg->msg_size); if (error == 0 && msg_prio != NULL) error = copyout(&msg->msg_prio, msg_prio, sizeof(int)); return (error); } /* * Free a message's memory */ static __inline void mqueue_freemsg(struct mqueue_msg *msg) { FREE(msg, M_MQUEUEDATA); } /* * Send a message. if waitok is false, thread will not be * blocked if there is no data in queue, otherwise, absolute * time will be checked. */ int mqueue_send(struct mqueue *mq, const char *msg_ptr, size_t msg_len, unsigned msg_prio, int waitok, const struct timespec *abs_timeout) { struct mqueue_msg *msg; struct timespec ets, ts, ts2; struct timeval tv; int error; if (msg_prio >= MQ_PRIO_MAX) return (EINVAL); if (msg_len > mq->mq_msgsize) return (EMSGSIZE); msg = mqueue_loadmsg(msg_ptr, msg_len, msg_prio); if (msg == NULL) return (EFAULT); /* O_NONBLOCK case */ if (!waitok) { error = _mqueue_send(mq, msg, -1); if (error) goto bad; return (0); } /* we allow a null timeout (wait forever) */ if (abs_timeout == NULL) { error = _mqueue_send(mq, msg, 0); if (error) goto bad; return (0); } /* send it before checking time */ error = _mqueue_send(mq, msg, -1); if (error == 0) return (0); if (error != EAGAIN) goto bad; error = copyin(abs_timeout, &ets, sizeof(ets)); if (error != 0) goto bad; if (ets.tv_nsec >= 1000000000 || ets.tv_nsec < 0) { error = EINVAL; goto bad; } for (;;) { ts2 = ets; getnanotime(&ts); timespecsub(&ts2, &ts); if (ts2.tv_sec < 0 || (ts2.tv_sec == 0 && ts2.tv_nsec <= 0)) { error = ETIMEDOUT; break; } TIMESPEC_TO_TIMEVAL(&tv, &ts2); error = _mqueue_send(mq, msg, tvtohz(&tv)); if (error != ETIMEDOUT) break; } if (error == 0) return (0); bad: mqueue_freemsg(msg); return (error); } /* * Common routine to send a message */ static int _mqueue_send(struct mqueue *mq, struct mqueue_msg *msg, int timo) { struct mqueue_msg *msg2; int error = 0; mtx_lock(&mq->mq_mutex); while (mq->mq_curmsgs >= mq->mq_maxmsg && error == 0) { if (timo < 0) { mtx_unlock(&mq->mq_mutex); return (EAGAIN); } mq->mq_senders++; error = msleep(&mq->mq_senders, &mq->mq_mutex, PCATCH, "mqsend", timo); mq->mq_senders--; if (error == EAGAIN) error = ETIMEDOUT; } if (mq->mq_curmsgs >= mq->mq_maxmsg) { mtx_unlock(&mq->mq_mutex); return (error); } error = 0; if (TAILQ_EMPTY(&mq->mq_msgq)) { TAILQ_INSERT_HEAD(&mq->mq_msgq, msg, msg_link); } else { if (msg->msg_prio <= TAILQ_LAST(&mq->mq_msgq, msgq)->msg_prio) { TAILQ_INSERT_TAIL(&mq->mq_msgq, msg, msg_link); } else { TAILQ_FOREACH(msg2, &mq->mq_msgq, msg_link) { if (msg2->msg_prio < msg->msg_prio) break; } TAILQ_INSERT_BEFORE(msg2, msg, msg_link); } } mq->mq_curmsgs++; mq->mq_totalbytes += msg->msg_size; if (mq->mq_receivers) wakeup_one(&mq->mq_receivers); else if (mq->mq_notifier != NULL) mqueue_send_notification(mq); if (mq->mq_flags & MQ_RSEL) { mq->mq_flags &= ~MQ_RSEL; selwakeup(&mq->mq_rsel); } KNOTE_LOCKED(&mq->mq_rsel.si_note, 0); mtx_unlock(&mq->mq_mutex); return (0); } /* * Send realtime a signal to process which registered itself * successfully by mq_notify. */ static void mqueue_send_notification(struct mqueue *mq) { struct mqueue_notifier *nt; struct proc *p; mtx_assert(&mq->mq_mutex, MA_OWNED); nt = mq->mq_notifier; if (nt->nt_sigev.sigev_notify != SIGEV_NONE) { p = nt->nt_proc; PROC_LOCK(p); if (!KSI_ONQ(&nt->nt_ksi)) psignal_event(p, &nt->nt_sigev, &nt->nt_ksi); PROC_UNLOCK(p); } mq->mq_notifier = NULL; } /* * Get a message. if waitok is false, thread will not be * blocked if there is no data in queue, otherwise, absolute * time will be checked. */ int mqueue_receive(struct mqueue *mq, char *msg_ptr, size_t msg_len, unsigned *msg_prio, int waitok, const struct timespec *abs_timeout) { struct mqueue_msg *msg; struct timespec ets, ts, ts2; struct timeval tv; int error; if (msg_len < mq->mq_msgsize) return (EMSGSIZE); /* O_NONBLOCK case */ if (!waitok) { error = _mqueue_recv(mq, &msg, -1); if (error) return (error); goto received; } /* we allow a null timeout (wait forever). */ if (abs_timeout == NULL) { error = _mqueue_recv(mq, &msg, 0); if (error) return (error); goto received; } /* try to get a message before checking time */ error = _mqueue_recv(mq, &msg, -1); if (error == 0) goto received; if (error != EAGAIN) return (error); error = copyin(abs_timeout, &ets, sizeof(ets)); if (error != 0) return (error); if (ets.tv_nsec >= 1000000000 || ets.tv_nsec < 0) { error = EINVAL; return (error); } for (;;) { ts2 = ets; getnanotime(&ts); timespecsub(&ts2, &ts); if (ts2.tv_sec < 0 || (ts2.tv_sec == 0 && ts2.tv_nsec <= 0)) { error = ETIMEDOUT; return (error); } TIMESPEC_TO_TIMEVAL(&tv, &ts2); error = _mqueue_recv(mq, &msg, tvtohz(&tv)); if (error == 0) break; if (error != ETIMEDOUT) return (error); } received: error = mqueue_savemsg(msg, msg_ptr, msg_prio); if (error == 0) { curthread->td_retval[0] = msg->msg_size; curthread->td_retval[1] = 0; } mqueue_freemsg(msg); return (error); } /* * Common routine to receive a message */ static int _mqueue_recv(struct mqueue *mq, struct mqueue_msg **msg, int timo) { int error = 0; mtx_lock(&mq->mq_mutex); while ((*msg = TAILQ_FIRST(&mq->mq_msgq)) == NULL && error == 0) { if (timo < 0) { mtx_unlock(&mq->mq_mutex); return (EAGAIN); } mq->mq_receivers++; error = msleep(&mq->mq_receivers, &mq->mq_mutex, PCATCH, "mqrecv", timo); mq->mq_receivers--; if (error == EAGAIN) error = ETIMEDOUT; } if (*msg != NULL) { error = 0; TAILQ_REMOVE(&mq->mq_msgq, *msg, msg_link); mq->mq_curmsgs--; mq->mq_totalbytes -= (*msg)->msg_size; if (mq->mq_senders) wakeup_one(&mq->mq_senders); if (mq->mq_flags & MQ_WSEL) { mq->mq_flags &= ~MQ_WSEL; selwakeup(&mq->mq_wsel); } KNOTE_LOCKED(&mq->mq_wsel.si_note, 0); } if (mq->mq_notifier != NULL && mq->mq_receivers == 0 && !TAILQ_EMPTY(&mq->mq_msgq)) { mqueue_send_notification(mq); } mtx_unlock(&mq->mq_mutex); return (error); } static __inline struct mqueue_notifier * notifier_alloc(void) { return (uma_zalloc(mqnoti_zone, M_WAITOK | M_ZERO)); } static __inline void notifier_free(struct mqueue_notifier *p) { uma_zfree(mqnoti_zone, p); } static struct mqueue_notifier * notifier_search(struct proc *p, int fd) { struct mqueue_notifier *nt; LIST_FOREACH(nt, &p->p_mqnotifier, nt_link) { if (nt->nt_ksi.ksi_mqd == fd) break; } return (nt); } static __inline void notifier_insert(struct proc *p, struct mqueue_notifier *nt) { LIST_INSERT_HEAD(&p->p_mqnotifier, nt, nt_link); } static __inline void notifier_delete(struct proc *p, struct mqueue_notifier *nt) { LIST_REMOVE(nt, nt_link); notifier_free(nt); } static void notifier_remove(struct proc *p, struct mqueue *mq, int fd) { struct mqueue_notifier *nt; mtx_assert(&mq->mq_mutex, MA_OWNED); PROC_LOCK(p); nt = notifier_search(p, fd); if (nt != NULL) { if (mq->mq_notifier == nt) mq->mq_notifier = NULL; sigqueue_take(&nt->nt_ksi); notifier_delete(p, nt); } PROC_UNLOCK(p); } /* * Syscall to open a message queue. */ int kmq_open(struct thread *td, struct kmq_open_args *uap) { char path[MQFS_NAMELEN + 1]; struct mq_attr attr, *pattr; struct mqfs_node *pn; struct filedesc *fdp; struct file *fp; struct mqueue *mq; int fd, error, len, flags, cmode; if ((uap->flags & O_ACCMODE) == O_ACCMODE) return (EINVAL); fdp = td->td_proc->p_fd; flags = FFLAGS(uap->flags); cmode = (((uap->mode & ~fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT); mq = NULL; if ((flags & O_CREAT) && (uap->attr != NULL)) { error = copyin(uap->attr, &attr, sizeof(attr)); if (error) return (error); if (attr.mq_maxmsg <= 0 || attr.mq_maxmsg > maxmsg) return (EINVAL); if (attr.mq_msgsize <= 0 || attr.mq_msgsize > maxmsgsize) return (EINVAL); pattr = &attr; } else pattr = NULL; error = copyinstr(uap->path, path, MQFS_NAMELEN + 1, NULL); if (error) return (error); /* * The first character of name must be a slash (/) character * and the remaining characters of name cannot include any slash * characters. */ len = strlen(path); if (len < 2 || path[0] != '/' || index(path + 1, '/') != NULL) return (EINVAL); error = falloc(td, &fp, &fd); if (error) return (error); sx_xlock(&mqfs_data.mi_lock); pn = mqfs_search(mqfs_data.mi_root, path + 1, len - 1); if (pn == NULL) { if (!(flags & O_CREAT)) { error = ENOENT; } else { mq = mqueue_alloc(pattr); if (mq == NULL) { error = ENFILE; } else { pn = mqfs_create_file(mqfs_data.mi_root, path + 1, len - 1, td->td_ucred, cmode); if (pn == NULL) { error = ENOSPC; mqueue_free(mq); } } } if (error == 0) { pn->mn_data = mq; } } else { if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL)) { error = EEXIST; } else { int acc_mode = 0; if (flags & FREAD) acc_mode |= VREAD; if (flags & FWRITE) acc_mode |= VWRITE; error = vaccess(VREG, pn->mn_mode, pn->mn_uid, pn->mn_gid, acc_mode, td->td_ucred, NULL); } } if (error) { sx_xunlock(&mqfs_data.mi_lock); fdclose(fdp, fp, fd, td); fdrop(fp, td); return (error); } mqnode_addref(pn); sx_xunlock(&mqfs_data.mi_lock); finit(fp, flags & (FREAD | FWRITE | O_NONBLOCK), DTYPE_MQUEUE, pn, &mqueueops); FILEDESC_XLOCK(fdp); if (fdp->fd_ofiles[fd] == fp) fdp->fd_ofileflags[fd] |= UF_EXCLOSE; FILEDESC_XUNLOCK(fdp); td->td_retval[0] = fd; fdrop(fp, td); return (0); } /* * Syscall to unlink a message queue. */ int kmq_unlink(struct thread *td, struct kmq_unlink_args *uap) { char path[MQFS_NAMELEN+1]; struct mqfs_node *pn; int error, len; error = copyinstr(uap->path, path, MQFS_NAMELEN + 1, NULL); if (error) return (error); len = strlen(path); if (len < 2 || path[0] != '/' || index(path + 1, '/') != NULL) return (EINVAL); sx_xlock(&mqfs_data.mi_lock); pn = mqfs_search(mqfs_data.mi_root, path + 1, len - 1); if (pn != NULL) error = do_unlink(pn, td->td_ucred); else error = ENOENT; sx_xunlock(&mqfs_data.mi_lock); return (error); } typedef int (*_fgetf)(struct thread *, int, struct file **); /* * Get message queue by giving file slot */ static int _getmq(struct thread *td, int fd, _fgetf func, struct file **fpp, struct mqfs_node **ppn, struct mqueue **pmq) { struct mqfs_node *pn; int error; error = func(td, fd, fpp); if (error) return (error); if (&mqueueops != (*fpp)->f_ops) { fdrop(*fpp, td); return (EBADF); } pn = (*fpp)->f_data; if (ppn) *ppn = pn; if (pmq) *pmq = pn->mn_data; return (0); } static __inline int getmq(struct thread *td, int fd, struct file **fpp, struct mqfs_node **ppn, struct mqueue **pmq) { return _getmq(td, fd, fget, fpp, ppn, pmq); } static __inline int getmq_read(struct thread *td, int fd, struct file **fpp, struct mqfs_node **ppn, struct mqueue **pmq) { return _getmq(td, fd, fget_read, fpp, ppn, pmq); } static __inline int getmq_write(struct thread *td, int fd, struct file **fpp, struct mqfs_node **ppn, struct mqueue **pmq) { return _getmq(td, fd, fget_write, fpp, ppn, pmq); } int kmq_setattr(struct thread *td, struct kmq_setattr_args *uap) { struct mqueue *mq; struct file *fp; struct mq_attr attr, oattr; u_int oflag, flag; int error; if (uap->attr) { error = copyin(uap->attr, &attr, sizeof(attr)); if (error) return (error); if (attr.mq_flags & ~O_NONBLOCK) return (EINVAL); } error = getmq(td, uap->mqd, &fp, NULL, &mq); if (error) return (error); oattr.mq_maxmsg = mq->mq_maxmsg; oattr.mq_msgsize = mq->mq_msgsize; oattr.mq_curmsgs = mq->mq_curmsgs; if (uap->attr) { do { oflag = flag = fp->f_flag; flag &= ~O_NONBLOCK; flag |= (attr.mq_flags & O_NONBLOCK); } while (atomic_cmpset_int(&fp->f_flag, oflag, flag) == 0); } else oflag = fp->f_flag; oattr.mq_flags = (O_NONBLOCK & oflag); fdrop(fp, td); if (uap->oattr) error = copyout(&oattr, uap->oattr, sizeof(oattr)); return (error); } int kmq_timedreceive(struct thread *td, struct kmq_timedreceive_args *uap) { struct mqueue *mq; struct file *fp; int error; int waitok; error = getmq_read(td, uap->mqd, &fp, NULL, &mq); if (error) return (error); waitok = !(fp->f_flag & O_NONBLOCK); error = mqueue_receive(mq, uap->msg_ptr, uap->msg_len, uap->msg_prio, waitok, uap->abs_timeout); fdrop(fp, td); return (error); } int kmq_timedsend(struct thread *td, struct kmq_timedsend_args *uap) { struct mqueue *mq; struct file *fp; int error, waitok; error = getmq_write(td, uap->mqd, &fp, NULL, &mq); if (error) return (error); waitok = !(fp->f_flag & O_NONBLOCK); error = mqueue_send(mq, uap->msg_ptr, uap->msg_len, uap->msg_prio, waitok, uap->abs_timeout); fdrop(fp, td); return (error); } int kmq_notify(struct thread *td, struct kmq_notify_args *uap) { struct sigevent ev; struct filedesc *fdp; struct proc *p; struct mqueue *mq; struct file *fp; struct mqueue_notifier *nt, *newnt = NULL; int error; p = td->td_proc; fdp = td->td_proc->p_fd; if (uap->sigev) { error = copyin(uap->sigev, &ev, sizeof(ev)); if (error) return (error); if (ev.sigev_notify != SIGEV_SIGNAL && ev.sigev_notify != SIGEV_THREAD_ID && ev.sigev_notify != SIGEV_NONE) return (EINVAL); if ((ev.sigev_notify == SIGEV_SIGNAL || ev.sigev_notify == SIGEV_THREAD_ID) && !_SIG_VALID(ev.sigev_signo)) return (EINVAL); } error = getmq(td, uap->mqd, &fp, NULL, &mq); if (error) return (error); again: FILEDESC_SLOCK(fdp); if (fget_locked(fdp, uap->mqd) != fp) { FILEDESC_SUNLOCK(fdp); error = EBADF; goto out; } mtx_lock(&mq->mq_mutex); FILEDESC_SUNLOCK(fdp); if (uap->sigev != NULL) { if (mq->mq_notifier != NULL) { error = EBUSY; } else { PROC_LOCK(p); nt = notifier_search(p, uap->mqd); if (nt == NULL) { if (newnt == NULL) { PROC_UNLOCK(p); mtx_unlock(&mq->mq_mutex); newnt = notifier_alloc(); goto again; } } if (nt != NULL) { sigqueue_take(&nt->nt_ksi); if (newnt != NULL) { notifier_free(newnt); newnt = NULL; } } else { nt = newnt; newnt = NULL; ksiginfo_init(&nt->nt_ksi); nt->nt_ksi.ksi_flags |= KSI_INS | KSI_EXT; nt->nt_ksi.ksi_code = SI_MESGQ; nt->nt_proc = p; nt->nt_ksi.ksi_mqd = uap->mqd; notifier_insert(p, nt); } nt->nt_sigev = ev; mq->mq_notifier = nt; PROC_UNLOCK(p); /* * if there is no receivers and message queue * is not empty, we should send notification * as soon as possible. */ if (mq->mq_receivers == 0 && !TAILQ_EMPTY(&mq->mq_msgq)) mqueue_send_notification(mq); } } else { notifier_remove(p, mq, uap->mqd); } mtx_unlock(&mq->mq_mutex); out: fdrop(fp, td); if (newnt != NULL) notifier_free(newnt); return (error); } static void mqueue_fdclose(struct thread *td, int fd, struct file *fp) { struct filedesc *fdp; struct mqueue *mq; fdp = td->td_proc->p_fd; FILEDESC_LOCK_ASSERT(fdp); if (fp->f_ops == &mqueueops) { mq = FPTOMQ(fp); mtx_lock(&mq->mq_mutex); notifier_remove(td->td_proc, mq, fd); /* have to wakeup thread in same process */ if (mq->mq_flags & MQ_RSEL) { mq->mq_flags &= ~MQ_RSEL; selwakeup(&mq->mq_rsel); } if (mq->mq_flags & MQ_WSEL) { mq->mq_flags &= ~MQ_WSEL; selwakeup(&mq->mq_wsel); } mtx_unlock(&mq->mq_mutex); } } static void mq_proc_exit(void *arg __unused, struct proc *p) { struct filedesc *fdp; struct file *fp; struct mqueue *mq; int i; fdp = p->p_fd; FILEDESC_SLOCK(fdp); for (i = 0; i < fdp->fd_nfiles; ++i) { fp = fget_locked(fdp, i); if (fp != NULL && fp->f_ops == &mqueueops) { mq = FPTOMQ(fp); mtx_lock(&mq->mq_mutex); notifier_remove(p, FPTOMQ(fp), i); mtx_unlock(&mq->mq_mutex); } } FILEDESC_SUNLOCK(fdp); KASSERT(LIST_EMPTY(&p->p_mqnotifier), ("mq notifiers left")); } static int mqf_read(struct file *fp, struct uio *uio, struct ucred *active_cred, int flags, struct thread *td) { return (EOPNOTSUPP); } static int mqf_write(struct file *fp, struct uio *uio, struct ucred *active_cred, int flags, struct thread *td) { return (EOPNOTSUPP); } static int mqf_truncate(struct file *fp, off_t length, struct ucred *active_cred, struct thread *td) { return (EINVAL); } static int mqf_ioctl(struct file *fp, u_long cmd, void *data, struct ucred *active_cred, struct thread *td) { return (ENOTTY); } static int mqf_poll(struct file *fp, int events, struct ucred *active_cred, struct thread *td) { struct mqueue *mq = FPTOMQ(fp); int revents = 0; mtx_lock(&mq->mq_mutex); if (events & (POLLIN | POLLRDNORM)) { if (mq->mq_curmsgs) { revents |= events & (POLLIN | POLLRDNORM); } else { mq->mq_flags |= MQ_RSEL; selrecord(td, &mq->mq_rsel); } } if (events & POLLOUT) { if (mq->mq_curmsgs < mq->mq_maxmsg) revents |= POLLOUT; else { mq->mq_flags |= MQ_WSEL; selrecord(td, &mq->mq_wsel); } } mtx_unlock(&mq->mq_mutex); return (revents); } static int mqf_close(struct file *fp, struct thread *td) { struct mqfs_node *pn; fp->f_ops = &badfileops; pn = fp->f_data; fp->f_data = NULL; sx_xlock(&mqfs_data.mi_lock); mqnode_release(pn); sx_xunlock(&mqfs_data.mi_lock); return (0); } static int mqf_stat(struct file *fp, struct stat *st, struct ucred *active_cred, struct thread *td) { struct mqfs_node *pn = fp->f_data; bzero(st, sizeof *st); st->st_atimespec = pn->mn_atime; st->st_mtimespec = pn->mn_mtime; st->st_ctimespec = pn->mn_ctime; st->st_birthtimespec = pn->mn_birth; st->st_uid = pn->mn_uid; st->st_gid = pn->mn_gid; st->st_mode = S_IFIFO | pn->mn_mode; return (0); } static int mqf_kqfilter(struct file *fp, struct knote *kn) { struct mqueue *mq = FPTOMQ(fp); int error = 0; if (kn->kn_filter == EVFILT_READ) { kn->kn_fop = &mq_rfiltops; knlist_add(&mq->mq_rsel.si_note, kn, 0); } else if (kn->kn_filter == EVFILT_WRITE) { kn->kn_fop = &mq_wfiltops; knlist_add(&mq->mq_wsel.si_note, kn, 0); } else error = EINVAL; return (error); } static void filt_mqdetach(struct knote *kn) { struct mqueue *mq = FPTOMQ(kn->kn_fp); if (kn->kn_filter == EVFILT_READ) knlist_remove(&mq->mq_rsel.si_note, kn, 0); else if (kn->kn_filter == EVFILT_WRITE) knlist_remove(&mq->mq_wsel.si_note, kn, 0); else panic("filt_mqdetach"); } static int filt_mqread(struct knote *kn, long hint) { struct mqueue *mq = FPTOMQ(kn->kn_fp); mtx_assert(&mq->mq_mutex, MA_OWNED); return (mq->mq_curmsgs != 0); } static int filt_mqwrite(struct knote *kn, long hint) { struct mqueue *mq = FPTOMQ(kn->kn_fp); mtx_assert(&mq->mq_mutex, MA_OWNED); return (mq->mq_curmsgs < mq->mq_maxmsg); } static struct fileops mqueueops = { .fo_read = mqf_read, .fo_write = mqf_write, .fo_truncate = mqf_truncate, .fo_ioctl = mqf_ioctl, .fo_poll = mqf_poll, .fo_kqfilter = mqf_kqfilter, .fo_stat = mqf_stat, .fo_close = mqf_close }; static struct vop_vector mqfs_vnodeops = { .vop_default = &default_vnodeops, .vop_access = mqfs_access, .vop_cachedlookup = mqfs_lookup, .vop_lookup = vfs_cache_lookup, .vop_reclaim = mqfs_reclaim, .vop_create = mqfs_create, .vop_remove = mqfs_remove, .vop_inactive = mqfs_inactive, .vop_open = mqfs_open, .vop_close = mqfs_close, .vop_getattr = mqfs_getattr, .vop_setattr = mqfs_setattr, .vop_read = mqfs_read, .vop_write = VOP_EOPNOTSUPP, .vop_readdir = mqfs_readdir, .vop_mkdir = VOP_EOPNOTSUPP, .vop_rmdir = VOP_EOPNOTSUPP }; static struct vfsops mqfs_vfsops = { .vfs_init = mqfs_init, .vfs_uninit = mqfs_uninit, .vfs_mount = mqfs_mount, .vfs_unmount = mqfs_unmount, .vfs_root = mqfs_root, .vfs_statfs = mqfs_statfs, }; SYSCALL_MODULE_HELPER(kmq_open); SYSCALL_MODULE_HELPER(kmq_setattr); SYSCALL_MODULE_HELPER(kmq_timedsend); SYSCALL_MODULE_HELPER(kmq_timedreceive); SYSCALL_MODULE_HELPER(kmq_notify); SYSCALL_MODULE_HELPER(kmq_unlink); VFS_SET(mqfs_vfsops, mqueuefs, VFCF_SYNTHETIC); MODULE_VERSION(mqueuefs, 1); Index: head/sys/nfsclient/nfs_vnops.c =================================================================== --- head/sys/nfsclient/nfs_vnops.c (revision 183214) +++ head/sys/nfsclient/nfs_vnops.c (revision 183215) @@ -1,3300 +1,3320 @@ /*- * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)nfs_vnops.c 8.16 (Berkeley) 5/27/95 */ #include __FBSDID("$FreeBSD$"); /* * vnode op calls for Sun NFS version 2 and 3 */ #include "opt_inet.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* Defs */ #define TRUE 1 #define FALSE 0 /* * Ifdef for FreeBSD-current merged buffer cache. It is unfortunate that these * calls are not in getblk() and brelse() so that they would not be necessary * here. */ #ifndef B_VMIO #define vfs_busy_pages(bp, f) #endif static vop_read_t nfsfifo_read; static vop_write_t nfsfifo_write; static vop_close_t nfsfifo_close; static int nfs_flush(struct vnode *, int, int); static int nfs_setattrrpc(struct vnode *, struct vattr *, struct ucred *); static vop_lookup_t nfs_lookup; static vop_create_t nfs_create; static vop_mknod_t nfs_mknod; static vop_open_t nfs_open; static vop_close_t nfs_close; static vop_access_t nfs_access; static vop_getattr_t nfs_getattr; static vop_setattr_t nfs_setattr; static vop_read_t nfs_read; static vop_fsync_t nfs_fsync; static vop_remove_t nfs_remove; static vop_link_t nfs_link; static vop_rename_t nfs_rename; static vop_mkdir_t nfs_mkdir; static vop_rmdir_t nfs_rmdir; static vop_symlink_t nfs_symlink; static vop_readdir_t nfs_readdir; static vop_strategy_t nfs_strategy; static int nfs_lookitup(struct vnode *, const char *, int, struct ucred *, struct thread *, struct nfsnode **); static int nfs_sillyrename(struct vnode *, struct vnode *, struct componentname *); static vop_access_t nfsspec_access; static vop_readlink_t nfs_readlink; static vop_print_t nfs_print; static vop_advlock_t nfs_advlock; static vop_advlockasync_t nfs_advlockasync; /* * Global vfs data structures for nfs */ struct vop_vector nfs_vnodeops = { .vop_default = &default_vnodeops, .vop_access = nfs_access, .vop_advlock = nfs_advlock, .vop_advlockasync = nfs_advlockasync, .vop_close = nfs_close, .vop_create = nfs_create, .vop_fsync = nfs_fsync, .vop_getattr = nfs_getattr, .vop_getpages = nfs_getpages, .vop_putpages = nfs_putpages, .vop_inactive = nfs_inactive, .vop_lease = VOP_NULL, .vop_link = nfs_link, .vop_lookup = nfs_lookup, .vop_mkdir = nfs_mkdir, .vop_mknod = nfs_mknod, .vop_open = nfs_open, .vop_print = nfs_print, .vop_read = nfs_read, .vop_readdir = nfs_readdir, .vop_readlink = nfs_readlink, .vop_reclaim = nfs_reclaim, .vop_remove = nfs_remove, .vop_rename = nfs_rename, .vop_rmdir = nfs_rmdir, .vop_setattr = nfs_setattr, .vop_strategy = nfs_strategy, .vop_symlink = nfs_symlink, .vop_write = nfs_write, }; struct vop_vector nfs_fifoops = { .vop_default = &fifo_specops, .vop_access = nfsspec_access, .vop_close = nfsfifo_close, .vop_fsync = nfs_fsync, .vop_getattr = nfs_getattr, .vop_inactive = nfs_inactive, .vop_print = nfs_print, .vop_read = nfsfifo_read, .vop_reclaim = nfs_reclaim, .vop_setattr = nfs_setattr, .vop_write = nfsfifo_write, }; static int nfs_mknodrpc(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, struct vattr *vap); static int nfs_removerpc(struct vnode *dvp, const char *name, int namelen, struct ucred *cred, struct thread *td); static int nfs_renamerpc(struct vnode *fdvp, const char *fnameptr, int fnamelen, struct vnode *tdvp, const char *tnameptr, int tnamelen, struct ucred *cred, struct thread *td); static int nfs_renameit(struct vnode *sdvp, struct componentname *scnp, struct sillyrename *sp); /* * Global variables */ struct mtx nfs_iod_mtx; struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON]; struct nfsmount *nfs_iodmount[NFS_MAXASYNCDAEMON]; int nfs_numasync = 0; vop_advlock_t *nfs_advlock_p = nfs_dolock; vop_reclaim_t *nfs_reclaim_p = NULL; #define DIRHDSIZ (sizeof (struct dirent) - (MAXNAMLEN + 1)) SYSCTL_DECL(_vfs_nfs); static int nfsaccess_cache_timeout = NFS_MAXATTRTIMO; SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_timeout, CTLFLAG_RW, &nfsaccess_cache_timeout, 0, "NFS ACCESS cache timeout"); static int nfsv3_commit_on_close = 0; SYSCTL_INT(_vfs_nfs, OID_AUTO, nfsv3_commit_on_close, CTLFLAG_RW, &nfsv3_commit_on_close, 0, "write+commit on close, else only write"); static int nfs_clean_pages_on_close = 1; SYSCTL_INT(_vfs_nfs, OID_AUTO, clean_pages_on_close, CTLFLAG_RW, &nfs_clean_pages_on_close, 0, "NFS clean dirty pages on close"); int nfs_directio_enable = 0; SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_directio_enable, CTLFLAG_RW, &nfs_directio_enable, 0, "Enable NFS directio"); /* * This sysctl allows other processes to mmap a file that has been opened * O_DIRECT by a process. In general, having processes mmap the file while * Direct IO is in progress can lead to Data Inconsistencies. But, we allow * this by default to prevent DoS attacks - to prevent a malicious user from * opening up files O_DIRECT preventing other users from mmap'ing these * files. "Protected" environments where stricter consistency guarantees are * required can disable this knob. The process that opened the file O_DIRECT * cannot mmap() the file, because mmap'ed IO on an O_DIRECT open() is not * meaningful. */ int nfs_directio_allow_mmap = 1; SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_directio_allow_mmap, CTLFLAG_RW, &nfs_directio_allow_mmap, 0, "Enable mmaped IO on file with O_DIRECT opens"); #if 0 SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_hits, CTLFLAG_RD, &nfsstats.accesscache_hits, 0, "NFS ACCESS cache hit count"); SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_misses, CTLFLAG_RD, &nfsstats.accesscache_misses, 0, "NFS ACCESS cache miss count"); #endif #define NFSV3ACCESS_ALL (NFSV3ACCESS_READ | NFSV3ACCESS_MODIFY \ | NFSV3ACCESS_EXTEND | NFSV3ACCESS_EXECUTE \ | NFSV3ACCESS_DELETE | NFSV3ACCESS_LOOKUP) /* * SMP Locking Note : * The list of locks after the description of the lock is the ordering * of other locks acquired with the lock held. * np->n_mtx : Protects the fields in the nfsnode. VM Object Lock VI_MTX (acquired indirectly) * nmp->nm_mtx : Protects the fields in the nfsmount. rep->r_mtx * nfs_iod_mtx : Global lock, protects shared nfsiod state. * nfs_reqq_mtx : Global lock, protects the nfs_reqq list. nmp->nm_mtx rep->r_mtx * rep->r_mtx : Protects the fields in an nfsreq. */ static int nfs3_access_otw(struct vnode *vp, int wmode, struct thread *td, struct ucred *cred) { const int v3 = 1; u_int32_t *tl; int error = 0, attrflag; struct mbuf *mreq, *mrep, *md, *mb; caddr_t bpos, dpos; u_int32_t rmode; struct nfsnode *np = VTONFS(vp); nfsstats.rpccnt[NFSPROC_ACCESS]++; mreq = nfsm_reqhead(vp, NFSPROC_ACCESS, NFSX_FH(v3) + NFSX_UNSIGNED); mb = mreq; bpos = mtod(mb, caddr_t); nfsm_fhtom(vp, v3); tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(wmode); nfsm_request(vp, NFSPROC_ACCESS, td, cred); nfsm_postop_attr(vp, attrflag); if (!error) { tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED); rmode = fxdr_unsigned(u_int32_t, *tl); mtx_lock(&np->n_mtx); np->n_mode = rmode; np->n_modeuid = cred->cr_uid; np->n_modestamp = time_second; mtx_unlock(&np->n_mtx); } m_freem(mrep); nfsmout: return (error); } /* * nfs access vnode op. * For nfs version 2, just return ok. File accesses may fail later. * For nfs version 3, use the access rpc to check accessibility. If file modes * are changed on the server, accesses might still fail later. */ static int nfs_access(struct vop_access_args *ap) { struct vnode *vp = ap->a_vp; int error = 0; u_int32_t mode, wmode; int v3 = NFS_ISV3(vp); struct nfsnode *np = VTONFS(vp); /* * Disallow write attempts on filesystems mounted read-only; * unless the file is a socket, fifo, or a block or character * device resident on the filesystem. */ if ((ap->a_mode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) { switch (vp->v_type) { case VREG: case VDIR: case VLNK: return (EROFS); default: break; } } /* * For nfs v3, check to see if we have done this recently, and if * so return our cached result instead of making an ACCESS call. * If not, do an access rpc, otherwise you are stuck emulating * ufs_access() locally using the vattr. This may not be correct, * since the server may apply other access criteria such as * client uid-->server uid mapping that we do not know about. */ if (v3) { if (ap->a_mode & VREAD) mode = NFSV3ACCESS_READ; else mode = 0; if (vp->v_type != VDIR) { if (ap->a_mode & VWRITE) mode |= (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND); if (ap->a_mode & VEXEC) mode |= NFSV3ACCESS_EXECUTE; } else { if (ap->a_mode & VWRITE) mode |= (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND | NFSV3ACCESS_DELETE); if (ap->a_mode & VEXEC) mode |= NFSV3ACCESS_LOOKUP; } /* XXX safety belt, only make blanket request if caching */ if (nfsaccess_cache_timeout > 0) { wmode = NFSV3ACCESS_READ | NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND | NFSV3ACCESS_EXECUTE | NFSV3ACCESS_DELETE | NFSV3ACCESS_LOOKUP; } else { wmode = mode; } /* * Does our cached result allow us to give a definite yes to * this request? */ mtx_lock(&np->n_mtx); if ((time_second < (np->n_modestamp + nfsaccess_cache_timeout)) && (ap->a_cred->cr_uid == np->n_modeuid) && ((np->n_mode & mode) == mode)) { nfsstats.accesscache_hits++; } else { /* * Either a no, or a don't know. Go to the wire. */ nfsstats.accesscache_misses++; mtx_unlock(&np->n_mtx); error = nfs3_access_otw(vp, wmode, ap->a_td,ap->a_cred); mtx_lock(&np->n_mtx); if (!error) { if ((np->n_mode & mode) != mode) { error = EACCES; } } } mtx_unlock(&np->n_mtx); return (error); } else { if ((error = nfsspec_access(ap)) != 0) { return (error); } /* * Attempt to prevent a mapped root from accessing a file * which it shouldn't. We try to read a byte from the file * if the user is root and the file is not zero length. * After calling nfsspec_access, we should have the correct * file size cached. */ mtx_lock(&np->n_mtx); if (ap->a_cred->cr_uid == 0 && (ap->a_mode & VREAD) && VTONFS(vp)->n_size > 0) { struct iovec aiov; struct uio auio; char buf[1]; mtx_unlock(&np->n_mtx); aiov.iov_base = buf; aiov.iov_len = 1; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_offset = 0; auio.uio_resid = 1; auio.uio_segflg = UIO_SYSSPACE; auio.uio_rw = UIO_READ; auio.uio_td = ap->a_td; if (vp->v_type == VREG) error = nfs_readrpc(vp, &auio, ap->a_cred); else if (vp->v_type == VDIR) { char* bp; bp = malloc(NFS_DIRBLKSIZ, M_TEMP, M_WAITOK); aiov.iov_base = bp; aiov.iov_len = auio.uio_resid = NFS_DIRBLKSIZ; error = nfs_readdirrpc(vp, &auio, ap->a_cred); free(bp, M_TEMP); } else if (vp->v_type == VLNK) error = nfs_readlinkrpc(vp, &auio, ap->a_cred); else error = EACCES; } else mtx_unlock(&np->n_mtx); return (error); } } int nfs_otw_getattr_avoid = 0; /* * nfs open vnode op * Check to see if the type is ok * and that deletion is not in progress. * For paged in text files, you will need to flush the page cache * if consistency is lost. */ /* ARGSUSED */ static int nfs_open(struct vop_open_args *ap) { struct vnode *vp = ap->a_vp; struct nfsnode *np = VTONFS(vp); struct vattr vattr; int error; int fmode = ap->a_mode; if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) return (EOPNOTSUPP); /* * Get a valid lease. If cached data is stale, flush it. */ mtx_lock(&np->n_mtx); if (np->n_flag & NMODIFIED) { mtx_unlock(&np->n_mtx); error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1); if (error == EINTR || error == EIO) return (error); np->n_attrstamp = 0; if (vp->v_type == VDIR) np->n_direofoffset = 0; error = VOP_GETATTR(vp, &vattr, ap->a_cred); if (error) return (error); mtx_lock(&np->n_mtx); np->n_mtime = vattr.va_mtime; mtx_unlock(&np->n_mtx); } else { struct thread *td = curthread; if (np->n_ac_ts_syscalls != td->td_syscalls || np->n_ac_ts_tid != td->td_tid || td->td_proc == NULL || np->n_ac_ts_pid != td->td_proc->p_pid) { np->n_attrstamp = 0; } mtx_unlock(&np->n_mtx); error = VOP_GETATTR(vp, &vattr, ap->a_cred); if (error) return (error); mtx_lock(&np->n_mtx); if (NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime)) { if (vp->v_type == VDIR) np->n_direofoffset = 0; mtx_unlock(&np->n_mtx); error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1); if (error == EINTR || error == EIO) { return (error); } mtx_lock(&np->n_mtx); np->n_mtime = vattr.va_mtime; } mtx_unlock(&np->n_mtx); } /* * If the object has >= 1 O_DIRECT active opens, we disable caching. */ if (nfs_directio_enable && (fmode & O_DIRECT) && (vp->v_type == VREG)) { if (np->n_directio_opens == 0) { error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1); if (error) return (error); mtx_lock(&np->n_mtx); np->n_flag |= NNONCACHE; mtx_unlock(&np->n_mtx); } np->n_directio_opens++; } vnode_create_vobject(vp, vattr.va_size, ap->a_td); return (0); } /* * nfs close vnode op * What an NFS client should do upon close after writing is a debatable issue. * Most NFS clients push delayed writes to the server upon close, basically for * two reasons: * 1 - So that any write errors may be reported back to the client process * doing the close system call. By far the two most likely errors are * NFSERR_NOSPC and NFSERR_DQUOT to indicate space allocation failure. * 2 - To put a worst case upper bound on cache inconsistency between * multiple clients for the file. * There is also a consistency problem for Version 2 of the protocol w.r.t. * not being able to tell if other clients are writing a file concurrently, * since there is no way of knowing if the changed modify time in the reply * is only due to the write for this client. * (NFS Version 3 provides weak cache consistency data in the reply that * should be sufficient to detect and handle this case.) * * The current code does the following: * for NFS Version 2 - play it safe and flush/invalidate all dirty buffers * for NFS Version 3 - flush dirty buffers to the server but don't invalidate * or commit them (this satisfies 1 and 2 except for the * case where the server crashes after this close but * before the commit RPC, which is felt to be "good * enough". Changing the last argument to nfs_flush() to * a 1 would force a commit operation, if it is felt a * commit is necessary now. */ /* ARGSUSED */ static int nfs_close(struct vop_close_args *ap) { struct vnode *vp = ap->a_vp; struct nfsnode *np = VTONFS(vp); int error = 0; int fmode = ap->a_fflag; if (vp->v_type == VREG) { /* * Examine and clean dirty pages, regardless of NMODIFIED. * This closes a major hole in close-to-open consistency. * We want to push out all dirty pages (and buffers) on * close, regardless of whether they were dirtied by * mmap'ed writes or via write(). */ if (nfs_clean_pages_on_close && vp->v_object) { VM_OBJECT_LOCK(vp->v_object); vm_object_page_clean(vp->v_object, 0, 0, 0); VM_OBJECT_UNLOCK(vp->v_object); } mtx_lock(&np->n_mtx); if (np->n_flag & NMODIFIED) { mtx_unlock(&np->n_mtx); if (NFS_ISV3(vp)) { /* * Under NFSv3 we have dirty buffers to dispose of. We * must flush them to the NFS server. We have the option * of waiting all the way through the commit rpc or just * waiting for the initial write. The default is to only * wait through the initial write so the data is in the * server's cache, which is roughly similar to the state * a standard disk subsystem leaves the file in on close(). * * We cannot clear the NMODIFIED bit in np->n_flag due to * potential races with other processes, and certainly * cannot clear it if we don't commit. */ int cm = nfsv3_commit_on_close ? 1 : 0; error = nfs_flush(vp, MNT_WAIT, cm); /* np->n_flag &= ~NMODIFIED; */ } else error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1); mtx_lock(&np->n_mtx); } /* * Invalidate the attribute cache in all cases. * An open is going to fetch fresh attrs any way, other procs * on this node that have file open will be forced to do an * otw attr fetch, but this is safe. */ np->n_attrstamp = 0; if (np->n_flag & NWRITEERR) { np->n_flag &= ~NWRITEERR; error = np->n_error; } mtx_unlock(&np->n_mtx); } if (nfs_directio_enable) KASSERT((np->n_directio_asyncwr == 0), ("nfs_close: dirty unflushed (%d) directio buffers\n", np->n_directio_asyncwr)); if (nfs_directio_enable && (fmode & O_DIRECT) && (vp->v_type == VREG)) { mtx_lock(&np->n_mtx); KASSERT((np->n_directio_opens > 0), ("nfs_close: unexpectedly value (0) of n_directio_opens\n")); np->n_directio_opens--; if (np->n_directio_opens == 0) np->n_flag &= ~NNONCACHE; mtx_unlock(&np->n_mtx); } return (error); } /* * nfs getattr call from vfs. */ static int nfs_getattr(struct vop_getattr_args *ap) { struct vnode *vp = ap->a_vp; struct nfsnode *np = VTONFS(vp); struct thread *td = curthread; + struct vattr *vap = ap->a_vap; + struct vattr vattr; caddr_t bpos, dpos; int error = 0; struct mbuf *mreq, *mrep, *md, *mb; int v3 = NFS_ISV3(vp); /* * Update local times for special files. */ mtx_lock(&np->n_mtx); if (np->n_flag & (NACC | NUPD)) np->n_flag |= NCHG; mtx_unlock(&np->n_mtx); /* * First look in the cache. */ - if (nfs_getattrcache(vp, ap->a_vap) == 0) + if (nfs_getattrcache(vp, &vattr) == 0) goto nfsmout; if (v3 && nfsaccess_cache_timeout > 0) { nfsstats.accesscache_misses++; nfs3_access_otw(vp, NFSV3ACCESS_ALL, td, ap->a_cred); - if (nfs_getattrcache(vp, ap->a_vap) == 0) + if (nfs_getattrcache(vp, &vattr) == 0) goto nfsmout; } nfsstats.rpccnt[NFSPROC_GETATTR]++; mreq = nfsm_reqhead(vp, NFSPROC_GETATTR, NFSX_FH(v3)); mb = mreq; bpos = mtod(mb, caddr_t); nfsm_fhtom(vp, v3); nfsm_request(vp, NFSPROC_GETATTR, td, ap->a_cred); if (!error) { - nfsm_loadattr(vp, ap->a_vap); + nfsm_loadattr(vp, &vattr); } m_freem(mrep); nfsmout: + vap->va_type = vattr.va_type; + vap->va_mode = vattr.va_mode; + vap->va_nlink = vattr.va_nlink; + vap->va_uid = vattr.va_uid; + vap->va_gid = vattr.va_gid; + vap->va_fsid = vattr.va_fsid; + vap->va_fileid = vattr.va_fileid; + vap->va_size = vattr.va_size; + vap->va_blocksize = vattr.va_blocksize; + vap->va_atime = vattr.va_atime; + vap->va_mtime = vattr.va_mtime; + vap->va_ctime = vattr.va_ctime; + vap->va_gen = vattr.va_gen; + vap->va_flags = vattr.va_flags; + vap->va_rdev = vattr.va_rdev; + vap->va_bytes = vattr.va_bytes; + vap->va_filerev = vattr.va_filerev; + return (error); } /* * nfs setattr call. */ static int nfs_setattr(struct vop_setattr_args *ap) { struct vnode *vp = ap->a_vp; struct nfsnode *np = VTONFS(vp); struct vattr *vap = ap->a_vap; struct thread *td = curthread; int error = 0; u_quad_t tsize; #ifndef nolint tsize = (u_quad_t)0; #endif /* * Setting of flags and marking of atimes are not supported. */ if (vap->va_flags != VNOVAL || (vap->va_vaflags & VA_MARK_ATIME)) return (EOPNOTSUPP); /* * Disallow write attempts if the filesystem is mounted read-only. */ if ((vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL) && (vp->v_mount->mnt_flag & MNT_RDONLY)) { error = EROFS; goto out; } if (vap->va_size != VNOVAL) { switch (vp->v_type) { case VDIR: return (EISDIR); case VCHR: case VBLK: case VSOCK: case VFIFO: if (vap->va_mtime.tv_sec == VNOVAL && vap->va_atime.tv_sec == VNOVAL && vap->va_mode == (mode_t)VNOVAL && vap->va_uid == (uid_t)VNOVAL && vap->va_gid == (gid_t)VNOVAL) return (0); vap->va_size = VNOVAL; break; default: /* * Disallow write attempts if the filesystem is * mounted read-only. */ if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); /* * We run vnode_pager_setsize() early (why?), * we must set np->n_size now to avoid vinvalbuf * V_SAVE races that might setsize a lower * value. */ mtx_lock(&np->n_mtx); tsize = np->n_size; mtx_unlock(&np->n_mtx); error = nfs_meta_setsize(vp, ap->a_cred, td, vap->va_size); mtx_lock(&np->n_mtx); if (np->n_flag & NMODIFIED) { tsize = np->n_size; mtx_unlock(&np->n_mtx); if (vap->va_size == 0) error = nfs_vinvalbuf(vp, 0, td, 1); else error = nfs_vinvalbuf(vp, V_SAVE, td, 1); if (error) { vnode_pager_setsize(vp, tsize); goto out; } } else mtx_unlock(&np->n_mtx); /* * np->n_size has already been set to vap->va_size * in nfs_meta_setsize(). We must set it again since * nfs_loadattrcache() could be called through * nfs_meta_setsize() and could modify np->n_size. */ mtx_lock(&np->n_mtx); np->n_vattr.va_size = np->n_size = vap->va_size; mtx_unlock(&np->n_mtx); }; } else { mtx_lock(&np->n_mtx); if ((vap->va_mtime.tv_sec != VNOVAL || vap->va_atime.tv_sec != VNOVAL) && (np->n_flag & NMODIFIED) && vp->v_type == VREG) { mtx_unlock(&np->n_mtx); if ((error = nfs_vinvalbuf(vp, V_SAVE, td, 1)) != 0 && (error == EINTR || error == EIO)) return error; } else mtx_unlock(&np->n_mtx); } error = nfs_setattrrpc(vp, vap, ap->a_cred); if (error && vap->va_size != VNOVAL) { mtx_lock(&np->n_mtx); np->n_size = np->n_vattr.va_size = tsize; vnode_pager_setsize(vp, tsize); mtx_unlock(&np->n_mtx); } out: return (error); } /* * Do an nfs setattr rpc. */ static int nfs_setattrrpc(struct vnode *vp, struct vattr *vap, struct ucred *cred) { struct nfsv2_sattr *sp; struct nfsnode *np = VTONFS(vp); caddr_t bpos, dpos; u_int32_t *tl; int error = 0, wccflag = NFSV3_WCCRATTR; struct mbuf *mreq, *mrep, *md, *mb; int v3 = NFS_ISV3(vp); nfsstats.rpccnt[NFSPROC_SETATTR]++; mreq = nfsm_reqhead(vp, NFSPROC_SETATTR, NFSX_FH(v3) + NFSX_SATTR(v3)); mb = mreq; bpos = mtod(mb, caddr_t); nfsm_fhtom(vp, v3); if (v3) { nfsm_v3attrbuild(vap, TRUE); tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED); *tl = nfs_false; } else { sp = nfsm_build(struct nfsv2_sattr *, NFSX_V2SATTR); if (vap->va_mode == (mode_t)VNOVAL) sp->sa_mode = nfs_xdrneg1; else sp->sa_mode = vtonfsv2_mode(vp->v_type, vap->va_mode); if (vap->va_uid == (uid_t)VNOVAL) sp->sa_uid = nfs_xdrneg1; else sp->sa_uid = txdr_unsigned(vap->va_uid); if (vap->va_gid == (gid_t)VNOVAL) sp->sa_gid = nfs_xdrneg1; else sp->sa_gid = txdr_unsigned(vap->va_gid); sp->sa_size = txdr_unsigned(vap->va_size); txdr_nfsv2time(&vap->va_atime, &sp->sa_atime); txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime); } nfsm_request(vp, NFSPROC_SETATTR, curthread, cred); if (v3) { np->n_modestamp = 0; nfsm_wcc_data(vp, wccflag); } else nfsm_loadattr(vp, NULL); m_freem(mrep); nfsmout: return (error); } /* * nfs lookup call, one step at a time... * First look in cache * If not found, unlock the directory nfsnode and do the rpc */ static int nfs_lookup(struct vop_lookup_args *ap) { struct componentname *cnp = ap->a_cnp; struct vnode *dvp = ap->a_dvp; struct vnode **vpp = ap->a_vpp; int flags = cnp->cn_flags; struct vnode *newvp; struct nfsmount *nmp; caddr_t bpos, dpos; struct mbuf *mreq, *mrep, *md, *mb; long len; nfsfh_t *fhp; struct nfsnode *np; int error = 0, attrflag, fhsize; int v3 = NFS_ISV3(dvp); struct thread *td = cnp->cn_thread; *vpp = NULLVP; if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) && (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) return (EROFS); if (dvp->v_type != VDIR) return (ENOTDIR); nmp = VFSTONFS(dvp->v_mount); np = VTONFS(dvp); if ((error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, td)) != 0) { *vpp = NULLVP; return (error); } if ((error = cache_lookup(dvp, vpp, cnp)) && error != ENOENT) { struct vattr vattr; newvp = *vpp; if (!VOP_GETATTR(newvp, &vattr, cnp->cn_cred) && vattr.va_ctime.tv_sec == VTONFS(newvp)->n_ctime) { nfsstats.lookupcache_hits++; if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN)) cnp->cn_flags |= SAVENAME; return (0); } cache_purge(newvp); if (dvp != newvp) vput(newvp); else vrele(newvp); *vpp = NULLVP; } error = 0; newvp = NULLVP; nfsstats.lookupcache_misses++; nfsstats.rpccnt[NFSPROC_LOOKUP]++; len = cnp->cn_namelen; mreq = nfsm_reqhead(dvp, NFSPROC_LOOKUP, NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len)); mb = mreq; bpos = mtod(mb, caddr_t); nfsm_fhtom(dvp, v3); nfsm_strtom(cnp->cn_nameptr, len, NFS_MAXNAMLEN); nfsm_request(dvp, NFSPROC_LOOKUP, cnp->cn_thread, cnp->cn_cred); if (error) { if (v3) { nfsm_postop_attr(dvp, attrflag); m_freem(mrep); } goto nfsmout; } nfsm_getfh(fhp, fhsize, v3); /* * Handle RENAME case... */ if (cnp->cn_nameiop == RENAME && (flags & ISLASTCN)) { if (NFS_CMPFH(np, fhp, fhsize)) { m_freem(mrep); return (EISDIR); } error = nfs_nget(dvp->v_mount, fhp, fhsize, &np, LK_EXCLUSIVE); if (error) { m_freem(mrep); return (error); } newvp = NFSTOV(np); if (v3) { nfsm_postop_attr(newvp, attrflag); nfsm_postop_attr(dvp, attrflag); } else nfsm_loadattr(newvp, NULL); *vpp = newvp; m_freem(mrep); cnp->cn_flags |= SAVENAME; return (0); } if (flags & ISDOTDOT) { VOP_UNLOCK(dvp, 0); error = nfs_nget(dvp->v_mount, fhp, fhsize, &np, cnp->cn_lkflags); vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY); if (error) return (error); newvp = NFSTOV(np); } else if (NFS_CMPFH(np, fhp, fhsize)) { VREF(dvp); newvp = dvp; } else { error = nfs_nget(dvp->v_mount, fhp, fhsize, &np, cnp->cn_lkflags); if (error) { m_freem(mrep); return (error); } newvp = NFSTOV(np); } if (v3) { nfsm_postop_attr(newvp, attrflag); nfsm_postop_attr(dvp, attrflag); } else nfsm_loadattr(newvp, NULL); if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN)) cnp->cn_flags |= SAVENAME; if ((cnp->cn_flags & MAKEENTRY) && (cnp->cn_nameiop != DELETE || !(flags & ISLASTCN))) { np->n_ctime = np->n_vattr.va_ctime.tv_sec; cache_enter(dvp, newvp, cnp); } *vpp = newvp; m_freem(mrep); nfsmout: if (error) { if (newvp != NULLVP) { vput(newvp); *vpp = NULLVP; } if ((cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME) && (flags & ISLASTCN) && error == ENOENT) { if (dvp->v_mount->mnt_flag & MNT_RDONLY) error = EROFS; else error = EJUSTRETURN; } if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN)) cnp->cn_flags |= SAVENAME; } return (error); } /* * nfs read call. * Just call nfs_bioread() to do the work. */ static int nfs_read(struct vop_read_args *ap) { struct vnode *vp = ap->a_vp; switch (vp->v_type) { case VREG: return (nfs_bioread(vp, ap->a_uio, ap->a_ioflag, ap->a_cred)); case VDIR: return (EISDIR); default: return (EOPNOTSUPP); } } /* * nfs readlink call */ static int nfs_readlink(struct vop_readlink_args *ap) { struct vnode *vp = ap->a_vp; if (vp->v_type != VLNK) return (EINVAL); return (nfs_bioread(vp, ap->a_uio, 0, ap->a_cred)); } /* * Do a readlink rpc. * Called by nfs_doio() from below the buffer cache. */ int nfs_readlinkrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred) { caddr_t bpos, dpos; int error = 0, len, attrflag; struct mbuf *mreq, *mrep, *md, *mb; int v3 = NFS_ISV3(vp); nfsstats.rpccnt[NFSPROC_READLINK]++; mreq = nfsm_reqhead(vp, NFSPROC_READLINK, NFSX_FH(v3)); mb = mreq; bpos = mtod(mb, caddr_t); nfsm_fhtom(vp, v3); nfsm_request(vp, NFSPROC_READLINK, uiop->uio_td, cred); if (v3) nfsm_postop_attr(vp, attrflag); if (!error) { nfsm_strsiz(len, NFS_MAXPATHLEN); if (len == NFS_MAXPATHLEN) { struct nfsnode *np = VTONFS(vp); mtx_lock(&np->n_mtx); if (np->n_size && np->n_size < NFS_MAXPATHLEN) len = np->n_size; mtx_unlock(&np->n_mtx); } nfsm_mtouio(uiop, len); } m_freem(mrep); nfsmout: return (error); } /* * nfs read rpc call * Ditto above */ int nfs_readrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred) { u_int32_t *tl; caddr_t bpos, dpos; struct mbuf *mreq, *mrep, *md, *mb; struct nfsmount *nmp; int error = 0, len, retlen, tsiz, eof, attrflag; int v3 = NFS_ISV3(vp); int rsize; #ifndef nolint eof = 0; #endif nmp = VFSTONFS(vp->v_mount); tsiz = uiop->uio_resid; mtx_lock(&nmp->nm_mtx); if (uiop->uio_offset + tsiz > nmp->nm_maxfilesize) { mtx_unlock(&nmp->nm_mtx); return (EFBIG); } rsize = nmp->nm_rsize; mtx_unlock(&nmp->nm_mtx); while (tsiz > 0) { nfsstats.rpccnt[NFSPROC_READ]++; len = (tsiz > rsize) ? rsize : tsiz; mreq = nfsm_reqhead(vp, NFSPROC_READ, NFSX_FH(v3) + NFSX_UNSIGNED * 3); mb = mreq; bpos = mtod(mb, caddr_t); nfsm_fhtom(vp, v3); tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED * 3); if (v3) { txdr_hyper(uiop->uio_offset, tl); *(tl + 2) = txdr_unsigned(len); } else { *tl++ = txdr_unsigned(uiop->uio_offset); *tl++ = txdr_unsigned(len); *tl = 0; } nfsm_request(vp, NFSPROC_READ, uiop->uio_td, cred); if (v3) { nfsm_postop_attr(vp, attrflag); if (error) { m_freem(mrep); goto nfsmout; } tl = nfsm_dissect(u_int32_t *, 2 * NFSX_UNSIGNED); eof = fxdr_unsigned(int, *(tl + 1)); } else { nfsm_loadattr(vp, NULL); } nfsm_strsiz(retlen, rsize); nfsm_mtouio(uiop, retlen); m_freem(mrep); tsiz -= retlen; if (v3) { if (eof || retlen == 0) { tsiz = 0; } } else if (retlen < len) { tsiz = 0; } } nfsmout: return (error); } /* * nfs write call */ int nfs_writerpc(struct vnode *vp, struct uio *uiop, struct ucred *cred, int *iomode, int *must_commit) { u_int32_t *tl; int32_t backup; caddr_t bpos, dpos; struct mbuf *mreq, *mrep, *md, *mb; struct nfsmount *nmp = VFSTONFS(vp->v_mount); int error = 0, len, tsiz, wccflag = NFSV3_WCCRATTR, rlen, commit; int v3 = NFS_ISV3(vp), committed = NFSV3WRITE_FILESYNC; int wsize; #ifndef DIAGNOSTIC if (uiop->uio_iovcnt != 1) panic("nfs: writerpc iovcnt > 1"); #endif *must_commit = 0; tsiz = uiop->uio_resid; mtx_lock(&nmp->nm_mtx); if (uiop->uio_offset + tsiz > nmp->nm_maxfilesize) { mtx_unlock(&nmp->nm_mtx); return (EFBIG); } wsize = nmp->nm_wsize; mtx_unlock(&nmp->nm_mtx); while (tsiz > 0) { nfsstats.rpccnt[NFSPROC_WRITE]++; len = (tsiz > wsize) ? wsize : tsiz; mreq = nfsm_reqhead(vp, NFSPROC_WRITE, NFSX_FH(v3) + 5 * NFSX_UNSIGNED + nfsm_rndup(len)); mb = mreq; bpos = mtod(mb, caddr_t); nfsm_fhtom(vp, v3); if (v3) { tl = nfsm_build(u_int32_t *, 5 * NFSX_UNSIGNED); txdr_hyper(uiop->uio_offset, tl); tl += 2; *tl++ = txdr_unsigned(len); *tl++ = txdr_unsigned(*iomode); *tl = txdr_unsigned(len); } else { u_int32_t x; tl = nfsm_build(u_int32_t *, 4 * NFSX_UNSIGNED); /* Set both "begin" and "current" to non-garbage. */ x = txdr_unsigned((u_int32_t)uiop->uio_offset); *tl++ = x; /* "begin offset" */ *tl++ = x; /* "current offset" */ x = txdr_unsigned(len); *tl++ = x; /* total to this offset */ *tl = x; /* size of this write */ } nfsm_uiotom(uiop, len); nfsm_request(vp, NFSPROC_WRITE, uiop->uio_td, cred); if (v3) { wccflag = NFSV3_WCCCHK; nfsm_wcc_data(vp, wccflag); if (!error) { tl = nfsm_dissect(u_int32_t *, 2 * NFSX_UNSIGNED + NFSX_V3WRITEVERF); rlen = fxdr_unsigned(int, *tl++); if (rlen == 0) { error = NFSERR_IO; m_freem(mrep); break; } else if (rlen < len) { backup = len - rlen; uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base - backup; uiop->uio_iov->iov_len += backup; uiop->uio_offset -= backup; uiop->uio_resid += backup; len = rlen; } commit = fxdr_unsigned(int, *tl++); /* * Return the lowest committment level * obtained by any of the RPCs. */ if (committed == NFSV3WRITE_FILESYNC) committed = commit; else if (committed == NFSV3WRITE_DATASYNC && commit == NFSV3WRITE_UNSTABLE) committed = commit; mtx_lock(&nmp->nm_mtx); if ((nmp->nm_state & NFSSTA_HASWRITEVERF) == 0){ bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf, NFSX_V3WRITEVERF); nmp->nm_state |= NFSSTA_HASWRITEVERF; } else if (bcmp((caddr_t)tl, (caddr_t)nmp->nm_verf, NFSX_V3WRITEVERF)) { *must_commit = 1; bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf, NFSX_V3WRITEVERF); } mtx_unlock(&nmp->nm_mtx); } } else { nfsm_loadattr(vp, NULL); } if (wccflag) { mtx_lock(&(VTONFS(vp))->n_mtx); VTONFS(vp)->n_mtime = VTONFS(vp)->n_vattr.va_mtime; mtx_unlock(&(VTONFS(vp))->n_mtx); } m_freem(mrep); if (error) break; tsiz -= len; } nfsmout: if (vp->v_mount->mnt_kern_flag & MNTK_ASYNC) committed = NFSV3WRITE_FILESYNC; *iomode = committed; if (error) uiop->uio_resid = tsiz; return (error); } /* * nfs mknod rpc * For NFS v2 this is a kludge. Use a create rpc but with the IFMT bits of the * mode set to specify the file type and the size field for rdev. */ static int nfs_mknodrpc(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, struct vattr *vap) { struct nfsv2_sattr *sp; u_int32_t *tl; struct vnode *newvp = NULL; struct nfsnode *np = NULL; struct vattr vattr; caddr_t bpos, dpos; int error = 0, wccflag = NFSV3_WCCRATTR, gotvp = 0; struct mbuf *mreq, *mrep, *md, *mb; u_int32_t rdev; int v3 = NFS_ISV3(dvp); if (vap->va_type == VCHR || vap->va_type == VBLK) rdev = txdr_unsigned(vap->va_rdev); else if (vap->va_type == VFIFO || vap->va_type == VSOCK) rdev = nfs_xdrneg1; else { return (EOPNOTSUPP); } if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred)) != 0) return (error); nfsstats.rpccnt[NFSPROC_MKNOD]++; mreq = nfsm_reqhead(dvp, NFSPROC_MKNOD, NFSX_FH(v3) + 4 * NFSX_UNSIGNED + + nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(v3)); mb = mreq; bpos = mtod(mb, caddr_t); nfsm_fhtom(dvp, v3); nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN); if (v3) { tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED); *tl++ = vtonfsv3_type(vap->va_type); nfsm_v3attrbuild(vap, FALSE); if (vap->va_type == VCHR || vap->va_type == VBLK) { tl = nfsm_build(u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(umajor(vap->va_rdev)); *tl = txdr_unsigned(uminor(vap->va_rdev)); } } else { sp = nfsm_build(struct nfsv2_sattr *, NFSX_V2SATTR); sp->sa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode); sp->sa_uid = nfs_xdrneg1; sp->sa_gid = nfs_xdrneg1; sp->sa_size = rdev; txdr_nfsv2time(&vap->va_atime, &sp->sa_atime); txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime); } nfsm_request(dvp, NFSPROC_MKNOD, cnp->cn_thread, cnp->cn_cred); if (!error) { nfsm_mtofh(dvp, newvp, v3, gotvp); if (!gotvp) { if (newvp) { vput(newvp); newvp = NULL; } error = nfs_lookitup(dvp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread, &np); if (!error) newvp = NFSTOV(np); } } if (v3) nfsm_wcc_data(dvp, wccflag); m_freem(mrep); nfsmout: if (error) { if (newvp) vput(newvp); } else { if (cnp->cn_flags & MAKEENTRY) cache_enter(dvp, newvp, cnp); *vpp = newvp; } mtx_lock(&(VTONFS(dvp))->n_mtx); VTONFS(dvp)->n_flag |= NMODIFIED; if (!wccflag) VTONFS(dvp)->n_attrstamp = 0; mtx_unlock(&(VTONFS(dvp))->n_mtx); return (error); } /* * nfs mknod vop * just call nfs_mknodrpc() to do the work. */ /* ARGSUSED */ static int nfs_mknod(struct vop_mknod_args *ap) { return (nfs_mknodrpc(ap->a_dvp, ap->a_vpp, ap->a_cnp, ap->a_vap)); } static u_long create_verf; /* * nfs file create call */ static int nfs_create(struct vop_create_args *ap) { struct vnode *dvp = ap->a_dvp; struct vattr *vap = ap->a_vap; struct componentname *cnp = ap->a_cnp; struct nfsv2_sattr *sp; u_int32_t *tl; struct nfsnode *np = NULL; struct vnode *newvp = NULL; caddr_t bpos, dpos; int error = 0, wccflag = NFSV3_WCCRATTR, gotvp = 0, fmode = 0; struct mbuf *mreq, *mrep, *md, *mb; struct vattr vattr; int v3 = NFS_ISV3(dvp); /* * Oops, not for me.. */ if (vap->va_type == VSOCK) return (nfs_mknodrpc(dvp, ap->a_vpp, cnp, vap)); if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred)) != 0) return (error); if (vap->va_vaflags & VA_EXCLUSIVE) fmode |= O_EXCL; again: nfsstats.rpccnt[NFSPROC_CREATE]++; mreq = nfsm_reqhead(dvp, NFSPROC_CREATE, NFSX_FH(v3) + 2 * NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(v3)); mb = mreq; bpos = mtod(mb, caddr_t); nfsm_fhtom(dvp, v3); nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN); if (v3) { tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED); if (fmode & O_EXCL) { *tl = txdr_unsigned(NFSV3CREATE_EXCLUSIVE); tl = nfsm_build(u_int32_t *, NFSX_V3CREATEVERF); #ifdef INET if (!TAILQ_EMPTY(&V_in_ifaddrhead)) *tl++ = IA_SIN(TAILQ_FIRST(&V_in_ifaddrhead))->sin_addr.s_addr; else #endif *tl++ = create_verf; *tl = ++create_verf; } else { *tl = txdr_unsigned(NFSV3CREATE_UNCHECKED); nfsm_v3attrbuild(vap, FALSE); } } else { sp = nfsm_build(struct nfsv2_sattr *, NFSX_V2SATTR); sp->sa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode); sp->sa_uid = nfs_xdrneg1; sp->sa_gid = nfs_xdrneg1; sp->sa_size = 0; txdr_nfsv2time(&vap->va_atime, &sp->sa_atime); txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime); } nfsm_request(dvp, NFSPROC_CREATE, cnp->cn_thread, cnp->cn_cred); if (!error) { nfsm_mtofh(dvp, newvp, v3, gotvp); if (!gotvp) { if (newvp) { vput(newvp); newvp = NULL; } error = nfs_lookitup(dvp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread, &np); if (!error) newvp = NFSTOV(np); } } if (v3) nfsm_wcc_data(dvp, wccflag); m_freem(mrep); nfsmout: if (error) { if (v3 && (fmode & O_EXCL) && error == NFSERR_NOTSUPP) { fmode &= ~O_EXCL; goto again; } if (newvp) vput(newvp); } else if (v3 && (fmode & O_EXCL)) { /* * We are normally called with only a partially initialized * VAP. Since the NFSv3 spec says that server may use the * file attributes to store the verifier, the spec requires * us to do a SETATTR RPC. FreeBSD servers store the verifier * in atime, but we can't really assume that all servers will * so we ensure that our SETATTR sets both atime and mtime. */ if (vap->va_mtime.tv_sec == VNOVAL) vfs_timestamp(&vap->va_mtime); if (vap->va_atime.tv_sec == VNOVAL) vap->va_atime = vap->va_mtime; error = nfs_setattrrpc(newvp, vap, cnp->cn_cred); if (error) vput(newvp); } if (!error) { if (cnp->cn_flags & MAKEENTRY) cache_enter(dvp, newvp, cnp); *ap->a_vpp = newvp; } mtx_lock(&(VTONFS(dvp))->n_mtx); VTONFS(dvp)->n_flag |= NMODIFIED; if (!wccflag) VTONFS(dvp)->n_attrstamp = 0; mtx_unlock(&(VTONFS(dvp))->n_mtx); return (error); } /* * nfs file remove call * To try and make nfs semantics closer to ufs semantics, a file that has * other processes using the vnode is renamed instead of removed and then * removed later on the last close. * - If v_usecount > 1 * If a rename is not already in the works * call nfs_sillyrename() to set it up * else * do the remove rpc */ static int nfs_remove(struct vop_remove_args *ap) { struct vnode *vp = ap->a_vp; struct vnode *dvp = ap->a_dvp; struct componentname *cnp = ap->a_cnp; struct nfsnode *np = VTONFS(vp); int error = 0; struct vattr vattr; #ifndef DIAGNOSTIC if ((cnp->cn_flags & HASBUF) == 0) panic("nfs_remove: no name"); if (vrefcnt(vp) < 1) panic("nfs_remove: bad v_usecount"); #endif if (vp->v_type == VDIR) error = EPERM; else if (vrefcnt(vp) == 1 || (np->n_sillyrename && !VOP_GETATTR(vp, &vattr, cnp->cn_cred) && vattr.va_nlink > 1)) { /* * Purge the name cache so that the chance of a lookup for * the name succeeding while the remove is in progress is * minimized. Without node locking it can still happen, such * that an I/O op returns ESTALE, but since you get this if * another host removes the file.. */ cache_purge(vp); /* * throw away biocache buffers, mainly to avoid * unnecessary delayed writes later. */ error = nfs_vinvalbuf(vp, 0, cnp->cn_thread, 1); /* Do the rpc */ if (error != EINTR && error != EIO) error = nfs_removerpc(dvp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread); /* * Kludge City: If the first reply to the remove rpc is lost.. * the reply to the retransmitted request will be ENOENT * since the file was in fact removed * Therefore, we cheat and return success. */ if (error == ENOENT) error = 0; } else if (!np->n_sillyrename) error = nfs_sillyrename(dvp, vp, cnp); np->n_attrstamp = 0; return (error); } /* * nfs file remove rpc called from nfs_inactive */ int nfs_removeit(struct sillyrename *sp) { /* * Make sure that the directory vnode is still valid. * XXX we should lock sp->s_dvp here. */ if (sp->s_dvp->v_type == VBAD) return (0); return (nfs_removerpc(sp->s_dvp, sp->s_name, sp->s_namlen, sp->s_cred, NULL)); } /* * Nfs remove rpc, called from nfs_remove() and nfs_removeit(). */ static int nfs_removerpc(struct vnode *dvp, const char *name, int namelen, struct ucred *cred, struct thread *td) { caddr_t bpos, dpos; int error = 0, wccflag = NFSV3_WCCRATTR; struct mbuf *mreq, *mrep, *md, *mb; int v3 = NFS_ISV3(dvp); nfsstats.rpccnt[NFSPROC_REMOVE]++; mreq = nfsm_reqhead(dvp, NFSPROC_REMOVE, NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(namelen)); mb = mreq; bpos = mtod(mb, caddr_t); nfsm_fhtom(dvp, v3); nfsm_strtom(name, namelen, NFS_MAXNAMLEN); nfsm_request(dvp, NFSPROC_REMOVE, td, cred); if (v3) nfsm_wcc_data(dvp, wccflag); m_freem(mrep); nfsmout: mtx_lock(&(VTONFS(dvp))->n_mtx); VTONFS(dvp)->n_flag |= NMODIFIED; if (!wccflag) VTONFS(dvp)->n_attrstamp = 0; mtx_unlock(&(VTONFS(dvp))->n_mtx); return (error); } /* * nfs file rename call */ static int nfs_rename(struct vop_rename_args *ap) { struct vnode *fvp = ap->a_fvp; struct vnode *tvp = ap->a_tvp; struct vnode *fdvp = ap->a_fdvp; struct vnode *tdvp = ap->a_tdvp; struct componentname *tcnp = ap->a_tcnp; struct componentname *fcnp = ap->a_fcnp; int error; #ifndef DIAGNOSTIC if ((tcnp->cn_flags & HASBUF) == 0 || (fcnp->cn_flags & HASBUF) == 0) panic("nfs_rename: no name"); #endif /* Check for cross-device rename */ if ((fvp->v_mount != tdvp->v_mount) || (tvp && (fvp->v_mount != tvp->v_mount))) { error = EXDEV; goto out; } if (fvp == tvp) { nfs_printf("nfs_rename: fvp == tvp (can't happen)\n"); error = 0; goto out; } if ((error = vn_lock(fvp, LK_EXCLUSIVE)) != 0) goto out; /* * We have to flush B_DELWRI data prior to renaming * the file. If we don't, the delayed-write buffers * can be flushed out later after the file has gone stale * under NFSV3. NFSV2 does not have this problem because * ( as far as I can tell ) it flushes dirty buffers more * often. * * Skip the rename operation if the fsync fails, this can happen * due to the server's volume being full, when we pushed out data * that was written back to our cache earlier. Not checking for * this condition can result in potential (silent) data loss. */ error = VOP_FSYNC(fvp, MNT_WAIT, fcnp->cn_thread); VOP_UNLOCK(fvp, 0); if (!error && tvp) error = VOP_FSYNC(tvp, MNT_WAIT, tcnp->cn_thread); if (error) goto out; /* * If the tvp exists and is in use, sillyrename it before doing the * rename of the new file over it. * XXX Can't sillyrename a directory. */ if (tvp && vrefcnt(tvp) > 1 && !VTONFS(tvp)->n_sillyrename && tvp->v_type != VDIR && !nfs_sillyrename(tdvp, tvp, tcnp)) { vput(tvp); tvp = NULL; } error = nfs_renamerpc(fdvp, fcnp->cn_nameptr, fcnp->cn_namelen, tdvp, tcnp->cn_nameptr, tcnp->cn_namelen, tcnp->cn_cred, tcnp->cn_thread); if (fvp->v_type == VDIR) { if (tvp != NULL && tvp->v_type == VDIR) cache_purge(tdvp); cache_purge(fdvp); } out: if (tdvp == tvp) vrele(tdvp); else vput(tdvp); if (tvp) vput(tvp); vrele(fdvp); vrele(fvp); /* * Kludge: Map ENOENT => 0 assuming that it is a reply to a retry. */ if (error == ENOENT) error = 0; return (error); } /* * nfs file rename rpc called from nfs_remove() above */ static int nfs_renameit(struct vnode *sdvp, struct componentname *scnp, struct sillyrename *sp) { return (nfs_renamerpc(sdvp, scnp->cn_nameptr, scnp->cn_namelen, sdvp, sp->s_name, sp->s_namlen, scnp->cn_cred, scnp->cn_thread)); } /* * Do an nfs rename rpc. Called from nfs_rename() and nfs_renameit(). */ static int nfs_renamerpc(struct vnode *fdvp, const char *fnameptr, int fnamelen, struct vnode *tdvp, const char *tnameptr, int tnamelen, struct ucred *cred, struct thread *td) { caddr_t bpos, dpos; int error = 0, fwccflag = NFSV3_WCCRATTR, twccflag = NFSV3_WCCRATTR; struct mbuf *mreq, *mrep, *md, *mb; int v3 = NFS_ISV3(fdvp); nfsstats.rpccnt[NFSPROC_RENAME]++; mreq = nfsm_reqhead(fdvp, NFSPROC_RENAME, (NFSX_FH(v3) + NFSX_UNSIGNED)*2 + nfsm_rndup(fnamelen) + nfsm_rndup(tnamelen)); mb = mreq; bpos = mtod(mb, caddr_t); nfsm_fhtom(fdvp, v3); nfsm_strtom(fnameptr, fnamelen, NFS_MAXNAMLEN); nfsm_fhtom(tdvp, v3); nfsm_strtom(tnameptr, tnamelen, NFS_MAXNAMLEN); nfsm_request(fdvp, NFSPROC_RENAME, td, cred); if (v3) { nfsm_wcc_data(fdvp, fwccflag); nfsm_wcc_data(tdvp, twccflag); } m_freem(mrep); nfsmout: mtx_lock(&(VTONFS(fdvp))->n_mtx); VTONFS(fdvp)->n_flag |= NMODIFIED; mtx_unlock(&(VTONFS(fdvp))->n_mtx); mtx_lock(&(VTONFS(tdvp))->n_mtx); VTONFS(tdvp)->n_flag |= NMODIFIED; mtx_unlock(&(VTONFS(tdvp))->n_mtx); if (!fwccflag) VTONFS(fdvp)->n_attrstamp = 0; if (!twccflag) VTONFS(tdvp)->n_attrstamp = 0; return (error); } /* * nfs hard link create call */ static int nfs_link(struct vop_link_args *ap) { struct vnode *vp = ap->a_vp; struct vnode *tdvp = ap->a_tdvp; struct componentname *cnp = ap->a_cnp; caddr_t bpos, dpos; int error = 0, wccflag = NFSV3_WCCRATTR, attrflag = 0; struct mbuf *mreq, *mrep, *md, *mb; int v3; if (vp->v_mount != tdvp->v_mount) { return (EXDEV); } /* * Push all writes to the server, so that the attribute cache * doesn't get "out of sync" with the server. * XXX There should be a better way! */ VOP_FSYNC(vp, MNT_WAIT, cnp->cn_thread); v3 = NFS_ISV3(vp); nfsstats.rpccnt[NFSPROC_LINK]++; mreq = nfsm_reqhead(vp, NFSPROC_LINK, NFSX_FH(v3)*2 + NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen)); mb = mreq; bpos = mtod(mb, caddr_t); nfsm_fhtom(vp, v3); nfsm_fhtom(tdvp, v3); nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN); nfsm_request(vp, NFSPROC_LINK, cnp->cn_thread, cnp->cn_cred); if (v3) { nfsm_postop_attr(vp, attrflag); nfsm_wcc_data(tdvp, wccflag); } m_freem(mrep); nfsmout: mtx_lock(&(VTONFS(tdvp))->n_mtx); VTONFS(tdvp)->n_flag |= NMODIFIED; mtx_unlock(&(VTONFS(tdvp))->n_mtx); if (!attrflag) VTONFS(vp)->n_attrstamp = 0; if (!wccflag) VTONFS(tdvp)->n_attrstamp = 0; return (error); } /* * nfs symbolic link create call */ static int nfs_symlink(struct vop_symlink_args *ap) { struct vnode *dvp = ap->a_dvp; struct vattr *vap = ap->a_vap; struct componentname *cnp = ap->a_cnp; struct nfsv2_sattr *sp; caddr_t bpos, dpos; int slen, error = 0, wccflag = NFSV3_WCCRATTR, gotvp; struct mbuf *mreq, *mrep, *md, *mb; struct vnode *newvp = NULL; int v3 = NFS_ISV3(dvp); nfsstats.rpccnt[NFSPROC_SYMLINK]++; slen = strlen(ap->a_target); mreq = nfsm_reqhead(dvp, NFSPROC_SYMLINK, NFSX_FH(v3) + 2*NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen) + nfsm_rndup(slen) + NFSX_SATTR(v3)); mb = mreq; bpos = mtod(mb, caddr_t); nfsm_fhtom(dvp, v3); nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN); if (v3) { nfsm_v3attrbuild(vap, FALSE); } nfsm_strtom(ap->a_target, slen, NFS_MAXPATHLEN); if (!v3) { sp = nfsm_build(struct nfsv2_sattr *, NFSX_V2SATTR); sp->sa_mode = vtonfsv2_mode(VLNK, vap->va_mode); sp->sa_uid = nfs_xdrneg1; sp->sa_gid = nfs_xdrneg1; sp->sa_size = nfs_xdrneg1; txdr_nfsv2time(&vap->va_atime, &sp->sa_atime); txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime); } /* * Issue the NFS request and get the rpc response. * * Only NFSv3 responses returning an error of 0 actually return * a file handle that can be converted into newvp without having * to do an extra lookup rpc. */ nfsm_request(dvp, NFSPROC_SYMLINK, cnp->cn_thread, cnp->cn_cred); if (v3) { if (error == 0) nfsm_mtofh(dvp, newvp, v3, gotvp); nfsm_wcc_data(dvp, wccflag); } /* * out code jumps -> here, mrep is also freed. */ m_freem(mrep); nfsmout: /* * If we do not have an error and we could not extract the newvp from * the response due to the request being NFSv2, we have to do a * lookup in order to obtain a newvp to return. */ if (error == 0 && newvp == NULL) { struct nfsnode *np = NULL; error = nfs_lookitup(dvp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread, &np); if (!error) newvp = NFSTOV(np); } if (error) { if (newvp) vput(newvp); } else { *ap->a_vpp = newvp; } mtx_lock(&(VTONFS(dvp))->n_mtx); VTONFS(dvp)->n_flag |= NMODIFIED; mtx_unlock(&(VTONFS(dvp))->n_mtx); if (!wccflag) VTONFS(dvp)->n_attrstamp = 0; return (error); } /* * nfs make dir call */ static int nfs_mkdir(struct vop_mkdir_args *ap) { struct vnode *dvp = ap->a_dvp; struct vattr *vap = ap->a_vap; struct componentname *cnp = ap->a_cnp; struct nfsv2_sattr *sp; int len; struct nfsnode *np = NULL; struct vnode *newvp = NULL; caddr_t bpos, dpos; int error = 0, wccflag = NFSV3_WCCRATTR; int gotvp = 0; struct mbuf *mreq, *mrep, *md, *mb; struct vattr vattr; int v3 = NFS_ISV3(dvp); if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred)) != 0) return (error); len = cnp->cn_namelen; nfsstats.rpccnt[NFSPROC_MKDIR]++; mreq = nfsm_reqhead(dvp, NFSPROC_MKDIR, NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len) + NFSX_SATTR(v3)); mb = mreq; bpos = mtod(mb, caddr_t); nfsm_fhtom(dvp, v3); nfsm_strtom(cnp->cn_nameptr, len, NFS_MAXNAMLEN); if (v3) { nfsm_v3attrbuild(vap, FALSE); } else { sp = nfsm_build(struct nfsv2_sattr *, NFSX_V2SATTR); sp->sa_mode = vtonfsv2_mode(VDIR, vap->va_mode); sp->sa_uid = nfs_xdrneg1; sp->sa_gid = nfs_xdrneg1; sp->sa_size = nfs_xdrneg1; txdr_nfsv2time(&vap->va_atime, &sp->sa_atime); txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime); } nfsm_request(dvp, NFSPROC_MKDIR, cnp->cn_thread, cnp->cn_cred); if (!error) nfsm_mtofh(dvp, newvp, v3, gotvp); if (v3) nfsm_wcc_data(dvp, wccflag); m_freem(mrep); nfsmout: mtx_lock(&(VTONFS(dvp))->n_mtx); VTONFS(dvp)->n_flag |= NMODIFIED; mtx_unlock(&(VTONFS(dvp))->n_mtx); if (!wccflag) VTONFS(dvp)->n_attrstamp = 0; if (error == 0 && newvp == NULL) { error = nfs_lookitup(dvp, cnp->cn_nameptr, len, cnp->cn_cred, cnp->cn_thread, &np); if (!error) { newvp = NFSTOV(np); if (newvp->v_type != VDIR) error = EEXIST; } } if (error) { if (newvp) vput(newvp); } else *ap->a_vpp = newvp; return (error); } /* * nfs remove directory call */ static int nfs_rmdir(struct vop_rmdir_args *ap) { struct vnode *vp = ap->a_vp; struct vnode *dvp = ap->a_dvp; struct componentname *cnp = ap->a_cnp; caddr_t bpos, dpos; int error = 0, wccflag = NFSV3_WCCRATTR; struct mbuf *mreq, *mrep, *md, *mb; int v3 = NFS_ISV3(dvp); if (dvp == vp) return (EINVAL); nfsstats.rpccnt[NFSPROC_RMDIR]++; mreq = nfsm_reqhead(dvp, NFSPROC_RMDIR, NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen)); mb = mreq; bpos = mtod(mb, caddr_t); nfsm_fhtom(dvp, v3); nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN); nfsm_request(dvp, NFSPROC_RMDIR, cnp->cn_thread, cnp->cn_cred); if (v3) nfsm_wcc_data(dvp, wccflag); m_freem(mrep); nfsmout: mtx_lock(&(VTONFS(dvp))->n_mtx); VTONFS(dvp)->n_flag |= NMODIFIED; mtx_unlock(&(VTONFS(dvp))->n_mtx); if (!wccflag) VTONFS(dvp)->n_attrstamp = 0; cache_purge(dvp); cache_purge(vp); /* * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry. */ if (error == ENOENT) error = 0; return (error); } /* * nfs readdir call */ static int nfs_readdir(struct vop_readdir_args *ap) { struct vnode *vp = ap->a_vp; struct nfsnode *np = VTONFS(vp); struct uio *uio = ap->a_uio; int tresid, error = 0; struct vattr vattr; if (vp->v_type != VDIR) return(EPERM); /* * First, check for hit on the EOF offset cache */ if (np->n_direofoffset > 0 && uio->uio_offset >= np->n_direofoffset && (np->n_flag & NMODIFIED) == 0) { if (VOP_GETATTR(vp, &vattr, ap->a_cred) == 0) { mtx_lock(&np->n_mtx); if (!NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime)) { mtx_unlock(&np->n_mtx); nfsstats.direofcache_hits++; goto out; } else mtx_unlock(&np->n_mtx); } } /* * Call nfs_bioread() to do the real work. */ tresid = uio->uio_resid; error = nfs_bioread(vp, uio, 0, ap->a_cred); if (!error && uio->uio_resid == tresid) { nfsstats.direofcache_misses++; } out: return (error); } /* * Readdir rpc call. * Called from below the buffer cache by nfs_doio(). */ int nfs_readdirrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred) { int len, left; struct dirent *dp = NULL; u_int32_t *tl; caddr_t cp; nfsuint64 *cookiep; caddr_t bpos, dpos; struct mbuf *mreq, *mrep, *md, *mb; nfsuint64 cookie; struct nfsmount *nmp = VFSTONFS(vp->v_mount); struct nfsnode *dnp = VTONFS(vp); u_quad_t fileno; int error = 0, tlen, more_dirs = 1, blksiz = 0, bigenough = 1; int attrflag; int v3 = NFS_ISV3(vp); #ifndef DIAGNOSTIC if (uiop->uio_iovcnt != 1 || (uiop->uio_offset & (DIRBLKSIZ - 1)) || (uiop->uio_resid & (DIRBLKSIZ - 1))) panic("nfs readdirrpc bad uio"); #endif /* * If there is no cookie, assume directory was stale. */ nfs_dircookie_lock(dnp); cookiep = nfs_getcookie(dnp, uiop->uio_offset, 0); if (cookiep) { cookie = *cookiep; nfs_dircookie_unlock(dnp); } else { nfs_dircookie_unlock(dnp); return (NFSERR_BAD_COOKIE); } /* * Loop around doing readdir rpc's of size nm_readdirsize * truncated to a multiple of DIRBLKSIZ. * The stopping criteria is EOF or buffer full. */ while (more_dirs && bigenough) { nfsstats.rpccnt[NFSPROC_READDIR]++; mreq = nfsm_reqhead(vp, NFSPROC_READDIR, NFSX_FH(v3) + NFSX_READDIR(v3)); mb = mreq; bpos = mtod(mb, caddr_t); nfsm_fhtom(vp, v3); if (v3) { tl = nfsm_build(u_int32_t *, 5 * NFSX_UNSIGNED); *tl++ = cookie.nfsuquad[0]; *tl++ = cookie.nfsuquad[1]; mtx_lock(&dnp->n_mtx); *tl++ = dnp->n_cookieverf.nfsuquad[0]; *tl++ = dnp->n_cookieverf.nfsuquad[1]; mtx_unlock(&dnp->n_mtx); } else { tl = nfsm_build(u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = cookie.nfsuquad[0]; } *tl = txdr_unsigned(nmp->nm_readdirsize); nfsm_request(vp, NFSPROC_READDIR, uiop->uio_td, cred); if (v3) { nfsm_postop_attr(vp, attrflag); if (!error) { tl = nfsm_dissect(u_int32_t *, 2 * NFSX_UNSIGNED); mtx_lock(&dnp->n_mtx); dnp->n_cookieverf.nfsuquad[0] = *tl++; dnp->n_cookieverf.nfsuquad[1] = *tl; mtx_unlock(&dnp->n_mtx); } else { m_freem(mrep); goto nfsmout; } } tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED); more_dirs = fxdr_unsigned(int, *tl); /* loop thru the dir entries, doctoring them to 4bsd form */ while (more_dirs && bigenough) { if (v3) { tl = nfsm_dissect(u_int32_t *, 3 * NFSX_UNSIGNED); fileno = fxdr_hyper(tl); len = fxdr_unsigned(int, *(tl + 2)); } else { tl = nfsm_dissect(u_int32_t *, 2 * NFSX_UNSIGNED); fileno = fxdr_unsigned(u_quad_t, *tl++); len = fxdr_unsigned(int, *tl); } if (len <= 0 || len > NFS_MAXNAMLEN) { error = EBADRPC; m_freem(mrep); goto nfsmout; } tlen = nfsm_rndup(len); if (tlen == len) tlen += 4; /* To ensure null termination */ left = DIRBLKSIZ - blksiz; if ((tlen + DIRHDSIZ) > left) { dp->d_reclen += left; uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base + left; uiop->uio_iov->iov_len -= left; uiop->uio_offset += left; uiop->uio_resid -= left; blksiz = 0; } if ((tlen + DIRHDSIZ) > uiop->uio_resid) bigenough = 0; if (bigenough) { dp = (struct dirent *)uiop->uio_iov->iov_base; dp->d_fileno = (int)fileno; dp->d_namlen = len; dp->d_reclen = tlen + DIRHDSIZ; dp->d_type = DT_UNKNOWN; blksiz += dp->d_reclen; if (blksiz == DIRBLKSIZ) blksiz = 0; uiop->uio_offset += DIRHDSIZ; uiop->uio_resid -= DIRHDSIZ; uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base + DIRHDSIZ; uiop->uio_iov->iov_len -= DIRHDSIZ; nfsm_mtouio(uiop, len); cp = uiop->uio_iov->iov_base; tlen -= len; *cp = '\0'; /* null terminate */ uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base + tlen; uiop->uio_iov->iov_len -= tlen; uiop->uio_offset += tlen; uiop->uio_resid -= tlen; } else nfsm_adv(nfsm_rndup(len)); if (v3) { tl = nfsm_dissect(u_int32_t *, 3 * NFSX_UNSIGNED); } else { tl = nfsm_dissect(u_int32_t *, 2 * NFSX_UNSIGNED); } if (bigenough) { cookie.nfsuquad[0] = *tl++; if (v3) cookie.nfsuquad[1] = *tl++; } else if (v3) tl += 2; else tl++; more_dirs = fxdr_unsigned(int, *tl); } /* * If at end of rpc data, get the eof boolean */ if (!more_dirs) { tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED); more_dirs = (fxdr_unsigned(int, *tl) == 0); } m_freem(mrep); } /* * Fill last record, iff any, out to a multiple of DIRBLKSIZ * by increasing d_reclen for the last record. */ if (blksiz > 0) { left = DIRBLKSIZ - blksiz; dp->d_reclen += left; uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base + left; uiop->uio_iov->iov_len -= left; uiop->uio_offset += left; uiop->uio_resid -= left; } /* * We are now either at the end of the directory or have filled the * block. */ if (bigenough) dnp->n_direofoffset = uiop->uio_offset; else { if (uiop->uio_resid > 0) nfs_printf("EEK! readdirrpc resid > 0\n"); nfs_dircookie_lock(dnp); cookiep = nfs_getcookie(dnp, uiop->uio_offset, 1); *cookiep = cookie; nfs_dircookie_unlock(dnp); } nfsmout: return (error); } /* * NFS V3 readdir plus RPC. Used in place of nfs_readdirrpc(). */ int nfs_readdirplusrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred) { int len, left; struct dirent *dp; u_int32_t *tl; caddr_t cp; struct vnode *newvp; nfsuint64 *cookiep; caddr_t bpos, dpos, dpossav1, dpossav2; struct mbuf *mreq, *mrep, *md, *mb, *mdsav1, *mdsav2; struct nameidata nami, *ndp = &nami; struct componentname *cnp = &ndp->ni_cnd; nfsuint64 cookie; struct nfsmount *nmp = VFSTONFS(vp->v_mount); struct nfsnode *dnp = VTONFS(vp), *np; nfsfh_t *fhp; u_quad_t fileno; int error = 0, tlen, more_dirs = 1, blksiz = 0, doit, bigenough = 1, i; int attrflag, fhsize; #ifndef nolint dp = NULL; #endif #ifndef DIAGNOSTIC if (uiop->uio_iovcnt != 1 || (uiop->uio_offset & (DIRBLKSIZ - 1)) || (uiop->uio_resid & (DIRBLKSIZ - 1))) panic("nfs readdirplusrpc bad uio"); #endif ndp->ni_dvp = vp; newvp = NULLVP; /* * If there is no cookie, assume directory was stale. */ nfs_dircookie_lock(dnp); cookiep = nfs_getcookie(dnp, uiop->uio_offset, 0); if (cookiep) { cookie = *cookiep; nfs_dircookie_unlock(dnp); } else { nfs_dircookie_unlock(dnp); return (NFSERR_BAD_COOKIE); } /* * Loop around doing readdir rpc's of size nm_readdirsize * truncated to a multiple of DIRBLKSIZ. * The stopping criteria is EOF or buffer full. */ while (more_dirs && bigenough) { nfsstats.rpccnt[NFSPROC_READDIRPLUS]++; mreq = nfsm_reqhead(vp, NFSPROC_READDIRPLUS, NFSX_FH(1) + 6 * NFSX_UNSIGNED); mb = mreq; bpos = mtod(mb, caddr_t); nfsm_fhtom(vp, 1); tl = nfsm_build(u_int32_t *, 6 * NFSX_UNSIGNED); *tl++ = cookie.nfsuquad[0]; *tl++ = cookie.nfsuquad[1]; mtx_lock(&dnp->n_mtx); *tl++ = dnp->n_cookieverf.nfsuquad[0]; *tl++ = dnp->n_cookieverf.nfsuquad[1]; mtx_unlock(&dnp->n_mtx); *tl++ = txdr_unsigned(nmp->nm_readdirsize); *tl = txdr_unsigned(nmp->nm_rsize); nfsm_request(vp, NFSPROC_READDIRPLUS, uiop->uio_td, cred); nfsm_postop_attr(vp, attrflag); if (error) { m_freem(mrep); goto nfsmout; } tl = nfsm_dissect(u_int32_t *, 3 * NFSX_UNSIGNED); mtx_lock(&dnp->n_mtx); dnp->n_cookieverf.nfsuquad[0] = *tl++; dnp->n_cookieverf.nfsuquad[1] = *tl++; mtx_unlock(&dnp->n_mtx); more_dirs = fxdr_unsigned(int, *tl); /* loop thru the dir entries, doctoring them to 4bsd form */ while (more_dirs && bigenough) { tl = nfsm_dissect(u_int32_t *, 3 * NFSX_UNSIGNED); fileno = fxdr_hyper(tl); len = fxdr_unsigned(int, *(tl + 2)); if (len <= 0 || len > NFS_MAXNAMLEN) { error = EBADRPC; m_freem(mrep); goto nfsmout; } tlen = nfsm_rndup(len); if (tlen == len) tlen += 4; /* To ensure null termination*/ left = DIRBLKSIZ - blksiz; if ((tlen + DIRHDSIZ) > left) { dp->d_reclen += left; uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base + left; uiop->uio_iov->iov_len -= left; uiop->uio_offset += left; uiop->uio_resid -= left; blksiz = 0; } if ((tlen + DIRHDSIZ) > uiop->uio_resid) bigenough = 0; if (bigenough) { dp = (struct dirent *)uiop->uio_iov->iov_base; dp->d_fileno = (int)fileno; dp->d_namlen = len; dp->d_reclen = tlen + DIRHDSIZ; dp->d_type = DT_UNKNOWN; blksiz += dp->d_reclen; if (blksiz == DIRBLKSIZ) blksiz = 0; uiop->uio_offset += DIRHDSIZ; uiop->uio_resid -= DIRHDSIZ; uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base + DIRHDSIZ; uiop->uio_iov->iov_len -= DIRHDSIZ; cnp->cn_nameptr = uiop->uio_iov->iov_base; cnp->cn_namelen = len; nfsm_mtouio(uiop, len); cp = uiop->uio_iov->iov_base; tlen -= len; *cp = '\0'; uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base + tlen; uiop->uio_iov->iov_len -= tlen; uiop->uio_offset += tlen; uiop->uio_resid -= tlen; } else nfsm_adv(nfsm_rndup(len)); tl = nfsm_dissect(u_int32_t *, 3 * NFSX_UNSIGNED); if (bigenough) { cookie.nfsuquad[0] = *tl++; cookie.nfsuquad[1] = *tl++; } else tl += 2; /* * Since the attributes are before the file handle * (sigh), we must skip over the attributes and then * come back and get them. */ attrflag = fxdr_unsigned(int, *tl); if (attrflag) { dpossav1 = dpos; mdsav1 = md; nfsm_adv(NFSX_V3FATTR); tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED); doit = fxdr_unsigned(int, *tl); /* * Skip loading the attrs for "..". There's a * race between loading the attrs here and * lookups that look for the directory currently * being read (in the parent). We try to acquire * the exclusive lock on ".." here, owning the * lock on the directory being read. Lookup will * hold the lock on ".." and try to acquire the * lock on the directory being read. * * There are other ways of fixing this, one would * be to do a trylock on the ".." vnode and skip * loading the attrs on ".." if it happens to be * locked by another process. But skipping the * attrload on ".." seems the easiest option. */ if (strcmp(dp->d_name, "..") == 0) { doit = 0; /* * We've already skipped over the attrs, * skip over the filehandle. And store d_type * as VDIR. */ tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED); i = fxdr_unsigned(int, *tl); nfsm_adv(nfsm_rndup(i)); dp->d_type = IFTODT(VTTOIF(VDIR)); } if (doit) { nfsm_getfh(fhp, fhsize, 1); if (NFS_CMPFH(dnp, fhp, fhsize)) { VREF(vp); newvp = vp; np = dnp; } else { error = nfs_nget(vp->v_mount, fhp, fhsize, &np, LK_EXCLUSIVE); if (error) doit = 0; else newvp = NFSTOV(np); } } if (doit && bigenough) { dpossav2 = dpos; dpos = dpossav1; mdsav2 = md; md = mdsav1; nfsm_loadattr(newvp, NULL); dpos = dpossav2; md = mdsav2; dp->d_type = IFTODT(VTTOIF(np->n_vattr.va_type)); ndp->ni_vp = newvp; /* Update n_ctime, so subsequent lookup doesn't purge entry */ np->n_ctime = np->n_vattr.va_ctime.tv_sec; cache_enter(ndp->ni_dvp, ndp->ni_vp, cnp); } } else { /* Just skip over the file handle */ tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED); i = fxdr_unsigned(int, *tl); if (i) { tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED); fhsize = fxdr_unsigned(int, *tl); nfsm_adv(nfsm_rndup(fhsize)); } } if (newvp != NULLVP) { if (newvp == vp) vrele(newvp); else vput(newvp); newvp = NULLVP; } tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED); more_dirs = fxdr_unsigned(int, *tl); } /* * If at end of rpc data, get the eof boolean */ if (!more_dirs) { tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED); more_dirs = (fxdr_unsigned(int, *tl) == 0); } m_freem(mrep); } /* * Fill last record, iff any, out to a multiple of DIRBLKSIZ * by increasing d_reclen for the last record. */ if (blksiz > 0) { left = DIRBLKSIZ - blksiz; dp->d_reclen += left; uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base + left; uiop->uio_iov->iov_len -= left; uiop->uio_offset += left; uiop->uio_resid -= left; } /* * We are now either at the end of the directory or have filled the * block. */ if (bigenough) dnp->n_direofoffset = uiop->uio_offset; else { if (uiop->uio_resid > 0) nfs_printf("EEK! readdirplusrpc resid > 0\n"); nfs_dircookie_lock(dnp); cookiep = nfs_getcookie(dnp, uiop->uio_offset, 1); *cookiep = cookie; nfs_dircookie_unlock(dnp); } nfsmout: if (newvp != NULLVP) { if (newvp == vp) vrele(newvp); else vput(newvp); newvp = NULLVP; } return (error); } /* * Silly rename. To make the NFS filesystem that is stateless look a little * more like the "ufs" a remove of an active vnode is translated to a rename * to a funny looking filename that is removed by nfs_inactive on the * nfsnode. There is the potential for another process on a different client * to create the same funny name between the nfs_lookitup() fails and the * nfs_rename() completes, but... */ static int nfs_sillyrename(struct vnode *dvp, struct vnode *vp, struct componentname *cnp) { struct sillyrename *sp; struct nfsnode *np; int error; short pid; unsigned int lticks; cache_purge(dvp); np = VTONFS(vp); #ifndef DIAGNOSTIC if (vp->v_type == VDIR) panic("nfs: sillyrename dir"); #endif MALLOC(sp, struct sillyrename *, sizeof (struct sillyrename), M_NFSREQ, M_WAITOK); sp->s_cred = crhold(cnp->cn_cred); sp->s_dvp = dvp; sp->s_removeit = nfs_removeit; VREF(dvp); /* * Fudge together a funny name. * Changing the format of the funny name to accomodate more * sillynames per directory. * The name is now changed to .nfs...4, where ticks is * CPU ticks since boot. */ pid = cnp->cn_thread->td_proc->p_pid; lticks = (unsigned int)ticks; for ( ; ; ) { sp->s_namlen = sprintf(sp->s_name, ".nfs.%08x.%04x4.4", lticks, pid); if (nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred, cnp->cn_thread, NULL)) break; lticks++; } error = nfs_renameit(dvp, cnp, sp); if (error) goto bad; error = nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred, cnp->cn_thread, &np); np->n_sillyrename = sp; return (0); bad: vrele(sp->s_dvp); crfree(sp->s_cred); free((caddr_t)sp, M_NFSREQ); return (error); } /* * Look up a file name and optionally either update the file handle or * allocate an nfsnode, depending on the value of npp. * npp == NULL --> just do the lookup * *npp == NULL --> allocate a new nfsnode and make sure attributes are * handled too * *npp != NULL --> update the file handle in the vnode */ static int nfs_lookitup(struct vnode *dvp, const char *name, int len, struct ucred *cred, struct thread *td, struct nfsnode **npp) { struct vnode *newvp = NULL; struct nfsnode *np, *dnp = VTONFS(dvp); caddr_t bpos, dpos; int error = 0, fhlen, attrflag; struct mbuf *mreq, *mrep, *md, *mb; nfsfh_t *nfhp; int v3 = NFS_ISV3(dvp); nfsstats.rpccnt[NFSPROC_LOOKUP]++; mreq = nfsm_reqhead(dvp, NFSPROC_LOOKUP, NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len)); mb = mreq; bpos = mtod(mb, caddr_t); nfsm_fhtom(dvp, v3); nfsm_strtom(name, len, NFS_MAXNAMLEN); nfsm_request(dvp, NFSPROC_LOOKUP, td, cred); if (npp && !error) { nfsm_getfh(nfhp, fhlen, v3); if (*npp) { np = *npp; if (np->n_fhsize > NFS_SMALLFH && fhlen <= NFS_SMALLFH) { free((caddr_t)np->n_fhp, M_NFSBIGFH); np->n_fhp = &np->n_fh; } else if (np->n_fhsize <= NFS_SMALLFH && fhlen>NFS_SMALLFH) np->n_fhp =(nfsfh_t *)malloc(fhlen, M_NFSBIGFH, M_WAITOK); bcopy((caddr_t)nfhp, (caddr_t)np->n_fhp, fhlen); np->n_fhsize = fhlen; newvp = NFSTOV(np); } else if (NFS_CMPFH(dnp, nfhp, fhlen)) { VREF(dvp); newvp = dvp; } else { error = nfs_nget(dvp->v_mount, nfhp, fhlen, &np, LK_EXCLUSIVE); if (error) { m_freem(mrep); return (error); } newvp = NFSTOV(np); } if (v3) { nfsm_postop_attr(newvp, attrflag); if (!attrflag && *npp == NULL) { m_freem(mrep); if (newvp == dvp) vrele(newvp); else vput(newvp); return (ENOENT); } } else nfsm_loadattr(newvp, NULL); } m_freem(mrep); nfsmout: if (npp && *npp == NULL) { if (error) { if (newvp) { if (newvp == dvp) vrele(newvp); else vput(newvp); } } else *npp = np; } return (error); } /* * Nfs Version 3 commit rpc */ int nfs_commit(struct vnode *vp, u_quad_t offset, int cnt, struct ucred *cred, struct thread *td) { u_int32_t *tl; struct nfsmount *nmp = VFSTONFS(vp->v_mount); caddr_t bpos, dpos; int error = 0, wccflag = NFSV3_WCCRATTR; struct mbuf *mreq, *mrep, *md, *mb; mtx_lock(&nmp->nm_mtx); if ((nmp->nm_state & NFSSTA_HASWRITEVERF) == 0) { mtx_unlock(&nmp->nm_mtx); return (0); } mtx_unlock(&nmp->nm_mtx); nfsstats.rpccnt[NFSPROC_COMMIT]++; mreq = nfsm_reqhead(vp, NFSPROC_COMMIT, NFSX_FH(1)); mb = mreq; bpos = mtod(mb, caddr_t); nfsm_fhtom(vp, 1); tl = nfsm_build(u_int32_t *, 3 * NFSX_UNSIGNED); txdr_hyper(offset, tl); tl += 2; *tl = txdr_unsigned(cnt); nfsm_request(vp, NFSPROC_COMMIT, td, cred); nfsm_wcc_data(vp, wccflag); if (!error) { tl = nfsm_dissect(u_int32_t *, NFSX_V3WRITEVERF); if (bcmp((caddr_t)nmp->nm_verf, (caddr_t)tl, NFSX_V3WRITEVERF)) { bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf, NFSX_V3WRITEVERF); error = NFSERR_STALEWRITEVERF; } } m_freem(mrep); nfsmout: return (error); } /* * Strategy routine. * For async requests when nfsiod(s) are running, queue the request by * calling nfs_asyncio(), otherwise just all nfs_doio() to do the * request. */ static int nfs_strategy(struct vop_strategy_args *ap) { struct buf *bp = ap->a_bp; struct ucred *cr; KASSERT(!(bp->b_flags & B_DONE), ("nfs_strategy: buffer %p unexpectedly marked B_DONE", bp)); BUF_ASSERT_HELD(bp); if (bp->b_iocmd == BIO_READ) cr = bp->b_rcred; else cr = bp->b_wcred; /* * If the op is asynchronous and an i/o daemon is waiting * queue the request, wake it up and wait for completion * otherwise just do it ourselves. */ if ((bp->b_flags & B_ASYNC) == 0 || nfs_asyncio(VFSTONFS(ap->a_vp->v_mount), bp, NOCRED, curthread)) (void)nfs_doio(ap->a_vp, bp, cr, curthread); return (0); } /* * fsync vnode op. Just call nfs_flush() with commit == 1. */ /* ARGSUSED */ static int nfs_fsync(struct vop_fsync_args *ap) { return (nfs_flush(ap->a_vp, ap->a_waitfor, 1)); } /* * Flush all the blocks associated with a vnode. * Walk through the buffer pool and push any dirty pages * associated with the vnode. */ static int nfs_flush(struct vnode *vp, int waitfor, int commit) { struct nfsnode *np = VTONFS(vp); struct buf *bp; int i; struct buf *nbp; struct nfsmount *nmp = VFSTONFS(vp->v_mount); int error = 0, slptimeo = 0, slpflag = 0, retv, bvecpos; int passone = 1; u_quad_t off, endoff, toff; struct ucred* wcred = NULL; struct buf **bvec = NULL; struct bufobj *bo; struct thread *td = curthread; #ifndef NFS_COMMITBVECSIZ #define NFS_COMMITBVECSIZ 20 #endif struct buf *bvec_on_stack[NFS_COMMITBVECSIZ]; int bvecsize = 0, bveccount; if (nmp->nm_flag & NFSMNT_INT) slpflag = PCATCH; if (!commit) passone = 0; bo = &vp->v_bufobj; /* * A b_flags == (B_DELWRI | B_NEEDCOMMIT) block has been written to the * server, but has not been committed to stable storage on the server * yet. On the first pass, the byte range is worked out and the commit * rpc is done. On the second pass, nfs_writebp() is called to do the * job. */ again: off = (u_quad_t)-1; endoff = 0; bvecpos = 0; if (NFS_ISV3(vp) && commit) { if (bvec != NULL && bvec != bvec_on_stack) free(bvec, M_TEMP); /* * Count up how many buffers waiting for a commit. */ bveccount = 0; BO_LOCK(bo); TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { if (!BUF_ISLOCKED(bp) && (bp->b_flags & (B_DELWRI | B_NEEDCOMMIT)) == (B_DELWRI | B_NEEDCOMMIT)) bveccount++; } /* * Allocate space to remember the list of bufs to commit. It is * important to use M_NOWAIT here to avoid a race with nfs_write. * If we can't get memory (for whatever reason), we will end up * committing the buffers one-by-one in the loop below. */ if (bveccount > NFS_COMMITBVECSIZ) { /* * Release the vnode interlock to avoid a lock * order reversal. */ BO_UNLOCK(bo); bvec = (struct buf **) malloc(bveccount * sizeof(struct buf *), M_TEMP, M_NOWAIT); BO_LOCK(bo); if (bvec == NULL) { bvec = bvec_on_stack; bvecsize = NFS_COMMITBVECSIZ; } else bvecsize = bveccount; } else { bvec = bvec_on_stack; bvecsize = NFS_COMMITBVECSIZ; } TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { if (bvecpos >= bvecsize) break; if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) { nbp = TAILQ_NEXT(bp, b_bobufs); continue; } if ((bp->b_flags & (B_DELWRI | B_NEEDCOMMIT)) != (B_DELWRI | B_NEEDCOMMIT)) { BUF_UNLOCK(bp); nbp = TAILQ_NEXT(bp, b_bobufs); continue; } BO_UNLOCK(bo); bremfree(bp); /* * Work out if all buffers are using the same cred * so we can deal with them all with one commit. * * NOTE: we are not clearing B_DONE here, so we have * to do it later on in this routine if we intend to * initiate I/O on the bp. * * Note: to avoid loopback deadlocks, we do not * assign b_runningbufspace. */ if (wcred == NULL) wcred = bp->b_wcred; else if (wcred != bp->b_wcred) wcred = NOCRED; vfs_busy_pages(bp, 1); BO_LOCK(bo); /* * bp is protected by being locked, but nbp is not * and vfs_busy_pages() may sleep. We have to * recalculate nbp. */ nbp = TAILQ_NEXT(bp, b_bobufs); /* * A list of these buffers is kept so that the * second loop knows which buffers have actually * been committed. This is necessary, since there * may be a race between the commit rpc and new * uncommitted writes on the file. */ bvec[bvecpos++] = bp; toff = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + bp->b_dirtyoff; if (toff < off) off = toff; toff += (u_quad_t)(bp->b_dirtyend - bp->b_dirtyoff); if (toff > endoff) endoff = toff; } BO_UNLOCK(bo); } if (bvecpos > 0) { /* * Commit data on the server, as required. * If all bufs are using the same wcred, then use that with * one call for all of them, otherwise commit each one * separately. */ if (wcred != NOCRED) retv = nfs_commit(vp, off, (int)(endoff - off), wcred, td); else { retv = 0; for (i = 0; i < bvecpos; i++) { off_t off, size; bp = bvec[i]; off = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + bp->b_dirtyoff; size = (u_quad_t)(bp->b_dirtyend - bp->b_dirtyoff); retv = nfs_commit(vp, off, (int)size, bp->b_wcred, td); if (retv) break; } } if (retv == NFSERR_STALEWRITEVERF) nfs_clearcommit(vp->v_mount); /* * Now, either mark the blocks I/O done or mark the * blocks dirty, depending on whether the commit * succeeded. */ for (i = 0; i < bvecpos; i++) { bp = bvec[i]; bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK); if (retv) { /* * Error, leave B_DELWRI intact */ vfs_unbusy_pages(bp); brelse(bp); } else { /* * Success, remove B_DELWRI ( bundirty() ). * * b_dirtyoff/b_dirtyend seem to be NFS * specific. We should probably move that * into bundirty(). XXX */ bufobj_wref(bo); bp->b_flags |= B_ASYNC; bundirty(bp); bp->b_flags &= ~B_DONE; bp->b_ioflags &= ~BIO_ERROR; bp->b_dirtyoff = bp->b_dirtyend = 0; bufdone(bp); } } } /* * Start/do any write(s) that are required. */ loop: BO_LOCK(bo); TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) { if (waitfor != MNT_WAIT || passone) continue; error = BUF_TIMELOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, BO_MTX(bo), "nfsfsync", slpflag, slptimeo); if (error == 0) { BUF_UNLOCK(bp); goto loop; } if (error == ENOLCK) { error = 0; goto loop; } if (nfs_sigintr(nmp, NULL, td)) { error = EINTR; goto done; } if (slpflag == PCATCH) { slpflag = 0; slptimeo = 2 * hz; } goto loop; } if ((bp->b_flags & B_DELWRI) == 0) panic("nfs_fsync: not dirty"); if ((passone || !commit) && (bp->b_flags & B_NEEDCOMMIT)) { BUF_UNLOCK(bp); continue; } BO_UNLOCK(bo); bremfree(bp); if (passone || !commit) bp->b_flags |= B_ASYNC; else bp->b_flags |= B_ASYNC; bwrite(bp); if (nfs_sigintr(nmp, NULL, td)) { error = EINTR; goto done; } goto loop; } if (passone) { passone = 0; BO_UNLOCK(bo); goto again; } if (waitfor == MNT_WAIT) { while (bo->bo_numoutput) { error = bufobj_wwait(bo, slpflag, slptimeo); if (error) { BO_UNLOCK(bo); error = nfs_sigintr(nmp, NULL, td); if (error) goto done; if (slpflag == PCATCH) { slpflag = 0; slptimeo = 2 * hz; } BO_LOCK(bo); } } if (bo->bo_dirty.bv_cnt != 0 && commit) { BO_UNLOCK(bo); goto loop; } /* * Wait for all the async IO requests to drain */ BO_UNLOCK(bo); mtx_lock(&np->n_mtx); while (np->n_directio_asyncwr > 0) { np->n_flag |= NFSYNCWAIT; error = nfs_msleep(td, (caddr_t)&np->n_directio_asyncwr, &np->n_mtx, slpflag | (PRIBIO + 1), "nfsfsync", 0); if (error) { if (nfs_sigintr(nmp, (struct nfsreq *)0, td)) { mtx_unlock(&np->n_mtx); error = EINTR; goto done; } } } mtx_unlock(&np->n_mtx); } else BO_UNLOCK(bo); mtx_lock(&np->n_mtx); if (np->n_flag & NWRITEERR) { error = np->n_error; np->n_flag &= ~NWRITEERR; } if (commit && bo->bo_dirty.bv_cnt == 0 && bo->bo_numoutput == 0 && np->n_directio_asyncwr == 0) np->n_flag &= ~NMODIFIED; mtx_unlock(&np->n_mtx); done: if (bvec != NULL && bvec != bvec_on_stack) free(bvec, M_TEMP); return (error); } /* * NFS advisory byte-level locks. */ static int nfs_advlock(struct vop_advlock_args *ap) { struct vnode *vp = ap->a_vp; u_quad_t size; int error; error = vn_lock(vp, LK_SHARED); if (error) return (error); if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NOLOCKD) != 0) { size = VTONFS(vp)->n_size; VOP_UNLOCK(vp, 0); error = lf_advlock(ap, &(vp->v_lockf), size); } else { if (nfs_advlock_p) error = nfs_advlock_p(ap); else error = ENOLCK; } return (error); } /* * NFS advisory byte-level locks. */ static int nfs_advlockasync(struct vop_advlockasync_args *ap) { struct vnode *vp = ap->a_vp; u_quad_t size; int error; error = vn_lock(vp, LK_SHARED); if (error) return (error); if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NOLOCKD) != 0) { size = VTONFS(vp)->n_size; VOP_UNLOCK(vp, 0); error = lf_advlockasync(ap, &(vp->v_lockf), size); } else { VOP_UNLOCK(vp, 0); error = EOPNOTSUPP; } return (error); } /* * Print out the contents of an nfsnode. */ static int nfs_print(struct vop_print_args *ap) { struct vnode *vp = ap->a_vp; struct nfsnode *np = VTONFS(vp); nfs_printf("\tfileid %ld fsid 0x%x", np->n_vattr.va_fileid, np->n_vattr.va_fsid); if (vp->v_type == VFIFO) fifo_printinfo(vp); printf("\n"); return (0); } /* * This is the "real" nfs::bwrite(struct buf*). * We set B_CACHE if this is a VMIO buffer. */ int nfs_writebp(struct buf *bp, int force __unused, struct thread *td) { int s; int oldflags = bp->b_flags; #if 0 int retv = 1; off_t off; #endif BUF_ASSERT_HELD(bp); if (bp->b_flags & B_INVAL) { brelse(bp); return(0); } bp->b_flags |= B_CACHE; /* * Undirty the bp. We will redirty it later if the I/O fails. */ s = splbio(); bundirty(bp); bp->b_flags &= ~B_DONE; bp->b_ioflags &= ~BIO_ERROR; bp->b_iocmd = BIO_WRITE; bufobj_wref(bp->b_bufobj); curthread->td_ru.ru_oublock++; splx(s); /* * Note: to avoid loopback deadlocks, we do not * assign b_runningbufspace. */ vfs_busy_pages(bp, 1); BUF_KERNPROC(bp); bp->b_iooffset = dbtob(bp->b_blkno); bstrategy(bp); if( (oldflags & B_ASYNC) == 0) { int rtval = bufwait(bp); if (oldflags & B_DELWRI) { s = splbio(); reassignbuf(bp); splx(s); } brelse(bp); return (rtval); } return (0); } /* * nfs special file access vnode op. * Essentially just get vattr and then imitate iaccess() since the device is * local to the client. */ static int nfsspec_access(struct vop_access_args *ap) { struct vattr *vap; struct ucred *cred = ap->a_cred; struct vnode *vp = ap->a_vp; mode_t mode = ap->a_mode; struct vattr vattr; int error; /* * Disallow write attempts on filesystems mounted read-only; * unless the file is a socket, fifo, or a block or character * device resident on the filesystem. */ if ((mode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) { switch (vp->v_type) { case VREG: case VDIR: case VLNK: return (EROFS); default: break; } } vap = &vattr; error = VOP_GETATTR(vp, vap, cred); if (error) goto out; error = vaccess(vp->v_type, vap->va_mode, vap->va_uid, vap->va_gid, mode, cred, NULL); out: return error; } /* * Read wrapper for fifos. */ static int nfsfifo_read(struct vop_read_args *ap) { struct nfsnode *np = VTONFS(ap->a_vp); int error; /* * Set access flag. */ mtx_lock(&np->n_mtx); np->n_flag |= NACC; getnanotime(&np->n_atim); mtx_unlock(&np->n_mtx); error = fifo_specops.vop_read(ap); return error; } /* * Write wrapper for fifos. */ static int nfsfifo_write(struct vop_write_args *ap) { struct nfsnode *np = VTONFS(ap->a_vp); /* * Set update flag. */ mtx_lock(&np->n_mtx); np->n_flag |= NUPD; getnanotime(&np->n_mtim); mtx_unlock(&np->n_mtx); return(fifo_specops.vop_write(ap)); } /* * Close wrapper for fifos. * * Update the times on the nfsnode then do fifo close. */ static int nfsfifo_close(struct vop_close_args *ap) { struct vnode *vp = ap->a_vp; struct nfsnode *np = VTONFS(vp); struct vattr vattr; struct timespec ts; mtx_lock(&np->n_mtx); if (np->n_flag & (NACC | NUPD)) { getnanotime(&ts); if (np->n_flag & NACC) np->n_atim = ts; if (np->n_flag & NUPD) np->n_mtim = ts; np->n_flag |= NCHG; if (vrefcnt(vp) == 1 && (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) { VATTR_NULL(&vattr); if (np->n_flag & NACC) vattr.va_atime = np->n_atim; if (np->n_flag & NUPD) vattr.va_mtime = np->n_mtim; mtx_unlock(&np->n_mtx); (void)VOP_SETATTR(vp, &vattr, ap->a_cred); goto out; } } mtx_unlock(&np->n_mtx); out: return (fifo_specops.vop_close(ap)); } /* * Just call nfs_writebp() with the force argument set to 1. * * NOTE: B_DONE may or may not be set in a_bp on call. */ static int nfs_bwrite(struct buf *bp) { return (nfs_writebp(bp, 1, curthread)); } struct buf_ops buf_ops_nfs = { .bop_name = "buf_ops_nfs", .bop_write = nfs_bwrite, .bop_strategy = bufstrategy, .bop_sync = bufsync, .bop_bdflush = bufbdflush, };