Index: head/sys/fs/specfs/spec_vnops.c =================================================================== --- head/sys/fs/specfs/spec_vnops.c (revision 17760) +++ head/sys/fs/specfs/spec_vnops.c (revision 17761) @@ -1,882 +1,887 @@ /* * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)spec_vnops.c 8.6 (Berkeley) 4/9/94 - * $Id: spec_vnops.c,v 1.29 1996/03/19 05:13:17 dyson Exp $ + * $Id: spec_vnops.c,v 1.30 1996/07/27 03:50:31 dyson Exp $ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include +#include #include static int spec_ebadf __P((void)); static int spec_getattr __P((struct vop_getattr_args *)); struct vnode *speclisth[SPECHSZ]; vop_t **spec_vnodeop_p; static struct vnodeopv_entry_desc spec_vnodeop_entries[] = { { &vop_default_desc, (vop_t *)vn_default_error }, { &vop_lookup_desc, (vop_t *)spec_lookup }, /* lookup */ { &vop_create_desc, (vop_t *)spec_create }, /* create */ { &vop_mknod_desc, (vop_t *)spec_mknod }, /* mknod */ { &vop_open_desc, (vop_t *)spec_open }, /* open */ { &vop_close_desc, (vop_t *)spec_close }, /* close */ { &vop_access_desc, (vop_t *)spec_access }, /* access */ { &vop_getattr_desc, (vop_t *)spec_getattr }, /* getattr */ { &vop_setattr_desc, (vop_t *)spec_setattr }, /* setattr */ { &vop_read_desc, (vop_t *)spec_read }, /* read */ { &vop_write_desc, (vop_t *)spec_write }, /* write */ { &vop_ioctl_desc, (vop_t *)spec_ioctl }, /* ioctl */ { &vop_select_desc, (vop_t *)spec_select }, /* select */ { &vop_mmap_desc, (vop_t *)spec_mmap }, /* mmap */ { &vop_fsync_desc, (vop_t *)spec_fsync }, /* fsync */ { &vop_seek_desc, (vop_t *)spec_seek }, /* seek */ { &vop_remove_desc, (vop_t *)spec_remove }, /* remove */ { &vop_link_desc, (vop_t *)spec_link }, /* link */ { &vop_rename_desc, (vop_t *)spec_rename }, /* rename */ { &vop_mkdir_desc, (vop_t *)spec_mkdir }, /* mkdir */ { &vop_rmdir_desc, (vop_t *)spec_rmdir }, /* rmdir */ { &vop_symlink_desc, (vop_t *)spec_symlink }, /* symlink */ { &vop_readdir_desc, (vop_t *)spec_readdir }, /* readdir */ { &vop_readlink_desc, (vop_t *)spec_readlink }, /* readlink */ { &vop_abortop_desc, (vop_t *)spec_abortop }, /* abortop */ { &vop_inactive_desc, (vop_t *)spec_inactive }, /* inactive */ { &vop_reclaim_desc, (vop_t *)spec_reclaim }, /* reclaim */ { &vop_lock_desc, (vop_t *)spec_lock }, /* lock */ { &vop_unlock_desc, (vop_t *)spec_unlock }, /* unlock */ { &vop_bmap_desc, (vop_t *)spec_bmap }, /* bmap */ { &vop_strategy_desc, (vop_t *)spec_strategy }, /* strategy */ { &vop_print_desc, (vop_t *)spec_print }, /* print */ { &vop_islocked_desc, (vop_t *)spec_islocked }, /* islocked */ { &vop_pathconf_desc, (vop_t *)spec_pathconf }, /* pathconf */ { &vop_advlock_desc, (vop_t *)spec_advlock }, /* advlock */ { &vop_blkatoff_desc, (vop_t *)spec_blkatoff }, /* blkatoff */ { &vop_valloc_desc, (vop_t *)spec_valloc }, /* valloc */ { &vop_vfree_desc, (vop_t *)spec_vfree }, /* vfree */ { &vop_truncate_desc, (vop_t *)spec_truncate }, /* truncate */ { &vop_update_desc, (vop_t *)spec_update }, /* update */ { &vop_bwrite_desc, (vop_t *)vn_bwrite }, /* bwrite */ { &vop_getpages_desc, (vop_t *)spec_getpages}, /* getpages */ { NULL, NULL } }; static struct vnodeopv_desc spec_vnodeop_opv_desc = { &spec_vnodeop_p, spec_vnodeop_entries }; VNODEOP_SET(spec_vnodeop_opv_desc); static void spec_getpages_iodone __P((struct buf *bp)); /* * Trivial lookup routine that always fails. */ int spec_lookup(ap) struct vop_lookup_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; } */ *ap; { *ap->a_vpp = NULL; return (ENOTDIR); } /* * Open a special file. */ /* ARGSUSED */ int spec_open(ap) struct vop_open_args /* { struct vnode *a_vp; int a_mode; struct ucred *a_cred; struct proc *a_p; } */ *ap; { struct vnode *bvp, *vp = ap->a_vp; dev_t bdev, dev = (dev_t)vp->v_rdev; register int maj = major(dev); int error; /* * Don't allow open if fs is mounted -nodev. */ if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_NODEV)) return (ENXIO); switch (vp->v_type) { case VCHR: if ((u_int)maj >= nchrdev) return (ENXIO); if ( (cdevsw[maj] == NULL) || (cdevsw[maj]->d_open == NULL)) return ENXIO; if (ap->a_cred != FSCRED && (ap->a_mode & FWRITE)) { /* * When running in very secure mode, do not allow * opens for writing of any disk character devices. */ if (securelevel >= 2 && isdisk(dev, VCHR)) return (EPERM); /* * When running in secure mode, do not allow opens * for writing of /dev/mem, /dev/kmem, or character * devices whose corresponding block devices are * currently mounted. */ if (securelevel >= 1) { if ((bdev = chrtoblk(dev)) != NODEV && vfinddev(bdev, VBLK, &bvp) && bvp->v_usecount > 0 && (error = vfs_mountedon(bvp))) return (error); if (iskmemdev(dev)) return (EPERM); } } VOP_UNLOCK(vp); error = (*cdevsw[maj]->d_open)(dev, ap->a_mode, S_IFCHR, ap->a_p); VOP_LOCK(vp); return (error); case VBLK: if ((u_int)maj >= nblkdev) return (ENXIO); if ( (bdevsw[maj] == NULL) || (bdevsw[maj]->d_open == NULL)) return ENXIO; /* * When running in very secure mode, do not allow * opens for writing of any disk block devices. */ if (securelevel >= 2 && ap->a_cred != FSCRED && (ap->a_mode & FWRITE) && isdisk(dev, VBLK)) return (EPERM); /* * Do not allow opens of block devices that are * currently mounted. */ error = vfs_mountedon(vp); if (error) return (error); return ((*bdevsw[maj]->d_open)(dev, ap->a_mode, S_IFBLK, ap->a_p)); default: break; } return (0); } /* * Vnode op for read */ /* ARGSUSED */ int spec_read(ap) struct vop_read_args /* { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; struct ucred *a_cred; } */ *ap; { register struct vnode *vp = ap->a_vp; register struct uio *uio = ap->a_uio; struct proc *p = uio->uio_procp; struct buf *bp; daddr_t bn, nextbn; long bsize, bscale; struct partinfo dpart; int n, on, majordev; d_ioctl_t *ioctl; int error = 0; dev_t dev; #ifdef DIAGNOSTIC if (uio->uio_rw != UIO_READ) panic("spec_read mode"); if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc) panic("spec_read proc"); #endif if (uio->uio_resid == 0) return (0); switch (vp->v_type) { case VCHR: VOP_UNLOCK(vp); error = (*cdevsw[major(vp->v_rdev)]->d_read) (vp->v_rdev, uio, ap->a_ioflag); VOP_LOCK(vp); return (error); case VBLK: if (uio->uio_offset < 0) return (EINVAL); bsize = BLKDEV_IOSIZE; dev = vp->v_rdev; if ((majordev = major(dev)) < nblkdev && (ioctl = bdevsw[majordev]->d_ioctl) != NULL && (*ioctl)(dev, DIOCGPART, (caddr_t)&dpart, FREAD, p) == 0 && dpart.part->p_fstype == FS_BSDFFS && dpart.part->p_frag != 0 && dpart.part->p_fsize != 0) bsize = dpart.part->p_frag * dpart.part->p_fsize; bscale = bsize >> DEV_BSHIFT; do { bn = (uio->uio_offset >> DEV_BSHIFT) &~ (bscale - 1); on = uio->uio_offset % bsize; n = min((unsigned)(bsize - on), uio->uio_resid); if (vp->v_lastr + bscale == bn) { nextbn = bn + bscale; error = breadn(vp, bn, (int)bsize, &nextbn, (int *)&bsize, 1, NOCRED, &bp); } else error = bread(vp, bn, (int)bsize, NOCRED, &bp); vp->v_lastr = bn; n = min(n, bsize - bp->b_resid); if (error) { brelse(bp); return (error); } error = uiomove((char *)bp->b_data + on, n, uio); brelse(bp); } while (error == 0 && uio->uio_resid > 0 && n != 0); return (error); default: panic("spec_read type"); } /* NOTREACHED */ } /* * Vnode op for write */ /* ARGSUSED */ int spec_write(ap) struct vop_write_args /* { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; struct ucred *a_cred; } */ *ap; { register struct vnode *vp = ap->a_vp; register struct uio *uio = ap->a_uio; struct proc *p = uio->uio_procp; struct buf *bp; daddr_t bn; int bsize, blkmask; struct partinfo dpart; register int n, on; int error = 0; #ifdef DIAGNOSTIC if (uio->uio_rw != UIO_WRITE) panic("spec_write mode"); if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc) panic("spec_write proc"); #endif switch (vp->v_type) { case VCHR: VOP_UNLOCK(vp); error = (*cdevsw[major(vp->v_rdev)]->d_write) (vp->v_rdev, uio, ap->a_ioflag); VOP_LOCK(vp); return (error); case VBLK: if (uio->uio_resid == 0) return (0); if (uio->uio_offset < 0) return (EINVAL); bsize = BLKDEV_IOSIZE; if ((*bdevsw[major(vp->v_rdev)]->d_ioctl)(vp->v_rdev, DIOCGPART, (caddr_t)&dpart, FREAD, p) == 0) { if (dpart.part->p_fstype == FS_BSDFFS && dpart.part->p_frag != 0 && dpart.part->p_fsize != 0) bsize = dpart.part->p_frag * dpart.part->p_fsize; } blkmask = (bsize >> DEV_BSHIFT) - 1; do { bn = (uio->uio_offset >> DEV_BSHIFT) &~ blkmask; on = uio->uio_offset % bsize; n = min((unsigned)(bsize - on), uio->uio_resid); if (n == bsize) bp = getblk(vp, bn, bsize, 0, 0); else error = bread(vp, bn, bsize, NOCRED, &bp); n = min(n, bsize - bp->b_resid); if (error) { brelse(bp); return (error); } error = uiomove((char *)bp->b_data + on, n, uio); if (n + on == bsize) { /* bawrite(bp); */ cluster_write(bp, 0); } else bdwrite(bp); } while (error == 0 && uio->uio_resid > 0 && n != 0); return (error); default: panic("spec_write type"); } /* NOTREACHED */ } /* * Device ioctl operation. */ /* ARGSUSED */ int spec_ioctl(ap) struct vop_ioctl_args /* { struct vnode *a_vp; int a_command; caddr_t a_data; int a_fflag; struct ucred *a_cred; struct proc *a_p; } */ *ap; { dev_t dev = ap->a_vp->v_rdev; switch (ap->a_vp->v_type) { case VCHR: return ((*cdevsw[major(dev)]->d_ioctl)(dev, ap->a_command, ap->a_data, ap->a_fflag, ap->a_p)); case VBLK: if (ap->a_command == 0 && (int)ap->a_data == B_TAPE) if (bdevsw[major(dev)]->d_flags & B_TAPE) return (0); else return (1); return ((*bdevsw[major(dev)]->d_ioctl)(dev, ap->a_command, ap->a_data, ap->a_fflag, ap->a_p)); default: panic("spec_ioctl"); /* NOTREACHED */ } } /* ARGSUSED */ int spec_select(ap) struct vop_select_args /* { struct vnode *a_vp; int a_which; int a_fflags; struct ucred *a_cred; struct proc *a_p; } */ *ap; { register dev_t dev; switch (ap->a_vp->v_type) { default: return (1); /* XXX */ case VCHR: dev = ap->a_vp->v_rdev; return (*cdevsw[major(dev)]->d_select)(dev, ap->a_which, ap->a_p); } } /* * Synch buffers associated with a block device */ /* ARGSUSED */ int spec_fsync(ap) struct vop_fsync_args /* { struct vnode *a_vp; struct ucred *a_cred; int a_waitfor; struct proc *a_p; } */ *ap; { register struct vnode *vp = ap->a_vp; register struct buf *bp; struct buf *nbp; int s; if (vp->v_type == VCHR) return (0); /* * Flush all dirty buffers associated with a block device. */ loop: s = splbio(); for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) { nbp = bp->b_vnbufs.le_next; if ((bp->b_flags & B_BUSY)) continue; if ((bp->b_flags & B_DELWRI) == 0) panic("spec_fsync: not dirty"); bremfree(bp); bp->b_flags |= B_BUSY; splx(s); bawrite(bp); goto loop; } if (ap->a_waitfor == MNT_WAIT) { while (vp->v_numoutput) { vp->v_flag |= VBWAIT; (void) tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "spfsyn", 0); } #ifdef DIAGNOSTIC if (vp->v_dirtyblkhd.lh_first) { vprint("spec_fsync: dirty", vp); splx(s); goto loop; } #endif } splx(s); return (0); } /* * Just call the device strategy routine */ int spec_strategy(ap) struct vop_strategy_args /* { struct buf *a_bp; } */ *ap; { (*bdevsw[major(ap->a_bp->b_dev)]->d_strategy)(ap->a_bp); return (0); } /* * This is a noop, simply returning what one has been given. */ int spec_bmap(ap) struct vop_bmap_args /* { struct vnode *a_vp; daddr_t a_bn; struct vnode **a_vpp; daddr_t *a_bnp; int *a_runp; int *a_runb; } */ *ap; { if (ap->a_vpp != NULL) *ap->a_vpp = ap->a_vp; if (ap->a_bnp != NULL) *ap->a_bnp = ap->a_bn; if (ap->a_runp != NULL) *ap->a_runp = 0; if (ap->a_runb != NULL) *ap->a_runb = 0; return (0); } /* * At the moment we do not do any locking. */ /* ARGSUSED */ int spec_lock(ap) struct vop_lock_args /* { struct vnode *a_vp; } */ *ap; { return (0); } /* ARGSUSED */ int spec_unlock(ap) struct vop_unlock_args /* { struct vnode *a_vp; } */ *ap; { return (0); } /* * Device close routine */ /* ARGSUSED */ int spec_close(ap) struct vop_close_args /* { struct vnode *a_vp; int a_fflag; struct ucred *a_cred; struct proc *a_p; } */ *ap; { register struct vnode *vp = ap->a_vp; dev_t dev = vp->v_rdev; d_close_t *devclose; int mode, error; switch (vp->v_type) { case VCHR: /* * Hack: a tty device that is a controlling terminal * has a reference from the session structure. * We cannot easily tell that a character device is * a controlling terminal, unless it is the closing * process' controlling terminal. In that case, * if the reference count is 2 (this last descriptor * plus the session), release the reference from the session. */ if (vcount(vp) == 2 && ap->a_p && vp == ap->a_p->p_session->s_ttyvp) { vrele(vp); ap->a_p->p_session->s_ttyvp = NULL; } /* * If the vnode is locked, then we are in the midst * of forcably closing the device, otherwise we only * close on last reference. */ if (vcount(vp) > 1 && (vp->v_flag & VXLOCK) == 0) return (0); devclose = cdevsw[major(dev)]->d_close; mode = S_IFCHR; break; case VBLK: /* * On last close of a block device (that isn't mounted) * we must invalidate any in core blocks, so that * we can, for instance, change floppy disks. */ error = vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 0, 0); if (error) return (error); /* * We do not want to really close the device if it * is still in use unless we are trying to close it * forcibly. Since every use (buffer, vnode, swap, cmap) * holds a reference to the vnode, and because we mark * any other vnodes that alias this device, when the * sum of the reference counts on all the aliased * vnodes descends to one, we are on last close. */ - if ((vcount(vp) > ((vp->v_flag & VVMIO)?2:1)) && + if ((vcount(vp) > (vp->v_object?2:1)) && (vp->v_flag & VXLOCK) == 0) return (0); + + if (vp->v_object) + vnode_pager_uncache(vp); + devclose = bdevsw[major(dev)]->d_close; mode = S_IFBLK; break; default: panic("spec_close: not special"); } return ((*devclose)(dev, ap->a_fflag, mode, ap->a_p)); } /* * Print out the contents of a special device vnode. */ int spec_print(ap) struct vop_print_args /* { struct vnode *a_vp; } */ *ap; { printf("tag VT_NON, dev %d, %d\n", major(ap->a_vp->v_rdev), minor(ap->a_vp->v_rdev)); return (0); } /* * Return POSIX pathconf information applicable to special devices. */ int spec_pathconf(ap) struct vop_pathconf_args /* { struct vnode *a_vp; int a_name; int *a_retval; } */ *ap; { switch (ap->a_name) { case _PC_LINK_MAX: *ap->a_retval = LINK_MAX; return (0); case _PC_MAX_CANON: *ap->a_retval = MAX_CANON; return (0); case _PC_MAX_INPUT: *ap->a_retval = MAX_INPUT; return (0); case _PC_PIPE_BUF: *ap->a_retval = PIPE_BUF; return (0); case _PC_CHOWN_RESTRICTED: *ap->a_retval = 1; return (0); case _PC_VDISABLE: *ap->a_retval = _POSIX_VDISABLE; return (0); default: return (EINVAL); } /* NOTREACHED */ } /* * Special device advisory byte-level locks. */ /* ARGSUSED */ int spec_advlock(ap) struct vop_advlock_args /* { struct vnode *a_vp; caddr_t a_id; int a_op; struct flock *a_fl; int a_flags; } */ *ap; { return (EOPNOTSUPP); } /* * Special device failed operation */ static int spec_ebadf() { return (EBADF); } /* * Special device bad operation */ int spec_badop() { panic("spec_badop called"); /* NOTREACHED */ } static void spec_getpages_iodone(bp) struct buf *bp; { bp->b_flags |= B_DONE; wakeup(bp); } int spec_getpages(ap) struct vop_getpages_args *ap; { vm_offset_t kva; int error; int i, pcount, size, s; daddr_t blkno; struct buf *bp; error = 0; pcount = round_page(ap->a_count) / PAGE_SIZE; /* * Calculate the size of the transfer. */ blkno = (IDX_TO_OFF(ap->a_m[0]->pindex) + ap->a_offset) / DEV_BSIZE; /* XXX sanity check before we go into details */ if (blkno < 0) { printf("spec_getpages: negative blkno (%ld)\n", blkno); return (VM_PAGER_ERROR); } /* * Round up physical size for real devices. */ size = (ap->a_count + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1); bp = getpbuf(); kva = (vm_offset_t)bp->b_data; /* * Map the pages to be read into the kva. */ pmap_qenter(kva, ap->a_m, pcount); /* Build a minimal buffer header. */ bp->b_flags = B_BUSY | B_READ | B_CALL; bp->b_iodone = spec_getpages_iodone; /* B_PHYS is not set, but it is nice to fill this in. */ bp->b_proc = curproc; bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; if (bp->b_rcred != NOCRED) crhold(bp->b_rcred); if (bp->b_wcred != NOCRED) crhold(bp->b_wcred); bp->b_blkno = blkno; bp->b_lblkno = blkno; pbgetvp(ap->a_vp, bp); bp->b_bcount = size; bp->b_bufsize = size; cnt.v_vnodein++; cnt.v_vnodepgsin += pcount; /* Do the input. */ VOP_STRATEGY(bp); if (bp->b_flags & B_ASYNC) return (VM_PAGER_PEND); s = splbio(); /* We definitely need to be at splbio here. */ while ((bp->b_flags & B_DONE) == 0) tsleep(bp, PVM, "vnread", 0); splx(s); if ((bp->b_flags & B_ERROR) != 0) error = EIO; if (!error && ap->a_count != pcount * PAGE_SIZE) bzero((caddr_t)kva + ap->a_count, PAGE_SIZE * pcount - ap->a_count); pmap_qremove(kva, pcount); /* * Free the buffer header back to the swap buffer pool. */ relpbuf(bp); for (i = 0; i < pcount; i++) { ap->a_m[i]->dirty = 0; ap->a_m[i]->valid = VM_PAGE_BITS_ALL; ap->a_m[i]->flags &= ~PG_ZERO; if (i != ap->a_reqpage) { /* * Whether or not to leave the page activated is up in * the air, but we should put the page on a page queue * somewhere (it already is in the object). Result: * It appears that emperical results show that * deactivating pages is best. */ /* * Just in case someone was asking for this page we * now tell them that it is ok to use. */ if (!error) { vm_page_deactivate(ap->a_m[i]); PAGE_WAKEUP(ap->a_m[i]); } else vnode_pager_freepage(ap->a_m[i]); } } if (error) printf("spec_getpages: I/O read error\n"); return (error ? VM_PAGER_ERROR : VM_PAGER_OK); } /* ARGSUSED */ static int spec_getattr(ap) struct vop_getattr_args /* { struct vnode *a_vp; struct vattr *a_vap; struct ucred *a_cred; struct proc *a_p; } */ *ap; { register struct vnode *vp = ap->a_vp; register struct vattr *vap = ap->a_vap; struct partinfo dpart; bzero(vap, sizeof (*vap)); if (vp->v_type == VBLK) vap->va_blocksize = BLKDEV_IOSIZE; else if (vp->v_type == VCHR) vap->va_blocksize = MAXBSIZE; if ((*bdevsw[major(vp->v_rdev)]->d_ioctl)(vp->v_rdev, DIOCGPART, (caddr_t)&dpart, FREAD, ap->a_p) == 0) { vap->va_bytes = (u_quad_t) dpart.disklab->d_partitions[minor(vp->v_rdev)].p_size * DEV_BSIZE; vap->va_size = vap->va_bytes; } return (0); } Index: head/sys/kern/vfs_bio.c =================================================================== --- head/sys/kern/vfs_bio.c (revision 17760) +++ head/sys/kern/vfs_bio.c (revision 17761) @@ -1,1770 +1,1785 @@ /* * Copyright (c) 1994 John S. Dyson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice immediately at the beginning of the file, without modification, * this list of conditions, and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Absolutely no warranty of function or purpose is made by the author * John S. Dyson. * 4. This work was done expressly for inclusion into FreeBSD. Other use * is allowed if this notation is included. * 5. Modifications may be freely made to this file if the above conditions * are met. * - * $Id: vfs_bio.c,v 1.94 1996/06/30 05:17:08 davidg Exp $ + * $Id: vfs_bio.c,v 1.95 1996/08/04 20:13:08 phk Exp $ */ /* * this file contains a new buffer I/O scheme implementing a coherent * VM object and buffer cache scheme. Pains have been taken to make * sure that the performance degradation associated with schemes such * as this is not realized. * * Author: John S. Dyson * Significant help during the development and debugging phases * had been provided by David Greenman, also of the FreeBSD core team. */ #include "opt_bounce.h" #define VMIO #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static void vfs_update __P((void)); static struct proc *updateproc; static struct kproc_desc up_kp = { "update", vfs_update, &updateproc }; SYSINIT_KT(update, SI_SUB_KTHREAD_UPDATE, SI_ORDER_FIRST, kproc_start, &up_kp) struct buf *buf; /* buffer header pool */ struct swqueue bswlist; int count_lock_queue __P((void)); static void vm_hold_free_pages(struct buf * bp, vm_offset_t from, vm_offset_t to); static void vm_hold_load_pages(struct buf * bp, vm_offset_t from, vm_offset_t to); static void vfs_clean_pages(struct buf * bp); static void vfs_setdirty(struct buf *bp); static void vfs_vmio_release(struct buf *bp); int needsbuffer; /* * Internal update daemon, process 3 * The variable vfs_update_wakeup allows for internal syncs. */ int vfs_update_wakeup; /* * buffers base kva */ caddr_t buffers_kva; /* * bogus page -- for I/O to/from partially complete buffers * this is a temporary solution to the problem, but it is not * really that bad. it would be better to split the buffer * for input in the case of buffers partially already in memory, * but the code is intricate enough already. */ vm_page_t bogus_page; static vm_offset_t bogus_offset; static int bufspace, maxbufspace, vmiospace, maxvmiobufspace, bufmallocspace, maxbufmallocspace; static struct bufhashhdr bufhashtbl[BUFHSZ], invalhash; static struct bqueues bufqueues[BUFFER_QUEUES]; extern int vm_swap_size; #define BUF_MAXUSE 8 +/* +#define NO_B_MALLOC +*/ /* * Initialize buffer headers and related structures. */ void bufinit() { struct buf *bp; int i; TAILQ_INIT(&bswlist); LIST_INIT(&invalhash); /* first, make a null hash table */ for (i = 0; i < BUFHSZ; i++) LIST_INIT(&bufhashtbl[i]); /* next, make a null set of free lists */ for (i = 0; i < BUFFER_QUEUES; i++) TAILQ_INIT(&bufqueues[i]); buffers_kva = (caddr_t) kmem_alloc_pageable(buffer_map, MAXBSIZE * nbuf); /* finally, initialize each buffer header and stick on empty q */ for (i = 0; i < nbuf; i++) { bp = &buf[i]; bzero(bp, sizeof *bp); bp->b_flags = B_INVAL; /* we're just an empty header */ bp->b_dev = NODEV; bp->b_rcred = NOCRED; bp->b_wcred = NOCRED; bp->b_qindex = QUEUE_EMPTY; bp->b_vnbufs.le_next = NOLIST; bp->b_data = buffers_kva + i * MAXBSIZE; TAILQ_INSERT_TAIL(&bufqueues[QUEUE_EMPTY], bp, b_freelist); LIST_INSERT_HEAD(&invalhash, bp, b_hash); } /* * maxbufspace is currently calculated to support all filesystem blocks * to be 8K. If you happen to use a 16K filesystem, the size of the buffer * cache is still the same as it would be for 8K filesystems. This * keeps the size of the buffer cache "in check" for big block filesystems. */ maxbufspace = 2 * (nbuf + 8) * PAGE_SIZE; /* * reserve 1/3 of the buffers for metadata (VDIR) which might not be VMIO'ed */ maxvmiobufspace = 2 * maxbufspace / 3; /* * Limit the amount of malloc memory since it is wired permanently into * the kernel space. Even though this is accounted for in the buffer * allocation, we don't want the malloced region to grow uncontrolled. * The malloc scheme improves memory utilization significantly on average * (small) directories. */ maxbufmallocspace = maxbufspace / 20; bogus_offset = kmem_alloc_pageable(kernel_map, PAGE_SIZE); bogus_page = vm_page_alloc(kernel_object, ((bogus_offset - VM_MIN_KERNEL_ADDRESS) >> PAGE_SHIFT), VM_ALLOC_NORMAL); } /* * remove the buffer from the appropriate free list */ void bremfree(struct buf * bp) { int s = splbio(); if (bp->b_qindex != QUEUE_NONE) { TAILQ_REMOVE(&bufqueues[bp->b_qindex], bp, b_freelist); bp->b_qindex = QUEUE_NONE; } else { panic("bremfree: removing a buffer when not on a queue"); } splx(s); } /* * Get a buffer with the specified data. Look in the cache first. */ int bread(struct vnode * vp, daddr_t blkno, int size, struct ucred * cred, struct buf ** bpp) { struct buf *bp; bp = getblk(vp, blkno, size, 0, 0); *bpp = bp; /* if not found in cache, do some I/O */ if ((bp->b_flags & B_CACHE) == 0) { if (curproc != NULL) curproc->p_stats->p_ru.ru_inblock++; bp->b_flags |= B_READ; bp->b_flags &= ~(B_DONE | B_ERROR | B_INVAL); if (bp->b_rcred == NOCRED) { if (cred != NOCRED) crhold(cred); bp->b_rcred = cred; } vfs_busy_pages(bp, 0); VOP_STRATEGY(bp); return (biowait(bp)); } return (0); } /* * Operates like bread, but also starts asynchronous I/O on * read-ahead blocks. */ int breadn(struct vnode * vp, daddr_t blkno, int size, daddr_t * rablkno, int *rabsize, int cnt, struct ucred * cred, struct buf ** bpp) { struct buf *bp, *rabp; int i; int rv = 0, readwait = 0; *bpp = bp = getblk(vp, blkno, size, 0, 0); /* if not found in cache, do some I/O */ if ((bp->b_flags & B_CACHE) == 0) { if (curproc != NULL) curproc->p_stats->p_ru.ru_inblock++; bp->b_flags |= B_READ; bp->b_flags &= ~(B_DONE | B_ERROR | B_INVAL); if (bp->b_rcred == NOCRED) { if (cred != NOCRED) crhold(cred); bp->b_rcred = cred; } vfs_busy_pages(bp, 0); VOP_STRATEGY(bp); ++readwait; } for (i = 0; i < cnt; i++, rablkno++, rabsize++) { if (inmem(vp, *rablkno)) continue; rabp = getblk(vp, *rablkno, *rabsize, 0, 0); if ((rabp->b_flags & B_CACHE) == 0) { if (curproc != NULL) curproc->p_stats->p_ru.ru_inblock++; rabp->b_flags |= B_READ | B_ASYNC; rabp->b_flags &= ~(B_DONE | B_ERROR | B_INVAL); if (rabp->b_rcred == NOCRED) { if (cred != NOCRED) crhold(cred); rabp->b_rcred = cred; } vfs_busy_pages(rabp, 0); VOP_STRATEGY(rabp); } else { brelse(rabp); } } if (readwait) { rv = biowait(bp); } return (rv); } /* * Write, release buffer on completion. (Done by iodone * if async.) */ int bwrite(struct buf * bp) { int oldflags = bp->b_flags; if (bp->b_flags & B_INVAL) { brelse(bp); return (0); } if (!(bp->b_flags & B_BUSY)) panic("bwrite: buffer is not busy???"); bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI); bp->b_flags |= B_WRITEINPROG; if ((oldflags & (B_ASYNC|B_DELWRI)) == (B_ASYNC|B_DELWRI)) { reassignbuf(bp, bp->b_vp); } bp->b_vp->v_numoutput++; vfs_busy_pages(bp, 1); if (curproc != NULL) curproc->p_stats->p_ru.ru_oublock++; VOP_STRATEGY(bp); if ((oldflags & B_ASYNC) == 0) { int rtval = biowait(bp); if (oldflags & B_DELWRI) { reassignbuf(bp, bp->b_vp); } brelse(bp); return (rtval); } return (0); } int vn_bwrite(ap) struct vop_bwrite_args *ap; { return (bwrite(ap->a_bp)); } /* * Delayed write. (Buffer is marked dirty). */ void bdwrite(struct buf * bp) { if ((bp->b_flags & B_BUSY) == 0) { panic("bdwrite: buffer is not busy"); } if (bp->b_flags & B_INVAL) { brelse(bp); return; } if (bp->b_flags & B_TAPE) { bawrite(bp); return; } bp->b_flags &= ~(B_READ|B_RELBUF); if ((bp->b_flags & B_DELWRI) == 0) { bp->b_flags |= B_DONE | B_DELWRI; reassignbuf(bp, bp->b_vp); } /* * This bmap keeps the system from needing to do the bmap later, * perhaps when the system is attempting to do a sync. Since it * is likely that the indirect block -- or whatever other datastructure * that the filesystem needs is still in memory now, it is a good * thing to do this. Note also, that if the pageout daemon is * requesting a sync -- there might not be enough memory to do * the bmap then... So, this is important to do. */ if( bp->b_lblkno == bp->b_blkno) { VOP_BMAP(bp->b_vp, bp->b_lblkno, NULL, &bp->b_blkno, NULL, NULL); } /* * Set the *dirty* buffer range based upon the VM system dirty pages. */ vfs_setdirty(bp); /* * We need to do this here to satisfy the vnode_pager and the * pageout daemon, so that it thinks that the pages have been * "cleaned". Note that since the pages are in a delayed write * buffer -- the VFS layer "will" see that the pages get written * out on the next sync, or perhaps the cluster will be completed. */ vfs_clean_pages(bp); bqrelse(bp); return; } /* * Asynchronous write. * Start output on a buffer, but do not wait for it to complete. * The buffer is released when the output completes. */ void bawrite(struct buf * bp) { bp->b_flags |= B_ASYNC; (void) VOP_BWRITE(bp); } /* * Release a buffer. */ void brelse(struct buf * bp) { int s; if (bp->b_flags & B_CLUSTER) { relpbuf(bp); return; } /* anyone need a "free" block? */ s = splbio(); /* anyone need this block? */ if (bp->b_flags & B_WANTED) { bp->b_flags &= ~(B_WANTED | B_AGE); wakeup(bp); } if (bp->b_flags & B_LOCKED) bp->b_flags &= ~B_ERROR; if ((bp->b_flags & (B_NOCACHE | B_INVAL | B_ERROR)) || (bp->b_bufsize <= 0)) { bp->b_flags |= B_INVAL; bp->b_flags &= ~(B_DELWRI | B_CACHE); if (((bp->b_flags & B_VMIO) == 0) && bp->b_vp) { if (bp->b_bufsize) allocbuf(bp, 0); brelvp(bp); } } /* * VMIO buffer rundown. It is not very necessary to keep a VMIO buffer * constituted, so the B_INVAL flag is used to *invalidate* the buffer, * but the VM object is kept around. The B_NOCACHE flag is used to * invalidate the pages in the VM object. */ if (bp->b_flags & B_VMIO) { vm_ooffset_t foff; vm_object_t obj; int i, resid; vm_page_t m; struct vnode *vp; int iototal = bp->b_bufsize; vp = bp->b_vp; if (!vp) panic("brelse: missing vp"); if (bp->b_npages) { vm_pindex_t poff; obj = (vm_object_t) vp->v_object; if (vp->v_type == VBLK) foff = ((vm_ooffset_t) bp->b_lblkno) << DEV_BSHIFT; else foff = (vm_ooffset_t) vp->v_mount->mnt_stat.f_iosize * bp->b_lblkno; poff = OFF_TO_IDX(foff); for (i = 0; i < bp->b_npages; i++) { m = bp->b_pages[i]; if (m == bogus_page) { m = vm_page_lookup(obj, poff + i); if (!m) { panic("brelse: page missing\n"); } bp->b_pages[i] = m; pmap_qenter(trunc_page(bp->b_data), bp->b_pages, bp->b_npages); } resid = IDX_TO_OFF(m->pindex+1) - foff; if (resid > iototal) resid = iototal; if (resid > 0) { /* * Don't invalidate the page if the local machine has already * modified it. This is the lesser of two evils, and should * be fixed. */ if (bp->b_flags & (B_NOCACHE | B_ERROR)) { vm_page_test_dirty(m); if (m->dirty == 0) { vm_page_set_invalid(m, (vm_offset_t) foff, resid); if (m->valid == 0) vm_page_protect(m, VM_PROT_NONE); } } if (resid >= PAGE_SIZE) { if ((m->valid & VM_PAGE_BITS_ALL) != VM_PAGE_BITS_ALL) { bp->b_flags |= B_INVAL; } } else { if (!vm_page_is_valid(m, (((vm_offset_t) bp->b_data) & PAGE_MASK), resid)) { bp->b_flags |= B_INVAL; } } } foff += resid; iototal -= resid; } } if (bp->b_flags & (B_INVAL | B_RELBUF)) vfs_vmio_release(bp); } if (bp->b_qindex != QUEUE_NONE) panic("brelse: free buffer onto another queue???"); /* enqueue */ /* buffers with no memory */ if (bp->b_bufsize == 0) { bp->b_qindex = QUEUE_EMPTY; TAILQ_INSERT_HEAD(&bufqueues[QUEUE_EMPTY], bp, b_freelist); LIST_REMOVE(bp, b_hash); LIST_INSERT_HEAD(&invalhash, bp, b_hash); bp->b_dev = NODEV; if (needsbuffer) { wakeup(&needsbuffer); needsbuffer=0; } /* buffers with junk contents */ } else if (bp->b_flags & (B_ERROR | B_INVAL | B_NOCACHE | B_RELBUF)) { bp->b_qindex = QUEUE_AGE; TAILQ_INSERT_HEAD(&bufqueues[QUEUE_AGE], bp, b_freelist); LIST_REMOVE(bp, b_hash); LIST_INSERT_HEAD(&invalhash, bp, b_hash); bp->b_dev = NODEV; if (needsbuffer) { wakeup(&needsbuffer); needsbuffer=0; } /* buffers that are locked */ } else if (bp->b_flags & B_LOCKED) { bp->b_qindex = QUEUE_LOCKED; TAILQ_INSERT_TAIL(&bufqueues[QUEUE_LOCKED], bp, b_freelist); /* buffers with stale but valid contents */ } else if (bp->b_flags & B_AGE) { bp->b_qindex = QUEUE_AGE; TAILQ_INSERT_TAIL(&bufqueues[QUEUE_AGE], bp, b_freelist); if (needsbuffer) { wakeup(&needsbuffer); needsbuffer=0; } /* buffers with valid and quite potentially reuseable contents */ } else { bp->b_qindex = QUEUE_LRU; TAILQ_INSERT_TAIL(&bufqueues[QUEUE_LRU], bp, b_freelist); if (needsbuffer) { wakeup(&needsbuffer); needsbuffer=0; } } /* unlock */ bp->b_flags &= ~(B_WANTED | B_BUSY | B_ASYNC | B_NOCACHE | B_AGE | B_RELBUF); splx(s); } /* * Release a buffer. */ void bqrelse(struct buf * bp) { int s; s = splbio(); /* anyone need this block? */ if (bp->b_flags & B_WANTED) { bp->b_flags &= ~(B_WANTED | B_AGE); wakeup(bp); } if (bp->b_qindex != QUEUE_NONE) panic("bqrelse: free buffer onto another queue???"); if (bp->b_flags & B_LOCKED) { bp->b_flags &= ~B_ERROR; bp->b_qindex = QUEUE_LOCKED; TAILQ_INSERT_TAIL(&bufqueues[QUEUE_LOCKED], bp, b_freelist); /* buffers with stale but valid contents */ } else { bp->b_qindex = QUEUE_LRU; TAILQ_INSERT_TAIL(&bufqueues[QUEUE_LRU], bp, b_freelist); if (needsbuffer) { wakeup(&needsbuffer); needsbuffer=0; } } /* unlock */ bp->b_flags &= ~(B_WANTED | B_BUSY | B_ASYNC | B_NOCACHE | B_AGE | B_RELBUF); splx(s); } static void vfs_vmio_release(bp) struct buf *bp; { int i; vm_page_t m; for (i = 0; i < bp->b_npages; i++) { m = bp->b_pages[i]; bp->b_pages[i] = NULL; if (m->flags & PG_WANTED) { m->flags &= ~PG_WANTED; wakeup(m); } vm_page_unwire(m); if (m->wire_count == 0 && (m->flags & PG_BUSY) == 0) { if (m->valid) { if(m->dirty == 0) vm_page_test_dirty(m); /* * this keeps pressure off of the process memory */ if ((vm_swap_size == 0) || (cnt.v_free_count < cnt.v_free_min)) { if ((m->dirty == 0) && (m->hold_count == 0) && (m->flags & PG_BUSY) == 0 && (m->busy == 0)) vm_page_cache(m); else vm_page_deactivate(m); } } else if ((m->hold_count == 0) && ((m->flags & PG_BUSY) == 0) && (m->busy == 0)) { vm_page_protect(m, VM_PROT_NONE); vm_page_free(m); } } } bufspace -= bp->b_bufsize; vmiospace -= bp->b_bufsize; pmap_qremove(trunc_page((vm_offset_t) bp->b_data), bp->b_npages); bp->b_npages = 0; bp->b_bufsize = 0; bp->b_flags &= ~B_VMIO; if (bp->b_vp) brelvp(bp); } /* * Check to see if a block is currently memory resident. */ __inline struct buf * gbincore(struct vnode * vp, daddr_t blkno) { struct buf *bp; struct bufhashhdr *bh; bh = BUFHASH(vp, blkno); bp = bh->lh_first; /* Search hash chain */ while (bp != NULL) { /* hit */ if (bp->b_vp == vp && bp->b_lblkno == blkno && (bp->b_flags & B_INVAL) == 0) { break; } bp = bp->b_hash.le_next; } return (bp); } /* * this routine implements clustered async writes for * clearing out B_DELWRI buffers... This is much better * than the old way of writing only one buffer at a time. */ int vfs_bio_awrite(struct buf * bp) { int i; daddr_t lblkno = bp->b_lblkno; struct vnode *vp = bp->b_vp; int s; int ncl; struct buf *bpa; int nwritten; s = splbio(); /* * right now we support clustered writing only to regular files */ if ((vp->v_type == VREG) && (vp->v_mount != 0) && /* Only on nodes that have the size info */ (bp->b_flags & (B_CLUSTEROK | B_INVAL)) == B_CLUSTEROK) { int size; int maxcl; size = vp->v_mount->mnt_stat.f_iosize; maxcl = MAXPHYS / size; for (i = 1; i < maxcl; i++) { if ((bpa = gbincore(vp, lblkno + i)) && ((bpa->b_flags & (B_BUSY | B_DELWRI | B_CLUSTEROK | B_INVAL)) == (B_DELWRI | B_CLUSTEROK)) && (bpa->b_bufsize == size)) { if ((bpa->b_blkno == bpa->b_lblkno) || (bpa->b_blkno != bp->b_blkno + ((i * size) >> DEV_BSHIFT))) break; } else { break; } } ncl = i; /* * this is a possible cluster write */ if (ncl != 1) { nwritten = cluster_wbuild(vp, size, lblkno, ncl); splx(s); return nwritten; } } bremfree(bp); splx(s); /* * default (old) behavior, writing out only one block */ bp->b_flags |= B_BUSY | B_ASYNC; nwritten = bp->b_bufsize; (void) VOP_BWRITE(bp); return nwritten; } /* * Find a buffer header which is available for use. */ static struct buf * getnewbuf(int slpflag, int slptimeo, int doingvmio) { struct buf *bp; int nbyteswritten = 0; start: if (bufspace >= maxbufspace) goto trytofreespace; /* can we constitute a new buffer? */ if ((bp = TAILQ_FIRST(&bufqueues[QUEUE_EMPTY]))) { if (bp->b_qindex != QUEUE_EMPTY) panic("getnewbuf: inconsistent EMPTY queue, qindex=%d", bp->b_qindex); bp->b_flags |= B_BUSY; bremfree(bp); goto fillbuf; } trytofreespace: /* * We keep the file I/O from hogging metadata I/O * This is desirable because file data is cached in the * VM/Buffer cache even if a buffer is freed. */ if ((bp = TAILQ_FIRST(&bufqueues[QUEUE_AGE]))) { if (bp->b_qindex != QUEUE_AGE) panic("getnewbuf: inconsistent AGE queue, qindex=%d", bp->b_qindex); } else if ((bp = TAILQ_FIRST(&bufqueues[QUEUE_LRU]))) { if (bp->b_qindex != QUEUE_LRU) panic("getnewbuf: inconsistent LRU queue, qindex=%d", bp->b_qindex); } if (!bp) { /* wait for a free buffer of any kind */ needsbuffer = 1; tsleep(&needsbuffer, (PRIBIO + 1) | slpflag, "newbuf", slptimeo); return (0); } /* * We are fairly aggressive about freeing VMIO buffers, but since * the buffering is intact without buffer headers, there is not * much loss. We gain by maintaining non-VMIOed metadata in buffers. */ if ((bp->b_qindex == QUEUE_LRU) && (bp->b_usecount > 0)) { if ((bp->b_flags & B_VMIO) == 0 || (vmiospace < maxvmiobufspace)) { --bp->b_usecount; TAILQ_REMOVE(&bufqueues[QUEUE_LRU], bp, b_freelist); if (TAILQ_FIRST(&bufqueues[QUEUE_LRU]) != NULL) { TAILQ_INSERT_TAIL(&bufqueues[QUEUE_LRU], bp, b_freelist); goto start; } TAILQ_INSERT_TAIL(&bufqueues[QUEUE_LRU], bp, b_freelist); } } /* if we are a delayed write, convert to an async write */ if ((bp->b_flags & (B_DELWRI | B_INVAL)) == B_DELWRI) { nbyteswritten += vfs_bio_awrite(bp); if (!slpflag && !slptimeo) { return (0); } goto start; } if (bp->b_flags & B_WANTED) { bp->b_flags &= ~B_WANTED; wakeup(bp); } bremfree(bp); bp->b_flags |= B_BUSY; if (bp->b_flags & B_VMIO) vfs_vmio_release(bp); if (bp->b_vp) brelvp(bp); fillbuf: /* we are not free, nor do we contain interesting data */ if (bp->b_rcred != NOCRED) { crfree(bp->b_rcred); bp->b_rcred = NOCRED; } if (bp->b_wcred != NOCRED) { crfree(bp->b_wcred); bp->b_wcred = NOCRED; } LIST_REMOVE(bp, b_hash); LIST_INSERT_HEAD(&invalhash, bp, b_hash); if (bp->b_bufsize) { allocbuf(bp, 0); } bp->b_flags = B_BUSY; bp->b_dev = NODEV; bp->b_vp = NULL; bp->b_blkno = bp->b_lblkno = 0; bp->b_iodone = 0; bp->b_error = 0; bp->b_resid = 0; bp->b_bcount = 0; bp->b_npages = 0; bp->b_data = buffers_kva + (bp - buf) * MAXBSIZE; bp->b_dirtyoff = bp->b_dirtyend = 0; bp->b_validoff = bp->b_validend = 0; - bp->b_usecount = 2; + bp->b_usecount = 4; if (bufspace >= maxbufspace + nbyteswritten) { bp->b_flags |= B_INVAL; brelse(bp); goto trytofreespace; } return (bp); } /* * Check to see if a block is currently memory resident. */ struct buf * incore(struct vnode * vp, daddr_t blkno) { struct buf *bp; int s = splbio(); bp = gbincore(vp, blkno); splx(s); return (bp); } /* * Returns true if no I/O is needed to access the * associated VM object. This is like incore except * it also hunts around in the VM system for the data. */ int inmem(struct vnode * vp, daddr_t blkno) { vm_object_t obj; vm_offset_t toff, tinc; vm_page_t m; vm_ooffset_t off; if (incore(vp, blkno)) return 1; if (vp->v_mount == NULL) return 0; if ((vp->v_object == NULL) || (vp->v_flag & VVMIO) == 0) return 0; obj = vp->v_object; tinc = PAGE_SIZE; if (tinc > vp->v_mount->mnt_stat.f_iosize) tinc = vp->v_mount->mnt_stat.f_iosize; off = blkno * vp->v_mount->mnt_stat.f_iosize; for (toff = 0; toff < vp->v_mount->mnt_stat.f_iosize; toff += tinc) { m = vm_page_lookup(obj, OFF_TO_IDX(off + toff)); if (!m) return 0; if (vm_page_is_valid(m, (vm_offset_t) (toff + off), tinc) == 0) return 0; } return 1; } /* * now we set the dirty range for the buffer -- * for NFS -- if the file is mapped and pages have * been written to, let it know. We want the * entire range of the buffer to be marked dirty if * any of the pages have been written to for consistancy * with the b_validoff, b_validend set in the nfs write * code, and used by the nfs read code. */ static void vfs_setdirty(struct buf *bp) { int i; vm_object_t object; vm_offset_t boffset, offset; /* * We qualify the scan for modified pages on whether the * object has been flushed yet. The OBJ_WRITEABLE flag * is not cleared simply by protecting pages off. */ if ((bp->b_flags & B_VMIO) && ((object = bp->b_pages[0]->object)->flags & (OBJ_WRITEABLE|OBJ_CLEANING))) { /* * test the pages to see if they have been modified directly * by users through the VM system. */ for (i = 0; i < bp->b_npages; i++) vm_page_test_dirty(bp->b_pages[i]); /* * scan forwards for the first page modified */ for (i = 0; i < bp->b_npages; i++) { if (bp->b_pages[i]->dirty) { break; } } boffset = (i << PAGE_SHIFT); if (boffset < bp->b_dirtyoff) { bp->b_dirtyoff = boffset; } /* * scan backwards for the last page modified */ for (i = bp->b_npages - 1; i >= 0; --i) { if (bp->b_pages[i]->dirty) { break; } } boffset = (i + 1); offset = boffset + bp->b_pages[0]->pindex; if (offset >= object->size) boffset = object->size - bp->b_pages[0]->pindex; if (bp->b_dirtyend < (boffset << PAGE_SHIFT)) bp->b_dirtyend = (boffset << PAGE_SHIFT); } } /* * Get a block given a specified block and offset into a file/device. */ struct buf * getblk(struct vnode * vp, daddr_t blkno, int size, int slpflag, int slptimeo) { struct buf *bp; int s; struct bufhashhdr *bh; s = splbio(); loop: if ((bp = gbincore(vp, blkno))) { if (bp->b_flags & B_BUSY) { bp->b_flags |= B_WANTED; if (bp->b_usecount < BUF_MAXUSE) ++bp->b_usecount; if (!tsleep(bp, (PRIBIO + 1) | slpflag, "getblk", slptimeo)) goto loop; splx(s); return (struct buf *) NULL; } bp->b_flags |= B_BUSY | B_CACHE; bremfree(bp); /* * check for size inconsistancies (note that they shouldn't happen * but do when filesystems don't handle the size changes correctly.) * We are conservative on metadata and don't just extend the buffer * but write and re-constitute it. */ if (bp->b_bcount != size) { if (bp->b_flags & B_VMIO) { allocbuf(bp, size); } else { bp->b_flags |= B_NOCACHE; VOP_BWRITE(bp); goto loop; } } if (bp->b_usecount < BUF_MAXUSE) ++bp->b_usecount; splx(s); return (bp); } else { vm_object_t obj; int doingvmio; if ((obj = vp->v_object) && (vp->v_flag & VVMIO)) { doingvmio = 1; } else { doingvmio = 0; } if ((bp = getnewbuf(slpflag, slptimeo, doingvmio)) == 0) { if (slpflag || slptimeo) { splx(s); return NULL; } goto loop; } /* * This code is used to make sure that a buffer is not * created while the getnewbuf routine is blocked. * Normally the vnode is locked so this isn't a problem. * VBLK type I/O requests, however, don't lock the vnode. */ if (!VOP_ISLOCKED(vp) && gbincore(vp, blkno)) { bp->b_flags |= B_INVAL; brelse(bp); goto loop; } /* * Insert the buffer into the hash, so that it can * be found by incore. */ bp->b_blkno = bp->b_lblkno = blkno; bgetvp(vp, bp); LIST_REMOVE(bp, b_hash); bh = BUFHASH(vp, blkno); LIST_INSERT_HEAD(bh, bp, b_hash); if (doingvmio) { bp->b_flags |= (B_VMIO | B_CACHE); #if defined(VFS_BIO_DEBUG) if (vp->v_type != VREG && vp->v_type != VBLK) printf("getblk: vmioing file type %d???\n", vp->v_type); #endif } else { bp->b_flags &= ~B_VMIO; } splx(s); allocbuf(bp, size); #ifdef PC98 /* * 1024byte/sector support */ #define B_XXX2 0x8000000 if (vp->v_flag & 0x10000) bp->b_flags |= B_XXX2; #endif return (bp); } } /* * Get an empty, disassociated buffer of given size. */ struct buf * geteblk(int size) { struct buf *bp; int s; s = splbio(); while ((bp = getnewbuf(0, 0, 0)) == 0); splx(s); allocbuf(bp, size); bp->b_flags |= B_INVAL; return (bp); } /* * This code constitutes the buffer memory from either anonymous system * memory (in the case of non-VMIO operations) or from an associated * VM object (in the case of VMIO operations). * * Note that this code is tricky, and has many complications to resolve * deadlock or inconsistant data situations. Tread lightly!!! * * Modify the length of a buffer's underlying buffer storage without * destroying information (unless, of course the buffer is shrinking). */ int allocbuf(struct buf * bp, int size) { int s; int newbsize, mbsize; int i; if (!(bp->b_flags & B_BUSY)) panic("allocbuf: buffer not busy"); if ((bp->b_flags & B_VMIO) == 0) { caddr_t origbuf; int origbufsize; /* * Just get anonymous memory from the kernel */ mbsize = (size + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1); +#if !defined(NO_B_MALLOC) if (bp->b_flags & B_MALLOC) newbsize = mbsize; else +#endif newbsize = round_page(size); if (newbsize < bp->b_bufsize) { +#if !defined(NO_B_MALLOC) /* * malloced buffers are not shrunk */ if (bp->b_flags & B_MALLOC) { if (newbsize) { bp->b_bcount = size; } else { free(bp->b_data, M_BIOBUF); bufspace -= bp->b_bufsize; bufmallocspace -= bp->b_bufsize; bp->b_data = (caddr_t) buffers_kva + (bp - buf) * MAXBSIZE; bp->b_bufsize = 0; bp->b_bcount = 0; bp->b_flags &= ~B_MALLOC; } return 1; } +#endif vm_hold_free_pages( bp, (vm_offset_t) bp->b_data + newbsize, (vm_offset_t) bp->b_data + bp->b_bufsize); } else if (newbsize > bp->b_bufsize) { +#if !defined(NO_B_MALLOC) /* * We only use malloced memory on the first allocation. * and revert to page-allocated memory when the buffer grows. */ if ( (bufmallocspace < maxbufmallocspace) && (bp->b_bufsize == 0) && (mbsize <= PAGE_SIZE/2)) { bp->b_data = malloc(mbsize, M_BIOBUF, M_WAITOK); bp->b_bufsize = mbsize; bp->b_bcount = size; bp->b_flags |= B_MALLOC; bufspace += mbsize; bufmallocspace += mbsize; return 1; } +#endif origbuf = NULL; origbufsize = 0; +#if !defined(NO_B_MALLOC) /* * If the buffer is growing on it's other-than-first allocation, * then we revert to the page-allocation scheme. */ if (bp->b_flags & B_MALLOC) { origbuf = bp->b_data; origbufsize = bp->b_bufsize; bp->b_data = (caddr_t) buffers_kva + (bp - buf) * MAXBSIZE; bufspace -= bp->b_bufsize; bufmallocspace -= bp->b_bufsize; bp->b_bufsize = 0; bp->b_flags &= ~B_MALLOC; newbsize = round_page(newbsize); } +#endif vm_hold_load_pages( bp, (vm_offset_t) bp->b_data + bp->b_bufsize, (vm_offset_t) bp->b_data + newbsize); +#if !defined(NO_B_MALLOC) if (origbuf) { bcopy(origbuf, bp->b_data, origbufsize); free(origbuf, M_BIOBUF); } +#endif } } else { vm_page_t m; int desiredpages; newbsize = (size + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1); desiredpages = (round_page(newbsize) >> PAGE_SHIFT); +#if !defined(NO_B_MALLOC) if (bp->b_flags & B_MALLOC) panic("allocbuf: VMIO buffer can't be malloced"); +#endif if (newbsize < bp->b_bufsize) { if (desiredpages < bp->b_npages) { for (i = desiredpages; i < bp->b_npages; i++) { /* * the page is not freed here -- it * is the responsibility of vnode_pager_setsize */ m = bp->b_pages[i]; s = splhigh(); while ((m->flags & PG_BUSY) || (m->busy != 0)) { m->flags |= PG_WANTED; tsleep(m, PVM, "biodep", 0); } splx(s); bp->b_pages[i] = NULL; vm_page_unwire(m); } pmap_qremove((vm_offset_t) trunc_page(bp->b_data) + (desiredpages << PAGE_SHIFT), (bp->b_npages - desiredpages)); bp->b_npages = desiredpages; } } else if (newbsize > bp->b_bufsize) { vm_object_t obj; vm_offset_t tinc, toff; vm_ooffset_t off; vm_pindex_t objoff; int pageindex, curbpnpages; struct vnode *vp; int bsize; vp = bp->b_vp; if (vp->v_type == VBLK) bsize = DEV_BSIZE; else bsize = vp->v_mount->mnt_stat.f_iosize; if (bp->b_npages < desiredpages) { obj = vp->v_object; tinc = PAGE_SIZE; if (tinc > bsize) tinc = bsize; off = (vm_ooffset_t) bp->b_lblkno * bsize; doretry: curbpnpages = bp->b_npages; bp->b_flags |= B_CACHE; for (toff = 0; toff < newbsize; toff += tinc) { int bytesinpage; pageindex = toff >> PAGE_SHIFT; objoff = OFF_TO_IDX(off + toff); if (pageindex < curbpnpages) { m = bp->b_pages[pageindex]; #ifdef VFS_BIO_DIAG if (m->pindex != objoff) panic("allocbuf: page changed offset??!!!?"); #endif bytesinpage = tinc; if (tinc > (newbsize - toff)) bytesinpage = newbsize - toff; if ((bp->b_flags & B_CACHE) && !vm_page_is_valid(m, (vm_offset_t) ((toff + off) & PAGE_MASK), bytesinpage)) { bp->b_flags &= ~B_CACHE; } continue; } m = vm_page_lookup(obj, objoff); if (!m) { m = vm_page_alloc(obj, objoff, VM_ALLOC_NORMAL); if (!m) { VM_WAIT; goto doretry; } /* * Normally it is unwise to clear PG_BUSY without * PAGE_WAKEUP -- but it is okay here, as there is * no chance for blocking between here and vm_page_alloc */ m->flags &= ~PG_BUSY; vm_page_wire(m); bp->b_flags &= ~B_CACHE; } else if (m->flags & PG_BUSY) { s = splhigh(); m->flags |= PG_WANTED; tsleep(m, PVM, "pgtblk", 0); splx(s); goto doretry; } else { if ((curproc != pageproc) && (m->queue == PQ_CACHE) && ((cnt.v_free_count + cnt.v_cache_count) < (cnt.v_free_min + cnt.v_cache_min))) { pagedaemon_wakeup(); } bytesinpage = tinc; if (tinc > (newbsize - toff)) bytesinpage = newbsize - toff; if ((bp->b_flags & B_CACHE) && !vm_page_is_valid(m, (vm_offset_t) ((toff + off) & PAGE_MASK), bytesinpage)) { bp->b_flags &= ~B_CACHE; } vm_page_wire(m); } bp->b_pages[pageindex] = m; curbpnpages = pageindex + 1; } bp->b_data = (caddr_t) trunc_page(bp->b_data); bp->b_npages = curbpnpages; pmap_qenter((vm_offset_t) bp->b_data, bp->b_pages, bp->b_npages); ((vm_offset_t) bp->b_data) |= off & PAGE_MASK; } } } if (bp->b_flags & B_VMIO) vmiospace += bp->b_bufsize; bufspace += (newbsize - bp->b_bufsize); bp->b_bufsize = newbsize; bp->b_bcount = size; return 1; } /* * Wait for buffer I/O completion, returning error status. */ int biowait(register struct buf * bp) { int s; s = splbio(); while ((bp->b_flags & B_DONE) == 0) tsleep(bp, PRIBIO, "biowait", 0); splx(s); if (bp->b_flags & B_EINTR) { bp->b_flags &= ~B_EINTR; return (EINTR); } if (bp->b_flags & B_ERROR) { return (bp->b_error ? bp->b_error : EIO); } else { return (0); } } /* * Finish I/O on a buffer, calling an optional function. * This is usually called from interrupt level, so process blocking * is not *a good idea*. */ void biodone(register struct buf * bp) { int s; s = splbio(); if (!(bp->b_flags & B_BUSY)) panic("biodone: buffer not busy"); if (bp->b_flags & B_DONE) { splx(s); printf("biodone: buffer already done\n"); return; } bp->b_flags |= B_DONE; if ((bp->b_flags & B_READ) == 0) { vwakeup(bp); } #ifdef BOUNCE_BUFFERS if (bp->b_flags & B_BOUNCE) vm_bounce_free(bp); #endif /* call optional completion function if requested */ if (bp->b_flags & B_CALL) { bp->b_flags &= ~B_CALL; (*bp->b_iodone) (bp); splx(s); return; } if (bp->b_flags & B_VMIO) { int i, resid; vm_ooffset_t foff; vm_page_t m; vm_object_t obj; int iosize; struct vnode *vp = bp->b_vp; if (vp->v_type == VBLK) foff = (vm_ooffset_t) DEV_BSIZE * bp->b_lblkno; else foff = (vm_ooffset_t) vp->v_mount->mnt_stat.f_iosize * bp->b_lblkno; obj = vp->v_object; if (!obj) { panic("biodone: no object"); } #if defined(VFS_BIO_DEBUG) if (obj->paging_in_progress < bp->b_npages) { printf("biodone: paging in progress(%d) < bp->b_npages(%d)\n", obj->paging_in_progress, bp->b_npages); } #endif iosize = bp->b_bufsize; for (i = 0; i < bp->b_npages; i++) { int bogusflag = 0; m = bp->b_pages[i]; if (m == bogus_page) { bogusflag = 1; m = vm_page_lookup(obj, OFF_TO_IDX(foff)); if (!m) { #if defined(VFS_BIO_DEBUG) printf("biodone: page disappeared\n"); #endif --obj->paging_in_progress; continue; } bp->b_pages[i] = m; pmap_qenter(trunc_page(bp->b_data), bp->b_pages, bp->b_npages); } #if defined(VFS_BIO_DEBUG) if (OFF_TO_IDX(foff) != m->pindex) { printf("biodone: foff(%d)/m->pindex(%d) mismatch\n", foff, m->pindex); } #endif resid = IDX_TO_OFF(m->pindex + 1) - foff; if (resid > iosize) resid = iosize; /* * In the write case, the valid and clean bits are * already changed correctly, so we only need to do this * here in the read case. */ if ((bp->b_flags & B_READ) && !bogusflag && resid > 0) { vm_page_set_validclean(m, (vm_offset_t) (foff & PAGE_MASK), resid); } /* * when debugging new filesystems or buffer I/O methods, this * is the most common error that pops up. if you see this, you * have not set the page busy flag correctly!!! */ if (m->busy == 0) { printf("biodone: page busy < 0, " "pindex: %d, foff: 0x(%x,%x), " "resid: %d, index: %d\n", (int) m->pindex, (int)(foff >> 32), (int) foff & 0xffffffff, resid, i); if (vp->v_type != VBLK) printf(" iosize: %ld, lblkno: %d, flags: 0x%lx, npages: %d\n", bp->b_vp->v_mount->mnt_stat.f_iosize, (int) bp->b_lblkno, bp->b_flags, bp->b_npages); else printf(" VDEV, lblkno: %d, flags: 0x%lx, npages: %d\n", (int) bp->b_lblkno, bp->b_flags, bp->b_npages); printf(" valid: 0x%x, dirty: 0x%x, wired: %d\n", m->valid, m->dirty, m->wire_count); panic("biodone: page busy < 0\n"); } --m->busy; if ((m->busy == 0) && (m->flags & PG_WANTED)) { m->flags &= ~PG_WANTED; wakeup(m); } --obj->paging_in_progress; foff += resid; iosize -= resid; } if (obj && obj->paging_in_progress == 0 && (obj->flags & OBJ_PIPWNT)) { obj->flags &= ~OBJ_PIPWNT; wakeup(obj); } } /* * For asynchronous completions, release the buffer now. The brelse * checks for B_WANTED and will do the wakeup there if necessary - so * no need to do a wakeup here in the async case. */ if (bp->b_flags & B_ASYNC) { if ((bp->b_flags & (B_NOCACHE | B_INVAL | B_ERROR | B_RELBUF)) != 0) brelse(bp); else bqrelse(bp); } else { wakeup(bp); } splx(s); } int count_lock_queue() { int count; struct buf *bp; count = 0; for (bp = TAILQ_FIRST(&bufqueues[QUEUE_LOCKED]); bp != NULL; bp = TAILQ_NEXT(bp, b_freelist)) count++; return (count); } int vfs_update_interval = 30; static void vfs_update() { (void) spl0(); /* XXX redundant? wrong place? */ while (1) { tsleep(&vfs_update_wakeup, PUSER, "update", hz * vfs_update_interval); vfs_update_wakeup = 0; sync(curproc, NULL, NULL); } } static int sysctl_kern_updateinterval SYSCTL_HANDLER_ARGS { int error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req); if (!error) wakeup(&vfs_update_wakeup); return error; } SYSCTL_PROC(_kern, KERN_UPDATEINTERVAL, update, CTLTYPE_INT|CTLFLAG_RW, &vfs_update_interval, 0, sysctl_kern_updateinterval, "I", ""); /* * This routine is called in lieu of iodone in the case of * incomplete I/O. This keeps the busy status for pages * consistant. */ void vfs_unbusy_pages(struct buf * bp) { int i; if (bp->b_flags & B_VMIO) { struct vnode *vp = bp->b_vp; vm_object_t obj = vp->v_object; vm_ooffset_t foff; foff = (vm_ooffset_t) vp->v_mount->mnt_stat.f_iosize * bp->b_lblkno; for (i = 0; i < bp->b_npages; i++) { vm_page_t m = bp->b_pages[i]; if (m == bogus_page) { m = vm_page_lookup(obj, OFF_TO_IDX(foff) + i); if (!m) { panic("vfs_unbusy_pages: page missing\n"); } bp->b_pages[i] = m; pmap_qenter(trunc_page(bp->b_data), bp->b_pages, bp->b_npages); } --obj->paging_in_progress; --m->busy; if ((m->busy == 0) && (m->flags & PG_WANTED)) { m->flags &= ~PG_WANTED; wakeup(m); } } if (obj->paging_in_progress == 0 && (obj->flags & OBJ_PIPWNT)) { obj->flags &= ~OBJ_PIPWNT; wakeup(obj); } } } /* * This routine is called before a device strategy routine. * It is used to tell the VM system that paging I/O is in * progress, and treat the pages associated with the buffer * almost as being PG_BUSY. Also the object paging_in_progress * flag is handled to make sure that the object doesn't become * inconsistant. */ void vfs_busy_pages(struct buf * bp, int clear_modify) { int i; if (bp->b_flags & B_VMIO) { vm_object_t obj = bp->b_vp->v_object; vm_ooffset_t foff; int iocount = bp->b_bufsize; if (bp->b_vp->v_type == VBLK) foff = (vm_ooffset_t) DEV_BSIZE * bp->b_lblkno; else foff = (vm_ooffset_t) bp->b_vp->v_mount->mnt_stat.f_iosize * bp->b_lblkno; vfs_setdirty(bp); for (i = 0; i < bp->b_npages; i++) { vm_page_t m = bp->b_pages[i]; int resid = IDX_TO_OFF(m->pindex + 1) - foff; if (resid > iocount) resid = iocount; if ((bp->b_flags & B_CLUSTER) == 0) { obj->paging_in_progress++; m->busy++; } vm_page_protect(m, VM_PROT_NONE); if (clear_modify) { vm_page_set_validclean(m, (vm_offset_t) (foff & PAGE_MASK), resid); } else if (bp->b_bcount >= PAGE_SIZE) { if (m->valid && (bp->b_flags & B_CACHE) == 0) { bp->b_pages[i] = bogus_page; pmap_qenter(trunc_page(bp->b_data), bp->b_pages, bp->b_npages); } } foff += resid; iocount -= resid; } } } /* * Tell the VM system that the pages associated with this buffer * are clean. This is used for delayed writes where the data is * going to go to disk eventually without additional VM intevention. */ void vfs_clean_pages(struct buf * bp) { int i; if (bp->b_flags & B_VMIO) { vm_ooffset_t foff; int iocount = bp->b_bufsize; if (bp->b_vp->v_type == VBLK) foff = (vm_ooffset_t) DEV_BSIZE * bp->b_lblkno; else foff = (vm_ooffset_t) bp->b_vp->v_mount->mnt_stat.f_iosize * bp->b_lblkno; for (i = 0; i < bp->b_npages; i++) { vm_page_t m = bp->b_pages[i]; int resid = IDX_TO_OFF(m->pindex + 1) - foff; if (resid > iocount) resid = iocount; if (resid > 0) { vm_page_set_validclean(m, ((vm_offset_t) foff & PAGE_MASK), resid); } foff += resid; iocount -= resid; } } } void vfs_bio_clrbuf(struct buf *bp) { int i; if( bp->b_flags & B_VMIO) { if( (bp->b_npages == 1) && (bp->b_bufsize < PAGE_SIZE)) { int mask; mask = 0; for(i=0;ib_bufsize;i+=DEV_BSIZE) mask |= (1 << (i/DEV_BSIZE)); if( bp->b_pages[0]->valid != mask) { bzero(bp->b_data, bp->b_bufsize); } bp->b_pages[0]->valid = mask; bp->b_resid = 0; return; } for(i=0;ib_npages;i++) { if( bp->b_pages[i]->valid == VM_PAGE_BITS_ALL) continue; if( bp->b_pages[i]->valid == 0) { if ((bp->b_pages[i]->flags & PG_ZERO) == 0) { bzero(bp->b_data + (i << PAGE_SHIFT), PAGE_SIZE); } } else { int j; for(j=0;jb_pages[i]->valid & (1<b_data + (i << PAGE_SHIFT) + j * DEV_BSIZE, DEV_BSIZE); } } /* bp->b_pages[i]->valid = VM_PAGE_BITS_ALL; */ } bp->b_resid = 0; } else { clrbuf(bp); } } /* * vm_hold_load_pages and vm_hold_unload pages get pages into * a buffers address space. The pages are anonymous and are * not associated with a file object. */ void vm_hold_load_pages(struct buf * bp, vm_offset_t from, vm_offset_t to) { vm_offset_t pg; vm_page_t p; int index; to = round_page(to); from = round_page(from); index = (from - trunc_page(bp->b_data)) >> PAGE_SHIFT; for (pg = from; pg < to; pg += PAGE_SIZE, index++) { tryagain: p = vm_page_alloc(kernel_object, ((pg - VM_MIN_KERNEL_ADDRESS) >> PAGE_SHIFT), VM_ALLOC_NORMAL); if (!p) { VM_WAIT; goto tryagain; } vm_page_wire(p); pmap_kenter(pg, VM_PAGE_TO_PHYS(p)); bp->b_pages[index] = p; PAGE_WAKEUP(p); } bp->b_npages = to >> PAGE_SHIFT; } void vm_hold_free_pages(struct buf * bp, vm_offset_t from, vm_offset_t to) { vm_offset_t pg; vm_page_t p; int index; from = round_page(from); to = round_page(to); index = (from - trunc_page(bp->b_data)) >> PAGE_SHIFT; for (pg = from; pg < to; pg += PAGE_SIZE, index++) { p = bp->b_pages[index]; if (p && (index < bp->b_npages)) { if (p->busy) { printf("vm_hold_free_pages: blkno: %d, lblkno: %d\n", bp->b_blkno, bp->b_lblkno); } bp->b_pages[index] = NULL; pmap_kremove(pg); vm_page_unwire(p); vm_page_free(p); } } bp->b_npages = from >> PAGE_SHIFT; } Index: head/sys/kern/vfs_export.c =================================================================== --- head/sys/kern/vfs_export.c (revision 17760) +++ head/sys/kern/vfs_export.c (revision 17761) @@ -1,1553 +1,1646 @@ /* * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94 - * $Id: vfs_subr.c,v 1.57 1996/07/30 18:00:25 bde Exp $ + * $Id: vfs_subr.c,v 1.58 1996/08/15 06:45:01 dyson Exp $ */ /* * External virtual filesystem routines */ #include "opt_ddb.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include +#include +#include #include #include #ifdef DDB extern void printlockedvnodes __P((void)); #endif extern void vclean __P((struct vnode *vp, int flags)); extern void vfs_unmountroot __P((struct mount *rootfs)); enum vtype iftovt_tab[16] = { VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, }; int vttoif_tab[9] = { 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, S_IFSOCK, S_IFIFO, S_IFMT, }; /* * Insq/Remq for the vnode usage lists. */ #define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs) #define bufremvn(bp) { \ LIST_REMOVE(bp, b_vnbufs); \ (bp)->b_vnbufs.le_next = NOLIST; \ } TAILQ_HEAD(freelst, vnode) vnode_free_list; /* vnode free list */ static u_long freevnodes = 0; struct mntlist mountlist; /* mounted filesystem list */ int desiredvnodes; SYSCTL_INT(_kern, KERN_MAXVNODES, maxvnodes, CTLFLAG_RD, &desiredvnodes, 0, ""); static void vfs_free_addrlist __P((struct netexport *nep)); static int vfs_free_netcred __P((struct radix_node *rn, void *w)); static int vfs_hang_addrlist __P((struct mount *mp, struct netexport *nep, struct export_args *argp)); /* * Initialize the vnode management data structures. */ void vntblinit() { desiredvnodes = maxproc + vm_object_cache_max + extravnodes; TAILQ_INIT(&vnode_free_list); CIRCLEQ_INIT(&mountlist); } /* * Lock a filesystem. * Used to prevent access to it while mounting and unmounting. */ int vfs_lock(mp) register struct mount *mp; { while (mp->mnt_flag & MNT_MLOCK) { mp->mnt_flag |= MNT_MWAIT; (void) tsleep((caddr_t) mp, PVFS, "vfslck", 0); } mp->mnt_flag |= MNT_MLOCK; return (0); } /* * Unlock a locked filesystem. * Panic if filesystem is not locked. */ void vfs_unlock(mp) register struct mount *mp; { if ((mp->mnt_flag & MNT_MLOCK) == 0) panic("vfs_unlock: not locked"); mp->mnt_flag &= ~MNT_MLOCK; if (mp->mnt_flag & MNT_MWAIT) { mp->mnt_flag &= ~MNT_MWAIT; wakeup((caddr_t) mp); } } /* * Mark a mount point as busy. * Used to synchronize access and to delay unmounting. */ int vfs_busy(mp) register struct mount *mp; { while (mp->mnt_flag & MNT_MPBUSY) { mp->mnt_flag |= MNT_MPWANT; (void) tsleep((caddr_t) &mp->mnt_flag, PVFS, "vfsbsy", 0); } if (mp->mnt_flag & MNT_UNMOUNT) return (1); mp->mnt_flag |= MNT_MPBUSY; return (0); } /* * Free a busy filesystem. * Panic if filesystem is not busy. */ void vfs_unbusy(mp) register struct mount *mp; { if ((mp->mnt_flag & MNT_MPBUSY) == 0) panic("vfs_unbusy: not busy"); mp->mnt_flag &= ~MNT_MPBUSY; if (mp->mnt_flag & MNT_MPWANT) { mp->mnt_flag &= ~MNT_MPWANT; wakeup((caddr_t) &mp->mnt_flag); } } void vfs_unmountroot(struct mount *rootfs) { struct mount *mp = rootfs; int error; if (vfs_busy(mp)) { printf("failed to unmount root\n"); return; } mp->mnt_flag |= MNT_UNMOUNT; if ((error = vfs_lock(mp))) { printf("lock of root filesystem failed (%d)\n", error); return; } vnode_pager_umount(mp); /* release cached vnodes */ cache_purgevfs(mp); /* remove cache entries for this file sys */ if ((error = VFS_SYNC(mp, MNT_WAIT, initproc->p_ucred, initproc))) printf("sync of root filesystem failed (%d)\n", error); if ((error = VFS_UNMOUNT(mp, MNT_FORCE, initproc))) { printf("unmount of root filesystem failed ("); if (error == EBUSY) printf("BUSY)\n"); else printf("%d)\n", error); } mp->mnt_flag &= ~MNT_UNMOUNT; vfs_unbusy(mp); } /* * Unmount all filesystems. Should only be called by halt(). */ void vfs_unmountall() { struct mount *mp, *nmp, *rootfs = NULL; int error; /* unmount all but rootfs */ for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) { nmp = mp->mnt_list.cqe_prev; if (mp->mnt_flag & MNT_ROOTFS) { rootfs = mp; continue; } error = dounmount(mp, MNT_FORCE, initproc); if (error) { printf("unmount of %s failed (", mp->mnt_stat.f_mntonname); if (error == EBUSY) printf("BUSY)\n"); else printf("%d)\n", error); } } /* and finally... */ if (rootfs) { vfs_unmountroot(rootfs); } else { printf("no root filesystem\n"); } } /* * Lookup a mount point by filesystem identifier. */ struct mount * getvfs(fsid) fsid_t *fsid; { register struct mount *mp; for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = mp->mnt_list.cqe_next) { if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) return (mp); } return ((struct mount *) 0); } /* * Get a new unique fsid */ void getnewfsid(mp, mtype) struct mount *mp; int mtype; { static u_short xxxfs_mntid; fsid_t tfsid; mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0); mp->mnt_stat.f_fsid.val[1] = mtype; if (xxxfs_mntid == 0) ++xxxfs_mntid; tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid); tfsid.val[1] = mtype; if (mountlist.cqh_first != (void *)&mountlist) { while (getvfs(&tfsid)) { tfsid.val[0]++; xxxfs_mntid++; } } mp->mnt_stat.f_fsid.val[0] = tfsid.val[0]; } /* * Set vnode attributes to VNOVAL */ void vattr_null(vap) register struct vattr *vap; { vap->va_type = VNON; vap->va_size = VNOVAL; vap->va_bytes = VNOVAL; vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid = vap->va_fsid = vap->va_fileid = vap->va_blocksize = vap->va_rdev = vap->va_atime.ts_sec = vap->va_atime.ts_nsec = vap->va_mtime.ts_sec = vap->va_mtime.ts_nsec = vap->va_ctime.ts_sec = vap->va_ctime.ts_nsec = vap->va_flags = vap->va_gen = VNOVAL; vap->va_vaflags = 0; } /* * Routines having to do with the management of the vnode table. */ extern vop_t **dead_vnodeop_p; /* * Return the next vnode from the free list. */ int getnewvnode(tag, mp, vops, vpp) enum vtagtype tag; struct mount *mp; vop_t **vops; struct vnode **vpp; { register struct vnode *vp; retry: vp = vnode_free_list.tqh_first; /* * we allocate a new vnode if * 1. we don't have any free * Pretty obvious, we actually used to panic, but that * is a silly thing to do. * 2. we havn't filled our pool yet * We don't want to trash the incore (VM-)vnodecache. * 3. if less that 1/4th of our vnodes are free. * We don't want to trash the namei cache either. */ if (freevnodes < (numvnodes >> 2) || numvnodes < desiredvnodes || vp == NULL) { vp = (struct vnode *) malloc((u_long) sizeof *vp, M_VNODE, M_WAITOK); bzero((char *) vp, sizeof *vp); numvnodes++; } else { TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); if (vp->v_usage > 0) { --vp->v_usage; TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); goto retry; } freevnodes--; if (vp->v_usecount) panic("free vnode isn't"); /* see comment on why 0xdeadb is set at end of vgone (below) */ vp->v_freelist.tqe_prev = (struct vnode **) 0xdeadb; vp->v_lease = NULL; if (vp->v_type != VBAD) vgone(vp); #ifdef DIAGNOSTIC { int s; if (vp->v_data) panic("cleaned vnode isn't"); s = splbio(); if (vp->v_numoutput) panic("Clean vnode has pending I/O's"); splx(s); } #endif vp->v_flag = 0; vp->v_lastr = 0; vp->v_ralen = 0; vp->v_maxra = 0; vp->v_lastw = 0; vp->v_lasta = 0; vp->v_cstart = 0; vp->v_clen = 0; vp->v_socket = 0; vp->v_writecount = 0; /* XXX */ vp->v_usage = 0; } vp->v_type = VNON; cache_purge(vp); vp->v_tag = tag; vp->v_op = vops; insmntque(vp, mp); *vpp = vp; vp->v_usecount = 1; vp->v_data = 0; return (0); } /* * Move a vnode from one mount queue to another. */ void insmntque(vp, mp) register struct vnode *vp; register struct mount *mp; { /* * Delete from old mount point vnode list, if on one. */ if (vp->v_mount != NULL) LIST_REMOVE(vp, v_mntvnodes); /* * Insert into list of vnodes for the new mount point, if available. */ if ((vp->v_mount = mp) == NULL) return; LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); } /* * Update outstanding I/O count and do wakeup if requested. */ void vwakeup(bp) register struct buf *bp; { register struct vnode *vp; bp->b_flags &= ~B_WRITEINPROG; if ((vp = bp->b_vp)) { vp->v_numoutput--; if (vp->v_numoutput < 0) panic("vwakeup: neg numoutput"); if ((vp->v_numoutput == 0) && (vp->v_flag & VBWAIT)) { vp->v_flag &= ~VBWAIT; wakeup((caddr_t) &vp->v_numoutput); } } } /* * Flush out and invalidate all buffers associated with a vnode. * Called with the underlying object locked. */ int vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) register struct vnode *vp; int flags; struct ucred *cred; struct proc *p; int slpflag, slptimeo; { register struct buf *bp; struct buf *nbp, *blist; int s, error; vm_object_t object; if (flags & V_SAVE) { if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p))) return (error); if (vp->v_dirtyblkhd.lh_first != NULL) panic("vinvalbuf: dirty bufs"); } + + s = splbio(); for (;;) { if ((blist = vp->v_cleanblkhd.lh_first) && (flags & V_SAVEMETA)) while (blist && blist->b_lblkno < 0) blist = blist->b_vnbufs.le_next; if (!blist && (blist = vp->v_dirtyblkhd.lh_first) && (flags & V_SAVEMETA)) while (blist && blist->b_lblkno < 0) blist = blist->b_vnbufs.le_next; if (!blist) break; for (bp = blist; bp; bp = nbp) { nbp = bp->b_vnbufs.le_next; if ((flags & V_SAVEMETA) && bp->b_lblkno < 0) continue; - s = splbio(); if (bp->b_flags & B_BUSY) { bp->b_flags |= B_WANTED; error = tsleep((caddr_t) bp, slpflag | (PRIBIO + 1), "vinvalbuf", slptimeo); splx(s); if (error) return (error); break; } bremfree(bp); bp->b_flags |= B_BUSY; - splx(s); /* * XXX Since there are no node locks for NFS, I * believe there is a slight chance that a delayed * write will occur while sleeping just above, so * check for it. */ if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) { (void) VOP_BWRITE(bp); break; } bp->b_flags |= (B_INVAL|B_NOCACHE|B_RELBUF); brelse(bp); } } + splx(s); s = splbio(); while (vp->v_numoutput > 0) { vp->v_flag |= VBWAIT; tsleep(&vp->v_numoutput, PVM, "vnvlbv", 0); } splx(s); /* * Destroy the copy in the VM cache, too. */ object = vp->v_object; if (object != NULL) { vm_object_page_remove(object, 0, object->size, (flags & V_SAVE) ? TRUE : FALSE); } if (!(flags & V_SAVEMETA) && (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first)) panic("vinvalbuf: flush failed"); return (0); } /* * Associate a buffer with a vnode. */ void bgetvp(vp, bp) register struct vnode *vp; register struct buf *bp; { int s; if (bp->b_vp) panic("bgetvp: not free"); VHOLD(vp); bp->b_vp = vp; if (vp->v_type == VBLK || vp->v_type == VCHR) bp->b_dev = vp->v_rdev; else bp->b_dev = NODEV; /* * Insert onto list for new vnode. */ s = splbio(); bufinsvn(bp, &vp->v_cleanblkhd); splx(s); } /* * Disassociate a buffer from a vnode. */ void brelvp(bp) register struct buf *bp; { struct vnode *vp; int s; if (bp->b_vp == (struct vnode *) 0) panic("brelvp: NULL"); /* * Delete from old vnode list, if on one. */ s = splbio(); if (bp->b_vnbufs.le_next != NOLIST) bufremvn(bp); splx(s); vp = bp->b_vp; bp->b_vp = (struct vnode *) 0; HOLDRELE(vp); } /* * Associate a p-buffer with a vnode. */ void pbgetvp(vp, bp) register struct vnode *vp; register struct buf *bp; { if (bp->b_vp) panic("pbgetvp: not free"); VHOLD(vp); bp->b_vp = vp; if (vp->v_type == VBLK || vp->v_type == VCHR) bp->b_dev = vp->v_rdev; else bp->b_dev = NODEV; } /* * Disassociate a p-buffer from a vnode. */ void pbrelvp(bp) register struct buf *bp; { struct vnode *vp; if (bp->b_vp == (struct vnode *) 0) panic("brelvp: NULL"); vp = bp->b_vp; bp->b_vp = (struct vnode *) 0; HOLDRELE(vp); } /* * Reassign a buffer from one vnode to another. * Used to assign file specific control information * (indirect blocks) to the vnode to which they belong. */ void reassignbuf(bp, newvp) register struct buf *bp; register struct vnode *newvp; { - register struct buflists *listheadp; int s; if (newvp == NULL) { printf("reassignbuf: NULL"); return; } s = splbio(); /* * Delete from old vnode list, if on one. */ if (bp->b_vnbufs.le_next != NOLIST) bufremvn(bp); /* * If dirty, put on list of dirty buffers; otherwise insert onto list * of clean buffers. */ if (bp->b_flags & B_DELWRI) { struct buf *tbp; tbp = newvp->v_dirtyblkhd.lh_first; if (!tbp || (tbp->b_lblkno > bp->b_lblkno)) { bufinsvn(bp, &newvp->v_dirtyblkhd); } else { while (tbp->b_vnbufs.le_next && (tbp->b_vnbufs.le_next->b_lblkno < bp->b_lblkno)) { tbp = tbp->b_vnbufs.le_next; } LIST_INSERT_AFTER(tbp, bp, b_vnbufs); } } else { - listheadp = &newvp->v_cleanblkhd; - bufinsvn(bp, listheadp); + bufinsvn(bp, &newvp->v_cleanblkhd); } splx(s); } #ifndef DEVFS_ROOT /* * Create a vnode for a block device. * Used for root filesystem, argdev, and swap areas. * Also used for memory file system special devices. */ int bdevvp(dev, vpp) dev_t dev; struct vnode **vpp; { register struct vnode *vp; struct vnode *nvp; int error; if (dev == NODEV) return (0); error = getnewvnode(VT_NON, (struct mount *) 0, spec_vnodeop_p, &nvp); if (error) { *vpp = 0; return (error); } vp = nvp; vp->v_type = VBLK; if ((nvp = checkalias(vp, dev, (struct mount *) 0))) { vput(vp); vp = nvp; } *vpp = vp; return (0); } #endif /* !DEVFS_ROOT */ /* * Check to see if the new vnode represents a special device * for which we already have a vnode (either because of * bdevvp() or because of a different vnode representing * the same block device). If such an alias exists, deallocate * the existing contents and return the aliased vnode. The * caller is responsible for filling it with its new contents. */ struct vnode * checkalias(nvp, nvp_rdev, mp) register struct vnode *nvp; dev_t nvp_rdev; struct mount *mp; { register struct vnode *vp; struct vnode **vpp; if (nvp->v_type != VBLK && nvp->v_type != VCHR) return (NULLVP); vpp = &speclisth[SPECHASH(nvp_rdev)]; loop: for (vp = *vpp; vp; vp = vp->v_specnext) { if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type) continue; /* * Alias, but not in use, so flush it out. */ if (vp->v_usecount == 0) { vgone(vp); goto loop; } if (vget(vp, 1)) goto loop; break; } + if (vp == NULL || vp->v_tag != VT_NON) { MALLOC(nvp->v_specinfo, struct specinfo *, sizeof(struct specinfo), M_VNODE, M_WAITOK); nvp->v_rdev = nvp_rdev; nvp->v_hashchain = vpp; nvp->v_specnext = *vpp; nvp->v_specflags = 0; *vpp = nvp; if (vp != NULL) { nvp->v_flag |= VALIASED; vp->v_flag |= VALIASED; vput(vp); } return (NULLVP); } VOP_UNLOCK(vp); vclean(vp, 0); vp->v_op = nvp->v_op; vp->v_tag = nvp->v_tag; nvp->v_type = VNON; insmntque(vp, mp); return (vp); } /* * Grab a particular vnode from the free list, increment its * reference count and lock it. The vnode lock bit is set the * vnode is being eliminated in vgone. The process is awakened * when the transition is completed, and an error returned to * indicate that the vnode is no longer usable (possibly having * been changed to a new file system type). */ int vget(vp, lockflag) register struct vnode *vp; int lockflag; { /* * If the vnode is in the process of being cleaned out for another * use, we wait for the cleaning to finish and then return failure. * Cleaning is determined either by checking that the VXLOCK flag is * set, or that the use count is zero with the back pointer set to * show that it has been removed from the free list by getnewvnode. * The VXLOCK flag may not have been set yet because vclean is blocked * in the VOP_LOCK call waiting for the VOP_INACTIVE to complete. */ if ((vp->v_flag & VXLOCK) || (vp->v_usecount == 0 && vp->v_freelist.tqe_prev == (struct vnode **) 0xdeadb)) { vp->v_flag |= VXWANT; (void) tsleep((caddr_t) vp, PINOD, "vget", 0); return (1); } if (vp->v_usecount == 0) { TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); freevnodes--; } vp->v_usecount++; + + /* + * Create the VM object, if needed + */ + if ((vp->v_type == VREG) && + ((vp->v_object == NULL) || + (vp->v_object->flags & OBJ_VFS_REF) == 0)) { + vfs_object_create(vp, curproc, curproc->p_ucred, 0); + } if (lockflag) VOP_LOCK(vp); + return (0); } /* * Vnode reference, just increment the count */ void vref(vp) struct vnode *vp; { - if (vp->v_usecount <= 0) panic("vref used where vget required"); + + if ((vp->v_type == VREG) && + ((vp->v_object == NULL) || + ((vp->v_object->flags & OBJ_VFS_REF) == 0)) ) { + /* + * We need to lock to VP during the time that + * the object is created. This is necessary to + * keep the system from re-entrantly doing it + * multiple times. + */ + vfs_object_create(vp, curproc, curproc->p_ucred, 0); + } + vp->v_usecount++; } /* * vput(), just unlock and vrele() */ void vput(vp) register struct vnode *vp; { - VOP_UNLOCK(vp); vrele(vp); } /* * Vnode release. * If count drops to zero, call inactive routine and return to freelist. */ void vrele(vp) register struct vnode *vp; { #ifdef DIAGNOSTIC if (vp == NULL) panic("vrele: null vp"); #endif + vp->v_usecount--; + + if ((vp->v_usecount == 1) && + vp->v_object && + (vp->v_object->flags & OBJ_VFS_REF)) { + vp->v_object->flags &= ~OBJ_VFS_REF; + vm_object_deallocate(vp->v_object); + return; + } + if (vp->v_usecount > 0) return; - if (vp->v_usecount < 0 /* || vp->v_writecount < 0 */ ) { + + if (vp->v_usecount < 0) { #ifdef DIAGNOSTIC vprint("vrele: negative ref count", vp); #endif panic("vrele: negative reference cnt"); } if (vp->v_flag & VAGE) { TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); vp->v_flag &= ~VAGE; vp->v_usage = 0; } else { TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); } freevnodes++; VOP_INACTIVE(vp); } #ifdef DIAGNOSTIC /* * Page or buffer structure gets a reference. */ void vhold(vp) register struct vnode *vp; { vp->v_holdcnt++; } /* * Page or buffer structure frees a reference. */ void holdrele(vp) register struct vnode *vp; { if (vp->v_holdcnt <= 0) panic("holdrele: holdcnt"); vp->v_holdcnt--; } #endif /* DIAGNOSTIC */ /* * Remove any vnodes in the vnode table belonging to mount point mp. * * If MNT_NOFORCE is specified, there should not be any active ones, * return error if any are found (nb: this is a user error, not a * system error). If MNT_FORCE is specified, detach any active vnodes * that are found. */ #ifdef DIAGNOSTIC static int busyprt = 0; /* print out busy vnodes */ SYSCTL_INT(_debug, 1, busyprt, CTLFLAG_RW, &busyprt, 0, ""); #endif int vflush(mp, skipvp, flags) struct mount *mp; struct vnode *skipvp; int flags; { register struct vnode *vp, *nvp; int busy = 0; if ((mp->mnt_flag & MNT_MPBUSY) == 0) panic("vflush: not busy"); loop: for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) { /* * Make sure this vnode wasn't reclaimed in getnewvnode(). * Start over if it has (it won't be on the list anymore). */ if (vp->v_mount != mp) goto loop; nvp = vp->v_mntvnodes.le_next; /* * Skip over a selected vnode. */ if (vp == skipvp) continue; /* * Skip over a vnodes marked VSYSTEM. */ if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) continue; /* * If WRITECLOSE is set, only flush out regular file vnodes * open for writing. */ if ((flags & WRITECLOSE) && (vp->v_writecount == 0 || vp->v_type != VREG)) continue; + + if ((vp->v_usecount == 1) && vp->v_object) { + pager_cache(vp->v_object, FALSE); + } + /* * With v_usecount == 0, all we need to do is clear out the * vnode data structures and we are done. */ if (vp->v_usecount == 0) { vgone(vp); continue; } /* * If FORCECLOSE is set, forcibly close the vnode. For block * or character devices, revert to an anonymous device. For * all other files, just kill them. */ if (flags & FORCECLOSE) { if (vp->v_type != VBLK && vp->v_type != VCHR) { vgone(vp); } else { vclean(vp, 0); vp->v_op = spec_vnodeop_p; insmntque(vp, (struct mount *) 0); } continue; } #ifdef DIAGNOSTIC if (busyprt) vprint("vflush: busy vnode", vp); #endif busy++; } if (busy) return (EBUSY); return (0); } /* * Disassociate the underlying file system from a vnode. */ void vclean(struct vnode *vp, int flags) { int active; /* * Check to see if the vnode is in use. If so we have to reference it * before we clean it out so that its count cannot fall to zero and * generate a race against ourselves to recycle it. */ if ((active = vp->v_usecount)) VREF(vp); /* * Even if the count is zero, the VOP_INACTIVE routine may still have * the object locked while it cleans it out. The VOP_LOCK ensures that * the VOP_INACTIVE routine is done with its work. For active vnodes, * it ensures that no other activity can occur while the underlying * object is being cleaned out. */ VOP_LOCK(vp); /* * Prevent the vnode from being recycled or brought into use while we * clean it out. */ if (vp->v_flag & VXLOCK) panic("vclean: deadlock"); vp->v_flag |= VXLOCK; /* * Clean out any buffers associated with the vnode. */ if (flags & DOCLOSE) vinvalbuf(vp, V_SAVE, NOCRED, NULL, 0, 0); /* * Any other processes trying to obtain this lock must first wait for * VXLOCK to clear, then call the new lock operation. */ VOP_UNLOCK(vp); /* * If purging an active vnode, it must be closed and deactivated * before being reclaimed. */ if (active) { if (flags & DOCLOSE) VOP_CLOSE(vp, FNONBLOCK, NOCRED, NULL); VOP_INACTIVE(vp); } /* * Reclaim the vnode. */ if (VOP_RECLAIM(vp)) panic("vclean: cannot reclaim"); if (active) vrele(vp); /* * Done with purge, notify sleepers of the grim news. */ vp->v_op = dead_vnodeop_p; vp->v_tag = VT_NON; vp->v_flag &= ~VXLOCK; if (vp->v_flag & VXWANT) { vp->v_flag &= ~VXWANT; wakeup((caddr_t) vp); } } /* * Eliminate all activity associated with the requested vnode * and with all vnodes aliased to the requested vnode. */ void vgoneall(vp) register struct vnode *vp; { register struct vnode *vq; if (vp->v_flag & VALIASED) { /* * If a vgone (or vclean) is already in progress, wait until * it is done and return. */ if (vp->v_flag & VXLOCK) { vp->v_flag |= VXWANT; (void) tsleep((caddr_t) vp, PINOD, "vgall", 0); return; } /* * Ensure that vp will not be vgone'd while we are eliminating * its aliases. */ vp->v_flag |= VXLOCK; while (vp->v_flag & VALIASED) { for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type || vp == vq) continue; vgone(vq); break; } } /* * Remove the lock so that vgone below will really eliminate * the vnode after which time vgone will awaken any sleepers. */ vp->v_flag &= ~VXLOCK; } vgone(vp); } /* * Eliminate all activity associated with a vnode * in preparation for reuse. */ void vgone(vp) register struct vnode *vp; { register struct vnode *vq; struct vnode *vx; /* * If a vgone (or vclean) is already in progress, wait until it is * done and return. */ if (vp->v_flag & VXLOCK) { vp->v_flag |= VXWANT; (void) tsleep((caddr_t) vp, PINOD, "vgone", 0); return; } /* * Clean out the filesystem specific data. */ vclean(vp, DOCLOSE); /* * Delete from old mount point vnode list, if on one. */ if (vp->v_mount != NULL) { LIST_REMOVE(vp, v_mntvnodes); vp->v_mount = NULL; } /* * If special device, remove it from special device alias list. */ if (vp->v_type == VBLK || vp->v_type == VCHR) { if (*vp->v_hashchain == vp) { *vp->v_hashchain = vp->v_specnext; } else { for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { if (vq->v_specnext != vp) continue; vq->v_specnext = vp->v_specnext; break; } if (vq == NULL) panic("missing bdev"); } if (vp->v_flag & VALIASED) { vx = NULL; for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) continue; if (vx) break; vx = vq; } if (vx == NULL) panic("missing alias"); if (vq == NULL) vx->v_flag &= ~VALIASED; vp->v_flag &= ~VALIASED; } FREE(vp->v_specinfo, M_VNODE); vp->v_specinfo = NULL; } /* * If it is on the freelist and not already at the head, move it to * the head of the list. The test of the back pointer and the * reference count of zero is because it will be removed from the free * list by getnewvnode, but will not have its reference count * incremented until after calling vgone. If the reference count were * incremented first, vgone would (incorrectly) try to close the * previous instance of the underlying object. So, the back pointer is * explicitly set to `0xdeadb' in getnewvnode after removing it from * the freelist to ensure that we do not try to move it here. */ if (vp->v_usecount == 0 && vp->v_freelist.tqe_prev != (struct vnode **) 0xdeadb && vnode_free_list.tqh_first != vp) { TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); } vp->v_type = VBAD; } /* * Lookup a vnode by device number. */ int vfinddev(dev, type, vpp) dev_t dev; enum vtype type; struct vnode **vpp; { register struct vnode *vp; for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) { if (dev != vp->v_rdev || type != vp->v_type) continue; *vpp = vp; return (1); } return (0); } /* * Calculate the total number of references to a special device. */ int vcount(vp) register struct vnode *vp; { register struct vnode *vq, *vnext; int count; loop: if ((vp->v_flag & VALIASED) == 0) return (vp->v_usecount); for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) { vnext = vq->v_specnext; if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) continue; /* * Alias, but not in use, so flush it out. */ if (vq->v_usecount == 0 && vq != vp) { vgone(vq); goto loop; } count += vq->v_usecount; } return (count); } /* * Print out a description of a vnode. */ static char *typename[] = {"VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD"}; void vprint(label, vp) char *label; register struct vnode *vp; { char buf[64]; if (label != NULL) printf("%s: ", label); printf("type %s, usecount %d, writecount %d, refcount %ld,", typename[vp->v_type], vp->v_usecount, vp->v_writecount, vp->v_holdcnt); buf[0] = '\0'; if (vp->v_flag & VROOT) strcat(buf, "|VROOT"); if (vp->v_flag & VTEXT) strcat(buf, "|VTEXT"); if (vp->v_flag & VSYSTEM) strcat(buf, "|VSYSTEM"); if (vp->v_flag & VXLOCK) strcat(buf, "|VXLOCK"); if (vp->v_flag & VXWANT) strcat(buf, "|VXWANT"); if (vp->v_flag & VBWAIT) strcat(buf, "|VBWAIT"); if (vp->v_flag & VALIASED) strcat(buf, "|VALIASED"); if (buf[0] != '\0') printf(" flags (%s)", &buf[1]); if (vp->v_data == NULL) { printf("\n"); } else { printf("\n\t"); VOP_PRINT(vp); } } #ifdef DDB /* * List all of the locked vnodes in the system. * Called when debugging the kernel. */ void printlockedvnodes(void) { register struct mount *mp; register struct vnode *vp; printf("Locked vnodes\n"); for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = mp->mnt_list.cqe_next) { for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = vp->v_mntvnodes.le_next) if (VOP_ISLOCKED(vp)) vprint((char *) 0, vp); } } #endif int kinfo_vdebug = 1; int kinfo_vgetfailed; #define KINFO_VNODESLOP 10 /* * Dump vnode list (via sysctl). * Copyout address of vnode followed by vnode. */ /* ARGSUSED */ static int sysctl_vnode SYSCTL_HANDLER_ARGS { register struct mount *mp, *nmp; struct vnode *vp; int error; #define VPTRSZ sizeof (struct vnode *) #define VNODESZ sizeof (struct vnode) req->lock = 0; if (!req->oldptr) /* Make an estimate */ return (SYSCTL_OUT(req, 0, (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ))); for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { nmp = mp->mnt_list.cqe_next; if (vfs_busy(mp)) continue; again: for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = vp->v_mntvnodes.le_next) { /* * Check that the vp is still associated with this * filesystem. RACE: could have been recycled onto * the same filesystem. */ if (vp->v_mount != mp) { if (kinfo_vdebug) printf("kinfo: vp changed\n"); goto again; } if ((error = SYSCTL_OUT(req, &vp, VPTRSZ)) || (error = SYSCTL_OUT(req, vp, VNODESZ))) { vfs_unbusy(mp); return (error); } } vfs_unbusy(mp); } return (0); } SYSCTL_PROC(_kern, KERN_VNODE, vnode, CTLTYPE_OPAQUE|CTLFLAG_RD, 0, 0, sysctl_vnode, "S,vnode", ""); /* * Check to see if a filesystem is mounted on a block device. */ int vfs_mountedon(vp) register struct vnode *vp; { register struct vnode *vq; if (vp->v_specflags & SI_MOUNTEDON) return (EBUSY); if (vp->v_flag & VALIASED) { for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) continue; if (vq->v_specflags & SI_MOUNTEDON) return (EBUSY); } } return (0); } /* * Build hash lists of net addresses and hang them off the mount point. * Called by ufs_mount() to set up the lists of export addresses. */ static int vfs_hang_addrlist(struct mount *mp, struct netexport *nep, struct export_args *argp) { register struct netcred *np; register struct radix_node_head *rnh; register int i; struct radix_node *rn; struct sockaddr *saddr, *smask = 0; struct domain *dom; int error; if (argp->ex_addrlen == 0) { if (mp->mnt_flag & MNT_DEFEXPORTED) return (EPERM); np = &nep->ne_defexported; np->netc_exflags = argp->ex_flags; np->netc_anon = argp->ex_anon; np->netc_anon.cr_ref = 1; mp->mnt_flag |= MNT_DEFEXPORTED; return (0); } i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen; np = (struct netcred *) malloc(i, M_NETADDR, M_WAITOK); bzero((caddr_t) np, i); saddr = (struct sockaddr *) (np + 1); if ((error = copyin(argp->ex_addr, (caddr_t) saddr, argp->ex_addrlen))) goto out; if (saddr->sa_len > argp->ex_addrlen) saddr->sa_len = argp->ex_addrlen; if (argp->ex_masklen) { smask = (struct sockaddr *) ((caddr_t) saddr + argp->ex_addrlen); error = copyin(argp->ex_addr, (caddr_t) smask, argp->ex_masklen); if (error) goto out; if (smask->sa_len > argp->ex_masklen) smask->sa_len = argp->ex_masklen; } i = saddr->sa_family; if ((rnh = nep->ne_rtable[i]) == 0) { /* * Seems silly to initialize every AF when most are not used, * do so on demand here */ for (dom = domains; dom; dom = dom->dom_next) if (dom->dom_family == i && dom->dom_rtattach) { dom->dom_rtattach((void **) &nep->ne_rtable[i], dom->dom_rtoffset); break; } if ((rnh = nep->ne_rtable[i]) == 0) { error = ENOBUFS; goto out; } } rn = (*rnh->rnh_addaddr) ((caddr_t) saddr, (caddr_t) smask, rnh, np->netc_rnodes); if (rn == 0 || np != (struct netcred *) rn) { /* already exists */ error = EPERM; goto out; } np->netc_exflags = argp->ex_flags; np->netc_anon = argp->ex_anon; np->netc_anon.cr_ref = 1; return (0); out: free(np, M_NETADDR); return (error); } /* ARGSUSED */ static int vfs_free_netcred(struct radix_node *rn, void *w) { register struct radix_node_head *rnh = (struct radix_node_head *) w; (*rnh->rnh_deladdr) (rn->rn_key, rn->rn_mask, rnh); free((caddr_t) rn, M_NETADDR); return (0); } /* * Free the net address hash lists that are hanging off the mount points. */ static void vfs_free_addrlist(struct netexport *nep) { register int i; register struct radix_node_head *rnh; for (i = 0; i <= AF_MAX; i++) if ((rnh = nep->ne_rtable[i])) { (*rnh->rnh_walktree) (rnh, vfs_free_netcred, (caddr_t) rnh); free((caddr_t) rnh, M_RTABLE); nep->ne_rtable[i] = 0; } } int vfs_export(mp, nep, argp) struct mount *mp; struct netexport *nep; struct export_args *argp; { int error; if (argp->ex_flags & MNT_DELEXPORT) { vfs_free_addrlist(nep); mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED); } if (argp->ex_flags & MNT_EXPORTED) { if ((error = vfs_hang_addrlist(mp, nep, argp))) return (error); mp->mnt_flag |= MNT_EXPORTED; } return (0); } struct netcred * vfs_export_lookup(mp, nep, nam) register struct mount *mp; struct netexport *nep; struct mbuf *nam; { register struct netcred *np; register struct radix_node_head *rnh; struct sockaddr *saddr; np = NULL; if (mp->mnt_flag & MNT_EXPORTED) { /* * Lookup in the export list first. */ if (nam != NULL) { saddr = mtod(nam, struct sockaddr *); rnh = nep->ne_rtable[saddr->sa_family]; if (rnh != NULL) { np = (struct netcred *) (*rnh->rnh_matchaddr) ((caddr_t) saddr, rnh); if (np && np->netc_rnodes->rn_flags & RNF_ROOT) np = NULL; } } /* * If no address match, use the default if it exists. */ if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED) np = &nep->ne_defexported; } return (np); } /* * perform msync on all vnodes under a mount point * the mount point must be locked. */ void vfs_msync(struct mount *mp, int flags) { struct vnode *vp, *nvp; loop: for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) { if (vp->v_mount != mp) goto loop; nvp = vp->v_mntvnodes.le_next; if (VOP_ISLOCKED(vp) && (flags != MNT_WAIT)) continue; if (vp->v_object && - (((vm_object_t) vp->v_object)->flags & OBJ_MIGHTBEDIRTY)) { + (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) { vm_object_page_clean(vp->v_object, 0, 0, TRUE, TRUE); } } +} + +/* + * Create the VM object needed for VMIO and mmap support. This + * is done for all VREG files in the system. Some filesystems might + * afford the additional metadata buffering capability of the + * VMIO code by making the device node be VMIO mode also. + */ +int +vfs_object_create(vp, p, cred, waslocked) + struct vnode *vp; + struct proc *p; + struct ucred *cred; + int waslocked; +{ + struct vattr vat; + vm_object_t object; + int error = 0; + +retry: + if ((object = vp->v_object) == NULL) { + if (vp->v_type == VREG) { + if ((error = VOP_GETATTR(vp, &vat, cred, p)) != 0) + goto retn; + (void) vnode_pager_alloc(vp, + OFF_TO_IDX(round_page(vat.va_size)), 0, 0); + } else { + /* + * This simply allocates the biggest object possible + * for a VBLK vnode. This should be fixed, but doesn't + * cause any problems (yet). + */ + (void) vnode_pager_alloc(vp, INT_MAX, 0, 0); + } + vp->v_object->flags |= OBJ_VFS_REF; + } else { + if (object->flags & OBJ_DEAD) { + if (waslocked) + VOP_UNLOCK(vp); + tsleep(object, PVM, "vodead", 0); + if (waslocked) + VOP_LOCK(vp); + goto retry; + } + if ((object->flags & OBJ_VFS_REF) == 0) { + object->flags |= OBJ_VFS_REF; + vm_object_reference(object); + } + } + if (vp->v_object) + vp->v_flag |= VVMIO; + +retn: + return error; } Index: head/sys/kern/vfs_subr.c =================================================================== --- head/sys/kern/vfs_subr.c (revision 17760) +++ head/sys/kern/vfs_subr.c (revision 17761) @@ -1,1553 +1,1646 @@ /* * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94 - * $Id: vfs_subr.c,v 1.57 1996/07/30 18:00:25 bde Exp $ + * $Id: vfs_subr.c,v 1.58 1996/08/15 06:45:01 dyson Exp $ */ /* * External virtual filesystem routines */ #include "opt_ddb.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include +#include +#include #include #include #ifdef DDB extern void printlockedvnodes __P((void)); #endif extern void vclean __P((struct vnode *vp, int flags)); extern void vfs_unmountroot __P((struct mount *rootfs)); enum vtype iftovt_tab[16] = { VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, }; int vttoif_tab[9] = { 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, S_IFSOCK, S_IFIFO, S_IFMT, }; /* * Insq/Remq for the vnode usage lists. */ #define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs) #define bufremvn(bp) { \ LIST_REMOVE(bp, b_vnbufs); \ (bp)->b_vnbufs.le_next = NOLIST; \ } TAILQ_HEAD(freelst, vnode) vnode_free_list; /* vnode free list */ static u_long freevnodes = 0; struct mntlist mountlist; /* mounted filesystem list */ int desiredvnodes; SYSCTL_INT(_kern, KERN_MAXVNODES, maxvnodes, CTLFLAG_RD, &desiredvnodes, 0, ""); static void vfs_free_addrlist __P((struct netexport *nep)); static int vfs_free_netcred __P((struct radix_node *rn, void *w)); static int vfs_hang_addrlist __P((struct mount *mp, struct netexport *nep, struct export_args *argp)); /* * Initialize the vnode management data structures. */ void vntblinit() { desiredvnodes = maxproc + vm_object_cache_max + extravnodes; TAILQ_INIT(&vnode_free_list); CIRCLEQ_INIT(&mountlist); } /* * Lock a filesystem. * Used to prevent access to it while mounting and unmounting. */ int vfs_lock(mp) register struct mount *mp; { while (mp->mnt_flag & MNT_MLOCK) { mp->mnt_flag |= MNT_MWAIT; (void) tsleep((caddr_t) mp, PVFS, "vfslck", 0); } mp->mnt_flag |= MNT_MLOCK; return (0); } /* * Unlock a locked filesystem. * Panic if filesystem is not locked. */ void vfs_unlock(mp) register struct mount *mp; { if ((mp->mnt_flag & MNT_MLOCK) == 0) panic("vfs_unlock: not locked"); mp->mnt_flag &= ~MNT_MLOCK; if (mp->mnt_flag & MNT_MWAIT) { mp->mnt_flag &= ~MNT_MWAIT; wakeup((caddr_t) mp); } } /* * Mark a mount point as busy. * Used to synchronize access and to delay unmounting. */ int vfs_busy(mp) register struct mount *mp; { while (mp->mnt_flag & MNT_MPBUSY) { mp->mnt_flag |= MNT_MPWANT; (void) tsleep((caddr_t) &mp->mnt_flag, PVFS, "vfsbsy", 0); } if (mp->mnt_flag & MNT_UNMOUNT) return (1); mp->mnt_flag |= MNT_MPBUSY; return (0); } /* * Free a busy filesystem. * Panic if filesystem is not busy. */ void vfs_unbusy(mp) register struct mount *mp; { if ((mp->mnt_flag & MNT_MPBUSY) == 0) panic("vfs_unbusy: not busy"); mp->mnt_flag &= ~MNT_MPBUSY; if (mp->mnt_flag & MNT_MPWANT) { mp->mnt_flag &= ~MNT_MPWANT; wakeup((caddr_t) &mp->mnt_flag); } } void vfs_unmountroot(struct mount *rootfs) { struct mount *mp = rootfs; int error; if (vfs_busy(mp)) { printf("failed to unmount root\n"); return; } mp->mnt_flag |= MNT_UNMOUNT; if ((error = vfs_lock(mp))) { printf("lock of root filesystem failed (%d)\n", error); return; } vnode_pager_umount(mp); /* release cached vnodes */ cache_purgevfs(mp); /* remove cache entries for this file sys */ if ((error = VFS_SYNC(mp, MNT_WAIT, initproc->p_ucred, initproc))) printf("sync of root filesystem failed (%d)\n", error); if ((error = VFS_UNMOUNT(mp, MNT_FORCE, initproc))) { printf("unmount of root filesystem failed ("); if (error == EBUSY) printf("BUSY)\n"); else printf("%d)\n", error); } mp->mnt_flag &= ~MNT_UNMOUNT; vfs_unbusy(mp); } /* * Unmount all filesystems. Should only be called by halt(). */ void vfs_unmountall() { struct mount *mp, *nmp, *rootfs = NULL; int error; /* unmount all but rootfs */ for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) { nmp = mp->mnt_list.cqe_prev; if (mp->mnt_flag & MNT_ROOTFS) { rootfs = mp; continue; } error = dounmount(mp, MNT_FORCE, initproc); if (error) { printf("unmount of %s failed (", mp->mnt_stat.f_mntonname); if (error == EBUSY) printf("BUSY)\n"); else printf("%d)\n", error); } } /* and finally... */ if (rootfs) { vfs_unmountroot(rootfs); } else { printf("no root filesystem\n"); } } /* * Lookup a mount point by filesystem identifier. */ struct mount * getvfs(fsid) fsid_t *fsid; { register struct mount *mp; for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = mp->mnt_list.cqe_next) { if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) return (mp); } return ((struct mount *) 0); } /* * Get a new unique fsid */ void getnewfsid(mp, mtype) struct mount *mp; int mtype; { static u_short xxxfs_mntid; fsid_t tfsid; mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0); mp->mnt_stat.f_fsid.val[1] = mtype; if (xxxfs_mntid == 0) ++xxxfs_mntid; tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid); tfsid.val[1] = mtype; if (mountlist.cqh_first != (void *)&mountlist) { while (getvfs(&tfsid)) { tfsid.val[0]++; xxxfs_mntid++; } } mp->mnt_stat.f_fsid.val[0] = tfsid.val[0]; } /* * Set vnode attributes to VNOVAL */ void vattr_null(vap) register struct vattr *vap; { vap->va_type = VNON; vap->va_size = VNOVAL; vap->va_bytes = VNOVAL; vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid = vap->va_fsid = vap->va_fileid = vap->va_blocksize = vap->va_rdev = vap->va_atime.ts_sec = vap->va_atime.ts_nsec = vap->va_mtime.ts_sec = vap->va_mtime.ts_nsec = vap->va_ctime.ts_sec = vap->va_ctime.ts_nsec = vap->va_flags = vap->va_gen = VNOVAL; vap->va_vaflags = 0; } /* * Routines having to do with the management of the vnode table. */ extern vop_t **dead_vnodeop_p; /* * Return the next vnode from the free list. */ int getnewvnode(tag, mp, vops, vpp) enum vtagtype tag; struct mount *mp; vop_t **vops; struct vnode **vpp; { register struct vnode *vp; retry: vp = vnode_free_list.tqh_first; /* * we allocate a new vnode if * 1. we don't have any free * Pretty obvious, we actually used to panic, but that * is a silly thing to do. * 2. we havn't filled our pool yet * We don't want to trash the incore (VM-)vnodecache. * 3. if less that 1/4th of our vnodes are free. * We don't want to trash the namei cache either. */ if (freevnodes < (numvnodes >> 2) || numvnodes < desiredvnodes || vp == NULL) { vp = (struct vnode *) malloc((u_long) sizeof *vp, M_VNODE, M_WAITOK); bzero((char *) vp, sizeof *vp); numvnodes++; } else { TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); if (vp->v_usage > 0) { --vp->v_usage; TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); goto retry; } freevnodes--; if (vp->v_usecount) panic("free vnode isn't"); /* see comment on why 0xdeadb is set at end of vgone (below) */ vp->v_freelist.tqe_prev = (struct vnode **) 0xdeadb; vp->v_lease = NULL; if (vp->v_type != VBAD) vgone(vp); #ifdef DIAGNOSTIC { int s; if (vp->v_data) panic("cleaned vnode isn't"); s = splbio(); if (vp->v_numoutput) panic("Clean vnode has pending I/O's"); splx(s); } #endif vp->v_flag = 0; vp->v_lastr = 0; vp->v_ralen = 0; vp->v_maxra = 0; vp->v_lastw = 0; vp->v_lasta = 0; vp->v_cstart = 0; vp->v_clen = 0; vp->v_socket = 0; vp->v_writecount = 0; /* XXX */ vp->v_usage = 0; } vp->v_type = VNON; cache_purge(vp); vp->v_tag = tag; vp->v_op = vops; insmntque(vp, mp); *vpp = vp; vp->v_usecount = 1; vp->v_data = 0; return (0); } /* * Move a vnode from one mount queue to another. */ void insmntque(vp, mp) register struct vnode *vp; register struct mount *mp; { /* * Delete from old mount point vnode list, if on one. */ if (vp->v_mount != NULL) LIST_REMOVE(vp, v_mntvnodes); /* * Insert into list of vnodes for the new mount point, if available. */ if ((vp->v_mount = mp) == NULL) return; LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); } /* * Update outstanding I/O count and do wakeup if requested. */ void vwakeup(bp) register struct buf *bp; { register struct vnode *vp; bp->b_flags &= ~B_WRITEINPROG; if ((vp = bp->b_vp)) { vp->v_numoutput--; if (vp->v_numoutput < 0) panic("vwakeup: neg numoutput"); if ((vp->v_numoutput == 0) && (vp->v_flag & VBWAIT)) { vp->v_flag &= ~VBWAIT; wakeup((caddr_t) &vp->v_numoutput); } } } /* * Flush out and invalidate all buffers associated with a vnode. * Called with the underlying object locked. */ int vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) register struct vnode *vp; int flags; struct ucred *cred; struct proc *p; int slpflag, slptimeo; { register struct buf *bp; struct buf *nbp, *blist; int s, error; vm_object_t object; if (flags & V_SAVE) { if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p))) return (error); if (vp->v_dirtyblkhd.lh_first != NULL) panic("vinvalbuf: dirty bufs"); } + + s = splbio(); for (;;) { if ((blist = vp->v_cleanblkhd.lh_first) && (flags & V_SAVEMETA)) while (blist && blist->b_lblkno < 0) blist = blist->b_vnbufs.le_next; if (!blist && (blist = vp->v_dirtyblkhd.lh_first) && (flags & V_SAVEMETA)) while (blist && blist->b_lblkno < 0) blist = blist->b_vnbufs.le_next; if (!blist) break; for (bp = blist; bp; bp = nbp) { nbp = bp->b_vnbufs.le_next; if ((flags & V_SAVEMETA) && bp->b_lblkno < 0) continue; - s = splbio(); if (bp->b_flags & B_BUSY) { bp->b_flags |= B_WANTED; error = tsleep((caddr_t) bp, slpflag | (PRIBIO + 1), "vinvalbuf", slptimeo); splx(s); if (error) return (error); break; } bremfree(bp); bp->b_flags |= B_BUSY; - splx(s); /* * XXX Since there are no node locks for NFS, I * believe there is a slight chance that a delayed * write will occur while sleeping just above, so * check for it. */ if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) { (void) VOP_BWRITE(bp); break; } bp->b_flags |= (B_INVAL|B_NOCACHE|B_RELBUF); brelse(bp); } } + splx(s); s = splbio(); while (vp->v_numoutput > 0) { vp->v_flag |= VBWAIT; tsleep(&vp->v_numoutput, PVM, "vnvlbv", 0); } splx(s); /* * Destroy the copy in the VM cache, too. */ object = vp->v_object; if (object != NULL) { vm_object_page_remove(object, 0, object->size, (flags & V_SAVE) ? TRUE : FALSE); } if (!(flags & V_SAVEMETA) && (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first)) panic("vinvalbuf: flush failed"); return (0); } /* * Associate a buffer with a vnode. */ void bgetvp(vp, bp) register struct vnode *vp; register struct buf *bp; { int s; if (bp->b_vp) panic("bgetvp: not free"); VHOLD(vp); bp->b_vp = vp; if (vp->v_type == VBLK || vp->v_type == VCHR) bp->b_dev = vp->v_rdev; else bp->b_dev = NODEV; /* * Insert onto list for new vnode. */ s = splbio(); bufinsvn(bp, &vp->v_cleanblkhd); splx(s); } /* * Disassociate a buffer from a vnode. */ void brelvp(bp) register struct buf *bp; { struct vnode *vp; int s; if (bp->b_vp == (struct vnode *) 0) panic("brelvp: NULL"); /* * Delete from old vnode list, if on one. */ s = splbio(); if (bp->b_vnbufs.le_next != NOLIST) bufremvn(bp); splx(s); vp = bp->b_vp; bp->b_vp = (struct vnode *) 0; HOLDRELE(vp); } /* * Associate a p-buffer with a vnode. */ void pbgetvp(vp, bp) register struct vnode *vp; register struct buf *bp; { if (bp->b_vp) panic("pbgetvp: not free"); VHOLD(vp); bp->b_vp = vp; if (vp->v_type == VBLK || vp->v_type == VCHR) bp->b_dev = vp->v_rdev; else bp->b_dev = NODEV; } /* * Disassociate a p-buffer from a vnode. */ void pbrelvp(bp) register struct buf *bp; { struct vnode *vp; if (bp->b_vp == (struct vnode *) 0) panic("brelvp: NULL"); vp = bp->b_vp; bp->b_vp = (struct vnode *) 0; HOLDRELE(vp); } /* * Reassign a buffer from one vnode to another. * Used to assign file specific control information * (indirect blocks) to the vnode to which they belong. */ void reassignbuf(bp, newvp) register struct buf *bp; register struct vnode *newvp; { - register struct buflists *listheadp; int s; if (newvp == NULL) { printf("reassignbuf: NULL"); return; } s = splbio(); /* * Delete from old vnode list, if on one. */ if (bp->b_vnbufs.le_next != NOLIST) bufremvn(bp); /* * If dirty, put on list of dirty buffers; otherwise insert onto list * of clean buffers. */ if (bp->b_flags & B_DELWRI) { struct buf *tbp; tbp = newvp->v_dirtyblkhd.lh_first; if (!tbp || (tbp->b_lblkno > bp->b_lblkno)) { bufinsvn(bp, &newvp->v_dirtyblkhd); } else { while (tbp->b_vnbufs.le_next && (tbp->b_vnbufs.le_next->b_lblkno < bp->b_lblkno)) { tbp = tbp->b_vnbufs.le_next; } LIST_INSERT_AFTER(tbp, bp, b_vnbufs); } } else { - listheadp = &newvp->v_cleanblkhd; - bufinsvn(bp, listheadp); + bufinsvn(bp, &newvp->v_cleanblkhd); } splx(s); } #ifndef DEVFS_ROOT /* * Create a vnode for a block device. * Used for root filesystem, argdev, and swap areas. * Also used for memory file system special devices. */ int bdevvp(dev, vpp) dev_t dev; struct vnode **vpp; { register struct vnode *vp; struct vnode *nvp; int error; if (dev == NODEV) return (0); error = getnewvnode(VT_NON, (struct mount *) 0, spec_vnodeop_p, &nvp); if (error) { *vpp = 0; return (error); } vp = nvp; vp->v_type = VBLK; if ((nvp = checkalias(vp, dev, (struct mount *) 0))) { vput(vp); vp = nvp; } *vpp = vp; return (0); } #endif /* !DEVFS_ROOT */ /* * Check to see if the new vnode represents a special device * for which we already have a vnode (either because of * bdevvp() or because of a different vnode representing * the same block device). If such an alias exists, deallocate * the existing contents and return the aliased vnode. The * caller is responsible for filling it with its new contents. */ struct vnode * checkalias(nvp, nvp_rdev, mp) register struct vnode *nvp; dev_t nvp_rdev; struct mount *mp; { register struct vnode *vp; struct vnode **vpp; if (nvp->v_type != VBLK && nvp->v_type != VCHR) return (NULLVP); vpp = &speclisth[SPECHASH(nvp_rdev)]; loop: for (vp = *vpp; vp; vp = vp->v_specnext) { if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type) continue; /* * Alias, but not in use, so flush it out. */ if (vp->v_usecount == 0) { vgone(vp); goto loop; } if (vget(vp, 1)) goto loop; break; } + if (vp == NULL || vp->v_tag != VT_NON) { MALLOC(nvp->v_specinfo, struct specinfo *, sizeof(struct specinfo), M_VNODE, M_WAITOK); nvp->v_rdev = nvp_rdev; nvp->v_hashchain = vpp; nvp->v_specnext = *vpp; nvp->v_specflags = 0; *vpp = nvp; if (vp != NULL) { nvp->v_flag |= VALIASED; vp->v_flag |= VALIASED; vput(vp); } return (NULLVP); } VOP_UNLOCK(vp); vclean(vp, 0); vp->v_op = nvp->v_op; vp->v_tag = nvp->v_tag; nvp->v_type = VNON; insmntque(vp, mp); return (vp); } /* * Grab a particular vnode from the free list, increment its * reference count and lock it. The vnode lock bit is set the * vnode is being eliminated in vgone. The process is awakened * when the transition is completed, and an error returned to * indicate that the vnode is no longer usable (possibly having * been changed to a new file system type). */ int vget(vp, lockflag) register struct vnode *vp; int lockflag; { /* * If the vnode is in the process of being cleaned out for another * use, we wait for the cleaning to finish and then return failure. * Cleaning is determined either by checking that the VXLOCK flag is * set, or that the use count is zero with the back pointer set to * show that it has been removed from the free list by getnewvnode. * The VXLOCK flag may not have been set yet because vclean is blocked * in the VOP_LOCK call waiting for the VOP_INACTIVE to complete. */ if ((vp->v_flag & VXLOCK) || (vp->v_usecount == 0 && vp->v_freelist.tqe_prev == (struct vnode **) 0xdeadb)) { vp->v_flag |= VXWANT; (void) tsleep((caddr_t) vp, PINOD, "vget", 0); return (1); } if (vp->v_usecount == 0) { TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); freevnodes--; } vp->v_usecount++; + + /* + * Create the VM object, if needed + */ + if ((vp->v_type == VREG) && + ((vp->v_object == NULL) || + (vp->v_object->flags & OBJ_VFS_REF) == 0)) { + vfs_object_create(vp, curproc, curproc->p_ucred, 0); + } if (lockflag) VOP_LOCK(vp); + return (0); } /* * Vnode reference, just increment the count */ void vref(vp) struct vnode *vp; { - if (vp->v_usecount <= 0) panic("vref used where vget required"); + + if ((vp->v_type == VREG) && + ((vp->v_object == NULL) || + ((vp->v_object->flags & OBJ_VFS_REF) == 0)) ) { + /* + * We need to lock to VP during the time that + * the object is created. This is necessary to + * keep the system from re-entrantly doing it + * multiple times. + */ + vfs_object_create(vp, curproc, curproc->p_ucred, 0); + } + vp->v_usecount++; } /* * vput(), just unlock and vrele() */ void vput(vp) register struct vnode *vp; { - VOP_UNLOCK(vp); vrele(vp); } /* * Vnode release. * If count drops to zero, call inactive routine and return to freelist. */ void vrele(vp) register struct vnode *vp; { #ifdef DIAGNOSTIC if (vp == NULL) panic("vrele: null vp"); #endif + vp->v_usecount--; + + if ((vp->v_usecount == 1) && + vp->v_object && + (vp->v_object->flags & OBJ_VFS_REF)) { + vp->v_object->flags &= ~OBJ_VFS_REF; + vm_object_deallocate(vp->v_object); + return; + } + if (vp->v_usecount > 0) return; - if (vp->v_usecount < 0 /* || vp->v_writecount < 0 */ ) { + + if (vp->v_usecount < 0) { #ifdef DIAGNOSTIC vprint("vrele: negative ref count", vp); #endif panic("vrele: negative reference cnt"); } if (vp->v_flag & VAGE) { TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); vp->v_flag &= ~VAGE; vp->v_usage = 0; } else { TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); } freevnodes++; VOP_INACTIVE(vp); } #ifdef DIAGNOSTIC /* * Page or buffer structure gets a reference. */ void vhold(vp) register struct vnode *vp; { vp->v_holdcnt++; } /* * Page or buffer structure frees a reference. */ void holdrele(vp) register struct vnode *vp; { if (vp->v_holdcnt <= 0) panic("holdrele: holdcnt"); vp->v_holdcnt--; } #endif /* DIAGNOSTIC */ /* * Remove any vnodes in the vnode table belonging to mount point mp. * * If MNT_NOFORCE is specified, there should not be any active ones, * return error if any are found (nb: this is a user error, not a * system error). If MNT_FORCE is specified, detach any active vnodes * that are found. */ #ifdef DIAGNOSTIC static int busyprt = 0; /* print out busy vnodes */ SYSCTL_INT(_debug, 1, busyprt, CTLFLAG_RW, &busyprt, 0, ""); #endif int vflush(mp, skipvp, flags) struct mount *mp; struct vnode *skipvp; int flags; { register struct vnode *vp, *nvp; int busy = 0; if ((mp->mnt_flag & MNT_MPBUSY) == 0) panic("vflush: not busy"); loop: for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) { /* * Make sure this vnode wasn't reclaimed in getnewvnode(). * Start over if it has (it won't be on the list anymore). */ if (vp->v_mount != mp) goto loop; nvp = vp->v_mntvnodes.le_next; /* * Skip over a selected vnode. */ if (vp == skipvp) continue; /* * Skip over a vnodes marked VSYSTEM. */ if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) continue; /* * If WRITECLOSE is set, only flush out regular file vnodes * open for writing. */ if ((flags & WRITECLOSE) && (vp->v_writecount == 0 || vp->v_type != VREG)) continue; + + if ((vp->v_usecount == 1) && vp->v_object) { + pager_cache(vp->v_object, FALSE); + } + /* * With v_usecount == 0, all we need to do is clear out the * vnode data structures and we are done. */ if (vp->v_usecount == 0) { vgone(vp); continue; } /* * If FORCECLOSE is set, forcibly close the vnode. For block * or character devices, revert to an anonymous device. For * all other files, just kill them. */ if (flags & FORCECLOSE) { if (vp->v_type != VBLK && vp->v_type != VCHR) { vgone(vp); } else { vclean(vp, 0); vp->v_op = spec_vnodeop_p; insmntque(vp, (struct mount *) 0); } continue; } #ifdef DIAGNOSTIC if (busyprt) vprint("vflush: busy vnode", vp); #endif busy++; } if (busy) return (EBUSY); return (0); } /* * Disassociate the underlying file system from a vnode. */ void vclean(struct vnode *vp, int flags) { int active; /* * Check to see if the vnode is in use. If so we have to reference it * before we clean it out so that its count cannot fall to zero and * generate a race against ourselves to recycle it. */ if ((active = vp->v_usecount)) VREF(vp); /* * Even if the count is zero, the VOP_INACTIVE routine may still have * the object locked while it cleans it out. The VOP_LOCK ensures that * the VOP_INACTIVE routine is done with its work. For active vnodes, * it ensures that no other activity can occur while the underlying * object is being cleaned out. */ VOP_LOCK(vp); /* * Prevent the vnode from being recycled or brought into use while we * clean it out. */ if (vp->v_flag & VXLOCK) panic("vclean: deadlock"); vp->v_flag |= VXLOCK; /* * Clean out any buffers associated with the vnode. */ if (flags & DOCLOSE) vinvalbuf(vp, V_SAVE, NOCRED, NULL, 0, 0); /* * Any other processes trying to obtain this lock must first wait for * VXLOCK to clear, then call the new lock operation. */ VOP_UNLOCK(vp); /* * If purging an active vnode, it must be closed and deactivated * before being reclaimed. */ if (active) { if (flags & DOCLOSE) VOP_CLOSE(vp, FNONBLOCK, NOCRED, NULL); VOP_INACTIVE(vp); } /* * Reclaim the vnode. */ if (VOP_RECLAIM(vp)) panic("vclean: cannot reclaim"); if (active) vrele(vp); /* * Done with purge, notify sleepers of the grim news. */ vp->v_op = dead_vnodeop_p; vp->v_tag = VT_NON; vp->v_flag &= ~VXLOCK; if (vp->v_flag & VXWANT) { vp->v_flag &= ~VXWANT; wakeup((caddr_t) vp); } } /* * Eliminate all activity associated with the requested vnode * and with all vnodes aliased to the requested vnode. */ void vgoneall(vp) register struct vnode *vp; { register struct vnode *vq; if (vp->v_flag & VALIASED) { /* * If a vgone (or vclean) is already in progress, wait until * it is done and return. */ if (vp->v_flag & VXLOCK) { vp->v_flag |= VXWANT; (void) tsleep((caddr_t) vp, PINOD, "vgall", 0); return; } /* * Ensure that vp will not be vgone'd while we are eliminating * its aliases. */ vp->v_flag |= VXLOCK; while (vp->v_flag & VALIASED) { for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type || vp == vq) continue; vgone(vq); break; } } /* * Remove the lock so that vgone below will really eliminate * the vnode after which time vgone will awaken any sleepers. */ vp->v_flag &= ~VXLOCK; } vgone(vp); } /* * Eliminate all activity associated with a vnode * in preparation for reuse. */ void vgone(vp) register struct vnode *vp; { register struct vnode *vq; struct vnode *vx; /* * If a vgone (or vclean) is already in progress, wait until it is * done and return. */ if (vp->v_flag & VXLOCK) { vp->v_flag |= VXWANT; (void) tsleep((caddr_t) vp, PINOD, "vgone", 0); return; } /* * Clean out the filesystem specific data. */ vclean(vp, DOCLOSE); /* * Delete from old mount point vnode list, if on one. */ if (vp->v_mount != NULL) { LIST_REMOVE(vp, v_mntvnodes); vp->v_mount = NULL; } /* * If special device, remove it from special device alias list. */ if (vp->v_type == VBLK || vp->v_type == VCHR) { if (*vp->v_hashchain == vp) { *vp->v_hashchain = vp->v_specnext; } else { for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { if (vq->v_specnext != vp) continue; vq->v_specnext = vp->v_specnext; break; } if (vq == NULL) panic("missing bdev"); } if (vp->v_flag & VALIASED) { vx = NULL; for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) continue; if (vx) break; vx = vq; } if (vx == NULL) panic("missing alias"); if (vq == NULL) vx->v_flag &= ~VALIASED; vp->v_flag &= ~VALIASED; } FREE(vp->v_specinfo, M_VNODE); vp->v_specinfo = NULL; } /* * If it is on the freelist and not already at the head, move it to * the head of the list. The test of the back pointer and the * reference count of zero is because it will be removed from the free * list by getnewvnode, but will not have its reference count * incremented until after calling vgone. If the reference count were * incremented first, vgone would (incorrectly) try to close the * previous instance of the underlying object. So, the back pointer is * explicitly set to `0xdeadb' in getnewvnode after removing it from * the freelist to ensure that we do not try to move it here. */ if (vp->v_usecount == 0 && vp->v_freelist.tqe_prev != (struct vnode **) 0xdeadb && vnode_free_list.tqh_first != vp) { TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); } vp->v_type = VBAD; } /* * Lookup a vnode by device number. */ int vfinddev(dev, type, vpp) dev_t dev; enum vtype type; struct vnode **vpp; { register struct vnode *vp; for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) { if (dev != vp->v_rdev || type != vp->v_type) continue; *vpp = vp; return (1); } return (0); } /* * Calculate the total number of references to a special device. */ int vcount(vp) register struct vnode *vp; { register struct vnode *vq, *vnext; int count; loop: if ((vp->v_flag & VALIASED) == 0) return (vp->v_usecount); for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) { vnext = vq->v_specnext; if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) continue; /* * Alias, but not in use, so flush it out. */ if (vq->v_usecount == 0 && vq != vp) { vgone(vq); goto loop; } count += vq->v_usecount; } return (count); } /* * Print out a description of a vnode. */ static char *typename[] = {"VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD"}; void vprint(label, vp) char *label; register struct vnode *vp; { char buf[64]; if (label != NULL) printf("%s: ", label); printf("type %s, usecount %d, writecount %d, refcount %ld,", typename[vp->v_type], vp->v_usecount, vp->v_writecount, vp->v_holdcnt); buf[0] = '\0'; if (vp->v_flag & VROOT) strcat(buf, "|VROOT"); if (vp->v_flag & VTEXT) strcat(buf, "|VTEXT"); if (vp->v_flag & VSYSTEM) strcat(buf, "|VSYSTEM"); if (vp->v_flag & VXLOCK) strcat(buf, "|VXLOCK"); if (vp->v_flag & VXWANT) strcat(buf, "|VXWANT"); if (vp->v_flag & VBWAIT) strcat(buf, "|VBWAIT"); if (vp->v_flag & VALIASED) strcat(buf, "|VALIASED"); if (buf[0] != '\0') printf(" flags (%s)", &buf[1]); if (vp->v_data == NULL) { printf("\n"); } else { printf("\n\t"); VOP_PRINT(vp); } } #ifdef DDB /* * List all of the locked vnodes in the system. * Called when debugging the kernel. */ void printlockedvnodes(void) { register struct mount *mp; register struct vnode *vp; printf("Locked vnodes\n"); for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = mp->mnt_list.cqe_next) { for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = vp->v_mntvnodes.le_next) if (VOP_ISLOCKED(vp)) vprint((char *) 0, vp); } } #endif int kinfo_vdebug = 1; int kinfo_vgetfailed; #define KINFO_VNODESLOP 10 /* * Dump vnode list (via sysctl). * Copyout address of vnode followed by vnode. */ /* ARGSUSED */ static int sysctl_vnode SYSCTL_HANDLER_ARGS { register struct mount *mp, *nmp; struct vnode *vp; int error; #define VPTRSZ sizeof (struct vnode *) #define VNODESZ sizeof (struct vnode) req->lock = 0; if (!req->oldptr) /* Make an estimate */ return (SYSCTL_OUT(req, 0, (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ))); for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { nmp = mp->mnt_list.cqe_next; if (vfs_busy(mp)) continue; again: for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = vp->v_mntvnodes.le_next) { /* * Check that the vp is still associated with this * filesystem. RACE: could have been recycled onto * the same filesystem. */ if (vp->v_mount != mp) { if (kinfo_vdebug) printf("kinfo: vp changed\n"); goto again; } if ((error = SYSCTL_OUT(req, &vp, VPTRSZ)) || (error = SYSCTL_OUT(req, vp, VNODESZ))) { vfs_unbusy(mp); return (error); } } vfs_unbusy(mp); } return (0); } SYSCTL_PROC(_kern, KERN_VNODE, vnode, CTLTYPE_OPAQUE|CTLFLAG_RD, 0, 0, sysctl_vnode, "S,vnode", ""); /* * Check to see if a filesystem is mounted on a block device. */ int vfs_mountedon(vp) register struct vnode *vp; { register struct vnode *vq; if (vp->v_specflags & SI_MOUNTEDON) return (EBUSY); if (vp->v_flag & VALIASED) { for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) continue; if (vq->v_specflags & SI_MOUNTEDON) return (EBUSY); } } return (0); } /* * Build hash lists of net addresses and hang them off the mount point. * Called by ufs_mount() to set up the lists of export addresses. */ static int vfs_hang_addrlist(struct mount *mp, struct netexport *nep, struct export_args *argp) { register struct netcred *np; register struct radix_node_head *rnh; register int i; struct radix_node *rn; struct sockaddr *saddr, *smask = 0; struct domain *dom; int error; if (argp->ex_addrlen == 0) { if (mp->mnt_flag & MNT_DEFEXPORTED) return (EPERM); np = &nep->ne_defexported; np->netc_exflags = argp->ex_flags; np->netc_anon = argp->ex_anon; np->netc_anon.cr_ref = 1; mp->mnt_flag |= MNT_DEFEXPORTED; return (0); } i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen; np = (struct netcred *) malloc(i, M_NETADDR, M_WAITOK); bzero((caddr_t) np, i); saddr = (struct sockaddr *) (np + 1); if ((error = copyin(argp->ex_addr, (caddr_t) saddr, argp->ex_addrlen))) goto out; if (saddr->sa_len > argp->ex_addrlen) saddr->sa_len = argp->ex_addrlen; if (argp->ex_masklen) { smask = (struct sockaddr *) ((caddr_t) saddr + argp->ex_addrlen); error = copyin(argp->ex_addr, (caddr_t) smask, argp->ex_masklen); if (error) goto out; if (smask->sa_len > argp->ex_masklen) smask->sa_len = argp->ex_masklen; } i = saddr->sa_family; if ((rnh = nep->ne_rtable[i]) == 0) { /* * Seems silly to initialize every AF when most are not used, * do so on demand here */ for (dom = domains; dom; dom = dom->dom_next) if (dom->dom_family == i && dom->dom_rtattach) { dom->dom_rtattach((void **) &nep->ne_rtable[i], dom->dom_rtoffset); break; } if ((rnh = nep->ne_rtable[i]) == 0) { error = ENOBUFS; goto out; } } rn = (*rnh->rnh_addaddr) ((caddr_t) saddr, (caddr_t) smask, rnh, np->netc_rnodes); if (rn == 0 || np != (struct netcred *) rn) { /* already exists */ error = EPERM; goto out; } np->netc_exflags = argp->ex_flags; np->netc_anon = argp->ex_anon; np->netc_anon.cr_ref = 1; return (0); out: free(np, M_NETADDR); return (error); } /* ARGSUSED */ static int vfs_free_netcred(struct radix_node *rn, void *w) { register struct radix_node_head *rnh = (struct radix_node_head *) w; (*rnh->rnh_deladdr) (rn->rn_key, rn->rn_mask, rnh); free((caddr_t) rn, M_NETADDR); return (0); } /* * Free the net address hash lists that are hanging off the mount points. */ static void vfs_free_addrlist(struct netexport *nep) { register int i; register struct radix_node_head *rnh; for (i = 0; i <= AF_MAX; i++) if ((rnh = nep->ne_rtable[i])) { (*rnh->rnh_walktree) (rnh, vfs_free_netcred, (caddr_t) rnh); free((caddr_t) rnh, M_RTABLE); nep->ne_rtable[i] = 0; } } int vfs_export(mp, nep, argp) struct mount *mp; struct netexport *nep; struct export_args *argp; { int error; if (argp->ex_flags & MNT_DELEXPORT) { vfs_free_addrlist(nep); mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED); } if (argp->ex_flags & MNT_EXPORTED) { if ((error = vfs_hang_addrlist(mp, nep, argp))) return (error); mp->mnt_flag |= MNT_EXPORTED; } return (0); } struct netcred * vfs_export_lookup(mp, nep, nam) register struct mount *mp; struct netexport *nep; struct mbuf *nam; { register struct netcred *np; register struct radix_node_head *rnh; struct sockaddr *saddr; np = NULL; if (mp->mnt_flag & MNT_EXPORTED) { /* * Lookup in the export list first. */ if (nam != NULL) { saddr = mtod(nam, struct sockaddr *); rnh = nep->ne_rtable[saddr->sa_family]; if (rnh != NULL) { np = (struct netcred *) (*rnh->rnh_matchaddr) ((caddr_t) saddr, rnh); if (np && np->netc_rnodes->rn_flags & RNF_ROOT) np = NULL; } } /* * If no address match, use the default if it exists. */ if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED) np = &nep->ne_defexported; } return (np); } /* * perform msync on all vnodes under a mount point * the mount point must be locked. */ void vfs_msync(struct mount *mp, int flags) { struct vnode *vp, *nvp; loop: for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) { if (vp->v_mount != mp) goto loop; nvp = vp->v_mntvnodes.le_next; if (VOP_ISLOCKED(vp) && (flags != MNT_WAIT)) continue; if (vp->v_object && - (((vm_object_t) vp->v_object)->flags & OBJ_MIGHTBEDIRTY)) { + (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) { vm_object_page_clean(vp->v_object, 0, 0, TRUE, TRUE); } } +} + +/* + * Create the VM object needed for VMIO and mmap support. This + * is done for all VREG files in the system. Some filesystems might + * afford the additional metadata buffering capability of the + * VMIO code by making the device node be VMIO mode also. + */ +int +vfs_object_create(vp, p, cred, waslocked) + struct vnode *vp; + struct proc *p; + struct ucred *cred; + int waslocked; +{ + struct vattr vat; + vm_object_t object; + int error = 0; + +retry: + if ((object = vp->v_object) == NULL) { + if (vp->v_type == VREG) { + if ((error = VOP_GETATTR(vp, &vat, cred, p)) != 0) + goto retn; + (void) vnode_pager_alloc(vp, + OFF_TO_IDX(round_page(vat.va_size)), 0, 0); + } else { + /* + * This simply allocates the biggest object possible + * for a VBLK vnode. This should be fixed, but doesn't + * cause any problems (yet). + */ + (void) vnode_pager_alloc(vp, INT_MAX, 0, 0); + } + vp->v_object->flags |= OBJ_VFS_REF; + } else { + if (object->flags & OBJ_DEAD) { + if (waslocked) + VOP_UNLOCK(vp); + tsleep(object, PVM, "vodead", 0); + if (waslocked) + VOP_LOCK(vp); + goto retry; + } + if ((object->flags & OBJ_VFS_REF) == 0) { + object->flags |= OBJ_VFS_REF; + vm_object_reference(object); + } + } + if (vp->v_object) + vp->v_flag |= VVMIO; + +retn: + return error; } Index: head/sys/kern/vfs_vnops.c =================================================================== --- head/sys/kern/vfs_vnops.c (revision 17760) +++ head/sys/kern/vfs_vnops.c (revision 17761) @@ -1,516 +1,465 @@ /* * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)vfs_vnops.c 8.2 (Berkeley) 1/21/94 - * $Id: vfs_vnops.c,v 1.24 1996/03/02 03:45:05 dyson Exp $ + * $Id: vfs_vnops.c,v 1.25 1996/03/09 06:42:15 dyson Exp $ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static int vn_closefile __P((struct file *fp, struct proc *p)); static int vn_ioctl __P((struct file *fp, int com, caddr_t data, struct proc *p)); static int vn_read __P((struct file *fp, struct uio *uio, struct ucred *cred)); static int vn_select __P((struct file *fp, int which, struct proc *p)); static int vn_write __P((struct file *fp, struct uio *uio, struct ucred *cred)); struct fileops vnops = { vn_read, vn_write, vn_ioctl, vn_select, vn_closefile }; /* * Common code for vnode open operations. * Check permissions, and call the VOP_OPEN or VOP_CREATE routine. */ int vn_open(ndp, fmode, cmode) register struct nameidata *ndp; int fmode, cmode; { register struct vnode *vp; register struct proc *p = ndp->ni_cnd.cn_proc; register struct ucred *cred = p->p_ucred; struct vattr vat; struct vattr *vap = &vat; int error; if (fmode & O_CREAT) { ndp->ni_cnd.cn_nameiop = CREATE; ndp->ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF; if ((fmode & O_EXCL) == 0) ndp->ni_cnd.cn_flags |= FOLLOW; error = namei(ndp); if (error) return (error); if (ndp->ni_vp == NULL) { VATTR_NULL(vap); vap->va_type = VREG; vap->va_mode = cmode; LEASE_CHECK(ndp->ni_dvp, p, cred, LEASE_WRITE); error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd, vap); if (error) return (error); fmode &= ~O_TRUNC; vp = ndp->ni_vp; } else { VOP_ABORTOP(ndp->ni_dvp, &ndp->ni_cnd); if (ndp->ni_dvp == ndp->ni_vp) vrele(ndp->ni_dvp); else vput(ndp->ni_dvp); ndp->ni_dvp = NULL; vp = ndp->ni_vp; if (fmode & O_EXCL) { error = EEXIST; goto bad; } fmode &= ~O_CREAT; } } else { ndp->ni_cnd.cn_nameiop = LOOKUP; ndp->ni_cnd.cn_flags = FOLLOW | LOCKLEAF; error = namei(ndp); if (error) return (error); vp = ndp->ni_vp; } if (vp->v_type == VSOCK) { error = EOPNOTSUPP; goto bad; } if ((fmode & O_CREAT) == 0) { if (fmode & FREAD) { error = VOP_ACCESS(vp, VREAD, cred, p); if (error) goto bad; } if (fmode & (FWRITE | O_TRUNC)) { if (vp->v_type == VDIR) { error = EISDIR; goto bad; } error = vn_writechk(vp); if (error) goto bad; error = VOP_ACCESS(vp, VWRITE, cred, p); if (error) goto bad; } } if (fmode & O_TRUNC) { VOP_UNLOCK(vp); /* XXX */ LEASE_CHECK(vp, p, cred, LEASE_WRITE); VOP_LOCK(vp); /* XXX */ VATTR_NULL(vap); vap->va_size = 0; error = VOP_SETATTR(vp, vap, cred, p); if (error) goto bad; } error = VOP_OPEN(vp, fmode, cred, p); if (error) goto bad; /* - * this is here for VMIO support + * Make sure that a VM object is created for VMIO support. */ if (vp->v_type == VREG) { - if ((error = vn_vmio_open(vp, p, cred)) != 0) + if ((error = vfs_object_create(vp, p, cred, 1)) != 0) goto bad; } + if (fmode & FWRITE) vp->v_writecount++; return (0); bad: vput(vp); return (error); } /* * Check for write permissions on the specified vnode. * The read-only status of the file system is checked. * Also, prototype text segments cannot be written. */ int vn_writechk(vp) register struct vnode *vp; { /* * If there's shared text associated with * the vnode, try to free it up once. If * we fail, we can't allow writing. */ if (vp->v_flag & VTEXT) return (ETXTBSY); return (0); } /* * Vnode close call */ int vn_close(vp, flags, cred, p) register struct vnode *vp; int flags; struct ucred *cred; struct proc *p; { int error; if (flags & FWRITE) vp->v_writecount--; error = VOP_CLOSE(vp, flags, cred, p); - vn_vmio_close(vp); + vrele(vp); return (error); } /* * Package up an I/O request on a vnode into a uio and do it. */ int vn_rdwr(rw, vp, base, len, offset, segflg, ioflg, cred, aresid, p) enum uio_rw rw; struct vnode *vp; caddr_t base; int len; off_t offset; enum uio_seg segflg; int ioflg; struct ucred *cred; int *aresid; struct proc *p; { struct uio auio; struct iovec aiov; int error; if ((ioflg & IO_NODELOCKED) == 0) VOP_LOCK(vp); auio.uio_iov = &aiov; auio.uio_iovcnt = 1; aiov.iov_base = base; aiov.iov_len = len; auio.uio_resid = len; auio.uio_offset = offset; auio.uio_segflg = segflg; auio.uio_rw = rw; auio.uio_procp = p; if (rw == UIO_READ) { error = VOP_READ(vp, &auio, ioflg, cred); } else { error = VOP_WRITE(vp, &auio, ioflg, cred); } if (aresid) *aresid = auio.uio_resid; else if (auio.uio_resid && error == 0) error = EIO; if ((ioflg & IO_NODELOCKED) == 0) VOP_UNLOCK(vp); return (error); } /* * File table vnode read routine. */ static int vn_read(fp, uio, cred) struct file *fp; struct uio *uio; struct ucred *cred; { register struct vnode *vp = (struct vnode *)fp->f_data; int count, error; LEASE_CHECK(vp, uio->uio_procp, cred, LEASE_READ); VOP_LOCK(vp); uio->uio_offset = fp->f_offset; count = uio->uio_resid; error = VOP_READ(vp, uio, (fp->f_flag & FNONBLOCK) ? IO_NDELAY : 0, cred); fp->f_offset += count - uio->uio_resid; VOP_UNLOCK(vp); return (error); } /* * File table vnode write routine. */ static int vn_write(fp, uio, cred) struct file *fp; struct uio *uio; struct ucred *cred; { register struct vnode *vp = (struct vnode *)fp->f_data; int count, error, ioflag = 0; if (vp->v_type == VREG && (fp->f_flag & O_APPEND)) ioflag |= IO_APPEND; if (fp->f_flag & FNONBLOCK) ioflag |= IO_NDELAY; LEASE_CHECK(vp, uio->uio_procp, cred, LEASE_WRITE); VOP_LOCK(vp); uio->uio_offset = fp->f_offset; count = uio->uio_resid; error = VOP_WRITE(vp, uio, ioflag, cred); if (ioflag & IO_APPEND) fp->f_offset = uio->uio_offset; else fp->f_offset += count - uio->uio_resid; VOP_UNLOCK(vp); return (error); } /* * File table vnode stat routine. */ int vn_stat(vp, sb, p) struct vnode *vp; register struct stat *sb; struct proc *p; { struct vattr vattr; register struct vattr *vap; int error; u_short mode; vap = &vattr; error = VOP_GETATTR(vp, vap, p->p_ucred, p); if (error) return (error); /* * Copy from vattr table */ sb->st_dev = vap->va_fsid; sb->st_ino = vap->va_fileid; mode = vap->va_mode; switch (vp->v_type) { case VREG: mode |= S_IFREG; break; case VDIR: mode |= S_IFDIR; break; case VBLK: mode |= S_IFBLK; break; case VCHR: mode |= S_IFCHR; break; case VLNK: mode |= S_IFLNK; break; case VSOCK: mode |= S_IFSOCK; break; case VFIFO: mode |= S_IFIFO; break; default: return (EBADF); }; sb->st_mode = mode; sb->st_nlink = vap->va_nlink; sb->st_uid = vap->va_uid; sb->st_gid = vap->va_gid; sb->st_rdev = vap->va_rdev; sb->st_size = vap->va_size; sb->st_atimespec = vap->va_atime; sb->st_mtimespec= vap->va_mtime; sb->st_ctimespec = vap->va_ctime; sb->st_blksize = vap->va_blocksize; sb->st_flags = vap->va_flags; sb->st_gen = vap->va_gen; #if (S_BLKSIZE == 512) /* Optimize this case */ sb->st_blocks = vap->va_bytes >> 9; #else sb->st_blocks = vap->va_bytes / S_BLKSIZE; #endif return (0); } /* * File table vnode ioctl routine. */ static int vn_ioctl(fp, com, data, p) struct file *fp; int com; caddr_t data; struct proc *p; { register struct vnode *vp = ((struct vnode *)fp->f_data); struct vattr vattr; int error; switch (vp->v_type) { case VREG: case VDIR: if (com == FIONREAD) { error = VOP_GETATTR(vp, &vattr, p->p_ucred, p); if (error) return (error); *(int *)data = vattr.va_size - fp->f_offset; return (0); } if (com == FIONBIO || com == FIOASYNC) /* XXX */ return (0); /* XXX */ /* fall into ... */ default: return (ENOTTY); case VFIFO: case VCHR: case VBLK: error = VOP_IOCTL(vp, com, data, fp->f_flag, p->p_ucred, p); if (error == 0 && com == TIOCSCTTY) { /* Do nothing if reassigning same control tty */ if (p->p_session->s_ttyvp == vp) return (0); /* Get rid of reference to old control tty */ if (p->p_session->s_ttyvp) vrele(p->p_session->s_ttyvp); p->p_session->s_ttyvp = vp; VREF(vp); } return (error); } } /* * File table vnode select routine. */ static int vn_select(fp, which, p) struct file *fp; int which; struct proc *p; { return (VOP_SELECT(((struct vnode *)fp->f_data), which, fp->f_flag, fp->f_cred, p)); } /* * File table vnode close routine. */ static int vn_closefile(fp, p) struct file *fp; struct proc *p; { return (vn_close(((struct vnode *)fp->f_data), fp->f_flag, fp->f_cred, p)); -} - -int -vn_vmio_open(vp, p, cred) - struct vnode *vp; - struct proc *p; - struct ucred *cred; -{ - struct vattr vat; - int error; - /* - * this is here for VMIO support - */ - if (vp->v_type == VREG || vp->v_type == VBLK) { -retry: - if ((vp->v_flag & VVMIO) == 0) { - if ((error = VOP_GETATTR(vp, &vat, cred, p)) != 0) - return error; - (void) vnode_pager_alloc(vp, OFF_TO_IDX(round_page(vat.va_size)), 0, 0); - vp->v_flag |= VVMIO; - } else { - vm_object_t object; - if ((object = vp->v_object) && - (object->flags & OBJ_DEAD)) { - VOP_UNLOCK(vp); - tsleep(object, PVM, "vodead", 0); - VOP_LOCK(vp); - goto retry; - } - if (!object) - panic("vn_open: VMIO object missing"); - vm_object_reference(object); - } - } - return 0; -} - -void -vn_vmio_close(vp) - struct vnode *vp; -{ - /* - * this code is here for VMIO support, will eventually - * be in vfs code. - */ - if (vp->v_flag & VVMIO) { - vrele(vp); - if (vp->v_object == NULL) - panic("vn_close: VMIO object missing"); - vm_object_deallocate(vp->v_object); - } else - vrele(vp); } Index: head/sys/miscfs/specfs/spec_vnops.c =================================================================== --- head/sys/miscfs/specfs/spec_vnops.c (revision 17760) +++ head/sys/miscfs/specfs/spec_vnops.c (revision 17761) @@ -1,882 +1,887 @@ /* * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)spec_vnops.c 8.6 (Berkeley) 4/9/94 - * $Id: spec_vnops.c,v 1.29 1996/03/19 05:13:17 dyson Exp $ + * $Id: spec_vnops.c,v 1.30 1996/07/27 03:50:31 dyson Exp $ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include +#include #include static int spec_ebadf __P((void)); static int spec_getattr __P((struct vop_getattr_args *)); struct vnode *speclisth[SPECHSZ]; vop_t **spec_vnodeop_p; static struct vnodeopv_entry_desc spec_vnodeop_entries[] = { { &vop_default_desc, (vop_t *)vn_default_error }, { &vop_lookup_desc, (vop_t *)spec_lookup }, /* lookup */ { &vop_create_desc, (vop_t *)spec_create }, /* create */ { &vop_mknod_desc, (vop_t *)spec_mknod }, /* mknod */ { &vop_open_desc, (vop_t *)spec_open }, /* open */ { &vop_close_desc, (vop_t *)spec_close }, /* close */ { &vop_access_desc, (vop_t *)spec_access }, /* access */ { &vop_getattr_desc, (vop_t *)spec_getattr }, /* getattr */ { &vop_setattr_desc, (vop_t *)spec_setattr }, /* setattr */ { &vop_read_desc, (vop_t *)spec_read }, /* read */ { &vop_write_desc, (vop_t *)spec_write }, /* write */ { &vop_ioctl_desc, (vop_t *)spec_ioctl }, /* ioctl */ { &vop_select_desc, (vop_t *)spec_select }, /* select */ { &vop_mmap_desc, (vop_t *)spec_mmap }, /* mmap */ { &vop_fsync_desc, (vop_t *)spec_fsync }, /* fsync */ { &vop_seek_desc, (vop_t *)spec_seek }, /* seek */ { &vop_remove_desc, (vop_t *)spec_remove }, /* remove */ { &vop_link_desc, (vop_t *)spec_link }, /* link */ { &vop_rename_desc, (vop_t *)spec_rename }, /* rename */ { &vop_mkdir_desc, (vop_t *)spec_mkdir }, /* mkdir */ { &vop_rmdir_desc, (vop_t *)spec_rmdir }, /* rmdir */ { &vop_symlink_desc, (vop_t *)spec_symlink }, /* symlink */ { &vop_readdir_desc, (vop_t *)spec_readdir }, /* readdir */ { &vop_readlink_desc, (vop_t *)spec_readlink }, /* readlink */ { &vop_abortop_desc, (vop_t *)spec_abortop }, /* abortop */ { &vop_inactive_desc, (vop_t *)spec_inactive }, /* inactive */ { &vop_reclaim_desc, (vop_t *)spec_reclaim }, /* reclaim */ { &vop_lock_desc, (vop_t *)spec_lock }, /* lock */ { &vop_unlock_desc, (vop_t *)spec_unlock }, /* unlock */ { &vop_bmap_desc, (vop_t *)spec_bmap }, /* bmap */ { &vop_strategy_desc, (vop_t *)spec_strategy }, /* strategy */ { &vop_print_desc, (vop_t *)spec_print }, /* print */ { &vop_islocked_desc, (vop_t *)spec_islocked }, /* islocked */ { &vop_pathconf_desc, (vop_t *)spec_pathconf }, /* pathconf */ { &vop_advlock_desc, (vop_t *)spec_advlock }, /* advlock */ { &vop_blkatoff_desc, (vop_t *)spec_blkatoff }, /* blkatoff */ { &vop_valloc_desc, (vop_t *)spec_valloc }, /* valloc */ { &vop_vfree_desc, (vop_t *)spec_vfree }, /* vfree */ { &vop_truncate_desc, (vop_t *)spec_truncate }, /* truncate */ { &vop_update_desc, (vop_t *)spec_update }, /* update */ { &vop_bwrite_desc, (vop_t *)vn_bwrite }, /* bwrite */ { &vop_getpages_desc, (vop_t *)spec_getpages}, /* getpages */ { NULL, NULL } }; static struct vnodeopv_desc spec_vnodeop_opv_desc = { &spec_vnodeop_p, spec_vnodeop_entries }; VNODEOP_SET(spec_vnodeop_opv_desc); static void spec_getpages_iodone __P((struct buf *bp)); /* * Trivial lookup routine that always fails. */ int spec_lookup(ap) struct vop_lookup_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; } */ *ap; { *ap->a_vpp = NULL; return (ENOTDIR); } /* * Open a special file. */ /* ARGSUSED */ int spec_open(ap) struct vop_open_args /* { struct vnode *a_vp; int a_mode; struct ucred *a_cred; struct proc *a_p; } */ *ap; { struct vnode *bvp, *vp = ap->a_vp; dev_t bdev, dev = (dev_t)vp->v_rdev; register int maj = major(dev); int error; /* * Don't allow open if fs is mounted -nodev. */ if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_NODEV)) return (ENXIO); switch (vp->v_type) { case VCHR: if ((u_int)maj >= nchrdev) return (ENXIO); if ( (cdevsw[maj] == NULL) || (cdevsw[maj]->d_open == NULL)) return ENXIO; if (ap->a_cred != FSCRED && (ap->a_mode & FWRITE)) { /* * When running in very secure mode, do not allow * opens for writing of any disk character devices. */ if (securelevel >= 2 && isdisk(dev, VCHR)) return (EPERM); /* * When running in secure mode, do not allow opens * for writing of /dev/mem, /dev/kmem, or character * devices whose corresponding block devices are * currently mounted. */ if (securelevel >= 1) { if ((bdev = chrtoblk(dev)) != NODEV && vfinddev(bdev, VBLK, &bvp) && bvp->v_usecount > 0 && (error = vfs_mountedon(bvp))) return (error); if (iskmemdev(dev)) return (EPERM); } } VOP_UNLOCK(vp); error = (*cdevsw[maj]->d_open)(dev, ap->a_mode, S_IFCHR, ap->a_p); VOP_LOCK(vp); return (error); case VBLK: if ((u_int)maj >= nblkdev) return (ENXIO); if ( (bdevsw[maj] == NULL) || (bdevsw[maj]->d_open == NULL)) return ENXIO; /* * When running in very secure mode, do not allow * opens for writing of any disk block devices. */ if (securelevel >= 2 && ap->a_cred != FSCRED && (ap->a_mode & FWRITE) && isdisk(dev, VBLK)) return (EPERM); /* * Do not allow opens of block devices that are * currently mounted. */ error = vfs_mountedon(vp); if (error) return (error); return ((*bdevsw[maj]->d_open)(dev, ap->a_mode, S_IFBLK, ap->a_p)); default: break; } return (0); } /* * Vnode op for read */ /* ARGSUSED */ int spec_read(ap) struct vop_read_args /* { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; struct ucred *a_cred; } */ *ap; { register struct vnode *vp = ap->a_vp; register struct uio *uio = ap->a_uio; struct proc *p = uio->uio_procp; struct buf *bp; daddr_t bn, nextbn; long bsize, bscale; struct partinfo dpart; int n, on, majordev; d_ioctl_t *ioctl; int error = 0; dev_t dev; #ifdef DIAGNOSTIC if (uio->uio_rw != UIO_READ) panic("spec_read mode"); if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc) panic("spec_read proc"); #endif if (uio->uio_resid == 0) return (0); switch (vp->v_type) { case VCHR: VOP_UNLOCK(vp); error = (*cdevsw[major(vp->v_rdev)]->d_read) (vp->v_rdev, uio, ap->a_ioflag); VOP_LOCK(vp); return (error); case VBLK: if (uio->uio_offset < 0) return (EINVAL); bsize = BLKDEV_IOSIZE; dev = vp->v_rdev; if ((majordev = major(dev)) < nblkdev && (ioctl = bdevsw[majordev]->d_ioctl) != NULL && (*ioctl)(dev, DIOCGPART, (caddr_t)&dpart, FREAD, p) == 0 && dpart.part->p_fstype == FS_BSDFFS && dpart.part->p_frag != 0 && dpart.part->p_fsize != 0) bsize = dpart.part->p_frag * dpart.part->p_fsize; bscale = bsize >> DEV_BSHIFT; do { bn = (uio->uio_offset >> DEV_BSHIFT) &~ (bscale - 1); on = uio->uio_offset % bsize; n = min((unsigned)(bsize - on), uio->uio_resid); if (vp->v_lastr + bscale == bn) { nextbn = bn + bscale; error = breadn(vp, bn, (int)bsize, &nextbn, (int *)&bsize, 1, NOCRED, &bp); } else error = bread(vp, bn, (int)bsize, NOCRED, &bp); vp->v_lastr = bn; n = min(n, bsize - bp->b_resid); if (error) { brelse(bp); return (error); } error = uiomove((char *)bp->b_data + on, n, uio); brelse(bp); } while (error == 0 && uio->uio_resid > 0 && n != 0); return (error); default: panic("spec_read type"); } /* NOTREACHED */ } /* * Vnode op for write */ /* ARGSUSED */ int spec_write(ap) struct vop_write_args /* { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; struct ucred *a_cred; } */ *ap; { register struct vnode *vp = ap->a_vp; register struct uio *uio = ap->a_uio; struct proc *p = uio->uio_procp; struct buf *bp; daddr_t bn; int bsize, blkmask; struct partinfo dpart; register int n, on; int error = 0; #ifdef DIAGNOSTIC if (uio->uio_rw != UIO_WRITE) panic("spec_write mode"); if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc) panic("spec_write proc"); #endif switch (vp->v_type) { case VCHR: VOP_UNLOCK(vp); error = (*cdevsw[major(vp->v_rdev)]->d_write) (vp->v_rdev, uio, ap->a_ioflag); VOP_LOCK(vp); return (error); case VBLK: if (uio->uio_resid == 0) return (0); if (uio->uio_offset < 0) return (EINVAL); bsize = BLKDEV_IOSIZE; if ((*bdevsw[major(vp->v_rdev)]->d_ioctl)(vp->v_rdev, DIOCGPART, (caddr_t)&dpart, FREAD, p) == 0) { if (dpart.part->p_fstype == FS_BSDFFS && dpart.part->p_frag != 0 && dpart.part->p_fsize != 0) bsize = dpart.part->p_frag * dpart.part->p_fsize; } blkmask = (bsize >> DEV_BSHIFT) - 1; do { bn = (uio->uio_offset >> DEV_BSHIFT) &~ blkmask; on = uio->uio_offset % bsize; n = min((unsigned)(bsize - on), uio->uio_resid); if (n == bsize) bp = getblk(vp, bn, bsize, 0, 0); else error = bread(vp, bn, bsize, NOCRED, &bp); n = min(n, bsize - bp->b_resid); if (error) { brelse(bp); return (error); } error = uiomove((char *)bp->b_data + on, n, uio); if (n + on == bsize) { /* bawrite(bp); */ cluster_write(bp, 0); } else bdwrite(bp); } while (error == 0 && uio->uio_resid > 0 && n != 0); return (error); default: panic("spec_write type"); } /* NOTREACHED */ } /* * Device ioctl operation. */ /* ARGSUSED */ int spec_ioctl(ap) struct vop_ioctl_args /* { struct vnode *a_vp; int a_command; caddr_t a_data; int a_fflag; struct ucred *a_cred; struct proc *a_p; } */ *ap; { dev_t dev = ap->a_vp->v_rdev; switch (ap->a_vp->v_type) { case VCHR: return ((*cdevsw[major(dev)]->d_ioctl)(dev, ap->a_command, ap->a_data, ap->a_fflag, ap->a_p)); case VBLK: if (ap->a_command == 0 && (int)ap->a_data == B_TAPE) if (bdevsw[major(dev)]->d_flags & B_TAPE) return (0); else return (1); return ((*bdevsw[major(dev)]->d_ioctl)(dev, ap->a_command, ap->a_data, ap->a_fflag, ap->a_p)); default: panic("spec_ioctl"); /* NOTREACHED */ } } /* ARGSUSED */ int spec_select(ap) struct vop_select_args /* { struct vnode *a_vp; int a_which; int a_fflags; struct ucred *a_cred; struct proc *a_p; } */ *ap; { register dev_t dev; switch (ap->a_vp->v_type) { default: return (1); /* XXX */ case VCHR: dev = ap->a_vp->v_rdev; return (*cdevsw[major(dev)]->d_select)(dev, ap->a_which, ap->a_p); } } /* * Synch buffers associated with a block device */ /* ARGSUSED */ int spec_fsync(ap) struct vop_fsync_args /* { struct vnode *a_vp; struct ucred *a_cred; int a_waitfor; struct proc *a_p; } */ *ap; { register struct vnode *vp = ap->a_vp; register struct buf *bp; struct buf *nbp; int s; if (vp->v_type == VCHR) return (0); /* * Flush all dirty buffers associated with a block device. */ loop: s = splbio(); for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) { nbp = bp->b_vnbufs.le_next; if ((bp->b_flags & B_BUSY)) continue; if ((bp->b_flags & B_DELWRI) == 0) panic("spec_fsync: not dirty"); bremfree(bp); bp->b_flags |= B_BUSY; splx(s); bawrite(bp); goto loop; } if (ap->a_waitfor == MNT_WAIT) { while (vp->v_numoutput) { vp->v_flag |= VBWAIT; (void) tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "spfsyn", 0); } #ifdef DIAGNOSTIC if (vp->v_dirtyblkhd.lh_first) { vprint("spec_fsync: dirty", vp); splx(s); goto loop; } #endif } splx(s); return (0); } /* * Just call the device strategy routine */ int spec_strategy(ap) struct vop_strategy_args /* { struct buf *a_bp; } */ *ap; { (*bdevsw[major(ap->a_bp->b_dev)]->d_strategy)(ap->a_bp); return (0); } /* * This is a noop, simply returning what one has been given. */ int spec_bmap(ap) struct vop_bmap_args /* { struct vnode *a_vp; daddr_t a_bn; struct vnode **a_vpp; daddr_t *a_bnp; int *a_runp; int *a_runb; } */ *ap; { if (ap->a_vpp != NULL) *ap->a_vpp = ap->a_vp; if (ap->a_bnp != NULL) *ap->a_bnp = ap->a_bn; if (ap->a_runp != NULL) *ap->a_runp = 0; if (ap->a_runb != NULL) *ap->a_runb = 0; return (0); } /* * At the moment we do not do any locking. */ /* ARGSUSED */ int spec_lock(ap) struct vop_lock_args /* { struct vnode *a_vp; } */ *ap; { return (0); } /* ARGSUSED */ int spec_unlock(ap) struct vop_unlock_args /* { struct vnode *a_vp; } */ *ap; { return (0); } /* * Device close routine */ /* ARGSUSED */ int spec_close(ap) struct vop_close_args /* { struct vnode *a_vp; int a_fflag; struct ucred *a_cred; struct proc *a_p; } */ *ap; { register struct vnode *vp = ap->a_vp; dev_t dev = vp->v_rdev; d_close_t *devclose; int mode, error; switch (vp->v_type) { case VCHR: /* * Hack: a tty device that is a controlling terminal * has a reference from the session structure. * We cannot easily tell that a character device is * a controlling terminal, unless it is the closing * process' controlling terminal. In that case, * if the reference count is 2 (this last descriptor * plus the session), release the reference from the session. */ if (vcount(vp) == 2 && ap->a_p && vp == ap->a_p->p_session->s_ttyvp) { vrele(vp); ap->a_p->p_session->s_ttyvp = NULL; } /* * If the vnode is locked, then we are in the midst * of forcably closing the device, otherwise we only * close on last reference. */ if (vcount(vp) > 1 && (vp->v_flag & VXLOCK) == 0) return (0); devclose = cdevsw[major(dev)]->d_close; mode = S_IFCHR; break; case VBLK: /* * On last close of a block device (that isn't mounted) * we must invalidate any in core blocks, so that * we can, for instance, change floppy disks. */ error = vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 0, 0); if (error) return (error); /* * We do not want to really close the device if it * is still in use unless we are trying to close it * forcibly. Since every use (buffer, vnode, swap, cmap) * holds a reference to the vnode, and because we mark * any other vnodes that alias this device, when the * sum of the reference counts on all the aliased * vnodes descends to one, we are on last close. */ - if ((vcount(vp) > ((vp->v_flag & VVMIO)?2:1)) && + if ((vcount(vp) > (vp->v_object?2:1)) && (vp->v_flag & VXLOCK) == 0) return (0); + + if (vp->v_object) + vnode_pager_uncache(vp); + devclose = bdevsw[major(dev)]->d_close; mode = S_IFBLK; break; default: panic("spec_close: not special"); } return ((*devclose)(dev, ap->a_fflag, mode, ap->a_p)); } /* * Print out the contents of a special device vnode. */ int spec_print(ap) struct vop_print_args /* { struct vnode *a_vp; } */ *ap; { printf("tag VT_NON, dev %d, %d\n", major(ap->a_vp->v_rdev), minor(ap->a_vp->v_rdev)); return (0); } /* * Return POSIX pathconf information applicable to special devices. */ int spec_pathconf(ap) struct vop_pathconf_args /* { struct vnode *a_vp; int a_name; int *a_retval; } */ *ap; { switch (ap->a_name) { case _PC_LINK_MAX: *ap->a_retval = LINK_MAX; return (0); case _PC_MAX_CANON: *ap->a_retval = MAX_CANON; return (0); case _PC_MAX_INPUT: *ap->a_retval = MAX_INPUT; return (0); case _PC_PIPE_BUF: *ap->a_retval = PIPE_BUF; return (0); case _PC_CHOWN_RESTRICTED: *ap->a_retval = 1; return (0); case _PC_VDISABLE: *ap->a_retval = _POSIX_VDISABLE; return (0); default: return (EINVAL); } /* NOTREACHED */ } /* * Special device advisory byte-level locks. */ /* ARGSUSED */ int spec_advlock(ap) struct vop_advlock_args /* { struct vnode *a_vp; caddr_t a_id; int a_op; struct flock *a_fl; int a_flags; } */ *ap; { return (EOPNOTSUPP); } /* * Special device failed operation */ static int spec_ebadf() { return (EBADF); } /* * Special device bad operation */ int spec_badop() { panic("spec_badop called"); /* NOTREACHED */ } static void spec_getpages_iodone(bp) struct buf *bp; { bp->b_flags |= B_DONE; wakeup(bp); } int spec_getpages(ap) struct vop_getpages_args *ap; { vm_offset_t kva; int error; int i, pcount, size, s; daddr_t blkno; struct buf *bp; error = 0; pcount = round_page(ap->a_count) / PAGE_SIZE; /* * Calculate the size of the transfer. */ blkno = (IDX_TO_OFF(ap->a_m[0]->pindex) + ap->a_offset) / DEV_BSIZE; /* XXX sanity check before we go into details */ if (blkno < 0) { printf("spec_getpages: negative blkno (%ld)\n", blkno); return (VM_PAGER_ERROR); } /* * Round up physical size for real devices. */ size = (ap->a_count + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1); bp = getpbuf(); kva = (vm_offset_t)bp->b_data; /* * Map the pages to be read into the kva. */ pmap_qenter(kva, ap->a_m, pcount); /* Build a minimal buffer header. */ bp->b_flags = B_BUSY | B_READ | B_CALL; bp->b_iodone = spec_getpages_iodone; /* B_PHYS is not set, but it is nice to fill this in. */ bp->b_proc = curproc; bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; if (bp->b_rcred != NOCRED) crhold(bp->b_rcred); if (bp->b_wcred != NOCRED) crhold(bp->b_wcred); bp->b_blkno = blkno; bp->b_lblkno = blkno; pbgetvp(ap->a_vp, bp); bp->b_bcount = size; bp->b_bufsize = size; cnt.v_vnodein++; cnt.v_vnodepgsin += pcount; /* Do the input. */ VOP_STRATEGY(bp); if (bp->b_flags & B_ASYNC) return (VM_PAGER_PEND); s = splbio(); /* We definitely need to be at splbio here. */ while ((bp->b_flags & B_DONE) == 0) tsleep(bp, PVM, "vnread", 0); splx(s); if ((bp->b_flags & B_ERROR) != 0) error = EIO; if (!error && ap->a_count != pcount * PAGE_SIZE) bzero((caddr_t)kva + ap->a_count, PAGE_SIZE * pcount - ap->a_count); pmap_qremove(kva, pcount); /* * Free the buffer header back to the swap buffer pool. */ relpbuf(bp); for (i = 0; i < pcount; i++) { ap->a_m[i]->dirty = 0; ap->a_m[i]->valid = VM_PAGE_BITS_ALL; ap->a_m[i]->flags &= ~PG_ZERO; if (i != ap->a_reqpage) { /* * Whether or not to leave the page activated is up in * the air, but we should put the page on a page queue * somewhere (it already is in the object). Result: * It appears that emperical results show that * deactivating pages is best. */ /* * Just in case someone was asking for this page we * now tell them that it is ok to use. */ if (!error) { vm_page_deactivate(ap->a_m[i]); PAGE_WAKEUP(ap->a_m[i]); } else vnode_pager_freepage(ap->a_m[i]); } } if (error) printf("spec_getpages: I/O read error\n"); return (error ? VM_PAGER_ERROR : VM_PAGER_OK); } /* ARGSUSED */ static int spec_getattr(ap) struct vop_getattr_args /* { struct vnode *a_vp; struct vattr *a_vap; struct ucred *a_cred; struct proc *a_p; } */ *ap; { register struct vnode *vp = ap->a_vp; register struct vattr *vap = ap->a_vap; struct partinfo dpart; bzero(vap, sizeof (*vap)); if (vp->v_type == VBLK) vap->va_blocksize = BLKDEV_IOSIZE; else if (vp->v_type == VCHR) vap->va_blocksize = MAXBSIZE; if ((*bdevsw[major(vp->v_rdev)]->d_ioctl)(vp->v_rdev, DIOCGPART, (caddr_t)&dpart, FREAD, ap->a_p) == 0) { vap->va_bytes = (u_quad_t) dpart.disklab->d_partitions[minor(vp->v_rdev)].p_size * DEV_BSIZE; vap->va_size = vap->va_bytes; } return (0); } Index: head/sys/nfs/nfs.h =================================================================== --- head/sys/nfs/nfs.h (revision 17760) +++ head/sys/nfs/nfs.h (revision 17761) @@ -1,603 +1,601 @@ /* * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)nfs.h 8.1 (Berkeley) 6/10/93 - * $Id: nfs.h,v 1.16 1995/12/17 21:12:05 phk Exp $ + * $Id: nfs.h,v 1.17 1996/01/30 22:59:39 mpp Exp $ */ #ifndef _NFS_NFS_H_ #define _NFS_NFS_H_ #include /* * Tunable constants for nfs */ #define NFS_MAXIOVEC 34 #define NFS_TICKINTVL 5 /* Desired time for a tick (msec) */ #define NFS_HZ (hz / nfs_ticks) /* Ticks/sec */ #define NFS_TIMEO (1 * NFS_HZ) /* Default timeout = 1 second */ #define NFS_MINTIMEO (1 * NFS_HZ) /* Min timeout to use */ #define NFS_MAXTIMEO (60 * NFS_HZ) /* Max timeout to backoff to */ #define NFS_MINIDEMTIMEO (5 * NFS_HZ) /* Min timeout for non-idempotent ops*/ #define NFS_MAXREXMIT 100 /* Stop counting after this many */ #define NFS_MAXWINDOW 1024 /* Max number of outstanding requests */ #define NFS_RETRANS 10 /* Num of retrans for soft mounts */ #define NFS_MAXGRPS 16 /* Max. size of groups list */ #ifndef NFS_MINATTRTIMO #define NFS_MINATTRTIMO 5 /* Attribute cache timeout in sec */ #endif #ifndef NFS_MAXATTRTIMO #define NFS_MAXATTRTIMO 60 #endif #define NFS_WSIZE 8192 /* Def. write data size <= 8192 */ #define NFS_RSIZE 8192 /* Def. read data size <= 8192 */ #define NFS_READDIRSIZE 8192 /* Def. readdir size */ #define NFS_DEFRAHEAD 1 /* Def. read ahead # blocks */ #define NFS_MAXRAHEAD 4 /* Max. read ahead # blocks */ #define NFS_MAXUIDHASH 64 /* Max. # of hashed uid entries/mp */ #define NFS_MAXASYNCDAEMON 20 /* Max. number async_daemons runnable */ #define NFS_MAXGATHERDELAY 100 /* Max. write gather delay (msec) */ #ifndef NFS_GATHERDELAY #define NFS_GATHERDELAY 10 /* Default write gather delay (msec) */ #endif #define NFS_DIRBLKSIZ 4096 /* Must be a multiple of DIRBLKSIZ */ /* * Oddballs */ #define NMOD(a) ((a) % nfs_asyncdaemons) #define NFS_CMPFH(n, f, s) \ ((n)->n_fhsize == (s) && !bcmp((caddr_t)(n)->n_fhp, (caddr_t)(f), (s))) #define NFS_ISV3(v) (VFSTONFS((v)->v_mount)->nm_flag & NFSMNT_NFSV3) #define NFS_SRVMAXDATA(n) \ (((n)->nd_flag & ND_NFSV3) ? (((n)->nd_nam2) ? \ NFS_MAXDGRAMDATA : NFS_MAXDATA) : NFS_V2MAXDATA) /* * XXX * sys/buf.h should be edited to change B_APPENDWRITE --> B_NEEDCOMMIT, but * until then... * Same goes for sys/malloc.h, which needs M_NFSDIROFF, * M_NFSRVDESC and M_NFSBIGFH added. * The VA_EXCLUSIVE flag should be added for va_vaflags and set for an * exclusive create. * The B_INVAFTERWRITE flag should be set to whatever is required by the * buffer cache code to say "Invalidate the block after it is written back". */ #ifndef B_NEEDCOMMIT #define B_NEEDCOMMIT B_APPENDWRITE #endif #ifndef M_NFSRVDESC #define M_NFSRVDESC M_TEMP #endif #ifndef M_NFSDIROFF #define M_NFSDIROFF M_TEMP #endif #ifndef M_NFSBIGFH #define M_NFSBIGFH M_TEMP #endif #ifndef VA_EXCLUSIVE #define VA_EXCLUSIVE 0 #endif #ifdef __FreeBSD__ #define B_INVAFTERWRITE B_NOCACHE #else #define B_INVAFTERWRITE B_INVAL #endif /* * These ifdefs try to handle the differences between the various 4.4BSD-Lite * based vfs interfaces. * btw: NetBSD-current does have a VOP_LEASDE(), but I don't know how to * differentiate between NetBSD-1.0 and NetBSD-current, so.. * I also don't know about BSDi's 2.0 release. */ #if !defined(HAS_VOPLEASE) && !defined(__FreeBSD__) && !defined(__NetBSD__) #define HAS_VOPLEASE 1 #endif #if !defined(HAS_VOPREVOKE) && !defined(__FreeBSD__) && !defined(__NetBSD__) #define HAS_VOPREVOKE 1 #endif /* * The IO_METASYNC flag should be implemented for local file systems. * (Until then, it is nothin at all.) */ #ifndef IO_METASYNC #define IO_METASYNC 0 #endif /* * Set the attribute timeout based on how recently the file has been modified. */ #define NFS_ATTRTIMEO(np) \ ((((np)->n_flag & NMODIFIED) || \ (time.tv_sec - (np)->n_mtime) / 10 < NFS_MINATTRTIMO) ? NFS_MINATTRTIMO : \ ((time.tv_sec - (np)->n_mtime) / 10 > NFS_MAXATTRTIMO ? NFS_MAXATTRTIMO : \ (time.tv_sec - (np)->n_mtime) / 10)) /* * Expected allocation sizes for major data structures. If the actual size * of the structure exceeds these sizes, then malloc() will be allocating * almost twice the memory required. This is used in nfs_init() to warn * the sysadmin that the size of a structure should be reduced. * (These sizes are always a power of 2. If the kernel malloc() changes * to one that does not allocate space in powers of 2 size, then this all * becomes bunk!) */ #define NFS_NODEALLOC 256 #define NFS_MNTALLOC 512 #define NFS_SVCALLOC 256 #define NFS_UIDALLOC 128 /* * Structures for the nfssvc(2) syscall. Not that anyone but nfsd and mount_nfs * should ever try and use it. */ struct nfsd_args { int sock; /* Socket to serve */ caddr_t name; /* Client address for connection based sockets */ int namelen; /* Length of name */ }; struct nfsd_srvargs { struct nfsd *nsd_nfsd; /* Pointer to in kernel nfsd struct */ uid_t nsd_uid; /* Effective uid mapped to cred */ u_long nsd_haddr; /* Ip address of client */ struct ucred nsd_cr; /* Cred. uid maps to */ int nsd_authlen; /* Length of auth string (ret) */ u_char *nsd_authstr; /* Auth string (ret) */ int nsd_verflen; /* and the verfier */ u_char *nsd_verfstr; struct timeval nsd_timestamp; /* timestamp from verifier */ u_long nsd_ttl; /* credential ttl (sec) */ NFSKERBKEY_T nsd_key; /* Session key */ }; struct nfsd_cargs { char *ncd_dirp; /* Mount dir path */ uid_t ncd_authuid; /* Effective uid */ int ncd_authtype; /* Type of authenticator */ int ncd_authlen; /* Length of authenticator string */ u_char *ncd_authstr; /* Authenticator string */ int ncd_verflen; /* and the verifier */ u_char *ncd_verfstr; NFSKERBKEY_T ncd_key; /* Session key */ }; /* * Stats structure */ struct nfsstats { int attrcache_hits; int attrcache_misses; int lookupcache_hits; int lookupcache_misses; int direofcache_hits; int direofcache_misses; int biocache_reads; int read_bios; int read_physios; int biocache_writes; int write_bios; int write_physios; int biocache_readlinks; int readlink_bios; int biocache_readdirs; int readdir_bios; int rpccnt[NFS_NPROCS]; int rpcretries; int srvrpccnt[NFS_NPROCS]; int srvrpc_errs; int srv_errs; int rpcrequests; int rpctimeouts; int rpcunexpected; int rpcinvalid; int srvcache_inproghits; int srvcache_idemdonehits; int srvcache_nonidemdonehits; int srvcache_misses; int srvnqnfs_leases; int srvnqnfs_maxleases; int srvnqnfs_getleases; int srvvop_writes; }; /* * Flags for nfssvc() system call. */ #define NFSSVC_BIOD 0x002 #define NFSSVC_NFSD 0x004 #define NFSSVC_ADDSOCK 0x008 #define NFSSVC_AUTHIN 0x010 #define NFSSVC_GOTAUTH 0x040 #define NFSSVC_AUTHINFAIL 0x080 #define NFSSVC_MNTD 0x100 /* * fs.nfs sysctl(3) identifiers */ #define NFS_NFSSTATS 1 /* struct: struct nfsstats */ #define FS_NFS_NAMES { \ { 0, 0 }, \ { "nfsstats", CTLTYPE_STRUCT }, \ } /* * The set of signals the interrupt an I/O in progress for NFSMNT_INT mounts. * What should be in this set is open to debate, but I believe that since * I/O system calls on ufs are never interrupted by signals the set should * be minimal. My reasoning is that many current programs that use signals * such as SIGALRM will not expect file I/O system calls to be interrupted * by them and break. */ #if defined(KERNEL) || defined(_KERNEL) struct uio; struct buf; struct vattr; struct nameidata; /* XXX */ #define NFSINT_SIGMASK (sigmask(SIGINT)|sigmask(SIGTERM)|sigmask(SIGKILL)| \ sigmask(SIGHUP)|sigmask(SIGQUIT)) /* * Socket errors ignored for connectionless sockets?? * For now, ignore them all */ #define NFSIGNORE_SOERROR(s, e) \ ((e) != EINTR && (e) != ERESTART && (e) != EWOULDBLOCK && \ ((s) & PR_CONNREQUIRED) == 0) /* * Nfs outstanding request list element */ struct nfsreq { TAILQ_ENTRY(nfsreq) r_chain; struct mbuf *r_mreq; struct mbuf *r_mrep; struct mbuf *r_md; caddr_t r_dpos; struct nfsmount *r_nmp; struct vnode *r_vp; u_long r_xid; int r_flags; /* flags on request, see below */ int r_retry; /* max retransmission count */ int r_rexmit; /* current retrans count */ int r_timer; /* tick counter on reply */ int r_procnum; /* NFS procedure number */ int r_rtt; /* RTT for rpc */ struct proc *r_procp; /* Proc that did I/O system call */ }; /* * Queue head for nfsreq's */ extern TAILQ_HEAD(nfs_reqq, nfsreq) nfs_reqq; /* Flag values for r_flags */ #define R_TIMING 0x01 /* timing request (in mntp) */ #define R_SENT 0x02 /* request has been sent */ #define R_SOFTTERM 0x04 /* soft mnt, too many retries */ #define R_INTR 0x08 /* intr mnt, signal pending */ #define R_SOCKERR 0x10 /* Fatal error on socket */ #define R_TPRINTFMSG 0x20 /* Did a tprintf msg. */ #define R_MUSTRESEND 0x40 /* Must resend request */ #define R_GETONEREP 0x80 /* Probe for one reply only */ /* * A list of nfssvc_sock structures is maintained with all the sockets * that require service by the nfsd. * The nfsuid structs hang off of the nfssvc_sock structs in both lru * and uid hash lists. */ #ifndef NFS_UIDHASHSIZ #define NFS_UIDHASHSIZ 29 /* Tune the size of nfssvc_sock with this */ #endif #define NUIDHASH(sock, uid) \ (&(sock)->ns_uidhashtbl[(uid) % NFS_UIDHASHSIZ]) #ifndef NFS_WDELAYHASHSIZ #define NFS_WDELAYHASHSIZ 16 /* and with this */ #endif #define NWDELAYHASH(sock, f) \ (&(sock)->ns_wdelayhashtbl[(*((u_long *)(f))) % NFS_WDELAYHASHSIZ]) #ifndef NFS_MUIDHASHSIZ #define NFS_MUIDHASHSIZ 67 /* Tune the size of nfsmount with this */ #endif #define NMUIDHASH(nmp, uid) \ (&(nmp)->nm_uidhashtbl[(uid) % NFS_MUIDHASHSIZ]) #define NFSNOHASH(fhsum) \ (&nfsnodehashtbl[(fhsum) & nfsnodehash]) /* * Network address hash list element */ union nethostaddr { u_long had_inetaddr; struct mbuf *had_nam; }; struct nfsuid { TAILQ_ENTRY(nfsuid) nu_lru; /* LRU chain */ LIST_ENTRY(nfsuid) nu_hash; /* Hash list */ int nu_flag; /* Flags */ union nethostaddr nu_haddr; /* Host addr. for dgram sockets */ struct ucred nu_cr; /* Cred uid mapped to */ int nu_expire; /* Expiry time (sec) */ struct timeval nu_timestamp; /* Kerb. timestamp */ u_long nu_nickname; /* Nickname on server */ NFSKERBKEY_T nu_key; /* and session key */ }; #define nu_inetaddr nu_haddr.had_inetaddr #define nu_nam nu_haddr.had_nam /* Bits for nu_flag */ #define NU_INETADDR 0x1 #define NU_NAM 0x2 #define NU_NETFAM(u) (((u)->nu_flag & NU_INETADDR) ? AF_INET : AF_ISO) struct nfssvc_sock { TAILQ_ENTRY(nfssvc_sock) ns_chain; /* List of all nfssvc_sock's */ TAILQ_HEAD(, nfsuid) ns_uidlruhead; struct file *ns_fp; struct socket *ns_so; struct mbuf *ns_nam; struct mbuf *ns_raw; struct mbuf *ns_rawend; struct mbuf *ns_rec; struct mbuf *ns_recend; struct mbuf *ns_frag; int ns_flag; int ns_solock; int ns_cc; int ns_reclen; int ns_numuids; u_long ns_sref; LIST_HEAD(, nfsrv_descript) ns_tq; /* Write gather lists */ LIST_HEAD(, nfsuid) ns_uidhashtbl[NFS_UIDHASHSIZ]; LIST_HEAD(nfsrvw_delayhash, nfsrv_descript) ns_wdelayhashtbl[NFS_WDELAYHASHSIZ]; }; /* Bits for "ns_flag" */ #define SLP_VALID 0x01 #define SLP_DOREC 0x02 #define SLP_NEEDQ 0x04 #define SLP_DISCONN 0x08 #define SLP_GETSTREAM 0x10 #define SLP_LASTFRAG 0x20 #define SLP_ALLFLAGS 0xff extern TAILQ_HEAD(nfssvc_sockhead, nfssvc_sock) nfssvc_sockhead; extern int nfssvc_sockhead_flag; #define SLP_INIT 0x01 #define SLP_WANTINIT 0x02 /* * One of these structures is allocated for each nfsd. */ struct nfsd { TAILQ_ENTRY(nfsd) nfsd_chain; /* List of all nfsd's */ int nfsd_flag; /* NFSD_ flags */ struct nfssvc_sock *nfsd_slp; /* Current socket */ int nfsd_authlen; /* Authenticator len */ u_char nfsd_authstr[RPCAUTH_MAXSIZ]; /* Authenticator data */ int nfsd_verflen; /* and the Verifier */ u_char nfsd_verfstr[RPCVERF_MAXSIZ]; struct proc *nfsd_procp; /* Proc ptr */ struct nfsrv_descript *nfsd_nd; /* Associated nfsrv_descript */ }; /* Bits for "nfsd_flag" */ #define NFSD_WAITING 0x01 #define NFSD_REQINPROG 0x02 #define NFSD_NEEDAUTH 0x04 #define NFSD_AUTHFAIL 0x08 /* * This structure is used by the server for describing each request. * Some fields are used only when write request gathering is performed. */ struct nfsrv_descript { u_quad_t nd_time; /* Write deadline (usec) */ off_t nd_off; /* Start byte offset */ off_t nd_eoff; /* and end byte offset */ LIST_ENTRY(nfsrv_descript) nd_hash; /* Hash list */ LIST_ENTRY(nfsrv_descript) nd_tq; /* and timer list */ LIST_HEAD(,nfsrv_descript) nd_coalesce; /* coalesced writes */ struct mbuf *nd_mrep; /* Request mbuf list */ struct mbuf *nd_md; /* Current dissect mbuf */ struct mbuf *nd_mreq; /* Reply mbuf list */ struct mbuf *nd_nam; /* and socket addr */ struct mbuf *nd_nam2; /* return socket addr */ caddr_t nd_dpos; /* Current dissect pos */ int nd_procnum; /* RPC # */ int nd_stable; /* storage type */ int nd_flag; /* nd_flag */ int nd_len; /* Length of this write */ int nd_repstat; /* Reply status */ u_long nd_retxid; /* Reply xid */ u_long nd_duration; /* Lease duration */ struct timeval nd_starttime; /* Time RPC initiated */ fhandle_t nd_fh; /* File handle */ struct ucred nd_cr; /* Credentials */ }; /* Bits for "nd_flag" */ #define ND_READ LEASE_READ #define ND_WRITE LEASE_WRITE #define ND_CHECK 0x04 #define ND_LEASE (ND_READ | ND_WRITE | ND_CHECK) #define ND_NFSV3 0x08 #define ND_NQNFS 0x10 #define ND_KERBNICK 0x20 #define ND_KERBFULL 0x40 #define ND_KERBAUTH (ND_KERBNICK | ND_KERBFULL) extern TAILQ_HEAD(nfsd_head, nfsd) nfsd_head; extern int nfsd_head_flag; #define NFSD_CHECKSLP 0x01 /* * These macros compare nfsrv_descript structures. */ #define NFSW_CONTIG(o, n) \ ((o)->nd_eoff >= (n)->nd_off && \ !bcmp((caddr_t)&(o)->nd_fh, (caddr_t)&(n)->nd_fh, NFSX_V3FH)) #define NFSW_SAMECRED(o, n) \ (((o)->nd_flag & ND_KERBAUTH) == ((n)->nd_flag & ND_KERBAUTH) && \ !bcmp((caddr_t)&(o)->nd_cr, (caddr_t)&(n)->nd_cr, \ sizeof (struct ucred))) int nfs_reply __P((struct nfsreq *)); int nfs_getreq __P((struct nfsrv_descript *,struct nfsd *,int)); int nfs_send __P((struct socket *,struct mbuf *,struct mbuf *,struct nfsreq *)); int nfs_rephead __P((int,struct nfsrv_descript *,struct nfssvc_sock *,int,int,u_quad_t *,struct mbuf **,struct mbuf **,caddr_t *)); int nfs_sndlock __P((int *,struct nfsreq *)); void nfs_sndunlock __P((int *flagp)); int nfs_disct __P((struct mbuf **,caddr_t *,int,int,caddr_t *)); int nfs_vinvalbuf __P((struct vnode *,int,struct ucred *,struct proc *,int)); int nfs_readrpc __P((struct vnode *,struct uio *,struct ucred *)); int nfs_writerpc __P((struct vnode *,struct uio *,struct ucred *,int *,int *)); int nfs_readdirrpc __P((register struct vnode *,struct uio *,struct ucred *)); int nfs_asyncio __P((struct buf *,struct ucred *)); int nfs_doio __P((struct buf *,struct ucred *,struct proc *)); int nfs_readlinkrpc __P((struct vnode *,struct uio *,struct ucred *)); int nfs_sigintr __P((struct nfsmount *,struct nfsreq *r,struct proc *)); int nfs_readdirplusrpc __P((struct vnode *,register struct uio *,struct ucred *)); int nfsm_disct __P((struct mbuf **,caddr_t *,int,int,caddr_t *)); void nfsm_srvfattr __P((struct nfsrv_descript *,struct vattr *,struct nfs_fattr *)); void nfsm_srvwcc __P((struct nfsrv_descript *,int,struct vattr *,int,struct vattr *,struct mbuf **,char **)); void nfsm_srvpostopattr __P((struct nfsrv_descript *,int,struct vattr *,struct mbuf **,char **)); int netaddr_match __P((int,union nethostaddr *,struct mbuf *)); int nfs_request __P((struct vnode *,struct mbuf *,int,struct proc *,struct ucred *,struct mbuf **,struct mbuf **,caddr_t *)); int nfs_loadattrcache __P((struct vnode **,struct mbuf **,caddr_t *,struct vattr *)); int nfs_namei __P((struct nameidata *,fhandle_t *,int,struct nfssvc_sock *,struct mbuf *,struct mbuf **,caddr_t *,struct vnode **,struct proc *,int)); void nfsm_adj __P((struct mbuf *,int,int)); int nfsm_mbuftouio __P((struct mbuf **,struct uio *,int,caddr_t *)); void nfsrv_initcache __P((void)); int nfs_getauth __P((struct nfsmount *,struct nfsreq *,struct ucred *,char **,int *,char *,int *,NFSKERBKEY_T)); int nfs_getnickauth __P((struct nfsmount *,struct ucred *,char **,int *,char *,int)); int nfs_savenickauth __P((struct nfsmount *,struct ucred *,int,NFSKERBKEY_T,struct mbuf **,char **,struct mbuf *)); int nfs_adv __P((struct mbuf **,caddr_t *,int,int)); void nfs_nhinit __P((void)); void nfs_timer __P((void*)); u_long nfs_hash __P((nfsfh_t *,int)); void nfsrv_slpderef __P((struct nfssvc_sock *slp)); int nfsrv_dorec __P((struct nfssvc_sock *,struct nfsd *,struct nfsrv_descript **)); void nfsrv_cleancache __P((void)); int nfsrv_getcache __P((struct nfsrv_descript *,struct nfssvc_sock *,struct mbuf **)); int nfs_init __P((void)); void nfsrv_updatecache __P((struct nfsrv_descript *,int,struct mbuf *)); int nfs_connect __P((struct nfsmount *,struct nfsreq *)); void nfs_disconnect __P((struct nfsmount *nmp)); int nfs_getattrcache __P((struct vnode *,struct vattr *)); int nfsm_strtmbuf __P((struct mbuf **,char **,char *,long)); int nfs_bioread __P((struct vnode *,struct uio *,int,struct ucred *)); int nfsm_uiotombuf __P((struct uio *,struct mbuf **,int,caddr_t *)); void nfsrv_init __P((int)); void nfs_clearcommit __P((struct mount *)); int nfsrv_errmap __P((struct nfsrv_descript *, int)); void nfsrv_rcv __P((struct socket *so, caddr_t arg, int waitflag)); void nfsrvw_sort __P((gid_t [],int)); void nfsrv_setcred __P((struct ucred *,struct ucred *)); int nfs_writebp __P((struct buf *,int)); -int nfsrv_vput __P(( struct vnode * )); -int nfsrv_vrele __P(( struct vnode * )); -int nfsrv_vmio __P(( struct vnode * )); +int nfsrv_object_create __P(( struct vnode * )); void nfsrv_wakenfsd __P((struct nfssvc_sock *slp)); int nfsrv_writegather __P((struct nfsrv_descript **, struct nfssvc_sock *, struct proc *, struct mbuf **)); int nfs_fsinfo __P((struct nfsmount *, struct vnode *, struct ucred *, struct proc *p)); int nfsrv3_access __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_commit __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_create __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_fhtovp __P((fhandle_t *,int,struct vnode **, struct ucred *,struct nfssvc_sock *,struct mbuf *, int *,int)); int nfsrv_fsinfo __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_getattr __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_link __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_lookup __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_mkdir __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_mknod __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_noop __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_null __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_pathconf __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_read __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_readdir __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_readdirplus __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_readlink __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_remove __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_rename __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_rmdir __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_setattr __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_statfs __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_symlink __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_write __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); #endif /* KERNEL */ #endif Index: head/sys/nfs/nfs_common.c =================================================================== --- head/sys/nfs/nfs_common.c (revision 17760) +++ head/sys/nfs/nfs_common.c (revision 17761) @@ -1,1992 +1,1949 @@ /* * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)nfs_subs.c 8.3 (Berkeley) 1/4/94 - * $Id: nfs_subs.c,v 1.30 1996/06/23 17:19:25 bde Exp $ + * $Id: nfs_subs.c,v 1.31 1996/07/16 10:19:44 dfr Exp $ */ /* * These functions support the macros and help fiddle mbuf chains for * the nfs op functions. They do things like create the rpc header and * copy data between mbuf chains and uio lists. */ #include #include #include #include #include #include #include #include #include #include #include #ifdef VFS_LKM #include #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef ISO #include #endif /* * Data items converted to xdr at startup, since they are constant * This is kinda hokey, but may save a little time doing byte swaps */ u_long nfs_xdrneg1; u_long rpc_call, rpc_vers, rpc_reply, rpc_msgdenied, rpc_autherr, rpc_mismatch, rpc_auth_unix, rpc_msgaccepted, rpc_auth_kerb; u_long nfs_prog, nqnfs_prog, nfs_true, nfs_false; /* And other global data */ static u_long nfs_xid = 0; static enum vtype nv2tov_type[8]= { VNON, VREG, VDIR, VBLK, VCHR, VLNK, VNON, VNON }; enum vtype nv3tov_type[8]= { VNON, VREG, VDIR, VBLK, VCHR, VLNK, VSOCK, VFIFO }; int nfs_ticks; struct nfs_reqq nfs_reqq; struct nfssvc_sockhead nfssvc_sockhead; int nfssvc_sockhead_flag; struct nfsd_head nfsd_head; int nfsd_head_flag; struct nfs_bufq nfs_bufq; struct nqtimerhead nqtimerhead; struct nqfhhashhead *nqfhhashtbl; u_long nqfhhash; #ifndef NFS_NOSERVER /* * Mapping of old NFS Version 2 RPC numbers to generic numbers. */ int nfsv3_procid[NFS_NPROCS] = { NFSPROC_NULL, NFSPROC_GETATTR, NFSPROC_SETATTR, NFSPROC_NOOP, NFSPROC_LOOKUP, NFSPROC_READLINK, NFSPROC_READ, NFSPROC_NOOP, NFSPROC_WRITE, NFSPROC_CREATE, NFSPROC_REMOVE, NFSPROC_RENAME, NFSPROC_LINK, NFSPROC_SYMLINK, NFSPROC_MKDIR, NFSPROC_RMDIR, NFSPROC_READDIR, NFSPROC_FSSTAT, NFSPROC_NOOP, NFSPROC_NOOP, NFSPROC_NOOP, NFSPROC_NOOP, NFSPROC_NOOP, NFSPROC_NOOP, NFSPROC_NOOP, NFSPROC_NOOP }; #endif /* NFS_NOSERVER */ /* * and the reverse mapping from generic to Version 2 procedure numbers */ int nfsv2_procid[NFS_NPROCS] = { NFSV2PROC_NULL, NFSV2PROC_GETATTR, NFSV2PROC_SETATTR, NFSV2PROC_LOOKUP, NFSV2PROC_NOOP, NFSV2PROC_READLINK, NFSV2PROC_READ, NFSV2PROC_WRITE, NFSV2PROC_CREATE, NFSV2PROC_MKDIR, NFSV2PROC_SYMLINK, NFSV2PROC_CREATE, NFSV2PROC_REMOVE, NFSV2PROC_RMDIR, NFSV2PROC_RENAME, NFSV2PROC_LINK, NFSV2PROC_READDIR, NFSV2PROC_NOOP, NFSV2PROC_STATFS, NFSV2PROC_NOOP, NFSV2PROC_NOOP, NFSV2PROC_NOOP, NFSV2PROC_NOOP, NFSV2PROC_NOOP, NFSV2PROC_NOOP, NFSV2PROC_NOOP, }; #ifndef NFS_NOSERVER /* * Maps errno values to nfs error numbers. * Use NFSERR_IO as the catch all for ones not specifically defined in * RFC 1094. */ static u_char nfsrv_v2errmap[ELAST] = { NFSERR_PERM, NFSERR_NOENT, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_NXIO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_ACCES, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_EXIST, NFSERR_IO, NFSERR_NODEV, NFSERR_NOTDIR, NFSERR_ISDIR, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_FBIG, NFSERR_NOSPC, NFSERR_IO, NFSERR_ROFS, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_NAMETOL, NFSERR_IO, NFSERR_IO, NFSERR_NOTEMPTY, NFSERR_IO, NFSERR_IO, NFSERR_DQUOT, NFSERR_STALE, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, }; /* * Maps errno values to nfs error numbers. * Although it is not obvious whether or not NFS clients really care if * a returned error value is in the specified list for the procedure, the * safest thing to do is filter them appropriately. For Version 2, the * X/Open XNFS document is the only specification that defines error values * for each RPC (The RFC simply lists all possible error values for all RPCs), * so I have decided to not do this for Version 2. * The first entry is the default error return and the rest are the valid * errors for that RPC in increasing numeric order. */ static short nfsv3err_null[] = { 0, 0, }; static short nfsv3err_getattr[] = { NFSERR_IO, NFSERR_IO, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_SERVERFAULT, 0, }; static short nfsv3err_setattr[] = { NFSERR_IO, NFSERR_PERM, NFSERR_IO, NFSERR_ACCES, NFSERR_INVAL, NFSERR_NOSPC, NFSERR_ROFS, NFSERR_DQUOT, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_NOT_SYNC, NFSERR_SERVERFAULT, 0, }; static short nfsv3err_lookup[] = { NFSERR_IO, NFSERR_NOENT, NFSERR_IO, NFSERR_ACCES, NFSERR_NOTDIR, NFSERR_NAMETOL, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_SERVERFAULT, 0, }; static short nfsv3err_access[] = { NFSERR_IO, NFSERR_IO, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_SERVERFAULT, 0, }; static short nfsv3err_readlink[] = { NFSERR_IO, NFSERR_IO, NFSERR_ACCES, NFSERR_INVAL, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_NOTSUPP, NFSERR_SERVERFAULT, 0, }; static short nfsv3err_read[] = { NFSERR_IO, NFSERR_IO, NFSERR_NXIO, NFSERR_ACCES, NFSERR_INVAL, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_SERVERFAULT, 0, }; static short nfsv3err_write[] = { NFSERR_IO, NFSERR_IO, NFSERR_ACCES, NFSERR_INVAL, NFSERR_FBIG, NFSERR_NOSPC, NFSERR_ROFS, NFSERR_DQUOT, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_SERVERFAULT, 0, }; static short nfsv3err_create[] = { NFSERR_IO, NFSERR_IO, NFSERR_ACCES, NFSERR_EXIST, NFSERR_NOTDIR, NFSERR_NOSPC, NFSERR_ROFS, NFSERR_NAMETOL, NFSERR_DQUOT, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_NOTSUPP, NFSERR_SERVERFAULT, 0, }; static short nfsv3err_mkdir[] = { NFSERR_IO, NFSERR_IO, NFSERR_ACCES, NFSERR_EXIST, NFSERR_NOTDIR, NFSERR_NOSPC, NFSERR_ROFS, NFSERR_NAMETOL, NFSERR_DQUOT, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_NOTSUPP, NFSERR_SERVERFAULT, 0, }; static short nfsv3err_symlink[] = { NFSERR_IO, NFSERR_IO, NFSERR_ACCES, NFSERR_EXIST, NFSERR_NOTDIR, NFSERR_NOSPC, NFSERR_ROFS, NFSERR_NAMETOL, NFSERR_DQUOT, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_NOTSUPP, NFSERR_SERVERFAULT, 0, }; static short nfsv3err_mknod[] = { NFSERR_IO, NFSERR_IO, NFSERR_ACCES, NFSERR_EXIST, NFSERR_NOTDIR, NFSERR_NOSPC, NFSERR_ROFS, NFSERR_NAMETOL, NFSERR_DQUOT, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_NOTSUPP, NFSERR_SERVERFAULT, NFSERR_BADTYPE, 0, }; static short nfsv3err_remove[] = { NFSERR_IO, NFSERR_NOENT, NFSERR_IO, NFSERR_ACCES, NFSERR_NOTDIR, NFSERR_ROFS, NFSERR_NAMETOL, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_SERVERFAULT, 0, }; static short nfsv3err_rmdir[] = { NFSERR_IO, NFSERR_NOENT, NFSERR_IO, NFSERR_ACCES, NFSERR_EXIST, NFSERR_NOTDIR, NFSERR_INVAL, NFSERR_ROFS, NFSERR_NAMETOL, NFSERR_NOTEMPTY, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_NOTSUPP, NFSERR_SERVERFAULT, 0, }; static short nfsv3err_rename[] = { NFSERR_IO, NFSERR_NOENT, NFSERR_IO, NFSERR_ACCES, NFSERR_EXIST, NFSERR_XDEV, NFSERR_NOTDIR, NFSERR_ISDIR, NFSERR_INVAL, NFSERR_NOSPC, NFSERR_ROFS, NFSERR_MLINK, NFSERR_NAMETOL, NFSERR_NOTEMPTY, NFSERR_DQUOT, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_NOTSUPP, NFSERR_SERVERFAULT, 0, }; static short nfsv3err_link[] = { NFSERR_IO, NFSERR_IO, NFSERR_ACCES, NFSERR_EXIST, NFSERR_XDEV, NFSERR_NOTDIR, NFSERR_INVAL, NFSERR_NOSPC, NFSERR_ROFS, NFSERR_MLINK, NFSERR_NAMETOL, NFSERR_DQUOT, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_NOTSUPP, NFSERR_SERVERFAULT, 0, }; static short nfsv3err_readdir[] = { NFSERR_IO, NFSERR_IO, NFSERR_ACCES, NFSERR_NOTDIR, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_BAD_COOKIE, NFSERR_TOOSMALL, NFSERR_SERVERFAULT, 0, }; static short nfsv3err_readdirplus[] = { NFSERR_IO, NFSERR_IO, NFSERR_ACCES, NFSERR_NOTDIR, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_BAD_COOKIE, NFSERR_NOTSUPP, NFSERR_TOOSMALL, NFSERR_SERVERFAULT, 0, }; static short nfsv3err_fsstat[] = { NFSERR_IO, NFSERR_IO, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_SERVERFAULT, 0, }; static short nfsv3err_fsinfo[] = { NFSERR_STALE, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_SERVERFAULT, 0, }; static short nfsv3err_pathconf[] = { NFSERR_STALE, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_SERVERFAULT, 0, }; static short nfsv3err_commit[] = { NFSERR_IO, NFSERR_IO, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_SERVERFAULT, 0, }; static short *nfsrv_v3errmap[] = { nfsv3err_null, nfsv3err_getattr, nfsv3err_setattr, nfsv3err_lookup, nfsv3err_access, nfsv3err_readlink, nfsv3err_read, nfsv3err_write, nfsv3err_create, nfsv3err_mkdir, nfsv3err_symlink, nfsv3err_mknod, nfsv3err_remove, nfsv3err_rmdir, nfsv3err_rename, nfsv3err_link, nfsv3err_readdir, nfsv3err_readdirplus, nfsv3err_fsstat, nfsv3err_fsinfo, nfsv3err_pathconf, nfsv3err_commit, }; #endif /* NFS_NOSERVER */ extern struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON]; extern struct nfsrtt nfsrtt; extern time_t nqnfsstarttime; extern int nqsrv_clockskew; extern int nqsrv_writeslack; extern int nqsrv_maxlease; extern struct nfsstats nfsstats; extern int nqnfs_piggy[NFS_NPROCS]; extern nfstype nfsv2_type[9]; extern nfstype nfsv3_type[9]; extern struct nfsnodehashhead *nfsnodehashtbl; extern u_long nfsnodehash; #ifdef VFS_LKM struct getfh_args; extern int getfh(struct proc *, struct getfh_args *, int *); struct nfssvc_args; extern int nfssvc(struct proc *, struct nfssvc_args *, int *); #endif LIST_HEAD(nfsnodehashhead, nfsnode); /* * Create the header for an rpc request packet * The hsiz is the size of the rest of the nfs request header. * (just used to decide if a cluster is a good idea) */ struct mbuf * nfsm_reqh(vp, procid, hsiz, bposp) struct vnode *vp; u_long procid; int hsiz; caddr_t *bposp; { register struct mbuf *mb; register u_long *tl; register caddr_t bpos; struct mbuf *mb2; struct nfsmount *nmp; int nqflag; MGET(mb, M_WAIT, MT_DATA); if (hsiz >= MINCLSIZE) MCLGET(mb, M_WAIT); mb->m_len = 0; bpos = mtod(mb, caddr_t); /* * For NQNFS, add lease request. */ if (vp) { nmp = VFSTONFS(vp->v_mount); if (nmp->nm_flag & NFSMNT_NQNFS) { nqflag = NQNFS_NEEDLEASE(vp, procid); if (nqflag) { nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED); *tl++ = txdr_unsigned(nqflag); *tl = txdr_unsigned(nmp->nm_leaseterm); } else { nfsm_build(tl, u_long *, NFSX_UNSIGNED); *tl = 0; } } } /* Finally, return values */ *bposp = bpos; return (mb); } /* * Build the RPC header and fill in the authorization info. * The authorization string argument is only used when the credentials * come from outside of the kernel. * Returns the head of the mbuf list. */ struct mbuf * nfsm_rpchead(cr, nmflag, procid, auth_type, auth_len, auth_str, verf_len, verf_str, mrest, mrest_len, mbp, xidp) register struct ucred *cr; int nmflag; int procid; int auth_type; int auth_len; char *auth_str; int verf_len; char *verf_str; struct mbuf *mrest; int mrest_len; struct mbuf **mbp; u_long *xidp; { register struct mbuf *mb; register u_long *tl; register caddr_t bpos; register int i; struct mbuf *mreq, *mb2; int siz, grpsiz, authsiz; struct timeval tv; static u_long base; authsiz = nfsm_rndup(auth_len); MGETHDR(mb, M_WAIT, MT_DATA); if ((authsiz + 10 * NFSX_UNSIGNED) >= MINCLSIZE) { MCLGET(mb, M_WAIT); } else if ((authsiz + 10 * NFSX_UNSIGNED) < MHLEN) { MH_ALIGN(mb, authsiz + 10 * NFSX_UNSIGNED); } else { MH_ALIGN(mb, 8 * NFSX_UNSIGNED); } mb->m_len = 0; mreq = mb; bpos = mtod(mb, caddr_t); /* * First the RPC header. */ nfsm_build(tl, u_long *, 8 * NFSX_UNSIGNED); /* * derive initial xid from system time * XXX time is invalid if root not yet mounted */ if (!base && (rootvp)) { microtime(&tv); base = tv.tv_sec << 12; nfs_xid = base; } /* * Skip zero xid if it should ever happen. */ if (++nfs_xid == 0) nfs_xid++; *tl++ = *xidp = txdr_unsigned(nfs_xid); *tl++ = rpc_call; *tl++ = rpc_vers; if (nmflag & NFSMNT_NQNFS) { *tl++ = txdr_unsigned(NQNFS_PROG); *tl++ = txdr_unsigned(NQNFS_VER3); } else { *tl++ = txdr_unsigned(NFS_PROG); if (nmflag & NFSMNT_NFSV3) *tl++ = txdr_unsigned(NFS_VER3); else *tl++ = txdr_unsigned(NFS_VER2); } if (nmflag & NFSMNT_NFSV3) *tl++ = txdr_unsigned(procid); else *tl++ = txdr_unsigned(nfsv2_procid[procid]); /* * And then the authorization cred. */ *tl++ = txdr_unsigned(auth_type); *tl = txdr_unsigned(authsiz); switch (auth_type) { case RPCAUTH_UNIX: nfsm_build(tl, u_long *, auth_len); *tl++ = 0; /* stamp ?? */ *tl++ = 0; /* NULL hostname */ *tl++ = txdr_unsigned(cr->cr_uid); *tl++ = txdr_unsigned(cr->cr_groups[0]); grpsiz = (auth_len >> 2) - 5; *tl++ = txdr_unsigned(grpsiz); for (i = 1; i <= grpsiz; i++) *tl++ = txdr_unsigned(cr->cr_groups[i]); break; case RPCAUTH_KERB4: siz = auth_len; while (siz > 0) { if (M_TRAILINGSPACE(mb) == 0) { MGET(mb2, M_WAIT, MT_DATA); if (siz >= MINCLSIZE) MCLGET(mb2, M_WAIT); mb->m_next = mb2; mb = mb2; mb->m_len = 0; bpos = mtod(mb, caddr_t); } i = min(siz, M_TRAILINGSPACE(mb)); bcopy(auth_str, bpos, i); mb->m_len += i; auth_str += i; bpos += i; siz -= i; } if ((siz = (nfsm_rndup(auth_len) - auth_len)) > 0) { for (i = 0; i < siz; i++) *bpos++ = '\0'; mb->m_len += siz; } break; }; /* * And the verifier... */ nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED); if (verf_str) { *tl++ = txdr_unsigned(RPCAUTH_KERB4); *tl = txdr_unsigned(verf_len); siz = verf_len; while (siz > 0) { if (M_TRAILINGSPACE(mb) == 0) { MGET(mb2, M_WAIT, MT_DATA); if (siz >= MINCLSIZE) MCLGET(mb2, M_WAIT); mb->m_next = mb2; mb = mb2; mb->m_len = 0; bpos = mtod(mb, caddr_t); } i = min(siz, M_TRAILINGSPACE(mb)); bcopy(verf_str, bpos, i); mb->m_len += i; verf_str += i; bpos += i; siz -= i; } if ((siz = (nfsm_rndup(verf_len) - verf_len)) > 0) { for (i = 0; i < siz; i++) *bpos++ = '\0'; mb->m_len += siz; } } else { *tl++ = txdr_unsigned(RPCAUTH_NULL); *tl = 0; } mb->m_next = mrest; mreq->m_pkthdr.len = authsiz + 10 * NFSX_UNSIGNED + mrest_len; mreq->m_pkthdr.rcvif = (struct ifnet *)0; *mbp = mb; return (mreq); } /* * copies mbuf chain to the uio scatter/gather list */ int nfsm_mbuftouio(mrep, uiop, siz, dpos) struct mbuf **mrep; register struct uio *uiop; int siz; caddr_t *dpos; { register char *mbufcp, *uiocp; register int xfer, left, len; register struct mbuf *mp; long uiosiz, rem; int error = 0; mp = *mrep; mbufcp = *dpos; len = mtod(mp, caddr_t)+mp->m_len-mbufcp; rem = nfsm_rndup(siz)-siz; while (siz > 0) { if (uiop->uio_iovcnt <= 0 || uiop->uio_iov == NULL) return (EFBIG); left = uiop->uio_iov->iov_len; uiocp = uiop->uio_iov->iov_base; if (left > siz) left = siz; uiosiz = left; while (left > 0) { while (len == 0) { mp = mp->m_next; if (mp == NULL) return (EBADRPC); mbufcp = mtod(mp, caddr_t); len = mp->m_len; } xfer = (left > len) ? len : left; #ifdef notdef /* Not Yet.. */ if (uiop->uio_iov->iov_op != NULL) (*(uiop->uio_iov->iov_op)) (mbufcp, uiocp, xfer); else #endif if (uiop->uio_segflg == UIO_SYSSPACE) bcopy(mbufcp, uiocp, xfer); else copyout(mbufcp, uiocp, xfer); left -= xfer; len -= xfer; mbufcp += xfer; uiocp += xfer; uiop->uio_offset += xfer; uiop->uio_resid -= xfer; } if (uiop->uio_iov->iov_len <= siz) { uiop->uio_iovcnt--; uiop->uio_iov++; } else { uiop->uio_iov->iov_base += uiosiz; uiop->uio_iov->iov_len -= uiosiz; } siz -= uiosiz; } *dpos = mbufcp; *mrep = mp; if (rem > 0) { if (len < rem) error = nfs_adv(mrep, dpos, rem, len); else *dpos += rem; } return (error); } /* * copies a uio scatter/gather list to an mbuf chain. * NOTE: can ony handle iovcnt == 1 */ int nfsm_uiotombuf(uiop, mq, siz, bpos) register struct uio *uiop; struct mbuf **mq; int siz; caddr_t *bpos; { register char *uiocp; register struct mbuf *mp, *mp2; register int xfer, left, mlen; int uiosiz, clflg, rem; char *cp; if (uiop->uio_iovcnt != 1) panic("nfsm_uiotombuf: iovcnt != 1"); if (siz > MLEN) /* or should it >= MCLBYTES ?? */ clflg = 1; else clflg = 0; rem = nfsm_rndup(siz)-siz; mp = mp2 = *mq; while (siz > 0) { left = uiop->uio_iov->iov_len; uiocp = uiop->uio_iov->iov_base; if (left > siz) left = siz; uiosiz = left; while (left > 0) { mlen = M_TRAILINGSPACE(mp); if (mlen == 0) { MGET(mp, M_WAIT, MT_DATA); if (clflg) MCLGET(mp, M_WAIT); mp->m_len = 0; mp2->m_next = mp; mp2 = mp; mlen = M_TRAILINGSPACE(mp); } xfer = (left > mlen) ? mlen : left; #ifdef notdef /* Not Yet.. */ if (uiop->uio_iov->iov_op != NULL) (*(uiop->uio_iov->iov_op)) (uiocp, mtod(mp, caddr_t)+mp->m_len, xfer); else #endif if (uiop->uio_segflg == UIO_SYSSPACE) bcopy(uiocp, mtod(mp, caddr_t)+mp->m_len, xfer); else copyin(uiocp, mtod(mp, caddr_t)+mp->m_len, xfer); mp->m_len += xfer; left -= xfer; uiocp += xfer; uiop->uio_offset += xfer; uiop->uio_resid -= xfer; } uiop->uio_iov->iov_base += uiosiz; uiop->uio_iov->iov_len -= uiosiz; siz -= uiosiz; } if (rem > 0) { if (rem > M_TRAILINGSPACE(mp)) { MGET(mp, M_WAIT, MT_DATA); mp->m_len = 0; mp2->m_next = mp; } cp = mtod(mp, caddr_t)+mp->m_len; for (left = 0; left < rem; left++) *cp++ = '\0'; mp->m_len += rem; *bpos = cp; } else *bpos = mtod(mp, caddr_t)+mp->m_len; *mq = mp; return (0); } /* * Help break down an mbuf chain by setting the first siz bytes contiguous * pointed to by returned val. * This is used by the macros nfsm_dissect and nfsm_dissecton for tough * cases. (The macros use the vars. dpos and dpos2) */ int nfsm_disct(mdp, dposp, siz, left, cp2) struct mbuf **mdp; caddr_t *dposp; int siz; int left; caddr_t *cp2; { register struct mbuf *mp, *mp2; register int siz2, xfer; register caddr_t p; mp = *mdp; while (left == 0) { *mdp = mp = mp->m_next; if (mp == NULL) return (EBADRPC); left = mp->m_len; *dposp = mtod(mp, caddr_t); } if (left >= siz) { *cp2 = *dposp; *dposp += siz; } else if (mp->m_next == NULL) { return (EBADRPC); } else if (siz > MHLEN) { panic("nfs S too big"); } else { MGET(mp2, M_WAIT, MT_DATA); mp2->m_next = mp->m_next; mp->m_next = mp2; mp->m_len -= left; mp = mp2; *cp2 = p = mtod(mp, caddr_t); bcopy(*dposp, p, left); /* Copy what was left */ siz2 = siz-left; p += left; mp2 = mp->m_next; /* Loop around copying up the siz2 bytes */ while (siz2 > 0) { if (mp2 == NULL) return (EBADRPC); xfer = (siz2 > mp2->m_len) ? mp2->m_len : siz2; if (xfer > 0) { bcopy(mtod(mp2, caddr_t), p, xfer); NFSMADV(mp2, xfer); mp2->m_len -= xfer; p += xfer; siz2 -= xfer; } if (siz2 > 0) mp2 = mp2->m_next; } mp->m_len = siz; *mdp = mp2; *dposp = mtod(mp2, caddr_t); } return (0); } /* * Advance the position in the mbuf chain. */ int nfs_adv(mdp, dposp, offs, left) struct mbuf **mdp; caddr_t *dposp; int offs; int left; { register struct mbuf *m; register int s; m = *mdp; s = left; while (s < offs) { offs -= s; m = m->m_next; if (m == NULL) return (EBADRPC); s = m->m_len; } *mdp = m; *dposp = mtod(m, caddr_t)+offs; return (0); } /* * Copy a string into mbufs for the hard cases... */ int nfsm_strtmbuf(mb, bpos, cp, siz) struct mbuf **mb; char **bpos; char *cp; long siz; { register struct mbuf *m1 = 0, *m2; long left, xfer, len, tlen; u_long *tl; int putsize; putsize = 1; m2 = *mb; left = M_TRAILINGSPACE(m2); if (left > 0) { tl = ((u_long *)(*bpos)); *tl++ = txdr_unsigned(siz); putsize = 0; left -= NFSX_UNSIGNED; m2->m_len += NFSX_UNSIGNED; if (left > 0) { bcopy(cp, (caddr_t) tl, left); siz -= left; cp += left; m2->m_len += left; left = 0; } } /* Loop around adding mbufs */ while (siz > 0) { MGET(m1, M_WAIT, MT_DATA); if (siz > MLEN) MCLGET(m1, M_WAIT); m1->m_len = NFSMSIZ(m1); m2->m_next = m1; m2 = m1; tl = mtod(m1, u_long *); tlen = 0; if (putsize) { *tl++ = txdr_unsigned(siz); m1->m_len -= NFSX_UNSIGNED; tlen = NFSX_UNSIGNED; putsize = 0; } if (siz < m1->m_len) { len = nfsm_rndup(siz); xfer = siz; if (xfer < len) *(tl+(xfer>>2)) = 0; } else { xfer = len = m1->m_len; } bcopy(cp, (caddr_t) tl, xfer); m1->m_len = len+tlen; siz -= xfer; cp += xfer; } *mb = m1; *bpos = mtod(m1, caddr_t)+m1->m_len; return (0); } /* * Called once to initialize data structures... */ int nfs_init() { register int i; /* * Check to see if major data structures haven't bloated. */ if (sizeof (struct nfsnode) > NFS_NODEALLOC) { printf("struct nfsnode bloated (> %dbytes)\n", NFS_NODEALLOC); printf("Try reducing NFS_SMALLFH\n"); } if (sizeof (struct nfsmount) > NFS_MNTALLOC) { printf("struct nfsmount bloated (> %dbytes)\n", NFS_MNTALLOC); printf("Try reducing NFS_MUIDHASHSIZ\n"); } if (sizeof (struct nfssvc_sock) > NFS_SVCALLOC) { printf("struct nfssvc_sock bloated (> %dbytes)\n",NFS_SVCALLOC); printf("Try reducing NFS_UIDHASHSIZ\n"); } if (sizeof (struct nfsuid) > NFS_UIDALLOC) { printf("struct nfsuid bloated (> %dbytes)\n",NFS_UIDALLOC); printf("Try unionizing the nu_nickname and nu_flag fields\n"); } nfsrtt.pos = 0; rpc_vers = txdr_unsigned(RPC_VER2); rpc_call = txdr_unsigned(RPC_CALL); rpc_reply = txdr_unsigned(RPC_REPLY); rpc_msgdenied = txdr_unsigned(RPC_MSGDENIED); rpc_msgaccepted = txdr_unsigned(RPC_MSGACCEPTED); rpc_mismatch = txdr_unsigned(RPC_MISMATCH); rpc_autherr = txdr_unsigned(RPC_AUTHERR); rpc_auth_unix = txdr_unsigned(RPCAUTH_UNIX); rpc_auth_kerb = txdr_unsigned(RPCAUTH_KERB4); nfs_prog = txdr_unsigned(NFS_PROG); nqnfs_prog = txdr_unsigned(NQNFS_PROG); nfs_true = txdr_unsigned(TRUE); nfs_false = txdr_unsigned(FALSE); nfs_xdrneg1 = txdr_unsigned(-1); nfs_ticks = (hz * NFS_TICKINTVL + 500) / 1000; if (nfs_ticks < 1) nfs_ticks = 1; /* Ensure async daemons disabled */ for (i = 0; i < NFS_MAXASYNCDAEMON; i++) nfs_iodwant[i] = (struct proc *)0; TAILQ_INIT(&nfs_bufq); nfs_nhinit(); /* Init the nfsnode table */ #ifndef NFS_NOSERVER nfsrv_init(0); /* Init server data structures */ nfsrv_initcache(); /* Init the server request cache */ #endif /* * Initialize the nqnfs server stuff. */ if (nqnfsstarttime == 0) { nqnfsstarttime = boottime.tv_sec + nqsrv_maxlease + nqsrv_clockskew + nqsrv_writeslack; NQLOADNOVRAM(nqnfsstarttime); CIRCLEQ_INIT(&nqtimerhead); nqfhhashtbl = hashinit(NQLCHSZ, M_NQLEASE, &nqfhhash); } /* * Initialize reply list and start timer */ TAILQ_INIT(&nfs_reqq); nfs_timer(0); #ifdef __FreeBSD__ /* * Set up lease_check and lease_updatetime so that other parts * of the system can call us, if we are loadable. */ #ifndef NFS_NOSERVER lease_check = nfs_lease_check; #endif lease_updatetime = nfs_lease_updatetime; vfsconf[MOUNT_NFS]->vfc_refcount++; /* make us non-unloadable */ #ifdef VFS_LKM sysent[SYS_nfssvc].sy_narg = 2; sysent[SYS_nfssvc].sy_call = nfssvc; #ifndef NFS_NOSERVER sysent[SYS_getfh].sy_narg = 2; sysent[SYS_getfh].sy_call = getfh; #endif #endif #endif return (0); } /* * Attribute cache routines. * nfs_loadattrcache() - loads or updates the cache contents from attributes * that are on the mbuf list * nfs_getattrcache() - returns valid attributes if found in cache, returns * error otherwise */ /* * Load the attribute cache (that lives in the nfsnode entry) with * the values on the mbuf list and * Iff vap not NULL * copy the attributes to *vaper */ int nfs_loadattrcache(vpp, mdp, dposp, vaper) struct vnode **vpp; struct mbuf **mdp; caddr_t *dposp; struct vattr *vaper; { register struct vnode *vp = *vpp; register struct vattr *vap; register struct nfs_fattr *fp; register struct nfsnode *np; register struct nfsnodehashhead *nhpp; register long t1; caddr_t cp2; int error = 0, rdev; struct mbuf *md; enum vtype vtyp; u_short vmode; struct timespec mtime; struct vnode *nvp; int v3 = NFS_ISV3(vp); md = *mdp; t1 = (mtod(md, caddr_t) + md->m_len) - *dposp; if (error = nfsm_disct(mdp, dposp, NFSX_FATTR(v3), t1, &cp2)) return (error); fp = (struct nfs_fattr *)cp2; if (v3) { vtyp = nfsv3tov_type(fp->fa_type); vmode = fxdr_unsigned(u_short, fp->fa_mode); rdev = makedev(fxdr_unsigned(int, fp->fa3_rdev.specdata1), fxdr_unsigned(int, fp->fa3_rdev.specdata2)); fxdr_nfsv3time(&fp->fa3_mtime, &mtime); } else { vtyp = nfsv2tov_type(fp->fa_type); vmode = fxdr_unsigned(u_short, fp->fa_mode); /* * XXX * * The duplicate information returned in fa_type and fa_mode * is an ambiguity in the NFS version 2 protocol. * * VREG should be taken literally as a regular file. If a * server intents to return some type information differently * in the upper bits of the mode field (e.g. for sockets, or * FIFOs), NFSv2 mandates fa_type to be VNON. Anyway, we * leave the examination of the mode bits even in the VREG * case to avoid breakage for bogus servers, but we make sure * that there are actually type bits set in the upper part of * fa_mode (and failing that, trust the va_type field). * * NFSv3 cleared the issue, and requires fa_mode to not * contain any type information (while also introduing sockets * and FIFOs for fa_type). */ if (vtyp == VNON || (vtyp == VREG && (vmode & S_IFMT) != 0)) vtyp = IFTOVT(vmode); rdev = fxdr_unsigned(long, fp->fa2_rdev); fxdr_nfsv2time(&fp->fa2_mtime, &mtime); /* * Really ugly NFSv2 kludge. */ if (vtyp == VCHR && rdev == 0xffffffff) vtyp = VFIFO; } /* * If v_type == VNON it is a new node, so fill in the v_type, * n_mtime fields. Check to see if it represents a special * device, and if so, check for a possible alias. Once the * correct vnode has been obtained, fill in the rest of the * information. */ np = VTONFS(vp); if (vp->v_type != vtyp) { /* * If we had a lock and it turns out that the vnode * is an object which we don't want to lock (e.g. VDIR) * to avoid nasty hanging problems on a server crash, * then release it here. */ if (vtyp != VREG && VOP_ISLOCKED(vp)) VOP_UNLOCK(vp); vp->v_type = vtyp; if (vp->v_type == VFIFO) { vp->v_op = fifo_nfsv2nodeop_p; } if (vp->v_type == VCHR || vp->v_type == VBLK) { vp->v_op = spec_nfsv2nodeop_p; nvp = checkalias(vp, (dev_t)rdev, vp->v_mount); if (nvp) { /* * Discard unneeded vnode, but save its nfsnode. */ LIST_REMOVE(np, n_hash); nvp->v_data = vp->v_data; vp->v_data = NULL; vp->v_op = spec_vnodeop_p; vrele(vp); vgone(vp); /* * Reinitialize aliased node. */ np->n_vnode = nvp; nhpp = NFSNOHASH(nfs_hash(np->n_fhp, np->n_fhsize)); LIST_INSERT_HEAD(nhpp, np, n_hash); *vpp = vp = nvp; } } np->n_mtime = mtime.ts_sec; } vap = &np->n_vattr; vap->va_type = vtyp; vap->va_mode = (vmode & 07777); vap->va_rdev = (dev_t)rdev; vap->va_mtime = mtime; vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0]; if (v3) { vap->va_nlink = fxdr_unsigned(u_short, fp->fa_nlink); vap->va_uid = fxdr_unsigned(uid_t, fp->fa_uid); vap->va_gid = fxdr_unsigned(gid_t, fp->fa_gid); fxdr_hyper(&fp->fa3_size, &vap->va_size); vap->va_blocksize = NFS_FABLKSIZE; fxdr_hyper(&fp->fa3_used, &vap->va_bytes); vap->va_fileid = fxdr_unsigned(int, fp->fa3_fileid.nfsuquad[1]); fxdr_nfsv3time(&fp->fa3_atime, &vap->va_atime); fxdr_nfsv3time(&fp->fa3_ctime, &vap->va_ctime); vap->va_flags = 0; vap->va_filerev = 0; } else { vap->va_nlink = fxdr_unsigned(u_short, fp->fa_nlink); vap->va_uid = fxdr_unsigned(uid_t, fp->fa_uid); vap->va_gid = fxdr_unsigned(gid_t, fp->fa_gid); vap->va_size = fxdr_unsigned(u_long, fp->fa2_size); vap->va_blocksize = fxdr_unsigned(long, fp->fa2_blocksize); vap->va_bytes = fxdr_unsigned(long, fp->fa2_blocks) * NFS_FABLKSIZE; vap->va_fileid = fxdr_unsigned(long, fp->fa2_fileid); fxdr_nfsv2time(&fp->fa2_atime, &vap->va_atime); vap->va_flags = 0; vap->va_ctime.ts_sec = fxdr_unsigned(long, fp->fa2_ctime.nfsv2_sec); vap->va_ctime.ts_nsec = 0; vap->va_gen = fxdr_unsigned(u_long, fp->fa2_ctime.nfsv2_usec); vap->va_filerev = 0; } if (vap->va_size != np->n_size) { if (vap->va_type == VREG) { if (np->n_flag & NMODIFIED) { if (vap->va_size < np->n_size) vap->va_size = np->n_size; else np->n_size = vap->va_size; } else np->n_size = vap->va_size; vnode_pager_setsize(vp, (u_long)np->n_size); } else np->n_size = vap->va_size; } np->n_attrstamp = time.tv_sec; if (vaper != NULL) { bcopy((caddr_t)vap, (caddr_t)vaper, sizeof(*vap)); if (np->n_flag & NCHG) { if (np->n_flag & NACC) vaper->va_atime = np->n_atim; if (np->n_flag & NUPD) vaper->va_mtime = np->n_mtim; } } return (0); } /* * Check the time stamp * If the cache is valid, copy contents to *vap and return 0 * otherwise return an error */ int nfs_getattrcache(vp, vaper) register struct vnode *vp; struct vattr *vaper; { register struct nfsnode *np = VTONFS(vp); register struct vattr *vap; if ((time.tv_sec - np->n_attrstamp) >= NFS_ATTRTIMEO(np)) { nfsstats.attrcache_misses++; return (ENOENT); } nfsstats.attrcache_hits++; vap = &np->n_vattr; if (vap->va_size != np->n_size) { if (vap->va_type == VREG) { if (np->n_flag & NMODIFIED) { if (vap->va_size < np->n_size) vap->va_size = np->n_size; else np->n_size = vap->va_size; } else np->n_size = vap->va_size; vnode_pager_setsize(vp, (u_long)np->n_size); } else np->n_size = vap->va_size; } bcopy((caddr_t)vap, (caddr_t)vaper, sizeof(struct vattr)); if (np->n_flag & NCHG) { if (np->n_flag & NACC) vaper->va_atime = np->n_atim; if (np->n_flag & NUPD) vaper->va_mtime = np->n_mtim; } return (0); } #ifndef NFS_NOSERVER /* * Set up nameidata for a lookup() call and do it */ int nfs_namei(ndp, fhp, len, slp, nam, mdp, dposp, retdirp, p, kerbflag) register struct nameidata *ndp; fhandle_t *fhp; int len; struct nfssvc_sock *slp; struct mbuf *nam; struct mbuf **mdp; caddr_t *dposp; struct vnode **retdirp; struct proc *p; int kerbflag; { register int i, rem; register struct mbuf *md; register char *fromcp, *tocp; struct vnode *dp; int error, rdonly; struct componentname *cnp = &ndp->ni_cnd; *retdirp = (struct vnode *)0; MALLOC(cnp->cn_pnbuf, char *, len + 1, M_NAMEI, M_WAITOK); /* * Copy the name from the mbuf list to ndp->ni_pnbuf * and set the various ndp fields appropriately. */ fromcp = *dposp; tocp = cnp->cn_pnbuf; md = *mdp; rem = mtod(md, caddr_t) + md->m_len - fromcp; cnp->cn_hash = 0; for (i = 0; i < len; i++) { while (rem == 0) { md = md->m_next; if (md == NULL) { error = EBADRPC; goto out; } fromcp = mtod(md, caddr_t); rem = md->m_len; } if (*fromcp == '\0' || *fromcp == '/') { error = EACCES; goto out; } cnp->cn_hash += (unsigned char)*fromcp; *tocp++ = *fromcp++; rem--; } *tocp = '\0'; *mdp = md; *dposp = fromcp; len = nfsm_rndup(len)-len; if (len > 0) { if (rem >= len) *dposp += len; else if (error = nfs_adv(mdp, dposp, len, rem)) goto out; } ndp->ni_pathlen = tocp - cnp->cn_pnbuf; cnp->cn_nameptr = cnp->cn_pnbuf; /* * Extract and set starting directory. */ if (error = nfsrv_fhtovp(fhp, FALSE, &dp, ndp->ni_cnd.cn_cred, slp, nam, &rdonly, kerbflag)) goto out; if (dp->v_type != VDIR) { - nfsrv_vrele(dp); + vrele(dp); error = ENOTDIR; goto out; } VREF(dp); *retdirp = dp; ndp->ni_startdir = dp; if (rdonly) cnp->cn_flags |= (NOCROSSMOUNT | RDONLY); else cnp->cn_flags |= NOCROSSMOUNT; /* * And call lookup() to do the real work */ cnp->cn_proc = p; if (error = lookup(ndp)) goto out; /* * Check for encountering a symbolic link */ if (cnp->cn_flags & ISSYMLINK) { if ((cnp->cn_flags & LOCKPARENT) && ndp->ni_pathlen == 1) vput(ndp->ni_dvp); else vrele(ndp->ni_dvp); vput(ndp->ni_vp); ndp->ni_vp = NULL; error = EINVAL; goto out; } - nfsrv_vmio(ndp->ni_vp); + nfsrv_object_create(ndp->ni_vp); /* * Check for saved name request */ if (cnp->cn_flags & (SAVENAME | SAVESTART)) { cnp->cn_flags |= HASBUF; return (0); } out: FREE(cnp->cn_pnbuf, M_NAMEI); return (error); } /* * A fiddled version of m_adj() that ensures null fill to a long * boundary and only trims off the back end */ void nfsm_adj(mp, len, nul) struct mbuf *mp; register int len; int nul; { register struct mbuf *m; register int count, i; register char *cp; /* * Trim from tail. Scan the mbuf chain, * calculating its length and finding the last mbuf. * If the adjustment only affects this mbuf, then just * adjust and return. Otherwise, rescan and truncate * after the remaining size. */ count = 0; m = mp; for (;;) { count += m->m_len; if (m->m_next == (struct mbuf *)0) break; m = m->m_next; } if (m->m_len > len) { m->m_len -= len; if (nul > 0) { cp = mtod(m, caddr_t)+m->m_len-nul; for (i = 0; i < nul; i++) *cp++ = '\0'; } return; } count -= len; if (count < 0) count = 0; /* * Correct length for chain is "count". * Find the mbuf with last data, adjust its length, * and toss data from remaining mbufs on chain. */ for (m = mp; m; m = m->m_next) { if (m->m_len >= count) { m->m_len = count; if (nul > 0) { cp = mtod(m, caddr_t)+m->m_len-nul; for (i = 0; i < nul; i++) *cp++ = '\0'; } break; } count -= m->m_len; } for (m = m->m_next;m;m = m->m_next) m->m_len = 0; } /* * Make these functions instead of macros, so that the kernel text size * doesn't get too big... */ void nfsm_srvwcc(nfsd, before_ret, before_vap, after_ret, after_vap, mbp, bposp) struct nfsrv_descript *nfsd; int before_ret; register struct vattr *before_vap; int after_ret; struct vattr *after_vap; struct mbuf **mbp; char **bposp; { register struct mbuf *mb = *mbp, *mb2; register char *bpos = *bposp; register u_long *tl; if (before_ret) { nfsm_build(tl, u_long *, NFSX_UNSIGNED); *tl = nfs_false; } else { nfsm_build(tl, u_long *, 7 * NFSX_UNSIGNED); *tl++ = nfs_true; txdr_hyper(&(before_vap->va_size), tl); tl += 2; txdr_nfsv3time(&(before_vap->va_mtime), tl); tl += 2; txdr_nfsv3time(&(before_vap->va_ctime), tl); } *bposp = bpos; *mbp = mb; nfsm_srvpostopattr(nfsd, after_ret, after_vap, mbp, bposp); } void nfsm_srvpostopattr(nfsd, after_ret, after_vap, mbp, bposp) struct nfsrv_descript *nfsd; int after_ret; struct vattr *after_vap; struct mbuf **mbp; char **bposp; { register struct mbuf *mb = *mbp, *mb2; register char *bpos = *bposp; register u_long *tl; register struct nfs_fattr *fp; if (after_ret) { nfsm_build(tl, u_long *, NFSX_UNSIGNED); *tl = nfs_false; } else { nfsm_build(tl, u_long *, NFSX_UNSIGNED + NFSX_V3FATTR); *tl++ = nfs_true; fp = (struct nfs_fattr *)tl; nfsm_srvfattr(nfsd, after_vap, fp); } *mbp = mb; *bposp = bpos; } void nfsm_srvfattr(nfsd, vap, fp) register struct nfsrv_descript *nfsd; register struct vattr *vap; register struct nfs_fattr *fp; { fp->fa_nlink = txdr_unsigned(vap->va_nlink); fp->fa_uid = txdr_unsigned(vap->va_uid); fp->fa_gid = txdr_unsigned(vap->va_gid); if (nfsd->nd_flag & ND_NFSV3) { fp->fa_type = vtonfsv3_type(vap->va_type); fp->fa_mode = vtonfsv3_mode(vap->va_mode); txdr_hyper(&vap->va_size, &fp->fa3_size); txdr_hyper(&vap->va_bytes, &fp->fa3_used); fp->fa3_rdev.specdata1 = txdr_unsigned(major(vap->va_rdev)); fp->fa3_rdev.specdata2 = txdr_unsigned(minor(vap->va_rdev)); fp->fa3_fsid.nfsuquad[0] = 0; fp->fa3_fsid.nfsuquad[1] = txdr_unsigned(vap->va_fsid); fp->fa3_fileid.nfsuquad[0] = 0; fp->fa3_fileid.nfsuquad[1] = txdr_unsigned(vap->va_fileid); txdr_nfsv3time(&vap->va_atime, &fp->fa3_atime); txdr_nfsv3time(&vap->va_mtime, &fp->fa3_mtime); txdr_nfsv3time(&vap->va_ctime, &fp->fa3_ctime); } else { fp->fa_type = vtonfsv2_type(vap->va_type); fp->fa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode); fp->fa2_size = txdr_unsigned(vap->va_size); fp->fa2_blocksize = txdr_unsigned(vap->va_blocksize); if (vap->va_type == VFIFO) fp->fa2_rdev = 0xffffffff; else fp->fa2_rdev = txdr_unsigned(vap->va_rdev); fp->fa2_blocks = txdr_unsigned(vap->va_bytes / NFS_FABLKSIZE); fp->fa2_fsid = txdr_unsigned(vap->va_fsid); fp->fa2_fileid = txdr_unsigned(vap->va_fileid); txdr_nfsv2time(&vap->va_atime, &fp->fa2_atime); txdr_nfsv2time(&vap->va_mtime, &fp->fa2_mtime); txdr_nfsv2time(&vap->va_ctime, &fp->fa2_ctime); } } /* * nfsrv_fhtovp() - convert a fh to a vnode ptr (optionally locked) * - look up fsid in mount list (if not found ret error) * - get vp and export rights by calling VFS_FHTOVP() * - if cred->cr_uid == 0 or MNT_EXPORTANON set it to credanon * - if not lockflag unlock it with VOP_UNLOCK() */ int nfsrv_fhtovp(fhp, lockflag, vpp, cred, slp, nam, rdonlyp, kerbflag) fhandle_t *fhp; int lockflag; struct vnode **vpp; struct ucred *cred; struct nfssvc_sock *slp; struct mbuf *nam; int *rdonlyp; int kerbflag; { register struct mount *mp; register int i; struct ucred *credanon; int error, exflags; *vpp = (struct vnode *)0; mp = getvfs(&fhp->fh_fsid); if (!mp) return (ESTALE); error = VFS_FHTOVP(mp, &fhp->fh_fid, nam, vpp, &exflags, &credanon); if (error) return (error); /* * Check/setup credentials. */ if (exflags & MNT_EXKERB) { if (!kerbflag) { vput(*vpp); return (NFSERR_AUTHERR | AUTH_TOOWEAK); } } else if (kerbflag) { vput(*vpp); return (NFSERR_AUTHERR | AUTH_TOOWEAK); } else if (cred->cr_uid == 0 || (exflags & MNT_EXPORTANON)) { cred->cr_uid = credanon->cr_uid; for (i = 0; i < credanon->cr_ngroups && i < NGROUPS; i++) cred->cr_groups[i] = credanon->cr_groups[i]; cred->cr_ngroups = i; } if (exflags & MNT_EXRDONLY) *rdonlyp = 1; else *rdonlyp = 0; - nfsrv_vmio(*vpp); + nfsrv_object_create(*vpp); if (!lockflag) VOP_UNLOCK(*vpp); return (0); } #endif /* NFS_NOSERVER */ /* * This function compares two net addresses by family and returns TRUE * if they are the same host. * If there is any doubt, return FALSE. * The AF_INET family is handled as a special case so that address mbufs * don't need to be saved to store "struct in_addr", which is only 4 bytes. */ int netaddr_match(family, haddr, nam) int family; union nethostaddr *haddr; struct mbuf *nam; { register struct sockaddr_in *inetaddr; switch (family) { case AF_INET: inetaddr = mtod(nam, struct sockaddr_in *); if (inetaddr->sin_family == AF_INET && inetaddr->sin_addr.s_addr == haddr->had_inetaddr) return (1); break; #ifdef ISO case AF_ISO: { register struct sockaddr_iso *isoaddr1, *isoaddr2; isoaddr1 = mtod(nam, struct sockaddr_iso *); isoaddr2 = mtod(haddr->had_nam, struct sockaddr_iso *); if (isoaddr1->siso_family == AF_ISO && isoaddr1->siso_nlen > 0 && isoaddr1->siso_nlen == isoaddr2->siso_nlen && SAME_ISOADDR(isoaddr1, isoaddr2)) return (1); break; } #endif /* ISO */ default: break; }; return (0); } static nfsuint64 nfs_nullcookie = { 0, 0 }; /* * This function finds the directory cookie that corresponds to the * logical byte offset given. */ nfsuint64 * nfs_getcookie(np, off, add) register struct nfsnode *np; off_t off; int add; { register struct nfsdmap *dp, *dp2; register int pos; pos = off / NFS_DIRBLKSIZ; if (pos == 0) { #ifdef DIAGNOSTIC if (add) panic("nfs getcookie add at 0"); #endif return (&nfs_nullcookie); } pos--; dp = np->n_cookies.lh_first; if (!dp) { if (add) { MALLOC(dp, struct nfsdmap *, sizeof (struct nfsdmap), M_NFSDIROFF, M_WAITOK); dp->ndm_eocookie = 0; LIST_INSERT_HEAD(&np->n_cookies, dp, ndm_list); } else return ((nfsuint64 *)0); } while (pos >= NFSNUMCOOKIES) { pos -= NFSNUMCOOKIES; if (dp->ndm_list.le_next) { if (!add && dp->ndm_eocookie < NFSNUMCOOKIES && pos >= dp->ndm_eocookie) return ((nfsuint64 *)0); dp = dp->ndm_list.le_next; } else if (add) { MALLOC(dp2, struct nfsdmap *, sizeof (struct nfsdmap), M_NFSDIROFF, M_WAITOK); dp2->ndm_eocookie = 0; LIST_INSERT_AFTER(dp, dp2, ndm_list); dp = dp2; } else return ((nfsuint64 *)0); } if (pos >= dp->ndm_eocookie) { if (add) dp->ndm_eocookie = pos + 1; else return ((nfsuint64 *)0); } return (&dp->ndm_cookies[pos]); } /* * Invalidate cached directory information, except for the actual directory * blocks (which are invalidated separately). * Done mainly to avoid the use of stale offset cookies. */ void nfs_invaldir(vp) register struct vnode *vp; { register struct nfsnode *np = VTONFS(vp); #ifdef DIAGNOSTIC if (vp->v_type != VDIR) panic("nfs: invaldir not dir"); #endif np->n_direofoffset = 0; np->n_cookieverf.nfsuquad[0] = 0; np->n_cookieverf.nfsuquad[1] = 0; if (np->n_cookies.lh_first) np->n_cookies.lh_first->ndm_eocookie = 0; } /* * The write verifier has changed (probably due to a server reboot), so all * B_NEEDCOMMIT blocks will have to be written again. Since they are on the * dirty block list as B_DELWRI, all this takes is clearing the B_NEEDCOMMIT * flag. Once done the new write verifier can be set for the mount point. */ void nfs_clearcommit(mp) struct mount *mp; { register struct vnode *vp, *nvp; register struct buf *bp, *nbp; int s; s = splbio(); loop: for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) { if (vp->v_mount != mp) /* Paranoia */ goto loop; nvp = vp->v_mntvnodes.le_next; for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) { nbp = bp->b_vnbufs.le_next; if ((bp->b_flags & (B_BUSY | B_DELWRI | B_NEEDCOMMIT)) == (B_DELWRI | B_NEEDCOMMIT)) bp->b_flags &= ~B_NEEDCOMMIT; } } splx(s); } #ifndef NFS_NOSERVER /* * Map errnos to NFS error numbers. For Version 3 also filter out error * numbers not specified for the associated procedure. */ int nfsrv_errmap(nd, err) struct nfsrv_descript *nd; register int err; { register short *defaulterrp, *errp; if (nd->nd_flag & ND_NFSV3) { if (nd->nd_procnum <= NFSPROC_COMMIT) { errp = defaulterrp = nfsrv_v3errmap[nd->nd_procnum]; while (*++errp) { if (*errp == err) return (err); else if (*errp > err) break; } return ((int)*defaulterrp); } else return (err & 0xffff); } if (err <= ELAST) return ((int)nfsrv_v2errmap[err - 1]); return (NFSERR_IO); } int -nfsrv_vmio(struct vnode *vp) { - vm_object_t object; +nfsrv_object_create(struct vnode *vp) { if ((vp == NULL) || (vp->v_type != VREG)) return 1; - -retry: - if ((vp->v_flag & VVMIO) == 0) { - struct vattr vat; - struct proc *p = curproc; - - if (VOP_GETATTR(vp, &vat, p->p_ucred, p) != 0) - panic("nfsrv_vmio: VOP_GETATTR failed"); - - (void) vnode_pager_alloc(vp, OFF_TO_IDX(round_page(vat.va_size)), 0, 0); - - vp->v_flag |= VVMIO; - } else { - if ((object = vp->v_object) && - (object->flags & OBJ_DEAD)) { - tsleep(object, PVM, "nfdead", 0); - goto retry; - } - if (!object) - panic("nfsrv_vmio: VMIO object missing"); - vm_object_reference(object); - } - return 0; -} -int -nfsrv_vput(struct vnode *vp) { - if ((vp->v_flag & VVMIO) && vp->v_object) { - vput(vp); - vm_object_deallocate(vp->v_object); - } else { - vput(vp); - } - return 0; -} -int -nfsrv_vrele(struct vnode *vp) { - if ((vp->v_flag & VVMIO) && vp->v_object) { - vrele(vp); - vm_object_deallocate(vp->v_object); - } else { - vrele(vp); - } - return 0; + return vfs_object_create(vp, curproc, curproc?curproc->p_ucred:NULL, 1); } #endif /* NFS_NOSERVER */ Index: head/sys/nfs/nfs_nqlease.c =================================================================== --- head/sys/nfs/nfs_nqlease.c (revision 17760) +++ head/sys/nfs/nfs_nqlease.c (revision 17761) @@ -1,1283 +1,1283 @@ /* * Copyright (c) 1992, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)nfs_nqlease.c 8.3 (Berkeley) 1/4/94 - * $Id: nfs_nqlease.c,v 1.18 1995/12/17 21:12:16 phk Exp $ + * $Id: nfs_nqlease.c,v 1.19 1996/01/13 23:27:43 phk Exp $ */ /* * References: * Cary G. Gray and David R. Cheriton, "Leases: An Efficient Fault-Tolerant * Mechanism for Distributed File Cache Consistency", * In Proc. of the Twelfth ACM Symposium on Operating Systems * Principals, pg. 202-210, Litchfield Park, AZ, Dec. 1989. * Michael N. Nelson, Brent B. Welch and John K. Ousterhout, "Caching * in the Sprite Network File System", ACM TOCS 6(1), * pages 134-154, February 1988. * V. Srinivasan and Jeffrey C. Mogul, "Spritely NFS: Implementation and * Performance of Cache-Consistency Protocols", Digital * Equipment Corporation WRL Research Report 89/5, May 1989. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include time_t nqnfsstarttime = (time_t)0; int nqsrv_clockskew = NQ_CLOCKSKEW; int nqsrv_writeslack = NQ_WRITESLACK; int nqsrv_maxlease = NQ_MAXLEASE; static int nqsrv_maxnumlease = NQ_MAXNUMLEASE; struct vop_lease_args; static int nqsrv_cmpnam __P((struct nfssvc_sock *,struct mbuf *, struct nqhost *)); extern void nqnfs_lease_check __P((struct vnode *vp, struct proc *p, struct ucred *cred, int flag)); extern void nqnfs_lease_updatetime __P((int deltat)); static int nqnfs_vacated __P((struct vnode *vp, struct ucred *cred)); extern int nqnfs_vop_lease_check __P((struct vop_lease_args *ap)); static void nqsrv_addhost __P((struct nqhost *lph, struct nfssvc_sock *slp, struct mbuf *nam)); static void nqsrv_instimeq __P((struct nqlease *lp, u_long duration)); static void nqsrv_locklease __P((struct nqlease *lp)); static void nqsrv_send_eviction __P((struct vnode *vp, struct nqlease *lp, struct nfssvc_sock *slp, struct mbuf *nam, struct ucred *cred)); static void nqsrv_unlocklease __P((struct nqlease *lp)); static void nqsrv_waitfor_expiry __P((struct nqlease *lp)); /* * Signifies which rpcs can have piggybacked lease requests */ int nqnfs_piggy[NFS_NPROCS] = { 0, 0, ND_WRITE, ND_READ, 0, ND_READ, ND_READ, ND_WRITE, 0, 0, 0, 0, 0, 0, 0, 0, ND_READ, ND_READ, 0, 0, 0, 0, 0, 0, 0, 0, }; extern nfstype nfsv2_type[9]; extern nfstype nfsv3_type[9]; extern struct nfssvc_sock *nfs_udpsock, *nfs_cltpsock; extern int nfsd_waiting; extern struct nfsstats nfsstats; #define TRUE 1 #define FALSE 0 #ifndef NFS_NOSERVER /* * Get or check for a lease for "vp", based on ND_CHECK flag. * The rules are as follows: * - if a current non-caching lease, reply non-caching * - if a current lease for same host only, extend lease * - if a read cachable lease and a read lease request * add host to list any reply cachable * - else { set non-cachable for read-write sharing } * send eviction notice messages to all other hosts that have lease * wait for lease termination { either by receiving vacated messages * from all the other hosts or expiry * via. timeout } * modify lease to non-cachable * - else if no current lease, issue new one * - reply * - return boolean TRUE iff nam should be m_freem()'d * NB: Since nqnfs_serverd() is called from a timer, any potential tsleep() * in here must be framed by nqsrv_locklease() and nqsrv_unlocklease(). * nqsrv_locklease() is coded such that at least one of LC_LOCKED and * LC_WANTED is set whenever a process is tsleeping in it. The exception * is when a new lease is being allocated, since it is not in the timer * queue yet. (Ditto for the splsoftclock() and splx(s) calls) */ int nqsrv_getlease(vp, duration, flags, slp, procp, nam, cachablep, frev, cred) struct vnode *vp; u_long *duration; int flags; struct nfssvc_sock *slp; struct proc *procp; struct mbuf *nam; int *cachablep; u_quad_t *frev; struct ucred *cred; { register struct nqlease *lp; register struct nqfhhashhead *lpp = 0; register struct nqhost *lph = 0; struct nqlease *tlp; struct nqm **lphp; struct vattr vattr; fhandle_t fh; int i, ok, error, s; if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) return (0); if (*duration > nqsrv_maxlease) *duration = nqsrv_maxlease; error = VOP_GETATTR(vp, &vattr, cred, procp); if (error) return (error); *frev = vattr.va_filerev; s = splsoftclock(); tlp = vp->v_lease; if ((flags & ND_CHECK) == 0) nfsstats.srvnqnfs_getleases++; if (tlp == (struct nqlease *)0) { /* * Find the lease by searching the hash list. */ fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; error = VFS_VPTOFH(vp, &fh.fh_fid); if (error) { splx(s); return (error); } lpp = NQFHHASH(fh.fh_fid.fid_data); for (lp = lpp->lh_first; lp != 0; lp = lp->lc_hash.le_next) if (fh.fh_fsid.val[0] == lp->lc_fsid.val[0] && fh.fh_fsid.val[1] == lp->lc_fsid.val[1] && !bcmp(fh.fh_fid.fid_data, lp->lc_fiddata, fh.fh_fid.fid_len - sizeof (long))) { /* Found it */ lp->lc_vp = vp; vp->v_lease = lp; tlp = lp; break; } } else lp = tlp; if (lp) { if ((lp->lc_flag & LC_NONCACHABLE) || (lp->lc_morehosts == (struct nqm *)0 && nqsrv_cmpnam(slp, nam, &lp->lc_host))) goto doreply; if ((flags & ND_READ) && (lp->lc_flag & LC_WRITE) == 0) { if (flags & ND_CHECK) goto doreply; if (nqsrv_cmpnam(slp, nam, &lp->lc_host)) goto doreply; i = 0; if (lp->lc_morehosts) { lph = lp->lc_morehosts->lpm_hosts; lphp = &lp->lc_morehosts->lpm_next; ok = 1; } else { lphp = &lp->lc_morehosts; ok = 0; } while (ok && (lph->lph_flag & LC_VALID)) { if (nqsrv_cmpnam(slp, nam, lph)) goto doreply; if (++i == LC_MOREHOSTSIZ) { i = 0; if (*lphp) { lph = (*lphp)->lpm_hosts; lphp = &((*lphp)->lpm_next); } else ok = 0; } else lph++; } nqsrv_locklease(lp); if (!ok) { *lphp = (struct nqm *) malloc(sizeof (struct nqm), M_NQMHOST, M_WAITOK); bzero((caddr_t)*lphp, sizeof (struct nqm)); lph = (*lphp)->lpm_hosts; } nqsrv_addhost(lph, slp, nam); nqsrv_unlocklease(lp); } else { lp->lc_flag |= LC_NONCACHABLE; nqsrv_locklease(lp); nqsrv_send_eviction(vp, lp, slp, nam, cred); nqsrv_waitfor_expiry(lp); nqsrv_unlocklease(lp); } doreply: /* * Update the lease and return */ if ((flags & ND_CHECK) == 0) nqsrv_instimeq(lp, *duration); if (lp->lc_flag & LC_NONCACHABLE) *cachablep = 0; else { *cachablep = 1; if (flags & ND_WRITE) lp->lc_flag |= LC_WRITTEN; } splx(s); return (0); } splx(s); if (flags & ND_CHECK) return (0); /* * Allocate new lease * The value of nqsrv_maxnumlease should be set generously, so that * the following "printf" happens infrequently. */ if (nfsstats.srvnqnfs_leases > nqsrv_maxnumlease) { printf("Nqnfs server, too many leases\n"); do { (void) tsleep((caddr_t)&lbolt, PSOCK, "nqsrvnuml", 0); } while (nfsstats.srvnqnfs_leases > nqsrv_maxnumlease); } MALLOC(lp, struct nqlease *, sizeof (struct nqlease), M_NQLEASE, M_WAITOK); bzero((caddr_t)lp, sizeof (struct nqlease)); if (flags & ND_WRITE) lp->lc_flag |= (LC_WRITE | LC_WRITTEN); nqsrv_addhost(&lp->lc_host, slp, nam); lp->lc_vp = vp; lp->lc_fsid = fh.fh_fsid; bcopy(fh.fh_fid.fid_data, lp->lc_fiddata, fh.fh_fid.fid_len - sizeof (long)); if(!lpp) panic("nfs_nqlease.c: Phoney lpp"); LIST_INSERT_HEAD(lpp, lp, lc_hash); vp->v_lease = lp; s = splsoftclock(); nqsrv_instimeq(lp, *duration); splx(s); *cachablep = 1; if (++nfsstats.srvnqnfs_leases > nfsstats.srvnqnfs_maxleases) nfsstats.srvnqnfs_maxleases = nfsstats.srvnqnfs_leases; return (0); } /* * Local lease check for server syscalls. * Just set up args and let nqsrv_getlease() do the rest. * nqnfs_vop_lease_check() is the VOP_LEASE() form of the same routine. * Ifdef'd code in nfsnode.h renames these routines to whatever a particular * OS needs. */ void nqnfs_lease_check(vp, p, cred, flag) struct vnode *vp; struct proc *p; struct ucred *cred; int flag; { u_long duration = 0; int cache; u_quad_t frev; (void) nqsrv_getlease(vp, &duration, ND_CHECK | flag, NQLOCALSLP, p, (struct mbuf *)0, &cache, &frev, cred); } #endif /* NFS_NOSERVER */ #ifdef HAS_VOPLEASE int nqnfs_vop_lease_check(ap) struct vop_lease_args /* { struct vnode *a_vp; struct proc *a_p; struct ucred *a_cred; int a_flag; } */ *ap; { u_long duration = 0; int cache; u_quad_t frev; (void) nqsrv_getlease(ap->a_vp, &duration, ND_CHECK | ap->a_flag, NQLOCALSLP, ap->a_p, (struct mbuf *)0, &cache, &frev, ap->a_cred); return (0); } #endif /* * Add a host to an nqhost structure for a lease. */ static void nqsrv_addhost(lph, slp, nam) register struct nqhost *lph; struct nfssvc_sock *slp; struct mbuf *nam; { register struct sockaddr_in *saddr; if (slp == NQLOCALSLP) lph->lph_flag |= (LC_VALID | LC_LOCAL); else if (slp == nfs_udpsock) { saddr = mtod(nam, struct sockaddr_in *); lph->lph_flag |= (LC_VALID | LC_UDP); lph->lph_inetaddr = saddr->sin_addr.s_addr; lph->lph_port = saddr->sin_port; } else if (slp == nfs_cltpsock) { lph->lph_nam = m_copym(nam, 0, M_COPYALL, M_WAIT); lph->lph_flag |= (LC_VALID | LC_CLTP); } else { lph->lph_flag |= (LC_VALID | LC_SREF); lph->lph_slp = slp; slp->ns_sref++; } } /* * Update the lease expiry time and position it in the timer queue correctly. */ static void nqsrv_instimeq(lp, duration) register struct nqlease *lp; u_long duration; { register struct nqlease *tlp; time_t newexpiry; newexpiry = time.tv_sec + duration + nqsrv_clockskew; if (lp->lc_expiry == newexpiry) return; if (lp->lc_timer.cqe_next != 0) { CIRCLEQ_REMOVE(&nqtimerhead, lp, lc_timer); } lp->lc_expiry = newexpiry; /* * Find where in the queue it should be. */ tlp = nqtimerhead.cqh_last; while (tlp != (void *)&nqtimerhead && tlp->lc_expiry > newexpiry) tlp = tlp->lc_timer.cqe_prev; #ifdef HASNVRAM if (tlp == nqtimerhead.cqh_last) NQSTORENOVRAM(newexpiry); #endif /* HASNVRAM */ if (tlp == (void *)&nqtimerhead) { CIRCLEQ_INSERT_HEAD(&nqtimerhead, lp, lc_timer); } else { CIRCLEQ_INSERT_AFTER(&nqtimerhead, tlp, lp, lc_timer); } } /* * Compare the requesting host address with the lph entry in the lease. * Return true iff it is the same. * This is somewhat messy due to the union in the nqhost structure. * The local host is indicated by the special value of NQLOCALSLP for slp. */ static int nqsrv_cmpnam(slp, nam, lph) register struct nfssvc_sock *slp; struct mbuf *nam; register struct nqhost *lph; { register struct sockaddr_in *saddr; struct mbuf *addr; union nethostaddr lhaddr; int ret; if (slp == NQLOCALSLP) { if (lph->lph_flag & LC_LOCAL) return (1); else return (0); } if (slp == nfs_udpsock || slp == nfs_cltpsock) addr = nam; else addr = slp->ns_nam; if (lph->lph_flag & LC_UDP) ret = netaddr_match(AF_INET, &lph->lph_haddr, addr); else if (lph->lph_flag & LC_CLTP) ret = netaddr_match(AF_ISO, &lph->lph_claddr, addr); else { if ((lph->lph_slp->ns_flag & SLP_VALID) == 0) return (0); saddr = mtod(lph->lph_slp->ns_nam, struct sockaddr_in *); if (saddr->sin_family == AF_INET) lhaddr.had_inetaddr = saddr->sin_addr.s_addr; else lhaddr.had_nam = lph->lph_slp->ns_nam; ret = netaddr_match(saddr->sin_family, &lhaddr, addr); } return (ret); } /* * Send out eviction notice messages to all other hosts for the lease. */ static void nqsrv_send_eviction(vp, lp, slp, nam, cred) struct vnode *vp; register struct nqlease *lp; struct nfssvc_sock *slp; struct mbuf *nam; struct ucred *cred; { register struct nqhost *lph = &lp->lc_host; register struct mbuf *m; register int siz; struct nqm *lphnext = lp->lc_morehosts; struct mbuf *mreq, *mb, *mb2, *nam2, *mheadend; struct socket *so; struct sockaddr_in *saddr; fhandle_t *fhp; caddr_t bpos, cp; u_long xid; int len = 1, ok = 1, i = 0; int sotype, *solockp; while (ok && (lph->lph_flag & LC_VALID)) { if (nqsrv_cmpnam(slp, nam, lph)) lph->lph_flag |= LC_VACATED; else if ((lph->lph_flag & (LC_LOCAL | LC_VACATED)) == 0) { if (lph->lph_flag & LC_UDP) { MGET(nam2, M_WAIT, MT_SONAME); saddr = mtod(nam2, struct sockaddr_in *); nam2->m_len = saddr->sin_len = sizeof (struct sockaddr_in); saddr->sin_family = AF_INET; saddr->sin_addr.s_addr = lph->lph_inetaddr; saddr->sin_port = lph->lph_port; so = nfs_udpsock->ns_so; } else if (lph->lph_flag & LC_CLTP) { nam2 = lph->lph_nam; so = nfs_cltpsock->ns_so; } else if (lph->lph_slp->ns_flag & SLP_VALID) { nam2 = (struct mbuf *)0; so = lph->lph_slp->ns_so; } else goto nextone; sotype = so->so_type; if (so->so_proto->pr_flags & PR_CONNREQUIRED) solockp = &lph->lph_slp->ns_solock; else solockp = (int *)0; nfsm_reqhead((struct vnode *)0, NQNFSPROC_EVICTED, NFSX_V3FH); nfsm_build(cp, caddr_t, NFSX_V3FH); bzero(cp, NFSX_V3FH); fhp = (fhandle_t *)cp; fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid; VFS_VPTOFH(vp, &fhp->fh_fid); m = mreq; siz = 0; while (m) { siz += m->m_len; m = m->m_next; } if (siz <= 0 || siz > NFS_MAXPACKET) { printf("mbuf siz=%d\n",siz); panic("Bad nfs svc reply"); } m = nfsm_rpchead(cred, (NFSMNT_NFSV3 | NFSMNT_NQNFS), NQNFSPROC_EVICTED, RPCAUTH_UNIX, 5 * NFSX_UNSIGNED, (char *)0, 0, (char *)NULL, mreq, siz, &mheadend, &xid); /* * For stream protocols, prepend a Sun RPC * Record Mark. */ if (sotype == SOCK_STREAM) { M_PREPEND(m, NFSX_UNSIGNED, M_WAIT); *mtod(m, u_long *) = htonl(0x80000000 | (m->m_pkthdr.len - NFSX_UNSIGNED)); } if (((lph->lph_flag & (LC_UDP | LC_CLTP)) == 0 && (lph->lph_slp->ns_flag & SLP_VALID) == 0) || (solockp && (*solockp & NFSMNT_SNDLOCK))) m_freem(m); else { if (solockp) *solockp |= NFSMNT_SNDLOCK; (void) nfs_send(so, nam2, m, (struct nfsreq *)0); if (solockp) nfs_sndunlock(solockp); } if (lph->lph_flag & LC_UDP) MFREE(nam2, m); } nextone: if (++i == len) { if (lphnext) { i = 0; len = LC_MOREHOSTSIZ; lph = lphnext->lpm_hosts; lphnext = lphnext->lpm_next; } else ok = 0; } else lph++; } } /* * Wait for the lease to expire. * This will occur when all clients have sent "vacated" messages to * this server OR when it expires do to timeout. */ static void nqsrv_waitfor_expiry(lp) register struct nqlease *lp; { register struct nqhost *lph; register int i; struct nqm *lphnext; int len, ok; tryagain: if (time.tv_sec > lp->lc_expiry) return; lph = &lp->lc_host; lphnext = lp->lc_morehosts; len = 1; i = 0; ok = 1; while (ok && (lph->lph_flag & LC_VALID)) { if ((lph->lph_flag & (LC_LOCAL | LC_VACATED)) == 0) { lp->lc_flag |= LC_EXPIREDWANTED; (void) tsleep((caddr_t)&lp->lc_flag, PSOCK, "nqexp", 0); goto tryagain; } if (++i == len) { if (lphnext) { i = 0; len = LC_MOREHOSTSIZ; lph = lphnext->lpm_hosts; lphnext = lphnext->lpm_next; } else ok = 0; } else lph++; } } #ifndef NFS_NOSERVER /* * Nqnfs server timer that maintains the server lease queue. * Scan the lease queue for expired entries: * - when one is found, wakeup anyone waiting for it * else dequeue and free */ void nqnfs_serverd() { register struct nqlease *lp; register struct nqhost *lph; struct nqlease *nextlp; struct nqm *lphnext, *olphnext; struct mbuf *n; int i, len, ok; for (lp = nqtimerhead.cqh_first; lp != (void *)&nqtimerhead; lp = nextlp) { if (lp->lc_expiry >= time.tv_sec) break; nextlp = lp->lc_timer.cqe_next; if (lp->lc_flag & LC_EXPIREDWANTED) { lp->lc_flag &= ~LC_EXPIREDWANTED; wakeup((caddr_t)&lp->lc_flag); } else if ((lp->lc_flag & (LC_LOCKED | LC_WANTED)) == 0) { /* * Make a best effort at keeping a write caching lease long * enough by not deleting it until it has been explicitly * vacated or there have been no writes in the previous * write_slack seconds since expiry and the nfsds are not * all busy. The assumption is that if the nfsds are not * all busy now (no queue of nfs requests), then the client * would have been able to do at least one write to the * file during the last write_slack seconds if it was still * trying to push writes to the server. */ if ((lp->lc_flag & (LC_WRITE | LC_VACATED)) == LC_WRITE && ((lp->lc_flag & LC_WRITTEN) || nfsd_waiting == 0)) { lp->lc_flag &= ~LC_WRITTEN; nqsrv_instimeq(lp, nqsrv_writeslack); } else { CIRCLEQ_REMOVE(&nqtimerhead, lp, lc_timer); LIST_REMOVE(lp, lc_hash); /* * This soft reference may no longer be valid, but * no harm done. The worst case is if the vnode was * recycled and has another valid lease reference, * which is dereferenced prematurely. */ lp->lc_vp->v_lease = (struct nqlease *)0; lph = &lp->lc_host; lphnext = lp->lc_morehosts; olphnext = (struct nqm *)0; len = 1; i = 0; ok = 1; while (ok && (lph->lph_flag & LC_VALID)) { if (lph->lph_flag & LC_CLTP) MFREE(lph->lph_nam, n); if (lph->lph_flag & LC_SREF) nfsrv_slpderef(lph->lph_slp); if (++i == len) { if (olphnext) { free((caddr_t)olphnext, M_NQMHOST); olphnext = (struct nqm *)0; } if (lphnext) { olphnext = lphnext; i = 0; len = LC_MOREHOSTSIZ; lph = lphnext->lpm_hosts; lphnext = lphnext->lpm_next; } else ok = 0; } else lph++; } FREE((caddr_t)lp, M_NQLEASE); if (olphnext) free((caddr_t)olphnext, M_NQMHOST); nfsstats.srvnqnfs_leases--; } } } } /* * Called from nfssvc_nfsd() for a getlease rpc request. * Do the from/to xdr translation and call nqsrv_getlease() to * do the real work. */ int nqnfsrv_getlease(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; struct mbuf *nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; struct ucred *cred = &nfsd->nd_cr; register struct nfs_fattr *fp; struct vattr va; register struct vattr *vap = &va; struct vnode *vp; nfsfh_t nfh; fhandle_t *fhp; register u_long *tl; register long t1; u_quad_t frev; caddr_t bpos; int error = 0; char *cp2; struct mbuf *mb, *mb2, *mreq; int flags, rdonly, cache; fhp = &nfh.fh_generic; nfsm_srvmtofh(fhp); nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED); flags = fxdr_unsigned(int, *tl++); nfsd->nd_duration = fxdr_unsigned(int, *tl); error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, &rdonly, (nfsd->nd_flag & ND_KERBAUTH)); if (error) nfsm_reply(0); if (rdonly && flags == ND_WRITE) { error = EROFS; - nfsrv_vput(vp); + vput(vp); nfsm_reply(0); } (void) nqsrv_getlease(vp, &nfsd->nd_duration, flags, slp, procp, nam, &cache, &frev, cred); error = VOP_GETATTR(vp, vap, cred, procp); - nfsrv_vput(vp); + vput(vp); nfsm_reply(NFSX_V3FATTR + 4 * NFSX_UNSIGNED); nfsm_build(tl, u_long *, 4 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(cache); *tl++ = txdr_unsigned(nfsd->nd_duration); txdr_hyper(&frev, tl); nfsm_build(fp, struct nfs_fattr *, NFSX_V3FATTR); nfsm_srvfillattr(vap, fp); nfsm_srvdone; } /* * Called from nfssvc_nfsd() when a "vacated" message is received from a * client. Find the entry and expire it. */ int nqnfsrv_vacated(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; struct mbuf *nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; register struct nqlease *lp; register struct nqhost *lph; struct nqlease *tlp = (struct nqlease *)0; nfsfh_t nfh; fhandle_t *fhp; register u_long *tl; register long t1; struct nqm *lphnext; struct mbuf *mreq, *mb; int error = 0, i, len, ok, gotit = 0, cache = 0; char *cp2, *bpos; u_quad_t frev; fhp = &nfh.fh_generic; nfsm_srvmtofh(fhp); m_freem(mrep); /* * Find the lease by searching the hash list. */ for (lp = NQFHHASH(fhp->fh_fid.fid_data)->lh_first; lp != 0; lp = lp->lc_hash.le_next) if (fhp->fh_fsid.val[0] == lp->lc_fsid.val[0] && fhp->fh_fsid.val[1] == lp->lc_fsid.val[1] && !bcmp(fhp->fh_fid.fid_data, lp->lc_fiddata, MAXFIDSZ)) { /* Found it */ tlp = lp; break; } if (tlp) { lp = tlp; len = 1; i = 0; lph = &lp->lc_host; lphnext = lp->lc_morehosts; ok = 1; while (ok && (lph->lph_flag & LC_VALID)) { if (nqsrv_cmpnam(slp, nam, lph)) { lph->lph_flag |= LC_VACATED; gotit++; break; } if (++i == len) { if (lphnext) { len = LC_MOREHOSTSIZ; i = 0; lph = lphnext->lpm_hosts; lphnext = lphnext->lpm_next; } else ok = 0; } else lph++; } if ((lp->lc_flag & LC_EXPIREDWANTED) && gotit) { lp->lc_flag &= ~LC_EXPIREDWANTED; wakeup((caddr_t)&lp->lc_flag); } nfsmout: return (EPERM); } return (EPERM); } #endif /* NFS_NOSERVER */ /* * Client get lease rpc function. */ int nqnfs_getlease(vp, rwflag, cred, p) register struct vnode *vp; int rwflag; struct ucred *cred; struct proc *p; { register u_long *tl; register caddr_t cp; register long t1, t2; register struct nfsnode *np; struct nfsmount *nmp = VFSTONFS(vp->v_mount); caddr_t bpos, dpos, cp2; time_t reqtime; int error = 0; struct mbuf *mreq, *mrep, *md, *mb, *mb2; int cachable; u_quad_t frev; nfsstats.rpccnt[NQNFSPROC_GETLEASE]++; mb = mreq = nfsm_reqh(vp, NQNFSPROC_GETLEASE, NFSX_V3FH+2*NFSX_UNSIGNED, &bpos); nfsm_fhtom(vp, 1); nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(rwflag); *tl = txdr_unsigned(nmp->nm_leaseterm); reqtime = time.tv_sec; nfsm_request(vp, NQNFSPROC_GETLEASE, p, cred); np = VTONFS(vp); nfsm_dissect(tl, u_long *, 4 * NFSX_UNSIGNED); cachable = fxdr_unsigned(int, *tl++); reqtime += fxdr_unsigned(int, *tl++); if (reqtime > time.tv_sec) { fxdr_hyper(tl, &frev); nqnfs_clientlease(nmp, np, rwflag, cachable, reqtime, frev); nfsm_loadattr(vp, (struct vattr *)0); } else error = NQNFS_EXPIRED; nfsm_reqdone; return (error); } /* * Client vacated message function. */ static int nqnfs_vacated(vp, cred) register struct vnode *vp; struct ucred *cred; { register caddr_t cp; register struct mbuf *m; register int i; register u_long *tl; register long t2; caddr_t bpos; u_long xid; int error = 0; struct mbuf *mreq, *mb, *mb2, *mheadend; struct nfsmount *nmp; struct nfsreq myrep; nmp = VFSTONFS(vp->v_mount); nfsstats.rpccnt[NQNFSPROC_VACATED]++; nfsm_reqhead(vp, NQNFSPROC_VACATED, NFSX_V3FH); nfsm_fhtom(vp, 1); m = mreq; i = 0; while (m) { i += m->m_len; m = m->m_next; } m = nfsm_rpchead(cred, nmp->nm_flag, NQNFSPROC_VACATED, RPCAUTH_UNIX, 5 * NFSX_UNSIGNED, (char *)0, 0, (char *)NULL, mreq, i, &mheadend, &xid); if (nmp->nm_sotype == SOCK_STREAM) { M_PREPEND(m, NFSX_UNSIGNED, M_WAIT); *mtod(m, u_long *) = htonl(0x80000000 | (m->m_pkthdr.len - NFSX_UNSIGNED)); } myrep.r_flags = 0; myrep.r_nmp = nmp; if (nmp->nm_soflags & PR_CONNREQUIRED) (void) nfs_sndlock(&nmp->nm_flag, (struct nfsreq *)0); (void) nfs_send(nmp->nm_so, nmp->nm_nam, m, &myrep); if (nmp->nm_soflags & PR_CONNREQUIRED) nfs_sndunlock(&nmp->nm_flag); nfsmout: return (error); } #ifndef NFS_NOSERVER /* * Called for client side callbacks */ int nqnfs_callback(nmp, mrep, md, dpos) struct nfsmount *nmp; struct mbuf *mrep, *md; caddr_t dpos; { register struct vnode *vp; register u_long *tl; register long t1; nfsfh_t nfh; fhandle_t *fhp; struct nfsnode *np; struct nfsd tnfsd; struct nfssvc_sock *slp; struct nfsrv_descript ndesc; register struct nfsrv_descript *nfsd = &ndesc; struct mbuf **mrq = (struct mbuf **)0, *mb, *mreq; int error = 0, cache = 0; char *cp2, *bpos; u_quad_t frev; #ifndef nolint slp = NULL; #endif nfsd->nd_mrep = mrep; nfsd->nd_md = md; nfsd->nd_dpos = dpos; error = nfs_getreq(nfsd, &tnfsd, FALSE); if (error) return (error); md = nfsd->nd_md; dpos = nfsd->nd_dpos; if (nfsd->nd_procnum != NQNFSPROC_EVICTED) { m_freem(mrep); return (EPERM); } fhp = &nfh.fh_generic; nfsm_srvmtofh(fhp); m_freem(mrep); error = nfs_nget(nmp->nm_mountp, (nfsfh_t *)fhp, NFSX_V3FH, &np); if (error) return (error); vp = NFSTOV(np); if (np->n_timer.cqe_next != 0) { np->n_expiry = 0; np->n_flag |= NQNFSEVICTED; if (nmp->nm_timerhead.cqh_first != np) { CIRCLEQ_REMOVE(&nmp->nm_timerhead, np, n_timer); CIRCLEQ_INSERT_HEAD(&nmp->nm_timerhead, np, n_timer); } } vput(vp); nfsm_srvdone; } /* * Nqnfs client helper daemon. Runs once a second to expire leases. * It also get authorization strings for "kerb" mounts. * It must start at the beginning of the list again after any potential * "sleep" since nfs_reclaim() called from vclean() can pull a node off * the list asynchronously. */ int nqnfs_clientd(nmp, cred, ncd, flag, argp, p) register struct nfsmount *nmp; struct ucred *cred; struct nfsd_cargs *ncd; int flag; caddr_t argp; struct proc *p; { register struct nfsnode *np; struct vnode *vp; struct nfsreq myrep; struct nfsuid *nuidp, *nnuidp; int error = 0, vpid; /* * First initialize some variables */ /* * If an authorization string is being passed in, get it. */ if ((flag & NFSSVC_GOTAUTH) && (nmp->nm_flag & (NFSMNT_WAITAUTH | NFSMNT_DISMNT)) == 0) { if (nmp->nm_flag & NFSMNT_HASAUTH) panic("cld kerb"); if ((flag & NFSSVC_AUTHINFAIL) == 0) { if (ncd->ncd_authlen <= nmp->nm_authlen && ncd->ncd_verflen <= nmp->nm_verflen && !copyin(ncd->ncd_authstr,nmp->nm_authstr,ncd->ncd_authlen)&& !copyin(ncd->ncd_verfstr,nmp->nm_verfstr,ncd->ncd_verflen)){ nmp->nm_authtype = ncd->ncd_authtype; nmp->nm_authlen = ncd->ncd_authlen; nmp->nm_verflen = ncd->ncd_verflen; #ifdef NFSKERB nmp->nm_key = ncd->ncd_key; #endif } else nmp->nm_flag |= NFSMNT_AUTHERR; } else nmp->nm_flag |= NFSMNT_AUTHERR; nmp->nm_flag |= NFSMNT_HASAUTH; wakeup((caddr_t)&nmp->nm_authlen); } else nmp->nm_flag |= NFSMNT_WAITAUTH; /* * Loop every second updating queue until there is a termination sig. */ while ((nmp->nm_flag & NFSMNT_DISMNT) == 0) { if (nmp->nm_flag & NFSMNT_NQNFS) { /* * If there are no outstanding requests (and therefore no * processes in nfs_reply) and there is data in the receive * queue, poke for callbacks. */ if (nfs_reqq.tqh_first == 0 && nmp->nm_so && nmp->nm_so->so_rcv.sb_cc > 0) { myrep.r_flags = R_GETONEREP; myrep.r_nmp = nmp; myrep.r_mrep = (struct mbuf *)0; myrep.r_procp = (struct proc *)0; (void) nfs_reply(&myrep); } /* * Loop through the leases, updating as required. */ np = nmp->nm_timerhead.cqh_first; while (np != (void *)&nmp->nm_timerhead && (nmp->nm_flag & NFSMNT_DISMINPROG) == 0) { vp = NFSTOV(np); vpid = vp->v_id; if (np->n_expiry < time.tv_sec) { if (vget(vp, 1) == 0) { nmp->nm_inprog = vp; if (vpid == vp->v_id) { CIRCLEQ_REMOVE(&nmp->nm_timerhead, np, n_timer); np->n_timer.cqe_next = 0; if ((np->n_flag & (NMODIFIED | NQNFSEVICTED)) && vp->v_type == VREG) { if (np->n_flag & NQNFSEVICTED) { (void) nfs_vinvalbuf(vp, V_SAVE, cred, p, 0); np->n_flag &= ~NQNFSEVICTED; (void) nqnfs_vacated(vp, cred); } else { (void) VOP_FSYNC(vp, cred, MNT_WAIT, p); np->n_flag &= ~NMODIFIED; } } } vrele(vp); nmp->nm_inprog = NULLVP; } } else if ((np->n_expiry - NQ_RENEWAL) < time.tv_sec) { if ((np->n_flag & (NQNFSWRITE | NQNFSNONCACHE)) == NQNFSWRITE && vp->v_dirtyblkhd.lh_first && vget(vp, 1) == 0) { nmp->nm_inprog = vp; if (vpid == vp->v_id && nqnfs_getlease(vp, ND_WRITE, cred, p)==0) np->n_brev = np->n_lrev; vrele(vp); nmp->nm_inprog = NULLVP; } } else break; if (np == nmp->nm_timerhead.cqh_first) break; np = nmp->nm_timerhead.cqh_first; } } /* * Get an authorization string, if required. */ if ((nmp->nm_flag & (NFSMNT_WAITAUTH | NFSMNT_DISMNT | NFSMNT_HASAUTH)) == 0) { ncd->ncd_authuid = nmp->nm_authuid; if (copyout((caddr_t)ncd, argp, sizeof (struct nfsd_cargs))) nmp->nm_flag |= NFSMNT_WAITAUTH; else return (ENEEDAUTH); } /* * Wait a bit (no pun) and do it again. */ if ((nmp->nm_flag & NFSMNT_DISMNT) == 0 && (nmp->nm_flag & (NFSMNT_WAITAUTH | NFSMNT_HASAUTH))) { error = tsleep((caddr_t)&nmp->nm_authstr, PSOCK | PCATCH, "nqnfstimr", hz / 3); if (error == EINTR || error == ERESTART) (void) dounmount(nmp->nm_mountp, 0, p); } } /* * Finally, we can free up the mount structure. */ for (nuidp = nmp->nm_uidlruhead.tqh_first; nuidp != 0; nuidp = nnuidp) { nnuidp = nuidp->nu_lru.tqe_next; LIST_REMOVE(nuidp, nu_hash); TAILQ_REMOVE(&nmp->nm_uidlruhead, nuidp, nu_lru); free((caddr_t)nuidp, M_NFSUID); } free((caddr_t)nmp, M_NFSMNT); if (error == EWOULDBLOCK) error = 0; return (error); } #endif /* NFS_NOSERVER */ /* * Adjust all timer queue expiry times when the time of day clock is changed. * Called from the settimeofday() syscall. */ void nqnfs_lease_updatetime(deltat) register int deltat; { register struct nqlease *lp; register struct nfsnode *np; struct mount *mp; struct nfsmount *nmp; int s; if (nqnfsstarttime != 0) nqnfsstarttime += deltat; s = splsoftclock(); for (lp = nqtimerhead.cqh_first; lp != (void *)&nqtimerhead; lp = lp->lc_timer.cqe_next) lp->lc_expiry += deltat; splx(s); /* * Search the mount list for all nqnfs mounts and do their timer * queues. */ for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = mp->mnt_list.cqe_next) { #ifdef __NetBSD__ if (!strcmp(&mp->mnt_stat.f_fstypename[0], MOUNT_NFS)) { #else if (mp->mnt_stat.f_fsid.val[1] == MOUNT_NFS) { #endif nmp = VFSTONFS(mp); if (nmp->nm_flag & NFSMNT_NQNFS) { for (np = nmp->nm_timerhead.cqh_first; np != (void *)&nmp->nm_timerhead; np = np->n_timer.cqe_next) { np->n_expiry += deltat; } } } } } /* * Lock a server lease. */ static void nqsrv_locklease(lp) struct nqlease *lp; { while (lp->lc_flag & LC_LOCKED) { lp->lc_flag |= LC_WANTED; (void) tsleep((caddr_t)lp, PSOCK, "nqlc", 0); } lp->lc_flag |= LC_LOCKED; lp->lc_flag &= ~LC_WANTED; } /* * Unlock a server lease. */ static void nqsrv_unlocklease(lp) struct nqlease *lp; { lp->lc_flag &= ~LC_LOCKED; if (lp->lc_flag & LC_WANTED) wakeup((caddr_t)lp); } /* * Update a client lease. */ void nqnfs_clientlease(nmp, np, rwflag, cachable, expiry, frev) register struct nfsmount *nmp; register struct nfsnode *np; int rwflag, cachable; time_t expiry; u_quad_t frev; { register struct nfsnode *tp; if (np->n_timer.cqe_next != 0) { CIRCLEQ_REMOVE(&nmp->nm_timerhead, np, n_timer); if (rwflag == ND_WRITE) np->n_flag |= NQNFSWRITE; } else if (rwflag == ND_READ) np->n_flag &= ~NQNFSWRITE; else np->n_flag |= NQNFSWRITE; if (cachable) np->n_flag &= ~NQNFSNONCACHE; else np->n_flag |= NQNFSNONCACHE; np->n_expiry = expiry; np->n_lrev = frev; tp = nmp->nm_timerhead.cqh_last; while (tp != (void *)&nmp->nm_timerhead && tp->n_expiry > np->n_expiry) tp = tp->n_timer.cqe_prev; if (tp == (void *)&nmp->nm_timerhead) { CIRCLEQ_INSERT_HEAD(&nmp->nm_timerhead, np, n_timer); } else { CIRCLEQ_INSERT_AFTER(&nmp->nm_timerhead, tp, np, n_timer); } } Index: head/sys/nfs/nfs_serv.c =================================================================== --- head/sys/nfs/nfs_serv.c (revision 17760) +++ head/sys/nfs/nfs_serv.c (revision 17761) @@ -1,3437 +1,3417 @@ /* * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)nfs_serv.c 8.3 (Berkeley) 1/12/94 - * $Id: nfs_serv.c,v 1.29 1996/04/30 23:23:07 bde Exp $ + * $Id: nfs_serv.c,v 1.30 1996/06/08 12:16:26 bde Exp $ */ /* * nfs version 2 and 3 server calls to vnode ops * - these routines generally have 3 phases * 1 - break down and validate rpc request in mbuf list * 2 - do the vnode ops for the request * (surprisingly ?? many are very similar to syscalls in vfs_syscalls.c) * 3 - build the rpc reply in an mbuf list * nb: * - do not mix the phases, since the nfsm_?? macros can return failures * on a bad rpc or similar and do not do any vrele() or vput()'s * * - the nfsm_reply() macro generates an nfs rpc reply with the nfs * error number iff error != 0 whereas * returning an error from the server function implies a fatal error * such as a badly constructed rpc request that should be dropped without * a reply. * For Version 3, nfsm_reply() does not return for the error case, since * most version 3 rpcs return more than the status for error cases. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include nfstype nfsv3_type[9] = { NFNON, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, NFSOCK, NFFIFO, NFNON }; #ifndef NFS_NOSERVER nfstype nfsv2_type[9] = { NFNON, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, NFNON, NFCHR, NFNON }; /* Global vars */ extern u_long nfs_xdrneg1; extern u_long nfs_false, nfs_true; extern enum vtype nv3tov_type[8]; extern struct nfsstats nfsstats; int nfsrvw_procrastinate = NFS_GATHERDELAY * 1000; int nfs_async; SYSCTL_INT(_vfs_nfs, OID_AUTO, async, CTLFLAG_RW, &nfs_async, 0, ""); static int nfsrv_access __P((struct vnode *,int,struct ucred *,int, struct proc *)); static void nfsrvw_coalesce __P((struct nfsrv_descript *, struct nfsrv_descript *)); /* * nfs v3 access service */ int nfsrv3_access(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; struct mbuf *nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; struct ucred *cred = &nfsd->nd_cr; struct vnode *vp; nfsfh_t nfh; fhandle_t *fhp; register u_long *tl; register long t1; caddr_t bpos; int error = 0, rdonly, cache, getret; char *cp2; struct mbuf *mb, *mreq, *mb2; struct vattr vattr, *vap = &vattr; u_long testmode, nfsmode; u_quad_t frev; #ifndef nolint cache = 0; #endif fhp = &nfh.fh_generic; nfsm_srvmtofh(fhp); nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); if (error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, &rdonly, (nfsd->nd_flag & ND_KERBAUTH))) { nfsm_reply(NFSX_UNSIGNED); nfsm_srvpostop_attr(1, (struct vattr *)0); return (0); } nfsmode = fxdr_unsigned(u_long, *tl); if ((nfsmode & NFSV3ACCESS_READ) && nfsrv_access(vp, VREAD, cred, rdonly, procp)) nfsmode &= ~NFSV3ACCESS_READ; if (vp->v_type == VDIR) testmode = (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND | NFSV3ACCESS_DELETE); else testmode = (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND); if ((nfsmode & testmode) && nfsrv_access(vp, VWRITE, cred, rdonly, procp)) nfsmode &= ~testmode; if (vp->v_type == VDIR) testmode = NFSV3ACCESS_LOOKUP; else testmode = NFSV3ACCESS_EXECUTE; if ((nfsmode & testmode) && nfsrv_access(vp, VEXEC, cred, rdonly, procp)) nfsmode &= ~testmode; getret = VOP_GETATTR(vp, vap, cred, procp); - nfsrv_vput(vp); + vput(vp); nfsm_reply(NFSX_POSTOPATTR(1) + NFSX_UNSIGNED); nfsm_srvpostop_attr(getret, vap); nfsm_build(tl, u_long *, NFSX_UNSIGNED); *tl = txdr_unsigned(nfsmode); nfsm_srvdone; } /* * nfs getattr service */ int nfsrv_getattr(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; struct mbuf *nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; struct ucred *cred = &nfsd->nd_cr; register struct nfs_fattr *fp; struct vattr va; register struct vattr *vap = &va; struct vnode *vp; nfsfh_t nfh; fhandle_t *fhp; register u_long *tl; register long t1; caddr_t bpos; int error = 0, rdonly, cache; char *cp2; struct mbuf *mb, *mb2, *mreq; u_quad_t frev; fhp = &nfh.fh_generic; nfsm_srvmtofh(fhp); if (error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, &rdonly, (nfsd->nd_flag & ND_KERBAUTH))) { nfsm_reply(0); return (0); } nqsrv_getl(vp, ND_READ); error = VOP_GETATTR(vp, vap, cred, procp); - nfsrv_vput(vp); + vput(vp); nfsm_reply(NFSX_FATTR(nfsd->nd_flag & ND_NFSV3)); if (error) return (0); nfsm_build(fp, struct nfs_fattr *, NFSX_FATTR(nfsd->nd_flag & ND_NFSV3)); nfsm_srvfillattr(vap, fp); nfsm_srvdone; } /* * nfs setattr service */ int nfsrv_setattr(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; struct mbuf *nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; struct ucred *cred = &nfsd->nd_cr; struct vattr va, preat; register struct vattr *vap = &va; register struct nfsv2_sattr *sp; register struct nfs_fattr *fp; struct vnode *vp; nfsfh_t nfh; fhandle_t *fhp; register u_long *tl; register long t1; caddr_t bpos; int error = 0, rdonly, cache, preat_ret = 1, postat_ret = 1; int v3 = (nfsd->nd_flag & ND_NFSV3), gcheck = 0; char *cp2; struct mbuf *mb, *mb2, *mreq; u_quad_t frev; struct timespec guard; fhp = &nfh.fh_generic; nfsm_srvmtofh(fhp); VATTR_NULL(vap); if (v3) { nfsm_srvsattr(vap); nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); gcheck = fxdr_unsigned(int, *tl); if (gcheck) { nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED); fxdr_nfsv3time(tl, &guard); } } else { nfsm_dissect(sp, struct nfsv2_sattr *, NFSX_V2SATTR); /* * Nah nah nah nah na nah * There is a bug in the Sun client that puts 0xffff in the mode * field of sattr when it should put in 0xffffffff. The u_short * doesn't sign extend. * --> check the low order 2 bytes for 0xffff */ if ((fxdr_unsigned(int, sp->sa_mode) & 0xffff) != 0xffff) vap->va_mode = nfstov_mode(sp->sa_mode); if (sp->sa_uid != nfs_xdrneg1) vap->va_uid = fxdr_unsigned(uid_t, sp->sa_uid); if (sp->sa_gid != nfs_xdrneg1) vap->va_gid = fxdr_unsigned(gid_t, sp->sa_gid); if (sp->sa_size != nfs_xdrneg1) vap->va_size = fxdr_unsigned(u_quad_t, sp->sa_size); if (sp->sa_atime.nfsv2_sec != nfs_xdrneg1) { #ifdef notyet fxdr_nfsv2time(&sp->sa_atime, &vap->va_atime); #else vap->va_atime.ts_sec = fxdr_unsigned(long, sp->sa_atime.nfsv2_sec); vap->va_atime.ts_nsec = 0; #endif } if (sp->sa_mtime.nfsv2_sec != nfs_xdrneg1) fxdr_nfsv2time(&sp->sa_mtime, &vap->va_mtime); } /* * Now that we have all the fields, lets do it. */ if (error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, &rdonly, (nfsd->nd_flag & ND_KERBAUTH))) { nfsm_reply(2 * NFSX_UNSIGNED); nfsm_srvwcc_data(preat_ret, &preat, postat_ret, vap); return (0); } nqsrv_getl(vp, ND_WRITE); if (v3) { error = preat_ret = VOP_GETATTR(vp, &preat, cred, procp); if (!error && gcheck && (preat.va_ctime.ts_sec != guard.ts_sec || preat.va_ctime.ts_nsec != guard.ts_nsec)) error = NFSERR_NOT_SYNC; if (error) { - nfsrv_vput(vp); + vput(vp); nfsm_reply(NFSX_WCCDATA(v3)); nfsm_srvwcc_data(preat_ret, &preat, postat_ret, vap); return (0); } } /* * If the size is being changed write acces is required, otherwise * just check for a read only file system. */ if (vap->va_size == ((u_quad_t)((quad_t) -1))) { if (rdonly || (vp->v_mount->mnt_flag & MNT_RDONLY)) { error = EROFS; goto out; } } else { if (vp->v_type == VDIR) { error = EISDIR; goto out; } else if (error = nfsrv_access(vp, VWRITE, cred, rdonly, procp)) goto out; } error = VOP_SETATTR(vp, vap, cred, procp); postat_ret = VOP_GETATTR(vp, vap, cred, procp); if (!error) error = postat_ret; out: - nfsrv_vput(vp); + vput(vp); nfsm_reply(NFSX_WCCORFATTR(v3)); if (v3) { nfsm_srvwcc_data(preat_ret, &preat, postat_ret, vap); return (0); } else { nfsm_build(fp, struct nfs_fattr *, NFSX_V2FATTR); nfsm_srvfillattr(vap, fp); } nfsm_srvdone; } /* * nfs lookup rpc */ int nfsrv_lookup(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; struct mbuf *nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; struct ucred *cred = &nfsd->nd_cr; register struct nfs_fattr *fp; struct nameidata nd; struct vnode *vp, *dirp; nfsfh_t nfh; fhandle_t *fhp; register caddr_t cp; register u_long *tl; register long t1; caddr_t bpos; int error = 0, cache, len, dirattr_ret = 1; int v3 = (nfsd->nd_flag & ND_NFSV3); char *cp2; struct mbuf *mb, *mb2, *mreq; struct vattr va, dirattr, *vap = &va; u_quad_t frev; fhp = &nfh.fh_generic; nfsm_srvmtofh(fhp); nfsm_srvnamesiz(len); nd.ni_cnd.cn_cred = cred; nd.ni_cnd.cn_nameiop = LOOKUP; nd.ni_cnd.cn_flags = LOCKLEAF | SAVESTART; error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos, &dirp, procp, (nfsd->nd_flag & ND_KERBAUTH)); if (dirp) { if (v3) dirattr_ret = VOP_GETATTR(dirp, &dirattr, cred, procp); - nfsrv_vrele(dirp); + vrele(dirp); } if (error) { nfsm_reply(NFSX_POSTOPATTR(v3)); nfsm_srvpostop_attr(dirattr_ret, &dirattr); return (0); } nqsrv_getl(nd.ni_startdir, ND_READ); - nfsrv_vrele(nd.ni_startdir); + vrele(nd.ni_startdir); FREE(nd.ni_cnd.cn_pnbuf, M_NAMEI); vp = nd.ni_vp; bzero((caddr_t)fhp, sizeof(nfh)); fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid; error = VFS_VPTOFH(vp, &fhp->fh_fid); if (!error) error = VOP_GETATTR(vp, vap, cred, procp); - nfsrv_vput(vp); + vput(vp); nfsm_reply(NFSX_SRVFH(v3) + NFSX_POSTOPORFATTR(v3) + NFSX_POSTOPATTR(v3)); if (error) { nfsm_srvpostop_attr(dirattr_ret, &dirattr); return (0); } nfsm_srvfhtom(fhp, v3); if (v3) { nfsm_srvpostop_attr(0, vap); nfsm_srvpostop_attr(dirattr_ret, &dirattr); } else { nfsm_build(fp, struct nfs_fattr *, NFSX_V2FATTR); nfsm_srvfillattr(vap, fp); } nfsm_srvdone; } /* * nfs readlink service */ int nfsrv_readlink(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; struct mbuf *nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; struct ucred *cred = &nfsd->nd_cr; struct iovec iv[(NFS_MAXPATHLEN+MLEN-1)/MLEN]; register struct iovec *ivp = iv; register struct mbuf *mp; register u_long *tl; register long t1; caddr_t bpos; int error = 0, rdonly, cache, i, tlen, len, getret; int v3 = (nfsd->nd_flag & ND_NFSV3); char *cp2; struct mbuf *mb, *mb2, *mp2, *mp3, *mreq; struct vnode *vp; struct vattr attr; nfsfh_t nfh; fhandle_t *fhp; struct uio io, *uiop = &io; u_quad_t frev; #ifndef nolint mp2 = mp3 = (struct mbuf *)0; #endif fhp = &nfh.fh_generic; nfsm_srvmtofh(fhp); len = 0; i = 0; while (len < NFS_MAXPATHLEN) { MGET(mp, M_WAIT, MT_DATA); MCLGET(mp, M_WAIT); mp->m_len = NFSMSIZ(mp); if (len == 0) mp3 = mp2 = mp; else { mp2->m_next = mp; mp2 = mp; } if ((len+mp->m_len) > NFS_MAXPATHLEN) { mp->m_len = NFS_MAXPATHLEN-len; len = NFS_MAXPATHLEN; } else len += mp->m_len; ivp->iov_base = mtod(mp, caddr_t); ivp->iov_len = mp->m_len; i++; ivp++; } uiop->uio_iov = iv; uiop->uio_iovcnt = i; uiop->uio_offset = 0; uiop->uio_resid = len; uiop->uio_rw = UIO_READ; uiop->uio_segflg = UIO_SYSSPACE; uiop->uio_procp = (struct proc *)0; if (error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, &rdonly, (nfsd->nd_flag & ND_KERBAUTH))) { m_freem(mp3); nfsm_reply(2 * NFSX_UNSIGNED); nfsm_srvpostop_attr(1, (struct vattr *)0); return (0); } if (vp->v_type != VLNK) { if (v3) error = EINVAL; else error = ENXIO; goto out; } nqsrv_getl(vp, ND_READ); error = VOP_READLINK(vp, uiop, cred); out: getret = VOP_GETATTR(vp, &attr, cred, procp); - nfsrv_vput(vp); + vput(vp); if (error) m_freem(mp3); nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_UNSIGNED); if (v3) { nfsm_srvpostop_attr(getret, &attr); if (error) return (0); } if (uiop->uio_resid > 0) { len -= uiop->uio_resid; tlen = nfsm_rndup(len); nfsm_adj(mp3, NFS_MAXPATHLEN-tlen, tlen-len); } nfsm_build(tl, u_long *, NFSX_UNSIGNED); *tl = txdr_unsigned(len); mb->m_next = mp3; nfsm_srvdone; } /* * nfs read service */ int nfsrv_read(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; struct mbuf *nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; struct ucred *cred = &nfsd->nd_cr; register struct iovec *iv; struct iovec *iv2; register struct mbuf *m; register struct nfs_fattr *fp; register u_long *tl; register long t1; register int i; caddr_t bpos; int error = 0, rdonly, cache, cnt, len, left, siz, tlen, getret; int v3 = (nfsd->nd_flag & ND_NFSV3), reqlen; char *cp2; struct mbuf *mb, *mb2, *mreq; struct mbuf *m2; struct vnode *vp; nfsfh_t nfh; fhandle_t *fhp; struct uio io, *uiop = &io; struct vattr va, *vap = &va; off_t off; u_quad_t frev; fhp = &nfh.fh_generic; nfsm_srvmtofh(fhp); if (v3) { nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED); fxdr_hyper(tl, &off); } else { nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); off = (off_t)fxdr_unsigned(u_long, *tl); } nfsm_srvstrsiz(reqlen, NFS_SRVMAXDATA(nfsd)); if (error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, &rdonly, (nfsd->nd_flag & ND_KERBAUTH))) { nfsm_reply(2 * NFSX_UNSIGNED); nfsm_srvpostop_attr(1, (struct vattr *)0); return (0); } if (vp->v_type != VREG) { if (v3) error = EINVAL; else error = (vp->v_type == VDIR) ? EISDIR : EACCES; } if (!error) { nqsrv_getl(vp, ND_READ); if (error = nfsrv_access(vp, VREAD, cred, rdonly, procp)) error = nfsrv_access(vp, VEXEC, cred, rdonly, procp); } getret = VOP_GETATTR(vp, vap, cred, procp); if (!error) error = getret; if (error) { - nfsrv_vput(vp); + vput(vp); nfsm_reply(NFSX_POSTOPATTR(v3)); nfsm_srvpostop_attr(getret, vap); return (0); } if (off >= vap->va_size) cnt = 0; else if ((off + reqlen) > vap->va_size) cnt = nfsm_rndup(vap->va_size - off); else cnt = reqlen; nfsm_reply(NFSX_POSTOPORFATTR(v3) + 3 * NFSX_UNSIGNED+nfsm_rndup(cnt)); if (v3) { nfsm_build(tl, u_long *, NFSX_V3FATTR + 4 * NFSX_UNSIGNED); *tl++ = nfs_true; fp = (struct nfs_fattr *)tl; tl += (NFSX_V3FATTR / sizeof (u_long)); } else { nfsm_build(tl, u_long *, NFSX_V2FATTR + NFSX_UNSIGNED); fp = (struct nfs_fattr *)tl; tl += (NFSX_V2FATTR / sizeof (u_long)); } len = left = cnt; if (cnt > 0) { /* * Generate the mbuf list with the uio_iov ref. to it. */ i = 0; m = m2 = mb; while (left > 0) { siz = min(M_TRAILINGSPACE(m), left); if (siz > 0) { left -= siz; i++; } if (left > 0) { MGET(m, M_WAIT, MT_DATA); MCLGET(m, M_WAIT); m->m_len = 0; m2->m_next = m; m2 = m; } } MALLOC(iv, struct iovec *, i * sizeof (struct iovec), M_TEMP, M_WAITOK); uiop->uio_iov = iv2 = iv; m = mb; left = cnt; i = 0; while (left > 0) { if (m == NULL) panic("nfsrv_read iov"); siz = min(M_TRAILINGSPACE(m), left); if (siz > 0) { iv->iov_base = mtod(m, caddr_t) + m->m_len; iv->iov_len = siz; m->m_len += siz; left -= siz; iv++; i++; } m = m->m_next; } uiop->uio_iovcnt = i; uiop->uio_offset = off; uiop->uio_resid = cnt; uiop->uio_rw = UIO_READ; uiop->uio_segflg = UIO_SYSSPACE; error = VOP_READ(vp, uiop, IO_NODELOCKED, cred); off = uiop->uio_offset; FREE((caddr_t)iv2, M_TEMP); if (error || (getret = VOP_GETATTR(vp, vap, cred, procp))) { if (!error) error = getret; m_freem(mreq); - nfsrv_vput(vp); + vput(vp); nfsm_reply(NFSX_POSTOPATTR(v3)); nfsm_srvpostop_attr(getret, vap); return (0); } } else uiop->uio_resid = 0; - nfsrv_vput(vp); + vput(vp); nfsm_srvfillattr(vap, fp); len -= uiop->uio_resid; tlen = nfsm_rndup(len); if (cnt != tlen || tlen != len) nfsm_adj(mb, cnt - tlen, tlen - len); if (v3) { *tl++ = txdr_unsigned(len); if (len < reqlen) *tl++ = nfs_true; else *tl++ = nfs_false; } *tl = txdr_unsigned(len); nfsm_srvdone; } /* * nfs write service */ int nfsrv_write(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; struct mbuf *nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; struct ucred *cred = &nfsd->nd_cr; register struct iovec *ivp; register int i, cnt; register struct mbuf *mp; register struct nfs_fattr *fp; struct iovec *iv; struct vattr va, forat; register struct vattr *vap = &va; register u_long *tl; register long t1; caddr_t bpos; int error = 0, rdonly, cache, len, forat_ret = 1; int ioflags, aftat_ret = 1, retlen, zeroing, adjust; int stable = NFSV3WRITE_FILESYNC; int v3 = (nfsd->nd_flag & ND_NFSV3); char *cp2; struct mbuf *mb, *mb2, *mreq; struct vnode *vp; nfsfh_t nfh; fhandle_t *fhp; struct uio io, *uiop = &io; off_t off; u_quad_t frev; if (mrep == NULL) { *mrq = NULL; return (0); } fhp = &nfh.fh_generic; nfsm_srvmtofh(fhp); if (v3) { nfsm_dissect(tl, u_long *, 5 * NFSX_UNSIGNED); fxdr_hyper(tl, &off); tl += 3; stable = fxdr_unsigned(int, *tl++); } else { nfsm_dissect(tl, u_long *, 4 * NFSX_UNSIGNED); off = (off_t)fxdr_unsigned(u_long, *++tl); tl += 2; if (nfs_async) stable = NFSV3WRITE_UNSTABLE; } retlen = len = fxdr_unsigned(long, *tl); cnt = i = 0; /* * For NFS Version 2, it is not obvious what a write of zero length * should do, but I might as well be consistent with Version 3, * which is to return ok so long as there are no permission problems. */ if (len > 0) { zeroing = 1; mp = mrep; while (mp) { if (mp == md) { zeroing = 0; adjust = dpos - mtod(mp, caddr_t); mp->m_len -= adjust; if (mp->m_len > 0 && adjust > 0) NFSMADV(mp, adjust); } if (zeroing) mp->m_len = 0; else if (mp->m_len > 0) { i += mp->m_len; if (i > len) { mp->m_len -= (i - len); zeroing = 1; } if (mp->m_len > 0) cnt++; } mp = mp->m_next; } } if (len > NFS_MAXDATA || len < 0 || i < len) { error = EIO; nfsm_reply(2 * NFSX_UNSIGNED); nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, vap); return (0); } if (error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, &rdonly, (nfsd->nd_flag & ND_KERBAUTH))) { nfsm_reply(2 * NFSX_UNSIGNED); nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, vap); return (0); } if (v3) forat_ret = VOP_GETATTR(vp, &forat, cred, procp); if (vp->v_type != VREG) { if (v3) error = EINVAL; else error = (vp->v_type == VDIR) ? EISDIR : EACCES; } if (!error) { nqsrv_getl(vp, ND_WRITE); error = nfsrv_access(vp, VWRITE, cred, rdonly, procp); } if (error) { - nfsrv_vput(vp); + vput(vp); nfsm_reply(NFSX_WCCDATA(v3)); nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, vap); return (0); } if (len > 0) { MALLOC(ivp, struct iovec *, cnt * sizeof (struct iovec), M_TEMP, M_WAITOK); uiop->uio_iov = iv = ivp; uiop->uio_iovcnt = cnt; mp = mrep; while (mp) { if (mp->m_len > 0) { ivp->iov_base = mtod(mp, caddr_t); ivp->iov_len = mp->m_len; ivp++; } mp = mp->m_next; } /* * XXX * The IO_METASYNC flag indicates that all metadata (and not just * enough to ensure data integrity) mus be written to stable storage * synchronously. * (IO_METASYNC is not yet implemented in 4.4BSD-Lite.) */ if (stable == NFSV3WRITE_UNSTABLE) ioflags = IO_NODELOCKED; else if (stable == NFSV3WRITE_DATASYNC) ioflags = (IO_SYNC | IO_NODELOCKED); else ioflags = (IO_METASYNC | IO_SYNC | IO_NODELOCKED); uiop->uio_resid = len; uiop->uio_rw = UIO_WRITE; uiop->uio_segflg = UIO_SYSSPACE; uiop->uio_procp = (struct proc *)0; uiop->uio_offset = off; error = VOP_WRITE(vp, uiop, ioflags, cred); nfsstats.srvvop_writes++; FREE((caddr_t)iv, M_TEMP); } aftat_ret = VOP_GETATTR(vp, vap, cred, procp); - nfsrv_vput(vp); + vput(vp); if (!error) error = aftat_ret; nfsm_reply(NFSX_PREOPATTR(v3) + NFSX_POSTOPORFATTR(v3) + 2 * NFSX_UNSIGNED + NFSX_WRITEVERF(v3)); if (v3) { nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, vap); if (error) return (0); nfsm_build(tl, u_long *, 4 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(retlen); if (stable == NFSV3WRITE_UNSTABLE) *tl++ = txdr_unsigned(stable); else *tl++ = txdr_unsigned(NFSV3WRITE_FILESYNC); /* * Actually, there is no need to txdr these fields, * but it may make the values more human readable, * for debugging purposes. */ *tl++ = txdr_unsigned(boottime.tv_sec); *tl = txdr_unsigned(boottime.tv_usec); } else { nfsm_build(fp, struct nfs_fattr *, NFSX_V2FATTR); nfsm_srvfillattr(vap, fp); } nfsm_srvdone; } /* * NFS write service with write gathering support. Called when * nfsrvw_procrastinate > 0. * See: Chet Juszczak, "Improving the Write Performance of an NFS Server", * in Proc. of the Winter 1994 Usenix Conference, pg. 247-259, San Franscisco, * Jan. 1994. */ int nfsrv_writegather(ndp, slp, procp, mrq) struct nfsrv_descript **ndp; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { register struct iovec *ivp; register struct mbuf *mp; register struct nfsrv_descript *wp, *nfsd, *owp, *swp; register struct nfs_fattr *fp; register int i; struct iovec *iov; struct nfsrvw_delayhash *wpp; struct ucred *cred; struct vattr va, forat; register u_long *tl; register long t1; caddr_t bpos, dpos; int error = 0, rdonly, cache, len, forat_ret = 1; int ioflags, aftat_ret = 1, s, adjust, v3, zeroing; char *cp2; struct mbuf *mb, *mb2, *mreq, *mrep, *md; struct vnode *vp; struct uio io, *uiop = &io; u_quad_t frev, cur_usec; #ifndef nolint i = 0; len = 0; #endif *mrq = NULL; if (*ndp) { nfsd = *ndp; *ndp = NULL; mrep = nfsd->nd_mrep; md = nfsd->nd_md; dpos = nfsd->nd_dpos; cred = &nfsd->nd_cr; v3 = (nfsd->nd_flag & ND_NFSV3); LIST_INIT(&nfsd->nd_coalesce); nfsd->nd_mreq = NULL; nfsd->nd_stable = NFSV3WRITE_FILESYNC; cur_usec = (u_quad_t)time.tv_sec * 1000000 + (u_quad_t)time.tv_usec; nfsd->nd_time = cur_usec + nfsrvw_procrastinate; /* * Now, get the write header.. */ nfsm_srvmtofh(&nfsd->nd_fh); if (v3) { nfsm_dissect(tl, u_long *, 5 * NFSX_UNSIGNED); fxdr_hyper(tl, &nfsd->nd_off); tl += 3; nfsd->nd_stable = fxdr_unsigned(int, *tl++); } else { nfsm_dissect(tl, u_long *, 4 * NFSX_UNSIGNED); nfsd->nd_off = (off_t)fxdr_unsigned(u_long, *++tl); tl += 2; if (nfs_async) nfsd->nd_stable = NFSV3WRITE_UNSTABLE; } len = fxdr_unsigned(long, *tl); nfsd->nd_len = len; nfsd->nd_eoff = nfsd->nd_off + len; /* * Trim the header out of the mbuf list and trim off any trailing * junk so that the mbuf list has only the write data. */ zeroing = 1; i = 0; mp = mrep; while (mp) { if (mp == md) { zeroing = 0; adjust = dpos - mtod(mp, caddr_t); mp->m_len -= adjust; if (mp->m_len > 0 && adjust > 0) NFSMADV(mp, adjust); } if (zeroing) mp->m_len = 0; else { i += mp->m_len; if (i > len) { mp->m_len -= (i - len); zeroing = 1; } } mp = mp->m_next; } if (len > NFS_MAXDATA || len < 0 || i < len) { nfsmout: m_freem(mrep); error = EIO; nfsm_writereply(2 * NFSX_UNSIGNED, v3); if (v3) nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, &va); nfsd->nd_mreq = mreq; nfsd->nd_mrep = NULL; nfsd->nd_time = 0; } /* * Add this entry to the hash and time queues. */ s = splsoftclock(); owp = NULL; wp = slp->ns_tq.lh_first; while (wp && wp->nd_time < nfsd->nd_time) { owp = wp; wp = wp->nd_tq.le_next; } if (owp) { LIST_INSERT_AFTER(owp, nfsd, nd_tq); } else { LIST_INSERT_HEAD(&slp->ns_tq, nfsd, nd_tq); } if (nfsd->nd_mrep) { wpp = NWDELAYHASH(slp, nfsd->nd_fh.fh_fid.fid_data); owp = NULL; wp = wpp->lh_first; while (wp && bcmp((caddr_t)&nfsd->nd_fh,(caddr_t)&wp->nd_fh,NFSX_V3FH)) { owp = wp; wp = wp->nd_hash.le_next; } while (wp && wp->nd_off < nfsd->nd_off && !bcmp((caddr_t)&nfsd->nd_fh,(caddr_t)&wp->nd_fh,NFSX_V3FH)) { owp = wp; wp = wp->nd_hash.le_next; } if (owp) { LIST_INSERT_AFTER(owp, nfsd, nd_hash); /* * Search the hash list for overlapping entries and * coalesce. */ for(; nfsd && NFSW_CONTIG(owp, nfsd); nfsd = wp) { wp = nfsd->nd_hash.le_next; if (NFSW_SAMECRED(owp, nfsd)) nfsrvw_coalesce(owp, nfsd); } } else { LIST_INSERT_HEAD(wpp, nfsd, nd_hash); } } splx(s); } /* * Now, do VOP_WRITE()s for any one(s) that need to be done now * and generate the associated reply mbuf list(s). */ loop1: cur_usec = (u_quad_t)time.tv_sec * 1000000 + (u_quad_t)time.tv_usec; s = splsoftclock(); for (nfsd = slp->ns_tq.lh_first; nfsd; nfsd = owp) { owp = nfsd->nd_tq.le_next; if (nfsd->nd_time > cur_usec) break; if (nfsd->nd_mreq) continue; LIST_REMOVE(nfsd, nd_tq); LIST_REMOVE(nfsd, nd_hash); splx(s); mrep = nfsd->nd_mrep; nfsd->nd_mrep = NULL; cred = &nfsd->nd_cr; v3 = (nfsd->nd_flag & ND_NFSV3); forat_ret = aftat_ret = 1; error = nfsrv_fhtovp(&nfsd->nd_fh, 1, &vp, cred, slp, nfsd->nd_nam, &rdonly, (nfsd->nd_flag & ND_KERBAUTH)); if (!error) { if (v3) forat_ret = VOP_GETATTR(vp, &forat, cred, procp); if (vp->v_type != VREG) { if (v3) error = EINVAL; else error = (vp->v_type == VDIR) ? EISDIR : EACCES; } } else vp = NULL; if (!error) { nqsrv_getl(vp, ND_WRITE); error = nfsrv_access(vp, VWRITE, cred, rdonly, procp); } if (nfsd->nd_stable == NFSV3WRITE_UNSTABLE) ioflags = IO_NODELOCKED; else if (nfsd->nd_stable == NFSV3WRITE_DATASYNC) ioflags = (IO_SYNC | IO_NODELOCKED); else ioflags = (IO_METASYNC | IO_SYNC | IO_NODELOCKED); uiop->uio_rw = UIO_WRITE; uiop->uio_segflg = UIO_SYSSPACE; uiop->uio_procp = (struct proc *)0; uiop->uio_offset = nfsd->nd_off; uiop->uio_resid = nfsd->nd_eoff - nfsd->nd_off; if (uiop->uio_resid > 0) { mp = mrep; i = 0; while (mp) { if (mp->m_len > 0) i++; mp = mp->m_next; } uiop->uio_iovcnt = i; MALLOC(iov, struct iovec *, i * sizeof (struct iovec), M_TEMP, M_WAITOK); uiop->uio_iov = ivp = iov; mp = mrep; while (mp) { if (mp->m_len > 0) { ivp->iov_base = mtod(mp, caddr_t); ivp->iov_len = mp->m_len; ivp++; } mp = mp->m_next; } if (!error) { error = VOP_WRITE(vp, uiop, ioflags, cred); nfsstats.srvvop_writes++; } FREE((caddr_t)iov, M_TEMP); } m_freem(mrep); if (vp) { aftat_ret = VOP_GETATTR(vp, &va, cred, procp); - nfsrv_vput(vp); + vput(vp); } /* * Loop around generating replies for all write rpcs that have * now been completed. */ swp = nfsd; do { if (error) { nfsm_writereply(NFSX_WCCDATA(v3), v3); if (v3) { nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, &va); } } else { nfsm_writereply(NFSX_PREOPATTR(v3) + NFSX_POSTOPORFATTR(v3) + 2 * NFSX_UNSIGNED + NFSX_WRITEVERF(v3), v3); if (v3) { nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, &va); nfsm_build(tl, u_long *, 4 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(nfsd->nd_len); *tl++ = txdr_unsigned(swp->nd_stable); /* * Actually, there is no need to txdr these fields, * but it may make the values more human readable, * for debugging purposes. */ *tl++ = txdr_unsigned(boottime.tv_sec); *tl = txdr_unsigned(boottime.tv_usec); } else { nfsm_build(fp, struct nfs_fattr *, NFSX_V2FATTR); nfsm_srvfillattr(&va, fp); } } nfsd->nd_mreq = mreq; if (nfsd->nd_mrep) panic("nfsrv_write: nd_mrep not free"); /* * Done. Put it at the head of the timer queue so that * the final phase can return the reply. */ s = splsoftclock(); if (nfsd != swp) { nfsd->nd_time = 0; LIST_INSERT_HEAD(&slp->ns_tq, nfsd, nd_tq); } nfsd = swp->nd_coalesce.lh_first; if (nfsd) { LIST_REMOVE(nfsd, nd_tq); } splx(s); } while (nfsd); s = splsoftclock(); swp->nd_time = 0; LIST_INSERT_HEAD(&slp->ns_tq, swp, nd_tq); splx(s); goto loop1; } splx(s); /* * Search for a reply to return. */ s = splsoftclock(); for (nfsd = slp->ns_tq.lh_first; nfsd; nfsd = nfsd->nd_tq.le_next) if (nfsd->nd_mreq) { LIST_REMOVE(nfsd, nd_tq); *mrq = nfsd->nd_mreq; *ndp = nfsd; break; } splx(s); return (0); } /* * Coalesce the write request nfsd into owp. To do this we must: * - remove nfsd from the queues * - merge nfsd->nd_mrep into owp->nd_mrep * - update the nd_eoff and nd_stable for owp * - put nfsd on owp's nd_coalesce list * NB: Must be called at splsoftclock(). */ static void nfsrvw_coalesce(owp, nfsd) register struct nfsrv_descript *owp; register struct nfsrv_descript *nfsd; { register int overlap; register struct mbuf *mp; LIST_REMOVE(nfsd, nd_hash); LIST_REMOVE(nfsd, nd_tq); if (owp->nd_eoff < nfsd->nd_eoff) { overlap = owp->nd_eoff - nfsd->nd_off; if (overlap < 0) panic("nfsrv_coalesce: bad off"); if (overlap > 0) m_adj(nfsd->nd_mrep, overlap); mp = owp->nd_mrep; while (mp->m_next) mp = mp->m_next; mp->m_next = nfsd->nd_mrep; owp->nd_eoff = nfsd->nd_eoff; } else m_freem(nfsd->nd_mrep); nfsd->nd_mrep = NULL; if (nfsd->nd_stable == NFSV3WRITE_FILESYNC) owp->nd_stable = NFSV3WRITE_FILESYNC; else if (nfsd->nd_stable == NFSV3WRITE_DATASYNC && owp->nd_stable == NFSV3WRITE_UNSTABLE) owp->nd_stable = NFSV3WRITE_DATASYNC; LIST_INSERT_HEAD(&owp->nd_coalesce, nfsd, nd_tq); } /* * Sort the group list in increasing numerical order. * (Insertion sort by Chris Torek, who was grossed out by the bubble sort * that used to be here.) */ void nfsrvw_sort(list, num) register gid_t *list; register int num; { register int i, j; gid_t v; /* Insertion sort. */ for (i = 1; i < num; i++) { v = list[i]; /* find correct slot for value v, moving others up */ for (j = i; --j >= 0 && v < list[j];) list[j + 1] = list[j]; list[j + 1] = v; } } /* * copy credentials making sure that the result can be compared with bcmp(). */ void nfsrv_setcred(incred, outcred) register struct ucred *incred, *outcred; { register int i; bzero((caddr_t)outcred, sizeof (struct ucred)); outcred->cr_ref = 1; outcred->cr_uid = incred->cr_uid; outcred->cr_ngroups = incred->cr_ngroups; for (i = 0; i < incred->cr_ngroups; i++) outcred->cr_groups[i] = incred->cr_groups[i]; nfsrvw_sort(outcred->cr_groups, outcred->cr_ngroups); } /* * nfs create service * now does a truncate to 0 length via. setattr if it already exists */ int nfsrv_create(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; struct mbuf *nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; struct ucred *cred = &nfsd->nd_cr; register struct nfs_fattr *fp; struct vattr va, dirfor, diraft; register struct vattr *vap = &va; register struct nfsv2_sattr *sp; register u_long *tl; struct nameidata nd; register caddr_t cp; register long t1; caddr_t bpos; int error = 0, rdev, cache, len, tsize, dirfor_ret = 1, diraft_ret = 1; int v3 = (nfsd->nd_flag & ND_NFSV3), how, exclusive_flag = 0; char *cp2; struct mbuf *mb, *mb2, *mreq; struct vnode *vp, *dirp = (struct vnode *)0; nfsfh_t nfh; fhandle_t *fhp; u_quad_t frev, tempsize; u_char cverf[NFSX_V3CREATEVERF]; #ifndef nolint rdev = 0; #endif nd.ni_cnd.cn_nameiop = 0; fhp = &nfh.fh_generic; nfsm_srvmtofh(fhp); nfsm_srvnamesiz(len); nd.ni_cnd.cn_cred = cred; nd.ni_cnd.cn_nameiop = CREATE; nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF | SAVESTART; error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos, &dirp, procp, (nfsd->nd_flag & ND_KERBAUTH)); if (dirp) { if (v3) dirfor_ret = VOP_GETATTR(dirp, &dirfor, cred, procp); else { - nfsrv_vrele(dirp); + vrele(dirp); dirp = (struct vnode *)0; } } if (error) { nfsm_reply(NFSX_WCCDATA(v3)); nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft); if (dirp) - nfsrv_vrele(dirp); + vrele(dirp); return (0); } VATTR_NULL(vap); if (v3) { nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); how = fxdr_unsigned(int, *tl); switch (how) { case NFSV3CREATE_GUARDED: if (nd.ni_vp) { error = EEXIST; break; } case NFSV3CREATE_UNCHECKED: nfsm_srvsattr(vap); break; case NFSV3CREATE_EXCLUSIVE: nfsm_dissect(cp, caddr_t, NFSX_V3CREATEVERF); bcopy(cp, cverf, NFSX_V3CREATEVERF); exclusive_flag = 1; if (nd.ni_vp == NULL) vap->va_mode = 0; break; }; vap->va_type = VREG; } else { nfsm_dissect(sp, struct nfsv2_sattr *, NFSX_V2SATTR); vap->va_type = IFTOVT(fxdr_unsigned(u_long, sp->sa_mode)); if (vap->va_type == VNON) vap->va_type = VREG; vap->va_mode = nfstov_mode(sp->sa_mode); switch (vap->va_type) { case VREG: tsize = fxdr_unsigned(long, sp->sa_size); if (tsize != -1) vap->va_size = (u_quad_t)tsize; break; case VCHR: case VBLK: case VFIFO: rdev = fxdr_unsigned(long, sp->sa_size); break; }; } /* * Iff doesn't exist, create it * otherwise just truncate to 0 length * should I set the mode too ?? */ if (nd.ni_vp == NULL) { if (vap->va_type == VREG || vap->va_type == VSOCK) { - nfsrv_vrele(nd.ni_startdir); + vrele(nd.ni_startdir); nqsrv_getl(nd.ni_dvp, ND_WRITE); error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap); if (!error) { - nfsrv_vmio(nd.ni_vp); + nfsrv_object_create(nd.ni_vp); FREE(nd.ni_cnd.cn_pnbuf, M_NAMEI); if (exclusive_flag) { exclusive_flag = 0; VATTR_NULL(vap); bcopy(cverf, (caddr_t)&vap->va_atime, NFSX_V3CREATEVERF); error = VOP_SETATTR(nd.ni_vp, vap, cred, procp); } } } else if (vap->va_type == VCHR || vap->va_type == VBLK || vap->va_type == VFIFO) { if (vap->va_type == VCHR && rdev == 0xffffffff) vap->va_type = VFIFO; if (error = suser(cred, (u_short *)0)) { - nfsrv_vrele(nd.ni_startdir); + vrele(nd.ni_startdir); free(nd.ni_cnd.cn_pnbuf, M_NAMEI); VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); - nfsrv_vput(nd.ni_dvp); + vput(nd.ni_dvp); nfsm_reply(0); return (error); } else vap->va_rdev = (dev_t)rdev; nqsrv_getl(nd.ni_dvp, ND_WRITE); if (error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap)) { - nfsrv_vrele(nd.ni_startdir); + vrele(nd.ni_startdir); nfsm_reply(0); } nd.ni_cnd.cn_nameiop = LOOKUP; nd.ni_cnd.cn_flags &= ~(LOCKPARENT | SAVESTART); nd.ni_cnd.cn_proc = procp; nd.ni_cnd.cn_cred = cred; if (error = lookup(&nd)) { free(nd.ni_cnd.cn_pnbuf, M_NAMEI); nfsm_reply(0); } - nfsrv_vmio(nd.ni_vp); + nfsrv_object_create(nd.ni_vp); FREE(nd.ni_cnd.cn_pnbuf, M_NAMEI); if (nd.ni_cnd.cn_flags & ISSYMLINK) { - nfsrv_vrele(nd.ni_dvp); - nfsrv_vput(nd.ni_vp); + vrele(nd.ni_dvp); + vput(nd.ni_vp); VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); error = EINVAL; nfsm_reply(0); } } else { - nfsrv_vrele(nd.ni_startdir); + vrele(nd.ni_startdir); free(nd.ni_cnd.cn_pnbuf, M_NAMEI); VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); - nfsrv_vput(nd.ni_dvp); + vput(nd.ni_dvp); error = ENXIO; } vp = nd.ni_vp; } else { - nfsrv_vrele(nd.ni_startdir); + vrele(nd.ni_startdir); free(nd.ni_cnd.cn_pnbuf, M_NAMEI); vp = nd.ni_vp; if (nd.ni_dvp == vp) - nfsrv_vrele(nd.ni_dvp); + vrele(nd.ni_dvp); else - nfsrv_vput(nd.ni_dvp); + vput(nd.ni_dvp); VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); if (vap->va_size != -1) { error = nfsrv_access(vp, VWRITE, cred, (nd.ni_cnd.cn_flags & RDONLY), procp); if (!error) { nqsrv_getl(vp, ND_WRITE); tempsize = vap->va_size; VATTR_NULL(vap); vap->va_size = tempsize; error = VOP_SETATTR(vp, vap, cred, procp); } if (error) - nfsrv_vput(vp); + vput(vp); } } if (!error) { bzero((caddr_t)fhp, sizeof(nfh)); fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid; error = VFS_VPTOFH(vp, &fhp->fh_fid); if (!error) error = VOP_GETATTR(vp, vap, cred, procp); - nfsrv_vput(vp); + vput(vp); } if (v3) { if (exclusive_flag && !error && bcmp(cverf, (caddr_t)&vap->va_atime, NFSX_V3CREATEVERF)) error = EEXIST; diraft_ret = VOP_GETATTR(dirp, &diraft, cred, procp); - nfsrv_vrele(dirp); + vrele(dirp); } nfsm_reply(NFSX_SRVFH(v3) + NFSX_FATTR(v3) + NFSX_WCCDATA(v3)); if (v3) { if (!error) { nfsm_srvpostop_fh(fhp); nfsm_srvpostop_attr(0, vap); } nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft); } else { nfsm_srvfhtom(fhp, v3); nfsm_build(fp, struct nfs_fattr *, NFSX_V2FATTR); nfsm_srvfillattr(vap, fp); } return (0); nfsmout: if (dirp) - nfsrv_vrele(dirp); + vrele(dirp); if (nd.ni_cnd.cn_nameiop) { - nfsrv_vrele(nd.ni_startdir); + vrele(nd.ni_startdir); free((caddr_t)nd.ni_cnd.cn_pnbuf, M_NAMEI); } VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); if (nd.ni_dvp == nd.ni_vp) - nfsrv_vrele(nd.ni_dvp); + vrele(nd.ni_dvp); else - nfsrv_vput(nd.ni_dvp); + vput(nd.ni_dvp); if (nd.ni_vp) - nfsrv_vput(nd.ni_vp); + vput(nd.ni_vp); return (error); } /* * nfs v3 mknod service */ int nfsrv_mknod(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; struct mbuf *nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; struct ucred *cred = &nfsd->nd_cr; struct vattr va, dirfor, diraft; register struct vattr *vap = &va; register u_long *tl; struct nameidata nd; register long t1; caddr_t bpos; int error = 0, cache, len, dirfor_ret = 1, diraft_ret = 1; u_long major, minor; enum vtype vtyp; char *cp2; struct mbuf *mb, *mb2, *mreq; struct vnode *vp, *dirp = (struct vnode *)0; nfsfh_t nfh; fhandle_t *fhp; u_quad_t frev; nd.ni_cnd.cn_nameiop = 0; fhp = &nfh.fh_generic; nfsm_srvmtofh(fhp); nfsm_srvnamesiz(len); nd.ni_cnd.cn_cred = cred; nd.ni_cnd.cn_nameiop = CREATE; nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF | SAVESTART; error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos, &dirp, procp, (nfsd->nd_flag & ND_KERBAUTH)); if (dirp) dirfor_ret = VOP_GETATTR(dirp, &dirfor, cred, procp); if (error) { nfsm_reply(NFSX_WCCDATA(1)); nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft); if (dirp) - nfsrv_vrele(dirp); + vrele(dirp); return (0); } nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); vtyp = nfsv3tov_type(*tl); if (vtyp != VCHR && vtyp != VBLK && vtyp != VSOCK && vtyp != VFIFO) { - nfsrv_vrele(nd.ni_startdir); + vrele(nd.ni_startdir); free((caddr_t)nd.ni_cnd.cn_pnbuf, M_NAMEI); error = NFSERR_BADTYPE; VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); - nfsrv_vput(nd.ni_dvp); + vput(nd.ni_dvp); goto out; } VATTR_NULL(vap); nfsm_srvsattr(vap); if (vtyp == VCHR || vtyp == VBLK) { nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED); major = fxdr_unsigned(u_long, *tl++); minor = fxdr_unsigned(u_long, *tl); vap->va_rdev = makedev(major, minor); } /* * Iff doesn't exist, create it. */ if (nd.ni_vp) { - nfsrv_vrele(nd.ni_startdir); + vrele(nd.ni_startdir); free((caddr_t)nd.ni_cnd.cn_pnbuf, M_NAMEI); error = EEXIST; VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); - nfsrv_vput(nd.ni_dvp); + vput(nd.ni_dvp); goto out; } vap->va_type = vtyp; if (vtyp == VSOCK) { - nfsrv_vrele(nd.ni_startdir); + vrele(nd.ni_startdir); nqsrv_getl(nd.ni_dvp, ND_WRITE); error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap); if (!error) FREE(nd.ni_cnd.cn_pnbuf, M_NAMEI); } else { if (error = suser(cred, (u_short *)0)) { - nfsrv_vrele(nd.ni_startdir); + vrele(nd.ni_startdir); free((caddr_t)nd.ni_cnd.cn_pnbuf, M_NAMEI); VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); - nfsrv_vput(nd.ni_dvp); + vput(nd.ni_dvp); goto out; } nqsrv_getl(nd.ni_dvp, ND_WRITE); if (error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap)) { - nfsrv_vrele(nd.ni_startdir); + vrele(nd.ni_startdir); goto out; } nd.ni_cnd.cn_nameiop = LOOKUP; nd.ni_cnd.cn_flags &= ~(LOCKPARENT | SAVESTART); nd.ni_cnd.cn_proc = procp; nd.ni_cnd.cn_cred = procp->p_ucred; error = lookup(&nd); FREE(nd.ni_cnd.cn_pnbuf, M_NAMEI); if (error) goto out; if (nd.ni_cnd.cn_flags & ISSYMLINK) { - nfsrv_vrele(nd.ni_dvp); - nfsrv_vput(nd.ni_vp); + vrele(nd.ni_dvp); + vput(nd.ni_vp); VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); error = EINVAL; } } out: vp = nd.ni_vp; if (!error) { bzero((caddr_t)fhp, sizeof(nfh)); fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid; error = VFS_VPTOFH(vp, &fhp->fh_fid); if (!error) error = VOP_GETATTR(vp, vap, cred, procp); - nfsrv_vput(vp); + vput(vp); } diraft_ret = VOP_GETATTR(dirp, &diraft, cred, procp); - nfsrv_vrele(dirp); + vrele(dirp); nfsm_reply(NFSX_SRVFH(1) + NFSX_POSTOPATTR(1) + NFSX_WCCDATA(1)); if (!error) { nfsm_srvpostop_fh(fhp); nfsm_srvpostop_attr(0, vap); } nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft); return (0); nfsmout: if (dirp) - nfsrv_vrele(dirp); + vrele(dirp); if (nd.ni_cnd.cn_nameiop) { - nfsrv_vrele(nd.ni_startdir); + vrele(nd.ni_startdir); free((caddr_t)nd.ni_cnd.cn_pnbuf, M_NAMEI); } VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); if (nd.ni_dvp == nd.ni_vp) - nfsrv_vrele(nd.ni_dvp); + vrele(nd.ni_dvp); else - nfsrv_vput(nd.ni_dvp); + vput(nd.ni_dvp); if (nd.ni_vp) - nfsrv_vput(nd.ni_vp); + vput(nd.ni_vp); return (error); } /* * nfs remove service */ int nfsrv_remove(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; struct mbuf *nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; struct ucred *cred = &nfsd->nd_cr; struct nameidata nd; register u_long *tl; register long t1; caddr_t bpos; int error = 0, cache, len, dirfor_ret = 1, diraft_ret = 1; int v3 = (nfsd->nd_flag & ND_NFSV3); char *cp2; struct mbuf *mb, *mreq; struct vnode *vp, *dirp; struct vattr dirfor, diraft; nfsfh_t nfh; fhandle_t *fhp; u_quad_t frev; #ifndef nolint vp = (struct vnode *)0; #endif fhp = &nfh.fh_generic; nfsm_srvmtofh(fhp); nfsm_srvnamesiz(len); nd.ni_cnd.cn_cred = cred; nd.ni_cnd.cn_nameiop = DELETE; nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF; error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos, &dirp, procp, (nfsd->nd_flag & ND_KERBAUTH)); if (dirp) { if (v3) dirfor_ret = VOP_GETATTR(dirp, &dirfor, cred, procp); else - nfsrv_vrele(dirp); + vrele(dirp); } if (!error) { vp = nd.ni_vp; if (vp->v_type == VDIR && (error = suser(cred, (u_short *)0))) goto out; /* * The root of a mounted filesystem cannot be deleted. */ if (vp->v_flag & VROOT) { error = EBUSY; goto out; } - vnode_pager_uncache(vp); out: if (!error) { - int deallocobj = 0; + vnode_pager_uncache(vp); nqsrv_getl(nd.ni_dvp, ND_WRITE); nqsrv_getl(vp, ND_WRITE); - if ((vp->v_flag & VVMIO) && vp->v_object) - deallocobj = 1; error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); - if (error == 0 && deallocobj) - vm_object_deallocate(vp->v_object); + } else { VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); if (nd.ni_dvp == vp) - nfsrv_vrele(nd.ni_dvp); + vrele(nd.ni_dvp); else - nfsrv_vput(nd.ni_dvp); - nfsrv_vput(vp); + vput(nd.ni_dvp); + vput(vp); } } if (dirp && v3) { diraft_ret = VOP_GETATTR(dirp, &diraft, cred, procp); - nfsrv_vrele(dirp); + vrele(dirp); } nfsm_reply(NFSX_WCCDATA(v3)); if (v3) { nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft); return (0); } nfsm_srvdone; } /* * nfs rename service */ int nfsrv_rename(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; struct mbuf *nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; struct ucred *cred = &nfsd->nd_cr; register u_long *tl; register long t1; caddr_t bpos; int error = 0, cache, len, len2, fdirfor_ret = 1, fdiraft_ret = 1; int tdirfor_ret = 1, tdiraft_ret = 1; int v3 = (nfsd->nd_flag & ND_NFSV3); char *cp2; struct mbuf *mb, *mreq; struct nameidata fromnd, tond; struct vnode *fvp, *tvp, *tdvp, *fdirp = (struct vnode *)0; struct vnode *tdirp = (struct vnode *)0; struct vattr fdirfor, fdiraft, tdirfor, tdiraft; nfsfh_t fnfh, tnfh; fhandle_t *ffhp, *tfhp; u_quad_t frev; uid_t saved_uid; #ifndef nolint fvp = (struct vnode *)0; #endif ffhp = &fnfh.fh_generic; tfhp = &tnfh.fh_generic; fromnd.ni_cnd.cn_nameiop = 0; tond.ni_cnd.cn_nameiop = 0; nfsm_srvmtofh(ffhp); nfsm_srvnamesiz(len); /* * Remember our original uid so that we can reset cr_uid before * the second nfs_namei() call, in case it is remapped. */ saved_uid = cred->cr_uid; fromnd.ni_cnd.cn_cred = cred; fromnd.ni_cnd.cn_nameiop = DELETE; fromnd.ni_cnd.cn_flags = WANTPARENT | SAVESTART; error = nfs_namei(&fromnd, ffhp, len, slp, nam, &md, &dpos, &fdirp, procp, (nfsd->nd_flag & ND_KERBAUTH)); if (fdirp) { if (v3) fdirfor_ret = VOP_GETATTR(fdirp, &fdirfor, cred, procp); else { - nfsrv_vrele(fdirp); + vrele(fdirp); fdirp = (struct vnode *)0; } } if (error) { nfsm_reply(2 * NFSX_WCCDATA(v3)); nfsm_srvwcc_data(fdirfor_ret, &fdirfor, fdiraft_ret, &fdiraft); nfsm_srvwcc_data(tdirfor_ret, &tdirfor, tdiraft_ret, &tdiraft); if (fdirp) - nfsrv_vrele(fdirp); + vrele(fdirp); return (0); } fvp = fromnd.ni_vp; nfsm_srvmtofh(tfhp); nfsm_strsiz(len2, NFS_MAXNAMLEN); cred->cr_uid = saved_uid; tond.ni_cnd.cn_cred = cred; tond.ni_cnd.cn_nameiop = RENAME; tond.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART; error = nfs_namei(&tond, tfhp, len2, slp, nam, &md, &dpos, &tdirp, procp, (nfsd->nd_flag & ND_KERBAUTH)); if (tdirp) { if (v3) tdirfor_ret = VOP_GETATTR(tdirp, &tdirfor, cred, procp); else { - nfsrv_vrele(tdirp); + vrele(tdirp); tdirp = (struct vnode *)0; } } if (error) { VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); - nfsrv_vrele(fromnd.ni_dvp); - nfsrv_vrele(fvp); + vrele(fromnd.ni_dvp); + vrele(fvp); goto out1; } tdvp = tond.ni_dvp; tvp = tond.ni_vp; if (tvp != NULL) { if (fvp->v_type == VDIR && tvp->v_type != VDIR) { if (v3) error = EEXIST; else error = EISDIR; goto out; } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { if (v3) error = EEXIST; else error = ENOTDIR; goto out; } if (tvp->v_type == VDIR && tvp->v_mountedhere) { if (v3) error = EXDEV; else error = ENOTEMPTY; goto out; } } if (fvp->v_type == VDIR && fvp->v_mountedhere) { if (v3) error = EXDEV; else error = ENOTEMPTY; goto out; } if (fvp->v_mount != tdvp->v_mount) { if (v3) error = EXDEV; else error = ENOTEMPTY; goto out; } if (fvp == tdvp) if (v3) error = EINVAL; else error = ENOTEMPTY; /* * If source is the same as the destination (that is the * same vnode with the same name in the same directory), * then there is nothing to do. */ if (fvp == tvp && fromnd.ni_dvp == tdvp && fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen && !bcmp(fromnd.ni_cnd.cn_nameptr, tond.ni_cnd.cn_nameptr, fromnd.ni_cnd.cn_namelen)) error = -1; out: if (!error) { - int deallocobjfrom = 0, deallocobjto = 0; nqsrv_getl(fromnd.ni_dvp, ND_WRITE); nqsrv_getl(tdvp, ND_WRITE); if (tvp) { nqsrv_getl(tvp, ND_WRITE); - if ((tvp->v_flag & VVMIO) && tvp->v_object) - deallocobjto = 1; (void) vnode_pager_uncache(tvp); } - if ((fvp->v_flag & VVMIO) && fvp->v_object) - deallocobjfrom = 1; error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); - if (deallocobjfrom) - vm_object_deallocate(fvp->v_object); - if (deallocobjto) - vm_object_deallocate(tvp->v_object); - } else { VOP_ABORTOP(tond.ni_dvp, &tond.ni_cnd); if (tdvp == tvp) - nfsrv_vrele(tdvp); + vrele(tdvp); else - nfsrv_vput(tdvp); + vput(tdvp); if (tvp) - nfsrv_vput(tvp); + vput(tvp); VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); - nfsrv_vrele(fromnd.ni_dvp); - nfsrv_vrele(fvp); + vrele(fromnd.ni_dvp); + vrele(fvp); if (error == -1) error = 0; } - nfsrv_vrele(tond.ni_startdir); + vrele(tond.ni_startdir); FREE(tond.ni_cnd.cn_pnbuf, M_NAMEI); out1: if (fdirp) { fdiraft_ret = VOP_GETATTR(fdirp, &fdiraft, cred, procp); - nfsrv_vrele(fdirp); + vrele(fdirp); } if (tdirp) { tdiraft_ret = VOP_GETATTR(tdirp, &tdiraft, cred, procp); - nfsrv_vrele(tdirp); + vrele(tdirp); } - nfsrv_vrele(fromnd.ni_startdir); + vrele(fromnd.ni_startdir); FREE(fromnd.ni_cnd.cn_pnbuf, M_NAMEI); nfsm_reply(2 * NFSX_WCCDATA(v3)); if (v3) { nfsm_srvwcc_data(fdirfor_ret, &fdirfor, fdiraft_ret, &fdiraft); nfsm_srvwcc_data(tdirfor_ret, &tdirfor, tdiraft_ret, &tdiraft); } return (0); nfsmout: if (fdirp) - nfsrv_vrele(fdirp); + vrele(fdirp); if (tdirp) - nfsrv_vrele(tdirp); + vrele(tdirp); if (tond.ni_cnd.cn_nameiop) { - nfsrv_vrele(tond.ni_startdir); + vrele(tond.ni_startdir); FREE(tond.ni_cnd.cn_pnbuf, M_NAMEI); } if (fromnd.ni_cnd.cn_nameiop) { - nfsrv_vrele(fromnd.ni_startdir); + vrele(fromnd.ni_startdir); FREE(fromnd.ni_cnd.cn_pnbuf, M_NAMEI); VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); - nfsrv_vrele(fromnd.ni_dvp); - nfsrv_vrele(fvp); + vrele(fromnd.ni_dvp); + vrele(fvp); } return (error); } /* * nfs link service */ int nfsrv_link(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; struct mbuf *nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; struct ucred *cred = &nfsd->nd_cr; struct nameidata nd; register u_long *tl; register long t1; caddr_t bpos; int error = 0, rdonly, cache, len, dirfor_ret = 1, diraft_ret = 1; int getret = 1, v3 = (nfsd->nd_flag & ND_NFSV3); char *cp2; struct mbuf *mb, *mreq; struct vnode *vp, *xp, *dirp = (struct vnode *)0; struct vattr dirfor, diraft, at; nfsfh_t nfh, dnfh; fhandle_t *fhp, *dfhp; u_quad_t frev; fhp = &nfh.fh_generic; dfhp = &dnfh.fh_generic; nfsm_srvmtofh(fhp); nfsm_srvmtofh(dfhp); nfsm_srvnamesiz(len); if (error = nfsrv_fhtovp(fhp, FALSE, &vp, cred, slp, nam, &rdonly, (nfsd->nd_flag & ND_KERBAUTH))) { nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_WCCDATA(v3)); nfsm_srvpostop_attr(getret, &at); nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft); return (0); } if (vp->v_type == VDIR && (error = suser(cred, (u_short *)0))) goto out1; nd.ni_cnd.cn_cred = cred; nd.ni_cnd.cn_nameiop = CREATE; nd.ni_cnd.cn_flags = LOCKPARENT; error = nfs_namei(&nd, dfhp, len, slp, nam, &md, &dpos, &dirp, procp, (nfsd->nd_flag & ND_KERBAUTH)); if (dirp) { if (v3) dirfor_ret = VOP_GETATTR(dirp, &dirfor, cred, procp); else { - nfsrv_vrele(dirp); + vrele(dirp); dirp = (struct vnode *)0; } } if (error) goto out1; xp = nd.ni_vp; if (xp != NULL) { error = EEXIST; goto out; } xp = nd.ni_dvp; if (vp->v_mount != xp->v_mount) error = EXDEV; out: if (!error) { nqsrv_getl(vp, ND_WRITE); nqsrv_getl(xp, ND_WRITE); #if defined(__NetBSD__) || defined(__FreeBSD__) error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); #else error = VOP_LINK(vp, nd.ni_dvp, &nd.ni_cnd); #endif } else { VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); if (nd.ni_dvp == nd.ni_vp) - nfsrv_vrele(nd.ni_dvp); + vrele(nd.ni_dvp); else - nfsrv_vput(nd.ni_dvp); + vput(nd.ni_dvp); if (nd.ni_vp) - nfsrv_vrele(nd.ni_vp); + vrele(nd.ni_vp); } out1: if (v3) getret = VOP_GETATTR(vp, &at, cred, procp); if (dirp) { diraft_ret = VOP_GETATTR(dirp, &diraft, cred, procp); - nfsrv_vrele(dirp); + vrele(dirp); } - nfsrv_vrele(vp); + vrele(vp); nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_WCCDATA(v3)); if (v3) { nfsm_srvpostop_attr(getret, &at); nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft); return (0); } nfsm_srvdone; } /* * nfs symbolic link service */ int nfsrv_symlink(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; struct mbuf *nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; struct ucred *cred = &nfsd->nd_cr; struct vattr va, dirfor, diraft; - struct vnode *ovp; struct nameidata nd; register struct vattr *vap = &va; register u_long *tl; register long t1; struct nfsv2_sattr *sp; char *bpos, *pathcp = (char *)0, *cp2; struct uio io; struct iovec iv; int error = 0, cache, len, len2, dirfor_ret = 1, diraft_ret = 1; int v3 = (nfsd->nd_flag & ND_NFSV3); struct mbuf *mb, *mreq, *mb2; struct vnode *dirp = (struct vnode *)0; nfsfh_t nfh; fhandle_t *fhp; u_quad_t frev; - int deallocobj = 0; nd.ni_cnd.cn_nameiop = 0; fhp = &nfh.fh_generic; nfsm_srvmtofh(fhp); nfsm_srvnamesiz(len); nd.ni_cnd.cn_cred = cred; nd.ni_cnd.cn_nameiop = CREATE; nd.ni_cnd.cn_flags = LOCKPARENT | SAVESTART; error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos, &dirp, procp, (nfsd->nd_flag & ND_KERBAUTH)); if (dirp) { if (v3) dirfor_ret = VOP_GETATTR(dirp, &dirfor, cred, procp); else { - nfsrv_vrele(dirp); + vrele(dirp); dirp = (struct vnode *)0; } } if (error) goto out; VATTR_NULL(vap); if (v3) nfsm_srvsattr(vap); nfsm_strsiz(len2, NFS_MAXPATHLEN); MALLOC(pathcp, caddr_t, len2 + 1, M_TEMP, M_WAITOK); iv.iov_base = pathcp; iv.iov_len = len2; io.uio_resid = len2; io.uio_offset = 0; io.uio_iov = &iv; io.uio_iovcnt = 1; io.uio_segflg = UIO_SYSSPACE; io.uio_rw = UIO_READ; io.uio_procp = (struct proc *)0; nfsm_mtouio(&io, len2); if (!v3) { nfsm_dissect(sp, struct nfsv2_sattr *, NFSX_V2SATTR); vap->va_mode = fxdr_unsigned(u_short, sp->sa_mode); } *(pathcp + len2) = '\0'; if (nd.ni_vp) { - nfsrv_vrele(nd.ni_startdir); + vrele(nd.ni_startdir); free(nd.ni_cnd.cn_pnbuf, M_NAMEI); VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); if (nd.ni_dvp == nd.ni_vp) - nfsrv_vrele(nd.ni_dvp); + vrele(nd.ni_dvp); else - nfsrv_vput(nd.ni_dvp); - nfsrv_vrele(nd.ni_vp); + vput(nd.ni_dvp); + vrele(nd.ni_vp); error = EEXIST; goto out; } nqsrv_getl(nd.ni_dvp, ND_WRITE); - if ((ovp = nd.ni_vp) && (ovp->v_flag & VVMIO) && ovp->v_object) - deallocobj = 1; error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap, pathcp); - if (error == 0 && deallocobj) - vm_object_deallocate(ovp->v_object); if (error) - nfsrv_vrele(nd.ni_startdir); + vrele(nd.ni_startdir); else { if (v3) { nd.ni_cnd.cn_nameiop = LOOKUP; nd.ni_cnd.cn_flags &= ~(LOCKPARENT | SAVESTART | FOLLOW); nd.ni_cnd.cn_flags |= (NOFOLLOW | LOCKLEAF); nd.ni_cnd.cn_proc = procp; nd.ni_cnd.cn_cred = cred; error = lookup(&nd); if (!error) { bzero((caddr_t)fhp, sizeof(nfh)); fhp->fh_fsid = nd.ni_vp->v_mount->mnt_stat.f_fsid; error = VFS_VPTOFH(nd.ni_vp, &fhp->fh_fid); if (!error) error = VOP_GETATTR(nd.ni_vp, vap, cred, procp); - nfsrv_vput(nd.ni_vp); + vput(nd.ni_vp); } } else - nfsrv_vrele(nd.ni_startdir); + vrele(nd.ni_startdir); FREE(nd.ni_cnd.cn_pnbuf, M_NAMEI); } out: if (pathcp) FREE(pathcp, M_TEMP); if (dirp) { diraft_ret = VOP_GETATTR(dirp, &diraft, cred, procp); - nfsrv_vrele(dirp); + vrele(dirp); } nfsm_reply(NFSX_SRVFH(v3) + NFSX_POSTOPATTR(v3) + NFSX_WCCDATA(v3)); if (v3) { if (!error) { nfsm_srvpostop_fh(fhp); nfsm_srvpostop_attr(0, vap); } nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft); } return (0); nfsmout: if (nd.ni_cnd.cn_nameiop) { - nfsrv_vrele(nd.ni_startdir); + vrele(nd.ni_startdir); free(nd.ni_cnd.cn_pnbuf, M_NAMEI); } if (dirp) - nfsrv_vrele(dirp); + vrele(dirp); VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); if (nd.ni_dvp == nd.ni_vp) - nfsrv_vrele(nd.ni_dvp); + vrele(nd.ni_dvp); else - nfsrv_vput(nd.ni_dvp); + vput(nd.ni_dvp); if (nd.ni_vp) - nfsrv_vrele(nd.ni_vp); + vrele(nd.ni_vp); if (pathcp) FREE(pathcp, M_TEMP); return (error); } /* * nfs mkdir service */ int nfsrv_mkdir(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; struct mbuf *nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; struct ucred *cred = &nfsd->nd_cr; struct vattr va, dirfor, diraft; register struct vattr *vap = &va; register struct nfs_fattr *fp; struct nameidata nd; register caddr_t cp; register u_long *tl; register long t1; caddr_t bpos; int error = 0, cache, len, dirfor_ret = 1, diraft_ret = 1; int v3 = (nfsd->nd_flag & ND_NFSV3); char *cp2; struct mbuf *mb, *mb2, *mreq; struct vnode *vp, *dirp = (struct vnode *)0; nfsfh_t nfh; fhandle_t *fhp; u_quad_t frev; fhp = &nfh.fh_generic; nfsm_srvmtofh(fhp); nfsm_srvnamesiz(len); nd.ni_cnd.cn_cred = cred; nd.ni_cnd.cn_nameiop = CREATE; nd.ni_cnd.cn_flags = LOCKPARENT; error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos, &dirp, procp, (nfsd->nd_flag & ND_KERBAUTH)); if (dirp) { if (v3) dirfor_ret = VOP_GETATTR(dirp, &dirfor, cred, procp); else { - nfsrv_vrele(dirp); + vrele(dirp); dirp = (struct vnode *)0; } } if (error) { nfsm_reply(NFSX_WCCDATA(v3)); nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft); if (dirp) - nfsrv_vrele(dirp); + vrele(dirp); return (0); } VATTR_NULL(vap); if (v3) { nfsm_srvsattr(vap); } else { nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); vap->va_mode = nfstov_mode(*tl++); } vap->va_type = VDIR; vp = nd.ni_vp; if (vp != NULL) { VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); if (nd.ni_dvp == vp) - nfsrv_vrele(nd.ni_dvp); + vrele(nd.ni_dvp); else - nfsrv_vput(nd.ni_dvp); - nfsrv_vrele(vp); + vput(nd.ni_dvp); + vrele(vp); error = EEXIST; goto out; } nqsrv_getl(nd.ni_dvp, ND_WRITE); error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap); if (!error) { vp = nd.ni_vp; bzero((caddr_t)fhp, sizeof(nfh)); fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid; error = VFS_VPTOFH(vp, &fhp->fh_fid); if (!error) error = VOP_GETATTR(vp, vap, cred, procp); - nfsrv_vput(vp); + vput(vp); } out: if (dirp) { diraft_ret = VOP_GETATTR(dirp, &diraft, cred, procp); - nfsrv_vrele(dirp); + vrele(dirp); } nfsm_reply(NFSX_SRVFH(v3) + NFSX_POSTOPATTR(v3) + NFSX_WCCDATA(v3)); if (v3) { if (!error) { nfsm_srvpostop_fh(fhp); nfsm_srvpostop_attr(0, vap); } nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft); } else { nfsm_srvfhtom(fhp, v3); nfsm_build(fp, struct nfs_fattr *, NFSX_V2FATTR); nfsm_srvfillattr(vap, fp); } return (0); nfsmout: if (dirp) - nfsrv_vrele(dirp); + vrele(dirp); VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); if (nd.ni_dvp == nd.ni_vp) - nfsrv_vrele(nd.ni_dvp); + vrele(nd.ni_dvp); else - nfsrv_vput(nd.ni_dvp); + vput(nd.ni_dvp); if (nd.ni_vp) - nfsrv_vrele(nd.ni_vp); + vrele(nd.ni_vp); return (error); } /* * nfs rmdir service */ int nfsrv_rmdir(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; struct mbuf *nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; struct ucred *cred = &nfsd->nd_cr; register u_long *tl; register long t1; caddr_t bpos; int error = 0, cache, len, dirfor_ret = 1, diraft_ret = 1; int v3 = (nfsd->nd_flag & ND_NFSV3); char *cp2; struct mbuf *mb, *mreq; struct vnode *vp, *dirp = (struct vnode *)0; struct vattr dirfor, diraft; nfsfh_t nfh; fhandle_t *fhp; struct nameidata nd; u_quad_t frev; fhp = &nfh.fh_generic; nfsm_srvmtofh(fhp); nfsm_srvnamesiz(len); nd.ni_cnd.cn_cred = cred; nd.ni_cnd.cn_nameiop = DELETE; nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF; error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos, &dirp, procp, (nfsd->nd_flag & ND_KERBAUTH)); if (dirp) { if (v3) dirfor_ret = VOP_GETATTR(dirp, &dirfor, cred, procp); else { - nfsrv_vrele(dirp); + vrele(dirp); dirp = (struct vnode *)0; } } if (error) { nfsm_reply(NFSX_WCCDATA(v3)); nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft); if (dirp) - nfsrv_vrele(dirp); + vrele(dirp); return (0); } vp = nd.ni_vp; if (vp->v_type != VDIR) { error = ENOTDIR; goto out; } /* * No rmdir "." please. */ if (nd.ni_dvp == vp) { error = EINVAL; goto out; } /* * The root of a mounted filesystem cannot be deleted. */ if (vp->v_flag & VROOT) error = EBUSY; out: if (!error) { nqsrv_getl(nd.ni_dvp, ND_WRITE); nqsrv_getl(vp, ND_WRITE); error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); } else { VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); if (nd.ni_dvp == nd.ni_vp) - nfsrv_vrele(nd.ni_dvp); + vrele(nd.ni_dvp); else - nfsrv_vput(nd.ni_dvp); - nfsrv_vput(vp); + vput(nd.ni_dvp); + vput(vp); } if (dirp) { diraft_ret = VOP_GETATTR(dirp, &diraft, cred, procp); - nfsrv_vrele(dirp); + vrele(dirp); } nfsm_reply(NFSX_WCCDATA(v3)); if (v3) { nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft); return (0); } nfsm_srvdone; } /* * nfs readdir service * - mallocs what it thinks is enough to read * count rounded up to a multiple of NFS_DIRBLKSIZ <= NFS_MAXREADDIR * - calls VOP_READDIR() * - loops around building the reply * if the output generated exceeds count break out of loop * The nfsm_clget macro is used here so that the reply will be packed * tightly in mbuf clusters. * - it only knows that it has encountered eof when the VOP_READDIR() * reads nothing * - as such one readdir rpc will return eof false although you are there * and then the next will return eof * - it trims out records with d_fileno == 0 * this doesn't matter for Unix clients, but they might confuse clients * for other os'. * NB: It is tempting to set eof to true if the VOP_READDIR() reads less * than requested, but this may not apply to all filesystems. For * example, client NFS does not { although it is never remote mounted * anyhow } * The alternate call nfsrv_readdirplus() does lookups as well. * PS: The NFS protocol spec. does not clarify what the "count" byte * argument is a count of.. just name strings and file id's or the * entire reply rpc or ... * I tried just file name and id sizes and it confused the Sun client, * so I am using the full rpc size now. The "paranoia.." comment refers * to including the status longwords that are not a part of the dir. * "entry" structures, but are in the rpc. */ struct flrep { nfsuint64 fl_off; u_long fl_postopok; u_long fl_fattr[NFSX_V3FATTR / sizeof (u_long)]; u_long fl_fhok; u_long fl_fhsize; u_long fl_nfh[NFSX_V3FH / sizeof (u_long)]; }; int nfsrv_readdir(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; struct mbuf *nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; struct ucred *cred = &nfsd->nd_cr; register char *bp, *be; register struct mbuf *mp; register struct dirent *dp; register caddr_t cp; register u_long *tl; register long t1; caddr_t bpos; struct mbuf *mb, *mb2, *mreq, *mp2; char *cpos, *cend, *cp2, *rbuf; struct vnode *vp; struct vattr at; nfsfh_t nfh; fhandle_t *fhp; struct uio io; struct iovec iv; int len, nlen, rem, xfer, tsiz, i, error = 0, getret = 1; int siz, cnt, fullsiz, eofflag, rdonly, cache, ncookies; int v3 = (nfsd->nd_flag & ND_NFSV3); u_quad_t frev, off, toff, verf; u_int *cookies = NULL, *cookiep; fhp = &nfh.fh_generic; nfsm_srvmtofh(fhp); if (v3) { nfsm_dissect(tl, u_long *, 5 * NFSX_UNSIGNED); fxdr_hyper(tl, &toff); tl += 2; fxdr_hyper(tl, &verf); tl += 2; } else { nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED); toff = fxdr_unsigned(u_quad_t, *tl++); } off = toff; cnt = fxdr_unsigned(int, *tl); siz = ((cnt + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1)); xfer = NFS_SRVMAXDATA(nfsd); if (siz > xfer) siz = xfer; fullsiz = siz; if (error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, &rdonly, (nfsd->nd_flag & ND_KERBAUTH))) { nfsm_reply(NFSX_UNSIGNED); nfsm_srvpostop_attr(getret, &at); return (0); } nqsrv_getl(vp, ND_READ); if (v3) { error = getret = VOP_GETATTR(vp, &at, cred, procp); if (!error && toff && verf != at.va_filerev) error = NFSERR_BAD_COOKIE; } if (!error) error = nfsrv_access(vp, VEXEC, cred, rdonly, procp); if (error) { - nfsrv_vput(vp); + vput(vp); nfsm_reply(NFSX_POSTOPATTR(v3)); nfsm_srvpostop_attr(getret, &at); return (0); } VOP_UNLOCK(vp); MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK); #ifdef __NetBSD__ ncookies = siz / (5 * NFSX_UNSIGNED); /*7 for V3, but it's an est. so*/ MALLOC(cookies, u_long *, ncookies * sizeof (u_long *), M_TEMP, M_WAITOK); #endif again: iv.iov_base = rbuf; iv.iov_len = fullsiz; io.uio_iov = &iv; io.uio_iovcnt = 1; io.uio_offset = (off_t)off; io.uio_resid = fullsiz; io.uio_segflg = UIO_SYSSPACE; io.uio_rw = UIO_READ; io.uio_procp = (struct proc *)0; eofflag = 0; VOP_LOCK(vp); #ifndef __NetBSD__ if (cookies) { free((caddr_t)cookies, M_TEMP); cookies = NULL; } error = VOP_READDIR(vp, &io, cred, &eofflag, &ncookies, &cookies); #else error = VOP_READDIR(vp, &io, cred, &eofflag, cookies, ncookies); #endif off = (off_t)io.uio_offset; if (!cookies && !error) error = NFSERR_PERM; if (v3) { getret = VOP_GETATTR(vp, &at, cred, procp); if (!error) error = getret; } VOP_UNLOCK(vp); if (error) { - nfsrv_vrele(vp); + vrele(vp); free((caddr_t)rbuf, M_TEMP); if (cookies) free((caddr_t)cookies, M_TEMP); nfsm_reply(NFSX_POSTOPATTR(v3)); nfsm_srvpostop_attr(getret, &at); return (0); } if (io.uio_resid) { siz -= io.uio_resid; /* * If nothing read, return eof * rpc reply */ if (siz == 0) { - nfsrv_vrele(vp); + vrele(vp); nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_COOKIEVERF(v3) + 2 * NFSX_UNSIGNED); if (v3) { nfsm_srvpostop_attr(getret, &at); nfsm_build(tl, u_long *, 4 * NFSX_UNSIGNED); txdr_hyper(&at.va_filerev, tl); tl += 2; } else nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED); *tl++ = nfs_false; *tl = nfs_true; FREE((caddr_t)rbuf, M_TEMP); FREE((caddr_t)cookies, M_TEMP); return (0); } } /* * Check for degenerate cases of nothing useful read. * If so go try again */ cpos = rbuf; cend = rbuf + siz; dp = (struct dirent *)cpos; cookiep = cookies; #ifdef __FreeBSD__ /* * For some reason FreeBSD's ufs_readdir() chooses to back the * directory offset up to a block boundary, so it is necessary to * skip over the records that preceed the requested offset. This * requires the assumption that file offset cookies monotonically * increase. */ while (cpos < cend && ncookies > 0 && (dp->d_fileno == 0 || ((u_quad_t)(*cookiep)) <= toff)) { #else while (dp->d_fileno == 0 && cpos < cend && ncookies > 0) { #endif cpos += dp->d_reclen; dp = (struct dirent *)cpos; cookiep++; ncookies--; } if (cpos >= cend || ncookies == 0) { toff = off; siz = fullsiz; goto again; } len = 3 * NFSX_UNSIGNED; /* paranoia, probably can be 0 */ nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_COOKIEVERF(v3) + siz); if (v3) { nfsm_srvpostop_attr(getret, &at); nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED); txdr_hyper(&at.va_filerev, tl); } mp = mp2 = mb; bp = bpos; be = bp + M_TRAILINGSPACE(mp); /* Loop through the records and build reply */ while (cpos < cend && ncookies > 0) { if (dp->d_fileno != 0) { nlen = dp->d_namlen; rem = nfsm_rndup(nlen)-nlen; len += (4 * NFSX_UNSIGNED + nlen + rem); if (v3) len += 2 * NFSX_UNSIGNED; if (len > cnt) { eofflag = 0; break; } /* * Build the directory record xdr from * the dirent entry. */ nfsm_clget; *tl = nfs_true; bp += NFSX_UNSIGNED; if (v3) { nfsm_clget; *tl = 0; bp += NFSX_UNSIGNED; } nfsm_clget; *tl = txdr_unsigned(dp->d_fileno); bp += NFSX_UNSIGNED; nfsm_clget; *tl = txdr_unsigned(nlen); bp += NFSX_UNSIGNED; /* And loop around copying the name */ xfer = nlen; cp = dp->d_name; while (xfer > 0) { nfsm_clget; if ((bp+xfer) > be) tsiz = be-bp; else tsiz = xfer; bcopy(cp, bp, tsiz); bp += tsiz; xfer -= tsiz; if (xfer > 0) cp += tsiz; } /* And null pad to a long boundary */ for (i = 0; i < rem; i++) *bp++ = '\0'; nfsm_clget; /* Finish off the record */ if (v3) { *tl = 0; bp += NFSX_UNSIGNED; nfsm_clget; } *tl = txdr_unsigned(*cookiep); bp += NFSX_UNSIGNED; } cpos += dp->d_reclen; dp = (struct dirent *)cpos; cookiep++; ncookies--; } - nfsrv_vrele(vp); + vrele(vp); nfsm_clget; *tl = nfs_false; bp += NFSX_UNSIGNED; nfsm_clget; if (eofflag) *tl = nfs_true; else *tl = nfs_false; bp += NFSX_UNSIGNED; if (mp != mb) { if (bp < be) mp->m_len = bp - mtod(mp, caddr_t); } else mp->m_len += bp - bpos; FREE((caddr_t)rbuf, M_TEMP); FREE((caddr_t)cookies, M_TEMP); nfsm_srvdone; } int nfsrv_readdirplus(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; struct mbuf *nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; struct ucred *cred = &nfsd->nd_cr; register char *bp, *be; register struct mbuf *mp; register struct dirent *dp; register caddr_t cp; register u_long *tl; register long t1; caddr_t bpos; struct mbuf *mb, *mb2, *mreq, *mp2; char *cpos, *cend, *cp2, *rbuf; struct vnode *vp, *nvp; struct flrep fl; nfsfh_t nfh; fhandle_t *fhp, *nfhp = (fhandle_t *)fl.fl_nfh; struct uio io; struct iovec iv; struct vattr va, at, *vap = &va; struct nfs_fattr *fp; int len, nlen, rem, xfer, tsiz, i, error = 0, getret = 1; int siz, cnt, fullsiz, eofflag, rdonly, cache, dirlen, ncookies; u_quad_t frev, off, toff, verf; u_int *cookies = NULL, *cookiep; fhp = &nfh.fh_generic; nfsm_srvmtofh(fhp); nfsm_dissect(tl, u_long *, 6 * NFSX_UNSIGNED); fxdr_hyper(tl, &toff); tl += 2; fxdr_hyper(tl, &verf); tl += 2; siz = fxdr_unsigned(int, *tl++); cnt = fxdr_unsigned(int, *tl); off = toff; siz = ((siz + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1)); xfer = NFS_SRVMAXDATA(nfsd); if (siz > xfer) siz = xfer; fullsiz = siz; if (error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, &rdonly, (nfsd->nd_flag & ND_KERBAUTH))) { nfsm_reply(NFSX_UNSIGNED); nfsm_srvpostop_attr(getret, &at); return (0); } error = getret = VOP_GETATTR(vp, &at, cred, procp); if (!error && toff && verf != at.va_filerev) error = NFSERR_BAD_COOKIE; if (!error) { nqsrv_getl(vp, ND_READ); error = nfsrv_access(vp, VEXEC, cred, rdonly, procp); } if (error) { - nfsrv_vput(vp); + vput(vp); nfsm_reply(NFSX_V3POSTOPATTR); nfsm_srvpostop_attr(getret, &at); return (0); } VOP_UNLOCK(vp); MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK); #ifdef __NetBSD__ ncookies = siz / (7 * NFSX_UNSIGNED); MALLOC(cookies, u_long *, ncookies * sizeof (u_long *), M_TEMP, M_WAITOK); #endif again: iv.iov_base = rbuf; iv.iov_len = fullsiz; io.uio_iov = &iv; io.uio_iovcnt = 1; io.uio_offset = (off_t)off; io.uio_resid = fullsiz; io.uio_segflg = UIO_SYSSPACE; io.uio_rw = UIO_READ; io.uio_procp = (struct proc *)0; eofflag = 0; VOP_LOCK(vp); #ifndef __NetBSD__ if (cookies) { free((caddr_t)cookies, M_TEMP); cookies = NULL; } error = VOP_READDIR(vp, &io, cred, &eofflag, &ncookies, &cookies); #else error = VOP_READDIR(vp, &io, cred, &eofflag, cookies, ncookies); #endif off = (u_quad_t)io.uio_offset; getret = VOP_GETATTR(vp, &at, cred, procp); VOP_UNLOCK(vp); if (!cookies && !error) error = NFSERR_PERM; if (!error) error = getret; if (error) { - nfsrv_vrele(vp); + vrele(vp); if (cookies) free((caddr_t)cookies, M_TEMP); free((caddr_t)rbuf, M_TEMP); nfsm_reply(NFSX_V3POSTOPATTR); nfsm_srvpostop_attr(getret, &at); return (0); } if (io.uio_resid) { siz -= io.uio_resid; /* * If nothing read, return eof * rpc reply */ if (siz == 0) { - nfsrv_vrele(vp); + vrele(vp); nfsm_reply(NFSX_V3POSTOPATTR + NFSX_V3COOKIEVERF + 2 * NFSX_UNSIGNED); nfsm_srvpostop_attr(getret, &at); nfsm_build(tl, u_long *, 4 * NFSX_UNSIGNED); txdr_hyper(&at.va_filerev, tl); tl += 2; *tl++ = nfs_false; *tl = nfs_true; FREE((caddr_t)cookies, M_TEMP); FREE((caddr_t)rbuf, M_TEMP); return (0); } } /* * Check for degenerate cases of nothing useful read. * If so go try again */ cpos = rbuf; cend = rbuf + siz; dp = (struct dirent *)cpos; cookiep = cookies; #ifdef __FreeBSD__ /* * For some reason FreeBSD's ufs_readdir() chooses to back the * directory offset up to a block boundary, so it is necessary to * skip over the records that preceed the requested offset. This * requires the assumption that file offset cookies monotonically * increase. */ while (cpos < cend && ncookies > 0 && (dp->d_fileno == 0 || ((u_quad_t)(*cookiep)) <= toff)) { #else while (dp->d_fileno == 0 && cpos < cend && ncookies > 0) { #endif cpos += dp->d_reclen; dp = (struct dirent *)cpos; cookiep++; ncookies--; } if (cpos >= cend || ncookies == 0) { toff = off; siz = fullsiz; goto again; } /* * Probe one of the directory entries to see if the filesystem * supports VGET. */ if (VFS_VGET(vp->v_mount, dp->d_fileno, &nvp) == EOPNOTSUPP) { error = NFSERR_NOTSUPP; - nfsrv_vrele(vp); + vrele(vp); free((caddr_t)cookies, M_TEMP); free((caddr_t)rbuf, M_TEMP); nfsm_reply(NFSX_V3POSTOPATTR); nfsm_srvpostop_attr(getret, &at); return (0); } dirlen = len = NFSX_V3POSTOPATTR + NFSX_V3COOKIEVERF + 2 * NFSX_UNSIGNED; nfsm_reply(cnt); nfsm_srvpostop_attr(getret, &at); nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED); txdr_hyper(&at.va_filerev, tl); mp = mp2 = mb; bp = bpos; be = bp + M_TRAILINGSPACE(mp); /* Loop through the records and build reply */ while (cpos < cend && ncookies > 0) { if (dp->d_fileno != 0) { nlen = dp->d_namlen; rem = nfsm_rndup(nlen)-nlen; /* * For readdir_and_lookup get the vnode using * the file number. */ if (VFS_VGET(vp->v_mount, dp->d_fileno, &nvp)) goto invalid; bzero((caddr_t)nfhp, NFSX_V3FH); nfhp->fh_fsid = nvp->v_mount->mnt_stat.f_fsid; if (VFS_VPTOFH(nvp, &nfhp->fh_fid)) { vput(nvp); goto invalid; } if (VOP_GETATTR(nvp, vap, cred, procp)) { vput(nvp); goto invalid; } vput(nvp); /* * If either the dircount or maxcount will be * exceeded, get out now. Both of these lengths * are calculated conservatively, including all * XDR overheads. */ len += (7 * NFSX_UNSIGNED + nlen + rem + NFSX_V3FH + NFSX_V3POSTOPATTR); dirlen += (6 * NFSX_UNSIGNED + nlen + rem); if (len > cnt || dirlen > fullsiz) { eofflag = 0; break; } /* * Build the directory record xdr from * the dirent entry. */ fp = (struct nfs_fattr *)&fl.fl_fattr; nfsm_srvfillattr(vap, fp); fl.fl_fhsize = txdr_unsigned(NFSX_V3FH); fl.fl_fhok = nfs_true; fl.fl_postopok = nfs_true; fl.fl_off.nfsuquad[0] = 0; fl.fl_off.nfsuquad[1] = txdr_unsigned(*cookiep); nfsm_clget; *tl = nfs_true; bp += NFSX_UNSIGNED; nfsm_clget; *tl = 0; bp += NFSX_UNSIGNED; nfsm_clget; *tl = txdr_unsigned(dp->d_fileno); bp += NFSX_UNSIGNED; nfsm_clget; *tl = txdr_unsigned(nlen); bp += NFSX_UNSIGNED; /* And loop around copying the name */ xfer = nlen; cp = dp->d_name; while (xfer > 0) { nfsm_clget; if ((bp + xfer) > be) tsiz = be - bp; else tsiz = xfer; bcopy(cp, bp, tsiz); bp += tsiz; xfer -= tsiz; if (xfer > 0) cp += tsiz; } /* And null pad to a long boundary */ for (i = 0; i < rem; i++) *bp++ = '\0'; /* * Now copy the flrep structure out. */ xfer = sizeof (struct flrep); cp = (caddr_t)&fl; while (xfer > 0) { nfsm_clget; if ((bp + xfer) > be) tsiz = be - bp; else tsiz = xfer; bcopy(cp, bp, tsiz); bp += tsiz; xfer -= tsiz; if (xfer > 0) cp += tsiz; } } invalid: cpos += dp->d_reclen; dp = (struct dirent *)cpos; cookiep++; ncookies--; } - nfsrv_vrele(vp); + vrele(vp); nfsm_clget; *tl = nfs_false; bp += NFSX_UNSIGNED; nfsm_clget; if (eofflag) *tl = nfs_true; else *tl = nfs_false; bp += NFSX_UNSIGNED; if (mp != mb) { if (bp < be) mp->m_len = bp - mtod(mp, caddr_t); } else mp->m_len += bp - bpos; FREE((caddr_t)cookies, M_TEMP); FREE((caddr_t)rbuf, M_TEMP); nfsm_srvdone; } /* * nfs commit service */ int nfsrv_commit(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; struct mbuf *nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; struct ucred *cred = &nfsd->nd_cr; struct vattr bfor, aft; struct vnode *vp; nfsfh_t nfh; fhandle_t *fhp; register u_long *tl; register long t1; caddr_t bpos; int error = 0, rdonly, for_ret = 1, aft_ret = 1, cnt, cache; char *cp2; struct mbuf *mb, *mb2, *mreq; u_quad_t frev, off; #ifndef nolint cache = 0; #endif fhp = &nfh.fh_generic; nfsm_srvmtofh(fhp); nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED); /* * XXX At this time VOP_FSYNC() does not accept offset and byte * count parameters, so these arguments are useless (someday maybe). */ fxdr_hyper(tl, &off); tl += 2; cnt = fxdr_unsigned(int, *tl); if (error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, &rdonly, (nfsd->nd_flag & ND_KERBAUTH))) { nfsm_reply(2 * NFSX_UNSIGNED); nfsm_srvwcc_data(for_ret, &bfor, aft_ret, &aft); return (0); } for_ret = VOP_GETATTR(vp, &bfor, cred, procp); error = VOP_FSYNC(vp, cred, MNT_WAIT, procp); aft_ret = VOP_GETATTR(vp, &aft, cred, procp); - nfsrv_vput(vp); + vput(vp); nfsm_reply(NFSX_V3WCCDATA + NFSX_V3WRITEVERF); nfsm_srvwcc_data(for_ret, &bfor, aft_ret, &aft); if (!error) { nfsm_build(tl, u_long *, NFSX_V3WRITEVERF); *tl++ = txdr_unsigned(boottime.tv_sec); *tl = txdr_unsigned(boottime.tv_usec); } else return (0); nfsm_srvdone; } /* * nfs statfs service */ int nfsrv_statfs(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; struct mbuf *nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; struct ucred *cred = &nfsd->nd_cr; register struct statfs *sf; register struct nfs_statfs *sfp; register u_long *tl; register long t1; caddr_t bpos; int error = 0, rdonly, cache, getret = 1; int v3 = (nfsd->nd_flag & ND_NFSV3); char *cp2; struct mbuf *mb, *mb2, *mreq; struct vnode *vp; struct vattr at; nfsfh_t nfh; fhandle_t *fhp; struct statfs statfs; u_quad_t frev, tval; #ifndef nolint cache = 0; #endif fhp = &nfh.fh_generic; nfsm_srvmtofh(fhp); if (error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, &rdonly, (nfsd->nd_flag & ND_KERBAUTH))) { nfsm_reply(NFSX_UNSIGNED); nfsm_srvpostop_attr(getret, &at); return (0); } sf = &statfs; error = VFS_STATFS(vp->v_mount, sf, procp); getret = VOP_GETATTR(vp, &at, cred, procp); - nfsrv_vput(vp); + vput(vp); nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_STATFS(v3)); if (v3) nfsm_srvpostop_attr(getret, &at); if (error) return (0); nfsm_build(sfp, struct nfs_statfs *, NFSX_STATFS(v3)); if (v3) { tval = (u_quad_t)sf->f_blocks; tval *= (u_quad_t)sf->f_bsize; txdr_hyper(&tval, &sfp->sf_tbytes); tval = (u_quad_t)sf->f_bfree; tval *= (u_quad_t)sf->f_bsize; txdr_hyper(&tval, &sfp->sf_fbytes); tval = (u_quad_t)sf->f_bavail; tval *= (u_quad_t)sf->f_bsize; txdr_hyper(&tval, &sfp->sf_abytes); sfp->sf_tfiles.nfsuquad[0] = 0; sfp->sf_tfiles.nfsuquad[1] = txdr_unsigned(sf->f_files); sfp->sf_ffiles.nfsuquad[0] = 0; sfp->sf_ffiles.nfsuquad[1] = txdr_unsigned(sf->f_ffree); sfp->sf_afiles.nfsuquad[0] = 0; sfp->sf_afiles.nfsuquad[1] = txdr_unsigned(sf->f_ffree); sfp->sf_invarsec = 0; } else { sfp->sf_tsize = txdr_unsigned(NFS_MAXDGRAMDATA); sfp->sf_bsize = txdr_unsigned(sf->f_bsize); sfp->sf_blocks = txdr_unsigned(sf->f_blocks); sfp->sf_bfree = txdr_unsigned(sf->f_bfree); sfp->sf_bavail = txdr_unsigned(sf->f_bavail); } nfsm_srvdone; } /* * nfs fsinfo service */ int nfsrv_fsinfo(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; struct mbuf *nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; struct ucred *cred = &nfsd->nd_cr; register u_long *tl; register struct nfsv3_fsinfo *sip; register long t1; caddr_t bpos; int error = 0, rdonly, cache, getret = 1, pref; char *cp2; struct mbuf *mb, *mb2, *mreq; struct vnode *vp; struct vattr at; nfsfh_t nfh; fhandle_t *fhp; u_quad_t frev; #ifndef nolint cache = 0; #endif fhp = &nfh.fh_generic; nfsm_srvmtofh(fhp); if (error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, &rdonly, (nfsd->nd_flag & ND_KERBAUTH))) { nfsm_reply(NFSX_UNSIGNED); nfsm_srvpostop_attr(getret, &at); return (0); } getret = VOP_GETATTR(vp, &at, cred, procp); - nfsrv_vput(vp); + vput(vp); nfsm_reply(NFSX_V3POSTOPATTR + NFSX_V3FSINFO); nfsm_srvpostop_attr(getret, &at); nfsm_build(sip, struct nfsv3_fsinfo *, NFSX_V3FSINFO); /* * XXX * There should be file system VFS OP(s) to get this information. * For now, assume ufs. */ if (slp->ns_so->so_type == SOCK_DGRAM) pref = NFS_MAXDGRAMDATA; else pref = NFS_MAXDATA; sip->fs_rtmax = txdr_unsigned(NFS_MAXDATA); sip->fs_rtpref = txdr_unsigned(pref); sip->fs_rtmult = txdr_unsigned(NFS_FABLKSIZE); sip->fs_wtmax = txdr_unsigned(NFS_MAXDATA); sip->fs_wtpref = txdr_unsigned(pref); sip->fs_wtmult = txdr_unsigned(NFS_FABLKSIZE); sip->fs_dtpref = txdr_unsigned(pref); sip->fs_maxfilesize.nfsuquad[0] = 0xffffffff; sip->fs_maxfilesize.nfsuquad[1] = 0xffffffff; sip->fs_timedelta.nfsv3_sec = 0; sip->fs_timedelta.nfsv3_nsec = txdr_unsigned(1); sip->fs_properties = txdr_unsigned(NFSV3FSINFO_LINK | NFSV3FSINFO_SYMLINK | NFSV3FSINFO_HOMOGENEOUS | NFSV3FSINFO_CANSETTIME); nfsm_srvdone; } /* * nfs pathconf service */ int nfsrv_pathconf(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; struct mbuf *nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; struct ucred *cred = &nfsd->nd_cr; register u_long *tl; register struct nfsv3_pathconf *pc; register long t1; caddr_t bpos; int error = 0, rdonly, cache, getret = 1, linkmax, namemax; int chownres, notrunc; char *cp2; struct mbuf *mb, *mb2, *mreq; struct vnode *vp; struct vattr at; nfsfh_t nfh; fhandle_t *fhp; u_quad_t frev; #ifndef nolint cache = 0; #endif fhp = &nfh.fh_generic; nfsm_srvmtofh(fhp); if (error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, &rdonly, (nfsd->nd_flag & ND_KERBAUTH))) { nfsm_reply(NFSX_UNSIGNED); nfsm_srvpostop_attr(getret, &at); return (0); } error = VOP_PATHCONF(vp, _PC_LINK_MAX, &linkmax); if (!error) error = VOP_PATHCONF(vp, _PC_NAME_MAX, &namemax); if (!error) error = VOP_PATHCONF(vp, _PC_CHOWN_RESTRICTED, &chownres); if (!error) error = VOP_PATHCONF(vp, _PC_NO_TRUNC, ¬runc); getret = VOP_GETATTR(vp, &at, cred, procp); - nfsrv_vput(vp); + vput(vp); nfsm_reply(NFSX_V3POSTOPATTR + NFSX_V3PATHCONF); nfsm_srvpostop_attr(getret, &at); if (error) return (0); nfsm_build(pc, struct nfsv3_pathconf *, NFSX_V3PATHCONF); pc->pc_linkmax = txdr_unsigned(linkmax); pc->pc_namemax = txdr_unsigned(namemax); pc->pc_notrunc = txdr_unsigned(notrunc); pc->pc_chownrestricted = txdr_unsigned(chownres); /* * These should probably be supported by VOP_PATHCONF(), but * until msdosfs is exportable (why would you want to?), the * Unix defaults should be ok. */ pc->pc_caseinsensitive = nfs_false; pc->pc_casepreserving = nfs_true; nfsm_srvdone; } /* * Null operation, used by clients to ping server */ /* ARGSUSED */ int nfsrv_null(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep; caddr_t bpos; int error = NFSERR_RETVOID, cache; struct mbuf *mb, *mreq; u_quad_t frev; #ifndef nolint cache = 0; #endif nfsm_reply(0); return (0); } /* * No operation, used for obsolete procedures */ /* ARGSUSED */ int nfsrv_noop(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep; caddr_t bpos; int error, cache; struct mbuf *mb, *mreq; u_quad_t frev; #ifndef nolint cache = 0; #endif if (nfsd->nd_repstat) error = nfsd->nd_repstat; else error = EPROCUNAVAIL; nfsm_reply(0); return (0); } /* * Perform access checking for vnodes obtained from file handles that would * refer to files already opened by a Unix client. You cannot just use * vn_writechk() and VOP_ACCESS() for two reasons. * 1 - You must check for exported rdonly as well as MNT_RDONLY for the write case * 2 - The owner is to be given access irrespective of mode bits so that * processes that chmod after opening a file don't break. I don't like * this because it opens a security hole, but since the nfs server opens * a security hole the size of a barn door anyhow, what the heck. */ static int nfsrv_access(vp, flags, cred, rdonly, p) register struct vnode *vp; int flags; register struct ucred *cred; int rdonly; struct proc *p; { struct vattr vattr; int error; if (flags & VWRITE) { /* Just vn_writechk() changed to check rdonly */ /* * Disallow write attempts on read-only file systems; * unless the file is a socket or a block or character * device resident on the file system. */ if (rdonly || (vp->v_mount->mnt_flag & MNT_RDONLY)) { switch (vp->v_type) { case VREG: case VDIR: case VLNK: return (EROFS); } } /* * If there's shared text associated with * the inode, we can't allow writing. */ if (vp->v_flag & VTEXT) return (ETXTBSY); } if (error = VOP_GETATTR(vp, &vattr, cred, p)) return (error); if ((error = VOP_ACCESS(vp, flags, cred, p)) && cred->cr_uid != vattr.va_uid) return (error); return (0); } #endif /* NFS_NOSERVER */ Index: head/sys/nfs/nfs_subs.c =================================================================== --- head/sys/nfs/nfs_subs.c (revision 17760) +++ head/sys/nfs/nfs_subs.c (revision 17761) @@ -1,1992 +1,1949 @@ /* * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)nfs_subs.c 8.3 (Berkeley) 1/4/94 - * $Id: nfs_subs.c,v 1.30 1996/06/23 17:19:25 bde Exp $ + * $Id: nfs_subs.c,v 1.31 1996/07/16 10:19:44 dfr Exp $ */ /* * These functions support the macros and help fiddle mbuf chains for * the nfs op functions. They do things like create the rpc header and * copy data between mbuf chains and uio lists. */ #include #include #include #include #include #include #include #include #include #include #include #ifdef VFS_LKM #include #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef ISO #include #endif /* * Data items converted to xdr at startup, since they are constant * This is kinda hokey, but may save a little time doing byte swaps */ u_long nfs_xdrneg1; u_long rpc_call, rpc_vers, rpc_reply, rpc_msgdenied, rpc_autherr, rpc_mismatch, rpc_auth_unix, rpc_msgaccepted, rpc_auth_kerb; u_long nfs_prog, nqnfs_prog, nfs_true, nfs_false; /* And other global data */ static u_long nfs_xid = 0; static enum vtype nv2tov_type[8]= { VNON, VREG, VDIR, VBLK, VCHR, VLNK, VNON, VNON }; enum vtype nv3tov_type[8]= { VNON, VREG, VDIR, VBLK, VCHR, VLNK, VSOCK, VFIFO }; int nfs_ticks; struct nfs_reqq nfs_reqq; struct nfssvc_sockhead nfssvc_sockhead; int nfssvc_sockhead_flag; struct nfsd_head nfsd_head; int nfsd_head_flag; struct nfs_bufq nfs_bufq; struct nqtimerhead nqtimerhead; struct nqfhhashhead *nqfhhashtbl; u_long nqfhhash; #ifndef NFS_NOSERVER /* * Mapping of old NFS Version 2 RPC numbers to generic numbers. */ int nfsv3_procid[NFS_NPROCS] = { NFSPROC_NULL, NFSPROC_GETATTR, NFSPROC_SETATTR, NFSPROC_NOOP, NFSPROC_LOOKUP, NFSPROC_READLINK, NFSPROC_READ, NFSPROC_NOOP, NFSPROC_WRITE, NFSPROC_CREATE, NFSPROC_REMOVE, NFSPROC_RENAME, NFSPROC_LINK, NFSPROC_SYMLINK, NFSPROC_MKDIR, NFSPROC_RMDIR, NFSPROC_READDIR, NFSPROC_FSSTAT, NFSPROC_NOOP, NFSPROC_NOOP, NFSPROC_NOOP, NFSPROC_NOOP, NFSPROC_NOOP, NFSPROC_NOOP, NFSPROC_NOOP, NFSPROC_NOOP }; #endif /* NFS_NOSERVER */ /* * and the reverse mapping from generic to Version 2 procedure numbers */ int nfsv2_procid[NFS_NPROCS] = { NFSV2PROC_NULL, NFSV2PROC_GETATTR, NFSV2PROC_SETATTR, NFSV2PROC_LOOKUP, NFSV2PROC_NOOP, NFSV2PROC_READLINK, NFSV2PROC_READ, NFSV2PROC_WRITE, NFSV2PROC_CREATE, NFSV2PROC_MKDIR, NFSV2PROC_SYMLINK, NFSV2PROC_CREATE, NFSV2PROC_REMOVE, NFSV2PROC_RMDIR, NFSV2PROC_RENAME, NFSV2PROC_LINK, NFSV2PROC_READDIR, NFSV2PROC_NOOP, NFSV2PROC_STATFS, NFSV2PROC_NOOP, NFSV2PROC_NOOP, NFSV2PROC_NOOP, NFSV2PROC_NOOP, NFSV2PROC_NOOP, NFSV2PROC_NOOP, NFSV2PROC_NOOP, }; #ifndef NFS_NOSERVER /* * Maps errno values to nfs error numbers. * Use NFSERR_IO as the catch all for ones not specifically defined in * RFC 1094. */ static u_char nfsrv_v2errmap[ELAST] = { NFSERR_PERM, NFSERR_NOENT, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_NXIO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_ACCES, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_EXIST, NFSERR_IO, NFSERR_NODEV, NFSERR_NOTDIR, NFSERR_ISDIR, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_FBIG, NFSERR_NOSPC, NFSERR_IO, NFSERR_ROFS, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_NAMETOL, NFSERR_IO, NFSERR_IO, NFSERR_NOTEMPTY, NFSERR_IO, NFSERR_IO, NFSERR_DQUOT, NFSERR_STALE, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, }; /* * Maps errno values to nfs error numbers. * Although it is not obvious whether or not NFS clients really care if * a returned error value is in the specified list for the procedure, the * safest thing to do is filter them appropriately. For Version 2, the * X/Open XNFS document is the only specification that defines error values * for each RPC (The RFC simply lists all possible error values for all RPCs), * so I have decided to not do this for Version 2. * The first entry is the default error return and the rest are the valid * errors for that RPC in increasing numeric order. */ static short nfsv3err_null[] = { 0, 0, }; static short nfsv3err_getattr[] = { NFSERR_IO, NFSERR_IO, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_SERVERFAULT, 0, }; static short nfsv3err_setattr[] = { NFSERR_IO, NFSERR_PERM, NFSERR_IO, NFSERR_ACCES, NFSERR_INVAL, NFSERR_NOSPC, NFSERR_ROFS, NFSERR_DQUOT, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_NOT_SYNC, NFSERR_SERVERFAULT, 0, }; static short nfsv3err_lookup[] = { NFSERR_IO, NFSERR_NOENT, NFSERR_IO, NFSERR_ACCES, NFSERR_NOTDIR, NFSERR_NAMETOL, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_SERVERFAULT, 0, }; static short nfsv3err_access[] = { NFSERR_IO, NFSERR_IO, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_SERVERFAULT, 0, }; static short nfsv3err_readlink[] = { NFSERR_IO, NFSERR_IO, NFSERR_ACCES, NFSERR_INVAL, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_NOTSUPP, NFSERR_SERVERFAULT, 0, }; static short nfsv3err_read[] = { NFSERR_IO, NFSERR_IO, NFSERR_NXIO, NFSERR_ACCES, NFSERR_INVAL, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_SERVERFAULT, 0, }; static short nfsv3err_write[] = { NFSERR_IO, NFSERR_IO, NFSERR_ACCES, NFSERR_INVAL, NFSERR_FBIG, NFSERR_NOSPC, NFSERR_ROFS, NFSERR_DQUOT, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_SERVERFAULT, 0, }; static short nfsv3err_create[] = { NFSERR_IO, NFSERR_IO, NFSERR_ACCES, NFSERR_EXIST, NFSERR_NOTDIR, NFSERR_NOSPC, NFSERR_ROFS, NFSERR_NAMETOL, NFSERR_DQUOT, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_NOTSUPP, NFSERR_SERVERFAULT, 0, }; static short nfsv3err_mkdir[] = { NFSERR_IO, NFSERR_IO, NFSERR_ACCES, NFSERR_EXIST, NFSERR_NOTDIR, NFSERR_NOSPC, NFSERR_ROFS, NFSERR_NAMETOL, NFSERR_DQUOT, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_NOTSUPP, NFSERR_SERVERFAULT, 0, }; static short nfsv3err_symlink[] = { NFSERR_IO, NFSERR_IO, NFSERR_ACCES, NFSERR_EXIST, NFSERR_NOTDIR, NFSERR_NOSPC, NFSERR_ROFS, NFSERR_NAMETOL, NFSERR_DQUOT, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_NOTSUPP, NFSERR_SERVERFAULT, 0, }; static short nfsv3err_mknod[] = { NFSERR_IO, NFSERR_IO, NFSERR_ACCES, NFSERR_EXIST, NFSERR_NOTDIR, NFSERR_NOSPC, NFSERR_ROFS, NFSERR_NAMETOL, NFSERR_DQUOT, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_NOTSUPP, NFSERR_SERVERFAULT, NFSERR_BADTYPE, 0, }; static short nfsv3err_remove[] = { NFSERR_IO, NFSERR_NOENT, NFSERR_IO, NFSERR_ACCES, NFSERR_NOTDIR, NFSERR_ROFS, NFSERR_NAMETOL, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_SERVERFAULT, 0, }; static short nfsv3err_rmdir[] = { NFSERR_IO, NFSERR_NOENT, NFSERR_IO, NFSERR_ACCES, NFSERR_EXIST, NFSERR_NOTDIR, NFSERR_INVAL, NFSERR_ROFS, NFSERR_NAMETOL, NFSERR_NOTEMPTY, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_NOTSUPP, NFSERR_SERVERFAULT, 0, }; static short nfsv3err_rename[] = { NFSERR_IO, NFSERR_NOENT, NFSERR_IO, NFSERR_ACCES, NFSERR_EXIST, NFSERR_XDEV, NFSERR_NOTDIR, NFSERR_ISDIR, NFSERR_INVAL, NFSERR_NOSPC, NFSERR_ROFS, NFSERR_MLINK, NFSERR_NAMETOL, NFSERR_NOTEMPTY, NFSERR_DQUOT, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_NOTSUPP, NFSERR_SERVERFAULT, 0, }; static short nfsv3err_link[] = { NFSERR_IO, NFSERR_IO, NFSERR_ACCES, NFSERR_EXIST, NFSERR_XDEV, NFSERR_NOTDIR, NFSERR_INVAL, NFSERR_NOSPC, NFSERR_ROFS, NFSERR_MLINK, NFSERR_NAMETOL, NFSERR_DQUOT, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_NOTSUPP, NFSERR_SERVERFAULT, 0, }; static short nfsv3err_readdir[] = { NFSERR_IO, NFSERR_IO, NFSERR_ACCES, NFSERR_NOTDIR, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_BAD_COOKIE, NFSERR_TOOSMALL, NFSERR_SERVERFAULT, 0, }; static short nfsv3err_readdirplus[] = { NFSERR_IO, NFSERR_IO, NFSERR_ACCES, NFSERR_NOTDIR, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_BAD_COOKIE, NFSERR_NOTSUPP, NFSERR_TOOSMALL, NFSERR_SERVERFAULT, 0, }; static short nfsv3err_fsstat[] = { NFSERR_IO, NFSERR_IO, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_SERVERFAULT, 0, }; static short nfsv3err_fsinfo[] = { NFSERR_STALE, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_SERVERFAULT, 0, }; static short nfsv3err_pathconf[] = { NFSERR_STALE, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_SERVERFAULT, 0, }; static short nfsv3err_commit[] = { NFSERR_IO, NFSERR_IO, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_SERVERFAULT, 0, }; static short *nfsrv_v3errmap[] = { nfsv3err_null, nfsv3err_getattr, nfsv3err_setattr, nfsv3err_lookup, nfsv3err_access, nfsv3err_readlink, nfsv3err_read, nfsv3err_write, nfsv3err_create, nfsv3err_mkdir, nfsv3err_symlink, nfsv3err_mknod, nfsv3err_remove, nfsv3err_rmdir, nfsv3err_rename, nfsv3err_link, nfsv3err_readdir, nfsv3err_readdirplus, nfsv3err_fsstat, nfsv3err_fsinfo, nfsv3err_pathconf, nfsv3err_commit, }; #endif /* NFS_NOSERVER */ extern struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON]; extern struct nfsrtt nfsrtt; extern time_t nqnfsstarttime; extern int nqsrv_clockskew; extern int nqsrv_writeslack; extern int nqsrv_maxlease; extern struct nfsstats nfsstats; extern int nqnfs_piggy[NFS_NPROCS]; extern nfstype nfsv2_type[9]; extern nfstype nfsv3_type[9]; extern struct nfsnodehashhead *nfsnodehashtbl; extern u_long nfsnodehash; #ifdef VFS_LKM struct getfh_args; extern int getfh(struct proc *, struct getfh_args *, int *); struct nfssvc_args; extern int nfssvc(struct proc *, struct nfssvc_args *, int *); #endif LIST_HEAD(nfsnodehashhead, nfsnode); /* * Create the header for an rpc request packet * The hsiz is the size of the rest of the nfs request header. * (just used to decide if a cluster is a good idea) */ struct mbuf * nfsm_reqh(vp, procid, hsiz, bposp) struct vnode *vp; u_long procid; int hsiz; caddr_t *bposp; { register struct mbuf *mb; register u_long *tl; register caddr_t bpos; struct mbuf *mb2; struct nfsmount *nmp; int nqflag; MGET(mb, M_WAIT, MT_DATA); if (hsiz >= MINCLSIZE) MCLGET(mb, M_WAIT); mb->m_len = 0; bpos = mtod(mb, caddr_t); /* * For NQNFS, add lease request. */ if (vp) { nmp = VFSTONFS(vp->v_mount); if (nmp->nm_flag & NFSMNT_NQNFS) { nqflag = NQNFS_NEEDLEASE(vp, procid); if (nqflag) { nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED); *tl++ = txdr_unsigned(nqflag); *tl = txdr_unsigned(nmp->nm_leaseterm); } else { nfsm_build(tl, u_long *, NFSX_UNSIGNED); *tl = 0; } } } /* Finally, return values */ *bposp = bpos; return (mb); } /* * Build the RPC header and fill in the authorization info. * The authorization string argument is only used when the credentials * come from outside of the kernel. * Returns the head of the mbuf list. */ struct mbuf * nfsm_rpchead(cr, nmflag, procid, auth_type, auth_len, auth_str, verf_len, verf_str, mrest, mrest_len, mbp, xidp) register struct ucred *cr; int nmflag; int procid; int auth_type; int auth_len; char *auth_str; int verf_len; char *verf_str; struct mbuf *mrest; int mrest_len; struct mbuf **mbp; u_long *xidp; { register struct mbuf *mb; register u_long *tl; register caddr_t bpos; register int i; struct mbuf *mreq, *mb2; int siz, grpsiz, authsiz; struct timeval tv; static u_long base; authsiz = nfsm_rndup(auth_len); MGETHDR(mb, M_WAIT, MT_DATA); if ((authsiz + 10 * NFSX_UNSIGNED) >= MINCLSIZE) { MCLGET(mb, M_WAIT); } else if ((authsiz + 10 * NFSX_UNSIGNED) < MHLEN) { MH_ALIGN(mb, authsiz + 10 * NFSX_UNSIGNED); } else { MH_ALIGN(mb, 8 * NFSX_UNSIGNED); } mb->m_len = 0; mreq = mb; bpos = mtod(mb, caddr_t); /* * First the RPC header. */ nfsm_build(tl, u_long *, 8 * NFSX_UNSIGNED); /* * derive initial xid from system time * XXX time is invalid if root not yet mounted */ if (!base && (rootvp)) { microtime(&tv); base = tv.tv_sec << 12; nfs_xid = base; } /* * Skip zero xid if it should ever happen. */ if (++nfs_xid == 0) nfs_xid++; *tl++ = *xidp = txdr_unsigned(nfs_xid); *tl++ = rpc_call; *tl++ = rpc_vers; if (nmflag & NFSMNT_NQNFS) { *tl++ = txdr_unsigned(NQNFS_PROG); *tl++ = txdr_unsigned(NQNFS_VER3); } else { *tl++ = txdr_unsigned(NFS_PROG); if (nmflag & NFSMNT_NFSV3) *tl++ = txdr_unsigned(NFS_VER3); else *tl++ = txdr_unsigned(NFS_VER2); } if (nmflag & NFSMNT_NFSV3) *tl++ = txdr_unsigned(procid); else *tl++ = txdr_unsigned(nfsv2_procid[procid]); /* * And then the authorization cred. */ *tl++ = txdr_unsigned(auth_type); *tl = txdr_unsigned(authsiz); switch (auth_type) { case RPCAUTH_UNIX: nfsm_build(tl, u_long *, auth_len); *tl++ = 0; /* stamp ?? */ *tl++ = 0; /* NULL hostname */ *tl++ = txdr_unsigned(cr->cr_uid); *tl++ = txdr_unsigned(cr->cr_groups[0]); grpsiz = (auth_len >> 2) - 5; *tl++ = txdr_unsigned(grpsiz); for (i = 1; i <= grpsiz; i++) *tl++ = txdr_unsigned(cr->cr_groups[i]); break; case RPCAUTH_KERB4: siz = auth_len; while (siz > 0) { if (M_TRAILINGSPACE(mb) == 0) { MGET(mb2, M_WAIT, MT_DATA); if (siz >= MINCLSIZE) MCLGET(mb2, M_WAIT); mb->m_next = mb2; mb = mb2; mb->m_len = 0; bpos = mtod(mb, caddr_t); } i = min(siz, M_TRAILINGSPACE(mb)); bcopy(auth_str, bpos, i); mb->m_len += i; auth_str += i; bpos += i; siz -= i; } if ((siz = (nfsm_rndup(auth_len) - auth_len)) > 0) { for (i = 0; i < siz; i++) *bpos++ = '\0'; mb->m_len += siz; } break; }; /* * And the verifier... */ nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED); if (verf_str) { *tl++ = txdr_unsigned(RPCAUTH_KERB4); *tl = txdr_unsigned(verf_len); siz = verf_len; while (siz > 0) { if (M_TRAILINGSPACE(mb) == 0) { MGET(mb2, M_WAIT, MT_DATA); if (siz >= MINCLSIZE) MCLGET(mb2, M_WAIT); mb->m_next = mb2; mb = mb2; mb->m_len = 0; bpos = mtod(mb, caddr_t); } i = min(siz, M_TRAILINGSPACE(mb)); bcopy(verf_str, bpos, i); mb->m_len += i; verf_str += i; bpos += i; siz -= i; } if ((siz = (nfsm_rndup(verf_len) - verf_len)) > 0) { for (i = 0; i < siz; i++) *bpos++ = '\0'; mb->m_len += siz; } } else { *tl++ = txdr_unsigned(RPCAUTH_NULL); *tl = 0; } mb->m_next = mrest; mreq->m_pkthdr.len = authsiz + 10 * NFSX_UNSIGNED + mrest_len; mreq->m_pkthdr.rcvif = (struct ifnet *)0; *mbp = mb; return (mreq); } /* * copies mbuf chain to the uio scatter/gather list */ int nfsm_mbuftouio(mrep, uiop, siz, dpos) struct mbuf **mrep; register struct uio *uiop; int siz; caddr_t *dpos; { register char *mbufcp, *uiocp; register int xfer, left, len; register struct mbuf *mp; long uiosiz, rem; int error = 0; mp = *mrep; mbufcp = *dpos; len = mtod(mp, caddr_t)+mp->m_len-mbufcp; rem = nfsm_rndup(siz)-siz; while (siz > 0) { if (uiop->uio_iovcnt <= 0 || uiop->uio_iov == NULL) return (EFBIG); left = uiop->uio_iov->iov_len; uiocp = uiop->uio_iov->iov_base; if (left > siz) left = siz; uiosiz = left; while (left > 0) { while (len == 0) { mp = mp->m_next; if (mp == NULL) return (EBADRPC); mbufcp = mtod(mp, caddr_t); len = mp->m_len; } xfer = (left > len) ? len : left; #ifdef notdef /* Not Yet.. */ if (uiop->uio_iov->iov_op != NULL) (*(uiop->uio_iov->iov_op)) (mbufcp, uiocp, xfer); else #endif if (uiop->uio_segflg == UIO_SYSSPACE) bcopy(mbufcp, uiocp, xfer); else copyout(mbufcp, uiocp, xfer); left -= xfer; len -= xfer; mbufcp += xfer; uiocp += xfer; uiop->uio_offset += xfer; uiop->uio_resid -= xfer; } if (uiop->uio_iov->iov_len <= siz) { uiop->uio_iovcnt--; uiop->uio_iov++; } else { uiop->uio_iov->iov_base += uiosiz; uiop->uio_iov->iov_len -= uiosiz; } siz -= uiosiz; } *dpos = mbufcp; *mrep = mp; if (rem > 0) { if (len < rem) error = nfs_adv(mrep, dpos, rem, len); else *dpos += rem; } return (error); } /* * copies a uio scatter/gather list to an mbuf chain. * NOTE: can ony handle iovcnt == 1 */ int nfsm_uiotombuf(uiop, mq, siz, bpos) register struct uio *uiop; struct mbuf **mq; int siz; caddr_t *bpos; { register char *uiocp; register struct mbuf *mp, *mp2; register int xfer, left, mlen; int uiosiz, clflg, rem; char *cp; if (uiop->uio_iovcnt != 1) panic("nfsm_uiotombuf: iovcnt != 1"); if (siz > MLEN) /* or should it >= MCLBYTES ?? */ clflg = 1; else clflg = 0; rem = nfsm_rndup(siz)-siz; mp = mp2 = *mq; while (siz > 0) { left = uiop->uio_iov->iov_len; uiocp = uiop->uio_iov->iov_base; if (left > siz) left = siz; uiosiz = left; while (left > 0) { mlen = M_TRAILINGSPACE(mp); if (mlen == 0) { MGET(mp, M_WAIT, MT_DATA); if (clflg) MCLGET(mp, M_WAIT); mp->m_len = 0; mp2->m_next = mp; mp2 = mp; mlen = M_TRAILINGSPACE(mp); } xfer = (left > mlen) ? mlen : left; #ifdef notdef /* Not Yet.. */ if (uiop->uio_iov->iov_op != NULL) (*(uiop->uio_iov->iov_op)) (uiocp, mtod(mp, caddr_t)+mp->m_len, xfer); else #endif if (uiop->uio_segflg == UIO_SYSSPACE) bcopy(uiocp, mtod(mp, caddr_t)+mp->m_len, xfer); else copyin(uiocp, mtod(mp, caddr_t)+mp->m_len, xfer); mp->m_len += xfer; left -= xfer; uiocp += xfer; uiop->uio_offset += xfer; uiop->uio_resid -= xfer; } uiop->uio_iov->iov_base += uiosiz; uiop->uio_iov->iov_len -= uiosiz; siz -= uiosiz; } if (rem > 0) { if (rem > M_TRAILINGSPACE(mp)) { MGET(mp, M_WAIT, MT_DATA); mp->m_len = 0; mp2->m_next = mp; } cp = mtod(mp, caddr_t)+mp->m_len; for (left = 0; left < rem; left++) *cp++ = '\0'; mp->m_len += rem; *bpos = cp; } else *bpos = mtod(mp, caddr_t)+mp->m_len; *mq = mp; return (0); } /* * Help break down an mbuf chain by setting the first siz bytes contiguous * pointed to by returned val. * This is used by the macros nfsm_dissect and nfsm_dissecton for tough * cases. (The macros use the vars. dpos and dpos2) */ int nfsm_disct(mdp, dposp, siz, left, cp2) struct mbuf **mdp; caddr_t *dposp; int siz; int left; caddr_t *cp2; { register struct mbuf *mp, *mp2; register int siz2, xfer; register caddr_t p; mp = *mdp; while (left == 0) { *mdp = mp = mp->m_next; if (mp == NULL) return (EBADRPC); left = mp->m_len; *dposp = mtod(mp, caddr_t); } if (left >= siz) { *cp2 = *dposp; *dposp += siz; } else if (mp->m_next == NULL) { return (EBADRPC); } else if (siz > MHLEN) { panic("nfs S too big"); } else { MGET(mp2, M_WAIT, MT_DATA); mp2->m_next = mp->m_next; mp->m_next = mp2; mp->m_len -= left; mp = mp2; *cp2 = p = mtod(mp, caddr_t); bcopy(*dposp, p, left); /* Copy what was left */ siz2 = siz-left; p += left; mp2 = mp->m_next; /* Loop around copying up the siz2 bytes */ while (siz2 > 0) { if (mp2 == NULL) return (EBADRPC); xfer = (siz2 > mp2->m_len) ? mp2->m_len : siz2; if (xfer > 0) { bcopy(mtod(mp2, caddr_t), p, xfer); NFSMADV(mp2, xfer); mp2->m_len -= xfer; p += xfer; siz2 -= xfer; } if (siz2 > 0) mp2 = mp2->m_next; } mp->m_len = siz; *mdp = mp2; *dposp = mtod(mp2, caddr_t); } return (0); } /* * Advance the position in the mbuf chain. */ int nfs_adv(mdp, dposp, offs, left) struct mbuf **mdp; caddr_t *dposp; int offs; int left; { register struct mbuf *m; register int s; m = *mdp; s = left; while (s < offs) { offs -= s; m = m->m_next; if (m == NULL) return (EBADRPC); s = m->m_len; } *mdp = m; *dposp = mtod(m, caddr_t)+offs; return (0); } /* * Copy a string into mbufs for the hard cases... */ int nfsm_strtmbuf(mb, bpos, cp, siz) struct mbuf **mb; char **bpos; char *cp; long siz; { register struct mbuf *m1 = 0, *m2; long left, xfer, len, tlen; u_long *tl; int putsize; putsize = 1; m2 = *mb; left = M_TRAILINGSPACE(m2); if (left > 0) { tl = ((u_long *)(*bpos)); *tl++ = txdr_unsigned(siz); putsize = 0; left -= NFSX_UNSIGNED; m2->m_len += NFSX_UNSIGNED; if (left > 0) { bcopy(cp, (caddr_t) tl, left); siz -= left; cp += left; m2->m_len += left; left = 0; } } /* Loop around adding mbufs */ while (siz > 0) { MGET(m1, M_WAIT, MT_DATA); if (siz > MLEN) MCLGET(m1, M_WAIT); m1->m_len = NFSMSIZ(m1); m2->m_next = m1; m2 = m1; tl = mtod(m1, u_long *); tlen = 0; if (putsize) { *tl++ = txdr_unsigned(siz); m1->m_len -= NFSX_UNSIGNED; tlen = NFSX_UNSIGNED; putsize = 0; } if (siz < m1->m_len) { len = nfsm_rndup(siz); xfer = siz; if (xfer < len) *(tl+(xfer>>2)) = 0; } else { xfer = len = m1->m_len; } bcopy(cp, (caddr_t) tl, xfer); m1->m_len = len+tlen; siz -= xfer; cp += xfer; } *mb = m1; *bpos = mtod(m1, caddr_t)+m1->m_len; return (0); } /* * Called once to initialize data structures... */ int nfs_init() { register int i; /* * Check to see if major data structures haven't bloated. */ if (sizeof (struct nfsnode) > NFS_NODEALLOC) { printf("struct nfsnode bloated (> %dbytes)\n", NFS_NODEALLOC); printf("Try reducing NFS_SMALLFH\n"); } if (sizeof (struct nfsmount) > NFS_MNTALLOC) { printf("struct nfsmount bloated (> %dbytes)\n", NFS_MNTALLOC); printf("Try reducing NFS_MUIDHASHSIZ\n"); } if (sizeof (struct nfssvc_sock) > NFS_SVCALLOC) { printf("struct nfssvc_sock bloated (> %dbytes)\n",NFS_SVCALLOC); printf("Try reducing NFS_UIDHASHSIZ\n"); } if (sizeof (struct nfsuid) > NFS_UIDALLOC) { printf("struct nfsuid bloated (> %dbytes)\n",NFS_UIDALLOC); printf("Try unionizing the nu_nickname and nu_flag fields\n"); } nfsrtt.pos = 0; rpc_vers = txdr_unsigned(RPC_VER2); rpc_call = txdr_unsigned(RPC_CALL); rpc_reply = txdr_unsigned(RPC_REPLY); rpc_msgdenied = txdr_unsigned(RPC_MSGDENIED); rpc_msgaccepted = txdr_unsigned(RPC_MSGACCEPTED); rpc_mismatch = txdr_unsigned(RPC_MISMATCH); rpc_autherr = txdr_unsigned(RPC_AUTHERR); rpc_auth_unix = txdr_unsigned(RPCAUTH_UNIX); rpc_auth_kerb = txdr_unsigned(RPCAUTH_KERB4); nfs_prog = txdr_unsigned(NFS_PROG); nqnfs_prog = txdr_unsigned(NQNFS_PROG); nfs_true = txdr_unsigned(TRUE); nfs_false = txdr_unsigned(FALSE); nfs_xdrneg1 = txdr_unsigned(-1); nfs_ticks = (hz * NFS_TICKINTVL + 500) / 1000; if (nfs_ticks < 1) nfs_ticks = 1; /* Ensure async daemons disabled */ for (i = 0; i < NFS_MAXASYNCDAEMON; i++) nfs_iodwant[i] = (struct proc *)0; TAILQ_INIT(&nfs_bufq); nfs_nhinit(); /* Init the nfsnode table */ #ifndef NFS_NOSERVER nfsrv_init(0); /* Init server data structures */ nfsrv_initcache(); /* Init the server request cache */ #endif /* * Initialize the nqnfs server stuff. */ if (nqnfsstarttime == 0) { nqnfsstarttime = boottime.tv_sec + nqsrv_maxlease + nqsrv_clockskew + nqsrv_writeslack; NQLOADNOVRAM(nqnfsstarttime); CIRCLEQ_INIT(&nqtimerhead); nqfhhashtbl = hashinit(NQLCHSZ, M_NQLEASE, &nqfhhash); } /* * Initialize reply list and start timer */ TAILQ_INIT(&nfs_reqq); nfs_timer(0); #ifdef __FreeBSD__ /* * Set up lease_check and lease_updatetime so that other parts * of the system can call us, if we are loadable. */ #ifndef NFS_NOSERVER lease_check = nfs_lease_check; #endif lease_updatetime = nfs_lease_updatetime; vfsconf[MOUNT_NFS]->vfc_refcount++; /* make us non-unloadable */ #ifdef VFS_LKM sysent[SYS_nfssvc].sy_narg = 2; sysent[SYS_nfssvc].sy_call = nfssvc; #ifndef NFS_NOSERVER sysent[SYS_getfh].sy_narg = 2; sysent[SYS_getfh].sy_call = getfh; #endif #endif #endif return (0); } /* * Attribute cache routines. * nfs_loadattrcache() - loads or updates the cache contents from attributes * that are on the mbuf list * nfs_getattrcache() - returns valid attributes if found in cache, returns * error otherwise */ /* * Load the attribute cache (that lives in the nfsnode entry) with * the values on the mbuf list and * Iff vap not NULL * copy the attributes to *vaper */ int nfs_loadattrcache(vpp, mdp, dposp, vaper) struct vnode **vpp; struct mbuf **mdp; caddr_t *dposp; struct vattr *vaper; { register struct vnode *vp = *vpp; register struct vattr *vap; register struct nfs_fattr *fp; register struct nfsnode *np; register struct nfsnodehashhead *nhpp; register long t1; caddr_t cp2; int error = 0, rdev; struct mbuf *md; enum vtype vtyp; u_short vmode; struct timespec mtime; struct vnode *nvp; int v3 = NFS_ISV3(vp); md = *mdp; t1 = (mtod(md, caddr_t) + md->m_len) - *dposp; if (error = nfsm_disct(mdp, dposp, NFSX_FATTR(v3), t1, &cp2)) return (error); fp = (struct nfs_fattr *)cp2; if (v3) { vtyp = nfsv3tov_type(fp->fa_type); vmode = fxdr_unsigned(u_short, fp->fa_mode); rdev = makedev(fxdr_unsigned(int, fp->fa3_rdev.specdata1), fxdr_unsigned(int, fp->fa3_rdev.specdata2)); fxdr_nfsv3time(&fp->fa3_mtime, &mtime); } else { vtyp = nfsv2tov_type(fp->fa_type); vmode = fxdr_unsigned(u_short, fp->fa_mode); /* * XXX * * The duplicate information returned in fa_type and fa_mode * is an ambiguity in the NFS version 2 protocol. * * VREG should be taken literally as a regular file. If a * server intents to return some type information differently * in the upper bits of the mode field (e.g. for sockets, or * FIFOs), NFSv2 mandates fa_type to be VNON. Anyway, we * leave the examination of the mode bits even in the VREG * case to avoid breakage for bogus servers, but we make sure * that there are actually type bits set in the upper part of * fa_mode (and failing that, trust the va_type field). * * NFSv3 cleared the issue, and requires fa_mode to not * contain any type information (while also introduing sockets * and FIFOs for fa_type). */ if (vtyp == VNON || (vtyp == VREG && (vmode & S_IFMT) != 0)) vtyp = IFTOVT(vmode); rdev = fxdr_unsigned(long, fp->fa2_rdev); fxdr_nfsv2time(&fp->fa2_mtime, &mtime); /* * Really ugly NFSv2 kludge. */ if (vtyp == VCHR && rdev == 0xffffffff) vtyp = VFIFO; } /* * If v_type == VNON it is a new node, so fill in the v_type, * n_mtime fields. Check to see if it represents a special * device, and if so, check for a possible alias. Once the * correct vnode has been obtained, fill in the rest of the * information. */ np = VTONFS(vp); if (vp->v_type != vtyp) { /* * If we had a lock and it turns out that the vnode * is an object which we don't want to lock (e.g. VDIR) * to avoid nasty hanging problems on a server crash, * then release it here. */ if (vtyp != VREG && VOP_ISLOCKED(vp)) VOP_UNLOCK(vp); vp->v_type = vtyp; if (vp->v_type == VFIFO) { vp->v_op = fifo_nfsv2nodeop_p; } if (vp->v_type == VCHR || vp->v_type == VBLK) { vp->v_op = spec_nfsv2nodeop_p; nvp = checkalias(vp, (dev_t)rdev, vp->v_mount); if (nvp) { /* * Discard unneeded vnode, but save its nfsnode. */ LIST_REMOVE(np, n_hash); nvp->v_data = vp->v_data; vp->v_data = NULL; vp->v_op = spec_vnodeop_p; vrele(vp); vgone(vp); /* * Reinitialize aliased node. */ np->n_vnode = nvp; nhpp = NFSNOHASH(nfs_hash(np->n_fhp, np->n_fhsize)); LIST_INSERT_HEAD(nhpp, np, n_hash); *vpp = vp = nvp; } } np->n_mtime = mtime.ts_sec; } vap = &np->n_vattr; vap->va_type = vtyp; vap->va_mode = (vmode & 07777); vap->va_rdev = (dev_t)rdev; vap->va_mtime = mtime; vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0]; if (v3) { vap->va_nlink = fxdr_unsigned(u_short, fp->fa_nlink); vap->va_uid = fxdr_unsigned(uid_t, fp->fa_uid); vap->va_gid = fxdr_unsigned(gid_t, fp->fa_gid); fxdr_hyper(&fp->fa3_size, &vap->va_size); vap->va_blocksize = NFS_FABLKSIZE; fxdr_hyper(&fp->fa3_used, &vap->va_bytes); vap->va_fileid = fxdr_unsigned(int, fp->fa3_fileid.nfsuquad[1]); fxdr_nfsv3time(&fp->fa3_atime, &vap->va_atime); fxdr_nfsv3time(&fp->fa3_ctime, &vap->va_ctime); vap->va_flags = 0; vap->va_filerev = 0; } else { vap->va_nlink = fxdr_unsigned(u_short, fp->fa_nlink); vap->va_uid = fxdr_unsigned(uid_t, fp->fa_uid); vap->va_gid = fxdr_unsigned(gid_t, fp->fa_gid); vap->va_size = fxdr_unsigned(u_long, fp->fa2_size); vap->va_blocksize = fxdr_unsigned(long, fp->fa2_blocksize); vap->va_bytes = fxdr_unsigned(long, fp->fa2_blocks) * NFS_FABLKSIZE; vap->va_fileid = fxdr_unsigned(long, fp->fa2_fileid); fxdr_nfsv2time(&fp->fa2_atime, &vap->va_atime); vap->va_flags = 0; vap->va_ctime.ts_sec = fxdr_unsigned(long, fp->fa2_ctime.nfsv2_sec); vap->va_ctime.ts_nsec = 0; vap->va_gen = fxdr_unsigned(u_long, fp->fa2_ctime.nfsv2_usec); vap->va_filerev = 0; } if (vap->va_size != np->n_size) { if (vap->va_type == VREG) { if (np->n_flag & NMODIFIED) { if (vap->va_size < np->n_size) vap->va_size = np->n_size; else np->n_size = vap->va_size; } else np->n_size = vap->va_size; vnode_pager_setsize(vp, (u_long)np->n_size); } else np->n_size = vap->va_size; } np->n_attrstamp = time.tv_sec; if (vaper != NULL) { bcopy((caddr_t)vap, (caddr_t)vaper, sizeof(*vap)); if (np->n_flag & NCHG) { if (np->n_flag & NACC) vaper->va_atime = np->n_atim; if (np->n_flag & NUPD) vaper->va_mtime = np->n_mtim; } } return (0); } /* * Check the time stamp * If the cache is valid, copy contents to *vap and return 0 * otherwise return an error */ int nfs_getattrcache(vp, vaper) register struct vnode *vp; struct vattr *vaper; { register struct nfsnode *np = VTONFS(vp); register struct vattr *vap; if ((time.tv_sec - np->n_attrstamp) >= NFS_ATTRTIMEO(np)) { nfsstats.attrcache_misses++; return (ENOENT); } nfsstats.attrcache_hits++; vap = &np->n_vattr; if (vap->va_size != np->n_size) { if (vap->va_type == VREG) { if (np->n_flag & NMODIFIED) { if (vap->va_size < np->n_size) vap->va_size = np->n_size; else np->n_size = vap->va_size; } else np->n_size = vap->va_size; vnode_pager_setsize(vp, (u_long)np->n_size); } else np->n_size = vap->va_size; } bcopy((caddr_t)vap, (caddr_t)vaper, sizeof(struct vattr)); if (np->n_flag & NCHG) { if (np->n_flag & NACC) vaper->va_atime = np->n_atim; if (np->n_flag & NUPD) vaper->va_mtime = np->n_mtim; } return (0); } #ifndef NFS_NOSERVER /* * Set up nameidata for a lookup() call and do it */ int nfs_namei(ndp, fhp, len, slp, nam, mdp, dposp, retdirp, p, kerbflag) register struct nameidata *ndp; fhandle_t *fhp; int len; struct nfssvc_sock *slp; struct mbuf *nam; struct mbuf **mdp; caddr_t *dposp; struct vnode **retdirp; struct proc *p; int kerbflag; { register int i, rem; register struct mbuf *md; register char *fromcp, *tocp; struct vnode *dp; int error, rdonly; struct componentname *cnp = &ndp->ni_cnd; *retdirp = (struct vnode *)0; MALLOC(cnp->cn_pnbuf, char *, len + 1, M_NAMEI, M_WAITOK); /* * Copy the name from the mbuf list to ndp->ni_pnbuf * and set the various ndp fields appropriately. */ fromcp = *dposp; tocp = cnp->cn_pnbuf; md = *mdp; rem = mtod(md, caddr_t) + md->m_len - fromcp; cnp->cn_hash = 0; for (i = 0; i < len; i++) { while (rem == 0) { md = md->m_next; if (md == NULL) { error = EBADRPC; goto out; } fromcp = mtod(md, caddr_t); rem = md->m_len; } if (*fromcp == '\0' || *fromcp == '/') { error = EACCES; goto out; } cnp->cn_hash += (unsigned char)*fromcp; *tocp++ = *fromcp++; rem--; } *tocp = '\0'; *mdp = md; *dposp = fromcp; len = nfsm_rndup(len)-len; if (len > 0) { if (rem >= len) *dposp += len; else if (error = nfs_adv(mdp, dposp, len, rem)) goto out; } ndp->ni_pathlen = tocp - cnp->cn_pnbuf; cnp->cn_nameptr = cnp->cn_pnbuf; /* * Extract and set starting directory. */ if (error = nfsrv_fhtovp(fhp, FALSE, &dp, ndp->ni_cnd.cn_cred, slp, nam, &rdonly, kerbflag)) goto out; if (dp->v_type != VDIR) { - nfsrv_vrele(dp); + vrele(dp); error = ENOTDIR; goto out; } VREF(dp); *retdirp = dp; ndp->ni_startdir = dp; if (rdonly) cnp->cn_flags |= (NOCROSSMOUNT | RDONLY); else cnp->cn_flags |= NOCROSSMOUNT; /* * And call lookup() to do the real work */ cnp->cn_proc = p; if (error = lookup(ndp)) goto out; /* * Check for encountering a symbolic link */ if (cnp->cn_flags & ISSYMLINK) { if ((cnp->cn_flags & LOCKPARENT) && ndp->ni_pathlen == 1) vput(ndp->ni_dvp); else vrele(ndp->ni_dvp); vput(ndp->ni_vp); ndp->ni_vp = NULL; error = EINVAL; goto out; } - nfsrv_vmio(ndp->ni_vp); + nfsrv_object_create(ndp->ni_vp); /* * Check for saved name request */ if (cnp->cn_flags & (SAVENAME | SAVESTART)) { cnp->cn_flags |= HASBUF; return (0); } out: FREE(cnp->cn_pnbuf, M_NAMEI); return (error); } /* * A fiddled version of m_adj() that ensures null fill to a long * boundary and only trims off the back end */ void nfsm_adj(mp, len, nul) struct mbuf *mp; register int len; int nul; { register struct mbuf *m; register int count, i; register char *cp; /* * Trim from tail. Scan the mbuf chain, * calculating its length and finding the last mbuf. * If the adjustment only affects this mbuf, then just * adjust and return. Otherwise, rescan and truncate * after the remaining size. */ count = 0; m = mp; for (;;) { count += m->m_len; if (m->m_next == (struct mbuf *)0) break; m = m->m_next; } if (m->m_len > len) { m->m_len -= len; if (nul > 0) { cp = mtod(m, caddr_t)+m->m_len-nul; for (i = 0; i < nul; i++) *cp++ = '\0'; } return; } count -= len; if (count < 0) count = 0; /* * Correct length for chain is "count". * Find the mbuf with last data, adjust its length, * and toss data from remaining mbufs on chain. */ for (m = mp; m; m = m->m_next) { if (m->m_len >= count) { m->m_len = count; if (nul > 0) { cp = mtod(m, caddr_t)+m->m_len-nul; for (i = 0; i < nul; i++) *cp++ = '\0'; } break; } count -= m->m_len; } for (m = m->m_next;m;m = m->m_next) m->m_len = 0; } /* * Make these functions instead of macros, so that the kernel text size * doesn't get too big... */ void nfsm_srvwcc(nfsd, before_ret, before_vap, after_ret, after_vap, mbp, bposp) struct nfsrv_descript *nfsd; int before_ret; register struct vattr *before_vap; int after_ret; struct vattr *after_vap; struct mbuf **mbp; char **bposp; { register struct mbuf *mb = *mbp, *mb2; register char *bpos = *bposp; register u_long *tl; if (before_ret) { nfsm_build(tl, u_long *, NFSX_UNSIGNED); *tl = nfs_false; } else { nfsm_build(tl, u_long *, 7 * NFSX_UNSIGNED); *tl++ = nfs_true; txdr_hyper(&(before_vap->va_size), tl); tl += 2; txdr_nfsv3time(&(before_vap->va_mtime), tl); tl += 2; txdr_nfsv3time(&(before_vap->va_ctime), tl); } *bposp = bpos; *mbp = mb; nfsm_srvpostopattr(nfsd, after_ret, after_vap, mbp, bposp); } void nfsm_srvpostopattr(nfsd, after_ret, after_vap, mbp, bposp) struct nfsrv_descript *nfsd; int after_ret; struct vattr *after_vap; struct mbuf **mbp; char **bposp; { register struct mbuf *mb = *mbp, *mb2; register char *bpos = *bposp; register u_long *tl; register struct nfs_fattr *fp; if (after_ret) { nfsm_build(tl, u_long *, NFSX_UNSIGNED); *tl = nfs_false; } else { nfsm_build(tl, u_long *, NFSX_UNSIGNED + NFSX_V3FATTR); *tl++ = nfs_true; fp = (struct nfs_fattr *)tl; nfsm_srvfattr(nfsd, after_vap, fp); } *mbp = mb; *bposp = bpos; } void nfsm_srvfattr(nfsd, vap, fp) register struct nfsrv_descript *nfsd; register struct vattr *vap; register struct nfs_fattr *fp; { fp->fa_nlink = txdr_unsigned(vap->va_nlink); fp->fa_uid = txdr_unsigned(vap->va_uid); fp->fa_gid = txdr_unsigned(vap->va_gid); if (nfsd->nd_flag & ND_NFSV3) { fp->fa_type = vtonfsv3_type(vap->va_type); fp->fa_mode = vtonfsv3_mode(vap->va_mode); txdr_hyper(&vap->va_size, &fp->fa3_size); txdr_hyper(&vap->va_bytes, &fp->fa3_used); fp->fa3_rdev.specdata1 = txdr_unsigned(major(vap->va_rdev)); fp->fa3_rdev.specdata2 = txdr_unsigned(minor(vap->va_rdev)); fp->fa3_fsid.nfsuquad[0] = 0; fp->fa3_fsid.nfsuquad[1] = txdr_unsigned(vap->va_fsid); fp->fa3_fileid.nfsuquad[0] = 0; fp->fa3_fileid.nfsuquad[1] = txdr_unsigned(vap->va_fileid); txdr_nfsv3time(&vap->va_atime, &fp->fa3_atime); txdr_nfsv3time(&vap->va_mtime, &fp->fa3_mtime); txdr_nfsv3time(&vap->va_ctime, &fp->fa3_ctime); } else { fp->fa_type = vtonfsv2_type(vap->va_type); fp->fa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode); fp->fa2_size = txdr_unsigned(vap->va_size); fp->fa2_blocksize = txdr_unsigned(vap->va_blocksize); if (vap->va_type == VFIFO) fp->fa2_rdev = 0xffffffff; else fp->fa2_rdev = txdr_unsigned(vap->va_rdev); fp->fa2_blocks = txdr_unsigned(vap->va_bytes / NFS_FABLKSIZE); fp->fa2_fsid = txdr_unsigned(vap->va_fsid); fp->fa2_fileid = txdr_unsigned(vap->va_fileid); txdr_nfsv2time(&vap->va_atime, &fp->fa2_atime); txdr_nfsv2time(&vap->va_mtime, &fp->fa2_mtime); txdr_nfsv2time(&vap->va_ctime, &fp->fa2_ctime); } } /* * nfsrv_fhtovp() - convert a fh to a vnode ptr (optionally locked) * - look up fsid in mount list (if not found ret error) * - get vp and export rights by calling VFS_FHTOVP() * - if cred->cr_uid == 0 or MNT_EXPORTANON set it to credanon * - if not lockflag unlock it with VOP_UNLOCK() */ int nfsrv_fhtovp(fhp, lockflag, vpp, cred, slp, nam, rdonlyp, kerbflag) fhandle_t *fhp; int lockflag; struct vnode **vpp; struct ucred *cred; struct nfssvc_sock *slp; struct mbuf *nam; int *rdonlyp; int kerbflag; { register struct mount *mp; register int i; struct ucred *credanon; int error, exflags; *vpp = (struct vnode *)0; mp = getvfs(&fhp->fh_fsid); if (!mp) return (ESTALE); error = VFS_FHTOVP(mp, &fhp->fh_fid, nam, vpp, &exflags, &credanon); if (error) return (error); /* * Check/setup credentials. */ if (exflags & MNT_EXKERB) { if (!kerbflag) { vput(*vpp); return (NFSERR_AUTHERR | AUTH_TOOWEAK); } } else if (kerbflag) { vput(*vpp); return (NFSERR_AUTHERR | AUTH_TOOWEAK); } else if (cred->cr_uid == 0 || (exflags & MNT_EXPORTANON)) { cred->cr_uid = credanon->cr_uid; for (i = 0; i < credanon->cr_ngroups && i < NGROUPS; i++) cred->cr_groups[i] = credanon->cr_groups[i]; cred->cr_ngroups = i; } if (exflags & MNT_EXRDONLY) *rdonlyp = 1; else *rdonlyp = 0; - nfsrv_vmio(*vpp); + nfsrv_object_create(*vpp); if (!lockflag) VOP_UNLOCK(*vpp); return (0); } #endif /* NFS_NOSERVER */ /* * This function compares two net addresses by family and returns TRUE * if they are the same host. * If there is any doubt, return FALSE. * The AF_INET family is handled as a special case so that address mbufs * don't need to be saved to store "struct in_addr", which is only 4 bytes. */ int netaddr_match(family, haddr, nam) int family; union nethostaddr *haddr; struct mbuf *nam; { register struct sockaddr_in *inetaddr; switch (family) { case AF_INET: inetaddr = mtod(nam, struct sockaddr_in *); if (inetaddr->sin_family == AF_INET && inetaddr->sin_addr.s_addr == haddr->had_inetaddr) return (1); break; #ifdef ISO case AF_ISO: { register struct sockaddr_iso *isoaddr1, *isoaddr2; isoaddr1 = mtod(nam, struct sockaddr_iso *); isoaddr2 = mtod(haddr->had_nam, struct sockaddr_iso *); if (isoaddr1->siso_family == AF_ISO && isoaddr1->siso_nlen > 0 && isoaddr1->siso_nlen == isoaddr2->siso_nlen && SAME_ISOADDR(isoaddr1, isoaddr2)) return (1); break; } #endif /* ISO */ default: break; }; return (0); } static nfsuint64 nfs_nullcookie = { 0, 0 }; /* * This function finds the directory cookie that corresponds to the * logical byte offset given. */ nfsuint64 * nfs_getcookie(np, off, add) register struct nfsnode *np; off_t off; int add; { register struct nfsdmap *dp, *dp2; register int pos; pos = off / NFS_DIRBLKSIZ; if (pos == 0) { #ifdef DIAGNOSTIC if (add) panic("nfs getcookie add at 0"); #endif return (&nfs_nullcookie); } pos--; dp = np->n_cookies.lh_first; if (!dp) { if (add) { MALLOC(dp, struct nfsdmap *, sizeof (struct nfsdmap), M_NFSDIROFF, M_WAITOK); dp->ndm_eocookie = 0; LIST_INSERT_HEAD(&np->n_cookies, dp, ndm_list); } else return ((nfsuint64 *)0); } while (pos >= NFSNUMCOOKIES) { pos -= NFSNUMCOOKIES; if (dp->ndm_list.le_next) { if (!add && dp->ndm_eocookie < NFSNUMCOOKIES && pos >= dp->ndm_eocookie) return ((nfsuint64 *)0); dp = dp->ndm_list.le_next; } else if (add) { MALLOC(dp2, struct nfsdmap *, sizeof (struct nfsdmap), M_NFSDIROFF, M_WAITOK); dp2->ndm_eocookie = 0; LIST_INSERT_AFTER(dp, dp2, ndm_list); dp = dp2; } else return ((nfsuint64 *)0); } if (pos >= dp->ndm_eocookie) { if (add) dp->ndm_eocookie = pos + 1; else return ((nfsuint64 *)0); } return (&dp->ndm_cookies[pos]); } /* * Invalidate cached directory information, except for the actual directory * blocks (which are invalidated separately). * Done mainly to avoid the use of stale offset cookies. */ void nfs_invaldir(vp) register struct vnode *vp; { register struct nfsnode *np = VTONFS(vp); #ifdef DIAGNOSTIC if (vp->v_type != VDIR) panic("nfs: invaldir not dir"); #endif np->n_direofoffset = 0; np->n_cookieverf.nfsuquad[0] = 0; np->n_cookieverf.nfsuquad[1] = 0; if (np->n_cookies.lh_first) np->n_cookies.lh_first->ndm_eocookie = 0; } /* * The write verifier has changed (probably due to a server reboot), so all * B_NEEDCOMMIT blocks will have to be written again. Since they are on the * dirty block list as B_DELWRI, all this takes is clearing the B_NEEDCOMMIT * flag. Once done the new write verifier can be set for the mount point. */ void nfs_clearcommit(mp) struct mount *mp; { register struct vnode *vp, *nvp; register struct buf *bp, *nbp; int s; s = splbio(); loop: for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) { if (vp->v_mount != mp) /* Paranoia */ goto loop; nvp = vp->v_mntvnodes.le_next; for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) { nbp = bp->b_vnbufs.le_next; if ((bp->b_flags & (B_BUSY | B_DELWRI | B_NEEDCOMMIT)) == (B_DELWRI | B_NEEDCOMMIT)) bp->b_flags &= ~B_NEEDCOMMIT; } } splx(s); } #ifndef NFS_NOSERVER /* * Map errnos to NFS error numbers. For Version 3 also filter out error * numbers not specified for the associated procedure. */ int nfsrv_errmap(nd, err) struct nfsrv_descript *nd; register int err; { register short *defaulterrp, *errp; if (nd->nd_flag & ND_NFSV3) { if (nd->nd_procnum <= NFSPROC_COMMIT) { errp = defaulterrp = nfsrv_v3errmap[nd->nd_procnum]; while (*++errp) { if (*errp == err) return (err); else if (*errp > err) break; } return ((int)*defaulterrp); } else return (err & 0xffff); } if (err <= ELAST) return ((int)nfsrv_v2errmap[err - 1]); return (NFSERR_IO); } int -nfsrv_vmio(struct vnode *vp) { - vm_object_t object; +nfsrv_object_create(struct vnode *vp) { if ((vp == NULL) || (vp->v_type != VREG)) return 1; - -retry: - if ((vp->v_flag & VVMIO) == 0) { - struct vattr vat; - struct proc *p = curproc; - - if (VOP_GETATTR(vp, &vat, p->p_ucred, p) != 0) - panic("nfsrv_vmio: VOP_GETATTR failed"); - - (void) vnode_pager_alloc(vp, OFF_TO_IDX(round_page(vat.va_size)), 0, 0); - - vp->v_flag |= VVMIO; - } else { - if ((object = vp->v_object) && - (object->flags & OBJ_DEAD)) { - tsleep(object, PVM, "nfdead", 0); - goto retry; - } - if (!object) - panic("nfsrv_vmio: VMIO object missing"); - vm_object_reference(object); - } - return 0; -} -int -nfsrv_vput(struct vnode *vp) { - if ((vp->v_flag & VVMIO) && vp->v_object) { - vput(vp); - vm_object_deallocate(vp->v_object); - } else { - vput(vp); - } - return 0; -} -int -nfsrv_vrele(struct vnode *vp) { - if ((vp->v_flag & VVMIO) && vp->v_object) { - vrele(vp); - vm_object_deallocate(vp->v_object); - } else { - vrele(vp); - } - return 0; + return vfs_object_create(vp, curproc, curproc?curproc->p_ucred:NULL, 1); } #endif /* NFS_NOSERVER */ Index: head/sys/nfsclient/nfs.h =================================================================== --- head/sys/nfsclient/nfs.h (revision 17760) +++ head/sys/nfsclient/nfs.h (revision 17761) @@ -1,603 +1,601 @@ /* * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)nfs.h 8.1 (Berkeley) 6/10/93 - * $Id: nfs.h,v 1.16 1995/12/17 21:12:05 phk Exp $ + * $Id: nfs.h,v 1.17 1996/01/30 22:59:39 mpp Exp $ */ #ifndef _NFS_NFS_H_ #define _NFS_NFS_H_ #include /* * Tunable constants for nfs */ #define NFS_MAXIOVEC 34 #define NFS_TICKINTVL 5 /* Desired time for a tick (msec) */ #define NFS_HZ (hz / nfs_ticks) /* Ticks/sec */ #define NFS_TIMEO (1 * NFS_HZ) /* Default timeout = 1 second */ #define NFS_MINTIMEO (1 * NFS_HZ) /* Min timeout to use */ #define NFS_MAXTIMEO (60 * NFS_HZ) /* Max timeout to backoff to */ #define NFS_MINIDEMTIMEO (5 * NFS_HZ) /* Min timeout for non-idempotent ops*/ #define NFS_MAXREXMIT 100 /* Stop counting after this many */ #define NFS_MAXWINDOW 1024 /* Max number of outstanding requests */ #define NFS_RETRANS 10 /* Num of retrans for soft mounts */ #define NFS_MAXGRPS 16 /* Max. size of groups list */ #ifndef NFS_MINATTRTIMO #define NFS_MINATTRTIMO 5 /* Attribute cache timeout in sec */ #endif #ifndef NFS_MAXATTRTIMO #define NFS_MAXATTRTIMO 60 #endif #define NFS_WSIZE 8192 /* Def. write data size <= 8192 */ #define NFS_RSIZE 8192 /* Def. read data size <= 8192 */ #define NFS_READDIRSIZE 8192 /* Def. readdir size */ #define NFS_DEFRAHEAD 1 /* Def. read ahead # blocks */ #define NFS_MAXRAHEAD 4 /* Max. read ahead # blocks */ #define NFS_MAXUIDHASH 64 /* Max. # of hashed uid entries/mp */ #define NFS_MAXASYNCDAEMON 20 /* Max. number async_daemons runnable */ #define NFS_MAXGATHERDELAY 100 /* Max. write gather delay (msec) */ #ifndef NFS_GATHERDELAY #define NFS_GATHERDELAY 10 /* Default write gather delay (msec) */ #endif #define NFS_DIRBLKSIZ 4096 /* Must be a multiple of DIRBLKSIZ */ /* * Oddballs */ #define NMOD(a) ((a) % nfs_asyncdaemons) #define NFS_CMPFH(n, f, s) \ ((n)->n_fhsize == (s) && !bcmp((caddr_t)(n)->n_fhp, (caddr_t)(f), (s))) #define NFS_ISV3(v) (VFSTONFS((v)->v_mount)->nm_flag & NFSMNT_NFSV3) #define NFS_SRVMAXDATA(n) \ (((n)->nd_flag & ND_NFSV3) ? (((n)->nd_nam2) ? \ NFS_MAXDGRAMDATA : NFS_MAXDATA) : NFS_V2MAXDATA) /* * XXX * sys/buf.h should be edited to change B_APPENDWRITE --> B_NEEDCOMMIT, but * until then... * Same goes for sys/malloc.h, which needs M_NFSDIROFF, * M_NFSRVDESC and M_NFSBIGFH added. * The VA_EXCLUSIVE flag should be added for va_vaflags and set for an * exclusive create. * The B_INVAFTERWRITE flag should be set to whatever is required by the * buffer cache code to say "Invalidate the block after it is written back". */ #ifndef B_NEEDCOMMIT #define B_NEEDCOMMIT B_APPENDWRITE #endif #ifndef M_NFSRVDESC #define M_NFSRVDESC M_TEMP #endif #ifndef M_NFSDIROFF #define M_NFSDIROFF M_TEMP #endif #ifndef M_NFSBIGFH #define M_NFSBIGFH M_TEMP #endif #ifndef VA_EXCLUSIVE #define VA_EXCLUSIVE 0 #endif #ifdef __FreeBSD__ #define B_INVAFTERWRITE B_NOCACHE #else #define B_INVAFTERWRITE B_INVAL #endif /* * These ifdefs try to handle the differences between the various 4.4BSD-Lite * based vfs interfaces. * btw: NetBSD-current does have a VOP_LEASDE(), but I don't know how to * differentiate between NetBSD-1.0 and NetBSD-current, so.. * I also don't know about BSDi's 2.0 release. */ #if !defined(HAS_VOPLEASE) && !defined(__FreeBSD__) && !defined(__NetBSD__) #define HAS_VOPLEASE 1 #endif #if !defined(HAS_VOPREVOKE) && !defined(__FreeBSD__) && !defined(__NetBSD__) #define HAS_VOPREVOKE 1 #endif /* * The IO_METASYNC flag should be implemented for local file systems. * (Until then, it is nothin at all.) */ #ifndef IO_METASYNC #define IO_METASYNC 0 #endif /* * Set the attribute timeout based on how recently the file has been modified. */ #define NFS_ATTRTIMEO(np) \ ((((np)->n_flag & NMODIFIED) || \ (time.tv_sec - (np)->n_mtime) / 10 < NFS_MINATTRTIMO) ? NFS_MINATTRTIMO : \ ((time.tv_sec - (np)->n_mtime) / 10 > NFS_MAXATTRTIMO ? NFS_MAXATTRTIMO : \ (time.tv_sec - (np)->n_mtime) / 10)) /* * Expected allocation sizes for major data structures. If the actual size * of the structure exceeds these sizes, then malloc() will be allocating * almost twice the memory required. This is used in nfs_init() to warn * the sysadmin that the size of a structure should be reduced. * (These sizes are always a power of 2. If the kernel malloc() changes * to one that does not allocate space in powers of 2 size, then this all * becomes bunk!) */ #define NFS_NODEALLOC 256 #define NFS_MNTALLOC 512 #define NFS_SVCALLOC 256 #define NFS_UIDALLOC 128 /* * Structures for the nfssvc(2) syscall. Not that anyone but nfsd and mount_nfs * should ever try and use it. */ struct nfsd_args { int sock; /* Socket to serve */ caddr_t name; /* Client address for connection based sockets */ int namelen; /* Length of name */ }; struct nfsd_srvargs { struct nfsd *nsd_nfsd; /* Pointer to in kernel nfsd struct */ uid_t nsd_uid; /* Effective uid mapped to cred */ u_long nsd_haddr; /* Ip address of client */ struct ucred nsd_cr; /* Cred. uid maps to */ int nsd_authlen; /* Length of auth string (ret) */ u_char *nsd_authstr; /* Auth string (ret) */ int nsd_verflen; /* and the verfier */ u_char *nsd_verfstr; struct timeval nsd_timestamp; /* timestamp from verifier */ u_long nsd_ttl; /* credential ttl (sec) */ NFSKERBKEY_T nsd_key; /* Session key */ }; struct nfsd_cargs { char *ncd_dirp; /* Mount dir path */ uid_t ncd_authuid; /* Effective uid */ int ncd_authtype; /* Type of authenticator */ int ncd_authlen; /* Length of authenticator string */ u_char *ncd_authstr; /* Authenticator string */ int ncd_verflen; /* and the verifier */ u_char *ncd_verfstr; NFSKERBKEY_T ncd_key; /* Session key */ }; /* * Stats structure */ struct nfsstats { int attrcache_hits; int attrcache_misses; int lookupcache_hits; int lookupcache_misses; int direofcache_hits; int direofcache_misses; int biocache_reads; int read_bios; int read_physios; int biocache_writes; int write_bios; int write_physios; int biocache_readlinks; int readlink_bios; int biocache_readdirs; int readdir_bios; int rpccnt[NFS_NPROCS]; int rpcretries; int srvrpccnt[NFS_NPROCS]; int srvrpc_errs; int srv_errs; int rpcrequests; int rpctimeouts; int rpcunexpected; int rpcinvalid; int srvcache_inproghits; int srvcache_idemdonehits; int srvcache_nonidemdonehits; int srvcache_misses; int srvnqnfs_leases; int srvnqnfs_maxleases; int srvnqnfs_getleases; int srvvop_writes; }; /* * Flags for nfssvc() system call. */ #define NFSSVC_BIOD 0x002 #define NFSSVC_NFSD 0x004 #define NFSSVC_ADDSOCK 0x008 #define NFSSVC_AUTHIN 0x010 #define NFSSVC_GOTAUTH 0x040 #define NFSSVC_AUTHINFAIL 0x080 #define NFSSVC_MNTD 0x100 /* * fs.nfs sysctl(3) identifiers */ #define NFS_NFSSTATS 1 /* struct: struct nfsstats */ #define FS_NFS_NAMES { \ { 0, 0 }, \ { "nfsstats", CTLTYPE_STRUCT }, \ } /* * The set of signals the interrupt an I/O in progress for NFSMNT_INT mounts. * What should be in this set is open to debate, but I believe that since * I/O system calls on ufs are never interrupted by signals the set should * be minimal. My reasoning is that many current programs that use signals * such as SIGALRM will not expect file I/O system calls to be interrupted * by them and break. */ #if defined(KERNEL) || defined(_KERNEL) struct uio; struct buf; struct vattr; struct nameidata; /* XXX */ #define NFSINT_SIGMASK (sigmask(SIGINT)|sigmask(SIGTERM)|sigmask(SIGKILL)| \ sigmask(SIGHUP)|sigmask(SIGQUIT)) /* * Socket errors ignored for connectionless sockets?? * For now, ignore them all */ #define NFSIGNORE_SOERROR(s, e) \ ((e) != EINTR && (e) != ERESTART && (e) != EWOULDBLOCK && \ ((s) & PR_CONNREQUIRED) == 0) /* * Nfs outstanding request list element */ struct nfsreq { TAILQ_ENTRY(nfsreq) r_chain; struct mbuf *r_mreq; struct mbuf *r_mrep; struct mbuf *r_md; caddr_t r_dpos; struct nfsmount *r_nmp; struct vnode *r_vp; u_long r_xid; int r_flags; /* flags on request, see below */ int r_retry; /* max retransmission count */ int r_rexmit; /* current retrans count */ int r_timer; /* tick counter on reply */ int r_procnum; /* NFS procedure number */ int r_rtt; /* RTT for rpc */ struct proc *r_procp; /* Proc that did I/O system call */ }; /* * Queue head for nfsreq's */ extern TAILQ_HEAD(nfs_reqq, nfsreq) nfs_reqq; /* Flag values for r_flags */ #define R_TIMING 0x01 /* timing request (in mntp) */ #define R_SENT 0x02 /* request has been sent */ #define R_SOFTTERM 0x04 /* soft mnt, too many retries */ #define R_INTR 0x08 /* intr mnt, signal pending */ #define R_SOCKERR 0x10 /* Fatal error on socket */ #define R_TPRINTFMSG 0x20 /* Did a tprintf msg. */ #define R_MUSTRESEND 0x40 /* Must resend request */ #define R_GETONEREP 0x80 /* Probe for one reply only */ /* * A list of nfssvc_sock structures is maintained with all the sockets * that require service by the nfsd. * The nfsuid structs hang off of the nfssvc_sock structs in both lru * and uid hash lists. */ #ifndef NFS_UIDHASHSIZ #define NFS_UIDHASHSIZ 29 /* Tune the size of nfssvc_sock with this */ #endif #define NUIDHASH(sock, uid) \ (&(sock)->ns_uidhashtbl[(uid) % NFS_UIDHASHSIZ]) #ifndef NFS_WDELAYHASHSIZ #define NFS_WDELAYHASHSIZ 16 /* and with this */ #endif #define NWDELAYHASH(sock, f) \ (&(sock)->ns_wdelayhashtbl[(*((u_long *)(f))) % NFS_WDELAYHASHSIZ]) #ifndef NFS_MUIDHASHSIZ #define NFS_MUIDHASHSIZ 67 /* Tune the size of nfsmount with this */ #endif #define NMUIDHASH(nmp, uid) \ (&(nmp)->nm_uidhashtbl[(uid) % NFS_MUIDHASHSIZ]) #define NFSNOHASH(fhsum) \ (&nfsnodehashtbl[(fhsum) & nfsnodehash]) /* * Network address hash list element */ union nethostaddr { u_long had_inetaddr; struct mbuf *had_nam; }; struct nfsuid { TAILQ_ENTRY(nfsuid) nu_lru; /* LRU chain */ LIST_ENTRY(nfsuid) nu_hash; /* Hash list */ int nu_flag; /* Flags */ union nethostaddr nu_haddr; /* Host addr. for dgram sockets */ struct ucred nu_cr; /* Cred uid mapped to */ int nu_expire; /* Expiry time (sec) */ struct timeval nu_timestamp; /* Kerb. timestamp */ u_long nu_nickname; /* Nickname on server */ NFSKERBKEY_T nu_key; /* and session key */ }; #define nu_inetaddr nu_haddr.had_inetaddr #define nu_nam nu_haddr.had_nam /* Bits for nu_flag */ #define NU_INETADDR 0x1 #define NU_NAM 0x2 #define NU_NETFAM(u) (((u)->nu_flag & NU_INETADDR) ? AF_INET : AF_ISO) struct nfssvc_sock { TAILQ_ENTRY(nfssvc_sock) ns_chain; /* List of all nfssvc_sock's */ TAILQ_HEAD(, nfsuid) ns_uidlruhead; struct file *ns_fp; struct socket *ns_so; struct mbuf *ns_nam; struct mbuf *ns_raw; struct mbuf *ns_rawend; struct mbuf *ns_rec; struct mbuf *ns_recend; struct mbuf *ns_frag; int ns_flag; int ns_solock; int ns_cc; int ns_reclen; int ns_numuids; u_long ns_sref; LIST_HEAD(, nfsrv_descript) ns_tq; /* Write gather lists */ LIST_HEAD(, nfsuid) ns_uidhashtbl[NFS_UIDHASHSIZ]; LIST_HEAD(nfsrvw_delayhash, nfsrv_descript) ns_wdelayhashtbl[NFS_WDELAYHASHSIZ]; }; /* Bits for "ns_flag" */ #define SLP_VALID 0x01 #define SLP_DOREC 0x02 #define SLP_NEEDQ 0x04 #define SLP_DISCONN 0x08 #define SLP_GETSTREAM 0x10 #define SLP_LASTFRAG 0x20 #define SLP_ALLFLAGS 0xff extern TAILQ_HEAD(nfssvc_sockhead, nfssvc_sock) nfssvc_sockhead; extern int nfssvc_sockhead_flag; #define SLP_INIT 0x01 #define SLP_WANTINIT 0x02 /* * One of these structures is allocated for each nfsd. */ struct nfsd { TAILQ_ENTRY(nfsd) nfsd_chain; /* List of all nfsd's */ int nfsd_flag; /* NFSD_ flags */ struct nfssvc_sock *nfsd_slp; /* Current socket */ int nfsd_authlen; /* Authenticator len */ u_char nfsd_authstr[RPCAUTH_MAXSIZ]; /* Authenticator data */ int nfsd_verflen; /* and the Verifier */ u_char nfsd_verfstr[RPCVERF_MAXSIZ]; struct proc *nfsd_procp; /* Proc ptr */ struct nfsrv_descript *nfsd_nd; /* Associated nfsrv_descript */ }; /* Bits for "nfsd_flag" */ #define NFSD_WAITING 0x01 #define NFSD_REQINPROG 0x02 #define NFSD_NEEDAUTH 0x04 #define NFSD_AUTHFAIL 0x08 /* * This structure is used by the server for describing each request. * Some fields are used only when write request gathering is performed. */ struct nfsrv_descript { u_quad_t nd_time; /* Write deadline (usec) */ off_t nd_off; /* Start byte offset */ off_t nd_eoff; /* and end byte offset */ LIST_ENTRY(nfsrv_descript) nd_hash; /* Hash list */ LIST_ENTRY(nfsrv_descript) nd_tq; /* and timer list */ LIST_HEAD(,nfsrv_descript) nd_coalesce; /* coalesced writes */ struct mbuf *nd_mrep; /* Request mbuf list */ struct mbuf *nd_md; /* Current dissect mbuf */ struct mbuf *nd_mreq; /* Reply mbuf list */ struct mbuf *nd_nam; /* and socket addr */ struct mbuf *nd_nam2; /* return socket addr */ caddr_t nd_dpos; /* Current dissect pos */ int nd_procnum; /* RPC # */ int nd_stable; /* storage type */ int nd_flag; /* nd_flag */ int nd_len; /* Length of this write */ int nd_repstat; /* Reply status */ u_long nd_retxid; /* Reply xid */ u_long nd_duration; /* Lease duration */ struct timeval nd_starttime; /* Time RPC initiated */ fhandle_t nd_fh; /* File handle */ struct ucred nd_cr; /* Credentials */ }; /* Bits for "nd_flag" */ #define ND_READ LEASE_READ #define ND_WRITE LEASE_WRITE #define ND_CHECK 0x04 #define ND_LEASE (ND_READ | ND_WRITE | ND_CHECK) #define ND_NFSV3 0x08 #define ND_NQNFS 0x10 #define ND_KERBNICK 0x20 #define ND_KERBFULL 0x40 #define ND_KERBAUTH (ND_KERBNICK | ND_KERBFULL) extern TAILQ_HEAD(nfsd_head, nfsd) nfsd_head; extern int nfsd_head_flag; #define NFSD_CHECKSLP 0x01 /* * These macros compare nfsrv_descript structures. */ #define NFSW_CONTIG(o, n) \ ((o)->nd_eoff >= (n)->nd_off && \ !bcmp((caddr_t)&(o)->nd_fh, (caddr_t)&(n)->nd_fh, NFSX_V3FH)) #define NFSW_SAMECRED(o, n) \ (((o)->nd_flag & ND_KERBAUTH) == ((n)->nd_flag & ND_KERBAUTH) && \ !bcmp((caddr_t)&(o)->nd_cr, (caddr_t)&(n)->nd_cr, \ sizeof (struct ucred))) int nfs_reply __P((struct nfsreq *)); int nfs_getreq __P((struct nfsrv_descript *,struct nfsd *,int)); int nfs_send __P((struct socket *,struct mbuf *,struct mbuf *,struct nfsreq *)); int nfs_rephead __P((int,struct nfsrv_descript *,struct nfssvc_sock *,int,int,u_quad_t *,struct mbuf **,struct mbuf **,caddr_t *)); int nfs_sndlock __P((int *,struct nfsreq *)); void nfs_sndunlock __P((int *flagp)); int nfs_disct __P((struct mbuf **,caddr_t *,int,int,caddr_t *)); int nfs_vinvalbuf __P((struct vnode *,int,struct ucred *,struct proc *,int)); int nfs_readrpc __P((struct vnode *,struct uio *,struct ucred *)); int nfs_writerpc __P((struct vnode *,struct uio *,struct ucred *,int *,int *)); int nfs_readdirrpc __P((register struct vnode *,struct uio *,struct ucred *)); int nfs_asyncio __P((struct buf *,struct ucred *)); int nfs_doio __P((struct buf *,struct ucred *,struct proc *)); int nfs_readlinkrpc __P((struct vnode *,struct uio *,struct ucred *)); int nfs_sigintr __P((struct nfsmount *,struct nfsreq *r,struct proc *)); int nfs_readdirplusrpc __P((struct vnode *,register struct uio *,struct ucred *)); int nfsm_disct __P((struct mbuf **,caddr_t *,int,int,caddr_t *)); void nfsm_srvfattr __P((struct nfsrv_descript *,struct vattr *,struct nfs_fattr *)); void nfsm_srvwcc __P((struct nfsrv_descript *,int,struct vattr *,int,struct vattr *,struct mbuf **,char **)); void nfsm_srvpostopattr __P((struct nfsrv_descript *,int,struct vattr *,struct mbuf **,char **)); int netaddr_match __P((int,union nethostaddr *,struct mbuf *)); int nfs_request __P((struct vnode *,struct mbuf *,int,struct proc *,struct ucred *,struct mbuf **,struct mbuf **,caddr_t *)); int nfs_loadattrcache __P((struct vnode **,struct mbuf **,caddr_t *,struct vattr *)); int nfs_namei __P((struct nameidata *,fhandle_t *,int,struct nfssvc_sock *,struct mbuf *,struct mbuf **,caddr_t *,struct vnode **,struct proc *,int)); void nfsm_adj __P((struct mbuf *,int,int)); int nfsm_mbuftouio __P((struct mbuf **,struct uio *,int,caddr_t *)); void nfsrv_initcache __P((void)); int nfs_getauth __P((struct nfsmount *,struct nfsreq *,struct ucred *,char **,int *,char *,int *,NFSKERBKEY_T)); int nfs_getnickauth __P((struct nfsmount *,struct ucred *,char **,int *,char *,int)); int nfs_savenickauth __P((struct nfsmount *,struct ucred *,int,NFSKERBKEY_T,struct mbuf **,char **,struct mbuf *)); int nfs_adv __P((struct mbuf **,caddr_t *,int,int)); void nfs_nhinit __P((void)); void nfs_timer __P((void*)); u_long nfs_hash __P((nfsfh_t *,int)); void nfsrv_slpderef __P((struct nfssvc_sock *slp)); int nfsrv_dorec __P((struct nfssvc_sock *,struct nfsd *,struct nfsrv_descript **)); void nfsrv_cleancache __P((void)); int nfsrv_getcache __P((struct nfsrv_descript *,struct nfssvc_sock *,struct mbuf **)); int nfs_init __P((void)); void nfsrv_updatecache __P((struct nfsrv_descript *,int,struct mbuf *)); int nfs_connect __P((struct nfsmount *,struct nfsreq *)); void nfs_disconnect __P((struct nfsmount *nmp)); int nfs_getattrcache __P((struct vnode *,struct vattr *)); int nfsm_strtmbuf __P((struct mbuf **,char **,char *,long)); int nfs_bioread __P((struct vnode *,struct uio *,int,struct ucred *)); int nfsm_uiotombuf __P((struct uio *,struct mbuf **,int,caddr_t *)); void nfsrv_init __P((int)); void nfs_clearcommit __P((struct mount *)); int nfsrv_errmap __P((struct nfsrv_descript *, int)); void nfsrv_rcv __P((struct socket *so, caddr_t arg, int waitflag)); void nfsrvw_sort __P((gid_t [],int)); void nfsrv_setcred __P((struct ucred *,struct ucred *)); int nfs_writebp __P((struct buf *,int)); -int nfsrv_vput __P(( struct vnode * )); -int nfsrv_vrele __P(( struct vnode * )); -int nfsrv_vmio __P(( struct vnode * )); +int nfsrv_object_create __P(( struct vnode * )); void nfsrv_wakenfsd __P((struct nfssvc_sock *slp)); int nfsrv_writegather __P((struct nfsrv_descript **, struct nfssvc_sock *, struct proc *, struct mbuf **)); int nfs_fsinfo __P((struct nfsmount *, struct vnode *, struct ucred *, struct proc *p)); int nfsrv3_access __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_commit __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_create __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_fhtovp __P((fhandle_t *,int,struct vnode **, struct ucred *,struct nfssvc_sock *,struct mbuf *, int *,int)); int nfsrv_fsinfo __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_getattr __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_link __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_lookup __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_mkdir __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_mknod __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_noop __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_null __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_pathconf __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_read __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_readdir __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_readdirplus __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_readlink __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_remove __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_rename __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_rmdir __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_setattr __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_statfs __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_symlink __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_write __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); #endif /* KERNEL */ #endif Index: head/sys/nfsclient/nfs_subs.c =================================================================== --- head/sys/nfsclient/nfs_subs.c (revision 17760) +++ head/sys/nfsclient/nfs_subs.c (revision 17761) @@ -1,1992 +1,1949 @@ /* * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)nfs_subs.c 8.3 (Berkeley) 1/4/94 - * $Id: nfs_subs.c,v 1.30 1996/06/23 17:19:25 bde Exp $ + * $Id: nfs_subs.c,v 1.31 1996/07/16 10:19:44 dfr Exp $ */ /* * These functions support the macros and help fiddle mbuf chains for * the nfs op functions. They do things like create the rpc header and * copy data between mbuf chains and uio lists. */ #include #include #include #include #include #include #include #include #include #include #include #ifdef VFS_LKM #include #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef ISO #include #endif /* * Data items converted to xdr at startup, since they are constant * This is kinda hokey, but may save a little time doing byte swaps */ u_long nfs_xdrneg1; u_long rpc_call, rpc_vers, rpc_reply, rpc_msgdenied, rpc_autherr, rpc_mismatch, rpc_auth_unix, rpc_msgaccepted, rpc_auth_kerb; u_long nfs_prog, nqnfs_prog, nfs_true, nfs_false; /* And other global data */ static u_long nfs_xid = 0; static enum vtype nv2tov_type[8]= { VNON, VREG, VDIR, VBLK, VCHR, VLNK, VNON, VNON }; enum vtype nv3tov_type[8]= { VNON, VREG, VDIR, VBLK, VCHR, VLNK, VSOCK, VFIFO }; int nfs_ticks; struct nfs_reqq nfs_reqq; struct nfssvc_sockhead nfssvc_sockhead; int nfssvc_sockhead_flag; struct nfsd_head nfsd_head; int nfsd_head_flag; struct nfs_bufq nfs_bufq; struct nqtimerhead nqtimerhead; struct nqfhhashhead *nqfhhashtbl; u_long nqfhhash; #ifndef NFS_NOSERVER /* * Mapping of old NFS Version 2 RPC numbers to generic numbers. */ int nfsv3_procid[NFS_NPROCS] = { NFSPROC_NULL, NFSPROC_GETATTR, NFSPROC_SETATTR, NFSPROC_NOOP, NFSPROC_LOOKUP, NFSPROC_READLINK, NFSPROC_READ, NFSPROC_NOOP, NFSPROC_WRITE, NFSPROC_CREATE, NFSPROC_REMOVE, NFSPROC_RENAME, NFSPROC_LINK, NFSPROC_SYMLINK, NFSPROC_MKDIR, NFSPROC_RMDIR, NFSPROC_READDIR, NFSPROC_FSSTAT, NFSPROC_NOOP, NFSPROC_NOOP, NFSPROC_NOOP, NFSPROC_NOOP, NFSPROC_NOOP, NFSPROC_NOOP, NFSPROC_NOOP, NFSPROC_NOOP }; #endif /* NFS_NOSERVER */ /* * and the reverse mapping from generic to Version 2 procedure numbers */ int nfsv2_procid[NFS_NPROCS] = { NFSV2PROC_NULL, NFSV2PROC_GETATTR, NFSV2PROC_SETATTR, NFSV2PROC_LOOKUP, NFSV2PROC_NOOP, NFSV2PROC_READLINK, NFSV2PROC_READ, NFSV2PROC_WRITE, NFSV2PROC_CREATE, NFSV2PROC_MKDIR, NFSV2PROC_SYMLINK, NFSV2PROC_CREATE, NFSV2PROC_REMOVE, NFSV2PROC_RMDIR, NFSV2PROC_RENAME, NFSV2PROC_LINK, NFSV2PROC_READDIR, NFSV2PROC_NOOP, NFSV2PROC_STATFS, NFSV2PROC_NOOP, NFSV2PROC_NOOP, NFSV2PROC_NOOP, NFSV2PROC_NOOP, NFSV2PROC_NOOP, NFSV2PROC_NOOP, NFSV2PROC_NOOP, }; #ifndef NFS_NOSERVER /* * Maps errno values to nfs error numbers. * Use NFSERR_IO as the catch all for ones not specifically defined in * RFC 1094. */ static u_char nfsrv_v2errmap[ELAST] = { NFSERR_PERM, NFSERR_NOENT, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_NXIO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_ACCES, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_EXIST, NFSERR_IO, NFSERR_NODEV, NFSERR_NOTDIR, NFSERR_ISDIR, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_FBIG, NFSERR_NOSPC, NFSERR_IO, NFSERR_ROFS, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_NAMETOL, NFSERR_IO, NFSERR_IO, NFSERR_NOTEMPTY, NFSERR_IO, NFSERR_IO, NFSERR_DQUOT, NFSERR_STALE, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, }; /* * Maps errno values to nfs error numbers. * Although it is not obvious whether or not NFS clients really care if * a returned error value is in the specified list for the procedure, the * safest thing to do is filter them appropriately. For Version 2, the * X/Open XNFS document is the only specification that defines error values * for each RPC (The RFC simply lists all possible error values for all RPCs), * so I have decided to not do this for Version 2. * The first entry is the default error return and the rest are the valid * errors for that RPC in increasing numeric order. */ static short nfsv3err_null[] = { 0, 0, }; static short nfsv3err_getattr[] = { NFSERR_IO, NFSERR_IO, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_SERVERFAULT, 0, }; static short nfsv3err_setattr[] = { NFSERR_IO, NFSERR_PERM, NFSERR_IO, NFSERR_ACCES, NFSERR_INVAL, NFSERR_NOSPC, NFSERR_ROFS, NFSERR_DQUOT, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_NOT_SYNC, NFSERR_SERVERFAULT, 0, }; static short nfsv3err_lookup[] = { NFSERR_IO, NFSERR_NOENT, NFSERR_IO, NFSERR_ACCES, NFSERR_NOTDIR, NFSERR_NAMETOL, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_SERVERFAULT, 0, }; static short nfsv3err_access[] = { NFSERR_IO, NFSERR_IO, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_SERVERFAULT, 0, }; static short nfsv3err_readlink[] = { NFSERR_IO, NFSERR_IO, NFSERR_ACCES, NFSERR_INVAL, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_NOTSUPP, NFSERR_SERVERFAULT, 0, }; static short nfsv3err_read[] = { NFSERR_IO, NFSERR_IO, NFSERR_NXIO, NFSERR_ACCES, NFSERR_INVAL, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_SERVERFAULT, 0, }; static short nfsv3err_write[] = { NFSERR_IO, NFSERR_IO, NFSERR_ACCES, NFSERR_INVAL, NFSERR_FBIG, NFSERR_NOSPC, NFSERR_ROFS, NFSERR_DQUOT, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_SERVERFAULT, 0, }; static short nfsv3err_create[] = { NFSERR_IO, NFSERR_IO, NFSERR_ACCES, NFSERR_EXIST, NFSERR_NOTDIR, NFSERR_NOSPC, NFSERR_ROFS, NFSERR_NAMETOL, NFSERR_DQUOT, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_NOTSUPP, NFSERR_SERVERFAULT, 0, }; static short nfsv3err_mkdir[] = { NFSERR_IO, NFSERR_IO, NFSERR_ACCES, NFSERR_EXIST, NFSERR_NOTDIR, NFSERR_NOSPC, NFSERR_ROFS, NFSERR_NAMETOL, NFSERR_DQUOT, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_NOTSUPP, NFSERR_SERVERFAULT, 0, }; static short nfsv3err_symlink[] = { NFSERR_IO, NFSERR_IO, NFSERR_ACCES, NFSERR_EXIST, NFSERR_NOTDIR, NFSERR_NOSPC, NFSERR_ROFS, NFSERR_NAMETOL, NFSERR_DQUOT, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_NOTSUPP, NFSERR_SERVERFAULT, 0, }; static short nfsv3err_mknod[] = { NFSERR_IO, NFSERR_IO, NFSERR_ACCES, NFSERR_EXIST, NFSERR_NOTDIR, NFSERR_NOSPC, NFSERR_ROFS, NFSERR_NAMETOL, NFSERR_DQUOT, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_NOTSUPP, NFSERR_SERVERFAULT, NFSERR_BADTYPE, 0, }; static short nfsv3err_remove[] = { NFSERR_IO, NFSERR_NOENT, NFSERR_IO, NFSERR_ACCES, NFSERR_NOTDIR, NFSERR_ROFS, NFSERR_NAMETOL, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_SERVERFAULT, 0, }; static short nfsv3err_rmdir[] = { NFSERR_IO, NFSERR_NOENT, NFSERR_IO, NFSERR_ACCES, NFSERR_EXIST, NFSERR_NOTDIR, NFSERR_INVAL, NFSERR_ROFS, NFSERR_NAMETOL, NFSERR_NOTEMPTY, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_NOTSUPP, NFSERR_SERVERFAULT, 0, }; static short nfsv3err_rename[] = { NFSERR_IO, NFSERR_NOENT, NFSERR_IO, NFSERR_ACCES, NFSERR_EXIST, NFSERR_XDEV, NFSERR_NOTDIR, NFSERR_ISDIR, NFSERR_INVAL, NFSERR_NOSPC, NFSERR_ROFS, NFSERR_MLINK, NFSERR_NAMETOL, NFSERR_NOTEMPTY, NFSERR_DQUOT, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_NOTSUPP, NFSERR_SERVERFAULT, 0, }; static short nfsv3err_link[] = { NFSERR_IO, NFSERR_IO, NFSERR_ACCES, NFSERR_EXIST, NFSERR_XDEV, NFSERR_NOTDIR, NFSERR_INVAL, NFSERR_NOSPC, NFSERR_ROFS, NFSERR_MLINK, NFSERR_NAMETOL, NFSERR_DQUOT, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_NOTSUPP, NFSERR_SERVERFAULT, 0, }; static short nfsv3err_readdir[] = { NFSERR_IO, NFSERR_IO, NFSERR_ACCES, NFSERR_NOTDIR, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_BAD_COOKIE, NFSERR_TOOSMALL, NFSERR_SERVERFAULT, 0, }; static short nfsv3err_readdirplus[] = { NFSERR_IO, NFSERR_IO, NFSERR_ACCES, NFSERR_NOTDIR, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_BAD_COOKIE, NFSERR_NOTSUPP, NFSERR_TOOSMALL, NFSERR_SERVERFAULT, 0, }; static short nfsv3err_fsstat[] = { NFSERR_IO, NFSERR_IO, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_SERVERFAULT, 0, }; static short nfsv3err_fsinfo[] = { NFSERR_STALE, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_SERVERFAULT, 0, }; static short nfsv3err_pathconf[] = { NFSERR_STALE, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_SERVERFAULT, 0, }; static short nfsv3err_commit[] = { NFSERR_IO, NFSERR_IO, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_SERVERFAULT, 0, }; static short *nfsrv_v3errmap[] = { nfsv3err_null, nfsv3err_getattr, nfsv3err_setattr, nfsv3err_lookup, nfsv3err_access, nfsv3err_readlink, nfsv3err_read, nfsv3err_write, nfsv3err_create, nfsv3err_mkdir, nfsv3err_symlink, nfsv3err_mknod, nfsv3err_remove, nfsv3err_rmdir, nfsv3err_rename, nfsv3err_link, nfsv3err_readdir, nfsv3err_readdirplus, nfsv3err_fsstat, nfsv3err_fsinfo, nfsv3err_pathconf, nfsv3err_commit, }; #endif /* NFS_NOSERVER */ extern struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON]; extern struct nfsrtt nfsrtt; extern time_t nqnfsstarttime; extern int nqsrv_clockskew; extern int nqsrv_writeslack; extern int nqsrv_maxlease; extern struct nfsstats nfsstats; extern int nqnfs_piggy[NFS_NPROCS]; extern nfstype nfsv2_type[9]; extern nfstype nfsv3_type[9]; extern struct nfsnodehashhead *nfsnodehashtbl; extern u_long nfsnodehash; #ifdef VFS_LKM struct getfh_args; extern int getfh(struct proc *, struct getfh_args *, int *); struct nfssvc_args; extern int nfssvc(struct proc *, struct nfssvc_args *, int *); #endif LIST_HEAD(nfsnodehashhead, nfsnode); /* * Create the header for an rpc request packet * The hsiz is the size of the rest of the nfs request header. * (just used to decide if a cluster is a good idea) */ struct mbuf * nfsm_reqh(vp, procid, hsiz, bposp) struct vnode *vp; u_long procid; int hsiz; caddr_t *bposp; { register struct mbuf *mb; register u_long *tl; register caddr_t bpos; struct mbuf *mb2; struct nfsmount *nmp; int nqflag; MGET(mb, M_WAIT, MT_DATA); if (hsiz >= MINCLSIZE) MCLGET(mb, M_WAIT); mb->m_len = 0; bpos = mtod(mb, caddr_t); /* * For NQNFS, add lease request. */ if (vp) { nmp = VFSTONFS(vp->v_mount); if (nmp->nm_flag & NFSMNT_NQNFS) { nqflag = NQNFS_NEEDLEASE(vp, procid); if (nqflag) { nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED); *tl++ = txdr_unsigned(nqflag); *tl = txdr_unsigned(nmp->nm_leaseterm); } else { nfsm_build(tl, u_long *, NFSX_UNSIGNED); *tl = 0; } } } /* Finally, return values */ *bposp = bpos; return (mb); } /* * Build the RPC header and fill in the authorization info. * The authorization string argument is only used when the credentials * come from outside of the kernel. * Returns the head of the mbuf list. */ struct mbuf * nfsm_rpchead(cr, nmflag, procid, auth_type, auth_len, auth_str, verf_len, verf_str, mrest, mrest_len, mbp, xidp) register struct ucred *cr; int nmflag; int procid; int auth_type; int auth_len; char *auth_str; int verf_len; char *verf_str; struct mbuf *mrest; int mrest_len; struct mbuf **mbp; u_long *xidp; { register struct mbuf *mb; register u_long *tl; register caddr_t bpos; register int i; struct mbuf *mreq, *mb2; int siz, grpsiz, authsiz; struct timeval tv; static u_long base; authsiz = nfsm_rndup(auth_len); MGETHDR(mb, M_WAIT, MT_DATA); if ((authsiz + 10 * NFSX_UNSIGNED) >= MINCLSIZE) { MCLGET(mb, M_WAIT); } else if ((authsiz + 10 * NFSX_UNSIGNED) < MHLEN) { MH_ALIGN(mb, authsiz + 10 * NFSX_UNSIGNED); } else { MH_ALIGN(mb, 8 * NFSX_UNSIGNED); } mb->m_len = 0; mreq = mb; bpos = mtod(mb, caddr_t); /* * First the RPC header. */ nfsm_build(tl, u_long *, 8 * NFSX_UNSIGNED); /* * derive initial xid from system time * XXX time is invalid if root not yet mounted */ if (!base && (rootvp)) { microtime(&tv); base = tv.tv_sec << 12; nfs_xid = base; } /* * Skip zero xid if it should ever happen. */ if (++nfs_xid == 0) nfs_xid++; *tl++ = *xidp = txdr_unsigned(nfs_xid); *tl++ = rpc_call; *tl++ = rpc_vers; if (nmflag & NFSMNT_NQNFS) { *tl++ = txdr_unsigned(NQNFS_PROG); *tl++ = txdr_unsigned(NQNFS_VER3); } else { *tl++ = txdr_unsigned(NFS_PROG); if (nmflag & NFSMNT_NFSV3) *tl++ = txdr_unsigned(NFS_VER3); else *tl++ = txdr_unsigned(NFS_VER2); } if (nmflag & NFSMNT_NFSV3) *tl++ = txdr_unsigned(procid); else *tl++ = txdr_unsigned(nfsv2_procid[procid]); /* * And then the authorization cred. */ *tl++ = txdr_unsigned(auth_type); *tl = txdr_unsigned(authsiz); switch (auth_type) { case RPCAUTH_UNIX: nfsm_build(tl, u_long *, auth_len); *tl++ = 0; /* stamp ?? */ *tl++ = 0; /* NULL hostname */ *tl++ = txdr_unsigned(cr->cr_uid); *tl++ = txdr_unsigned(cr->cr_groups[0]); grpsiz = (auth_len >> 2) - 5; *tl++ = txdr_unsigned(grpsiz); for (i = 1; i <= grpsiz; i++) *tl++ = txdr_unsigned(cr->cr_groups[i]); break; case RPCAUTH_KERB4: siz = auth_len; while (siz > 0) { if (M_TRAILINGSPACE(mb) == 0) { MGET(mb2, M_WAIT, MT_DATA); if (siz >= MINCLSIZE) MCLGET(mb2, M_WAIT); mb->m_next = mb2; mb = mb2; mb->m_len = 0; bpos = mtod(mb, caddr_t); } i = min(siz, M_TRAILINGSPACE(mb)); bcopy(auth_str, bpos, i); mb->m_len += i; auth_str += i; bpos += i; siz -= i; } if ((siz = (nfsm_rndup(auth_len) - auth_len)) > 0) { for (i = 0; i < siz; i++) *bpos++ = '\0'; mb->m_len += siz; } break; }; /* * And the verifier... */ nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED); if (verf_str) { *tl++ = txdr_unsigned(RPCAUTH_KERB4); *tl = txdr_unsigned(verf_len); siz = verf_len; while (siz > 0) { if (M_TRAILINGSPACE(mb) == 0) { MGET(mb2, M_WAIT, MT_DATA); if (siz >= MINCLSIZE) MCLGET(mb2, M_WAIT); mb->m_next = mb2; mb = mb2; mb->m_len = 0; bpos = mtod(mb, caddr_t); } i = min(siz, M_TRAILINGSPACE(mb)); bcopy(verf_str, bpos, i); mb->m_len += i; verf_str += i; bpos += i; siz -= i; } if ((siz = (nfsm_rndup(verf_len) - verf_len)) > 0) { for (i = 0; i < siz; i++) *bpos++ = '\0'; mb->m_len += siz; } } else { *tl++ = txdr_unsigned(RPCAUTH_NULL); *tl = 0; } mb->m_next = mrest; mreq->m_pkthdr.len = authsiz + 10 * NFSX_UNSIGNED + mrest_len; mreq->m_pkthdr.rcvif = (struct ifnet *)0; *mbp = mb; return (mreq); } /* * copies mbuf chain to the uio scatter/gather list */ int nfsm_mbuftouio(mrep, uiop, siz, dpos) struct mbuf **mrep; register struct uio *uiop; int siz; caddr_t *dpos; { register char *mbufcp, *uiocp; register int xfer, left, len; register struct mbuf *mp; long uiosiz, rem; int error = 0; mp = *mrep; mbufcp = *dpos; len = mtod(mp, caddr_t)+mp->m_len-mbufcp; rem = nfsm_rndup(siz)-siz; while (siz > 0) { if (uiop->uio_iovcnt <= 0 || uiop->uio_iov == NULL) return (EFBIG); left = uiop->uio_iov->iov_len; uiocp = uiop->uio_iov->iov_base; if (left > siz) left = siz; uiosiz = left; while (left > 0) { while (len == 0) { mp = mp->m_next; if (mp == NULL) return (EBADRPC); mbufcp = mtod(mp, caddr_t); len = mp->m_len; } xfer = (left > len) ? len : left; #ifdef notdef /* Not Yet.. */ if (uiop->uio_iov->iov_op != NULL) (*(uiop->uio_iov->iov_op)) (mbufcp, uiocp, xfer); else #endif if (uiop->uio_segflg == UIO_SYSSPACE) bcopy(mbufcp, uiocp, xfer); else copyout(mbufcp, uiocp, xfer); left -= xfer; len -= xfer; mbufcp += xfer; uiocp += xfer; uiop->uio_offset += xfer; uiop->uio_resid -= xfer; } if (uiop->uio_iov->iov_len <= siz) { uiop->uio_iovcnt--; uiop->uio_iov++; } else { uiop->uio_iov->iov_base += uiosiz; uiop->uio_iov->iov_len -= uiosiz; } siz -= uiosiz; } *dpos = mbufcp; *mrep = mp; if (rem > 0) { if (len < rem) error = nfs_adv(mrep, dpos, rem, len); else *dpos += rem; } return (error); } /* * copies a uio scatter/gather list to an mbuf chain. * NOTE: can ony handle iovcnt == 1 */ int nfsm_uiotombuf(uiop, mq, siz, bpos) register struct uio *uiop; struct mbuf **mq; int siz; caddr_t *bpos; { register char *uiocp; register struct mbuf *mp, *mp2; register int xfer, left, mlen; int uiosiz, clflg, rem; char *cp; if (uiop->uio_iovcnt != 1) panic("nfsm_uiotombuf: iovcnt != 1"); if (siz > MLEN) /* or should it >= MCLBYTES ?? */ clflg = 1; else clflg = 0; rem = nfsm_rndup(siz)-siz; mp = mp2 = *mq; while (siz > 0) { left = uiop->uio_iov->iov_len; uiocp = uiop->uio_iov->iov_base; if (left > siz) left = siz; uiosiz = left; while (left > 0) { mlen = M_TRAILINGSPACE(mp); if (mlen == 0) { MGET(mp, M_WAIT, MT_DATA); if (clflg) MCLGET(mp, M_WAIT); mp->m_len = 0; mp2->m_next = mp; mp2 = mp; mlen = M_TRAILINGSPACE(mp); } xfer = (left > mlen) ? mlen : left; #ifdef notdef /* Not Yet.. */ if (uiop->uio_iov->iov_op != NULL) (*(uiop->uio_iov->iov_op)) (uiocp, mtod(mp, caddr_t)+mp->m_len, xfer); else #endif if (uiop->uio_segflg == UIO_SYSSPACE) bcopy(uiocp, mtod(mp, caddr_t)+mp->m_len, xfer); else copyin(uiocp, mtod(mp, caddr_t)+mp->m_len, xfer); mp->m_len += xfer; left -= xfer; uiocp += xfer; uiop->uio_offset += xfer; uiop->uio_resid -= xfer; } uiop->uio_iov->iov_base += uiosiz; uiop->uio_iov->iov_len -= uiosiz; siz -= uiosiz; } if (rem > 0) { if (rem > M_TRAILINGSPACE(mp)) { MGET(mp, M_WAIT, MT_DATA); mp->m_len = 0; mp2->m_next = mp; } cp = mtod(mp, caddr_t)+mp->m_len; for (left = 0; left < rem; left++) *cp++ = '\0'; mp->m_len += rem; *bpos = cp; } else *bpos = mtod(mp, caddr_t)+mp->m_len; *mq = mp; return (0); } /* * Help break down an mbuf chain by setting the first siz bytes contiguous * pointed to by returned val. * This is used by the macros nfsm_dissect and nfsm_dissecton for tough * cases. (The macros use the vars. dpos and dpos2) */ int nfsm_disct(mdp, dposp, siz, left, cp2) struct mbuf **mdp; caddr_t *dposp; int siz; int left; caddr_t *cp2; { register struct mbuf *mp, *mp2; register int siz2, xfer; register caddr_t p; mp = *mdp; while (left == 0) { *mdp = mp = mp->m_next; if (mp == NULL) return (EBADRPC); left = mp->m_len; *dposp = mtod(mp, caddr_t); } if (left >= siz) { *cp2 = *dposp; *dposp += siz; } else if (mp->m_next == NULL) { return (EBADRPC); } else if (siz > MHLEN) { panic("nfs S too big"); } else { MGET(mp2, M_WAIT, MT_DATA); mp2->m_next = mp->m_next; mp->m_next = mp2; mp->m_len -= left; mp = mp2; *cp2 = p = mtod(mp, caddr_t); bcopy(*dposp, p, left); /* Copy what was left */ siz2 = siz-left; p += left; mp2 = mp->m_next; /* Loop around copying up the siz2 bytes */ while (siz2 > 0) { if (mp2 == NULL) return (EBADRPC); xfer = (siz2 > mp2->m_len) ? mp2->m_len : siz2; if (xfer > 0) { bcopy(mtod(mp2, caddr_t), p, xfer); NFSMADV(mp2, xfer); mp2->m_len -= xfer; p += xfer; siz2 -= xfer; } if (siz2 > 0) mp2 = mp2->m_next; } mp->m_len = siz; *mdp = mp2; *dposp = mtod(mp2, caddr_t); } return (0); } /* * Advance the position in the mbuf chain. */ int nfs_adv(mdp, dposp, offs, left) struct mbuf **mdp; caddr_t *dposp; int offs; int left; { register struct mbuf *m; register int s; m = *mdp; s = left; while (s < offs) { offs -= s; m = m->m_next; if (m == NULL) return (EBADRPC); s = m->m_len; } *mdp = m; *dposp = mtod(m, caddr_t)+offs; return (0); } /* * Copy a string into mbufs for the hard cases... */ int nfsm_strtmbuf(mb, bpos, cp, siz) struct mbuf **mb; char **bpos; char *cp; long siz; { register struct mbuf *m1 = 0, *m2; long left, xfer, len, tlen; u_long *tl; int putsize; putsize = 1; m2 = *mb; left = M_TRAILINGSPACE(m2); if (left > 0) { tl = ((u_long *)(*bpos)); *tl++ = txdr_unsigned(siz); putsize = 0; left -= NFSX_UNSIGNED; m2->m_len += NFSX_UNSIGNED; if (left > 0) { bcopy(cp, (caddr_t) tl, left); siz -= left; cp += left; m2->m_len += left; left = 0; } } /* Loop around adding mbufs */ while (siz > 0) { MGET(m1, M_WAIT, MT_DATA); if (siz > MLEN) MCLGET(m1, M_WAIT); m1->m_len = NFSMSIZ(m1); m2->m_next = m1; m2 = m1; tl = mtod(m1, u_long *); tlen = 0; if (putsize) { *tl++ = txdr_unsigned(siz); m1->m_len -= NFSX_UNSIGNED; tlen = NFSX_UNSIGNED; putsize = 0; } if (siz < m1->m_len) { len = nfsm_rndup(siz); xfer = siz; if (xfer < len) *(tl+(xfer>>2)) = 0; } else { xfer = len = m1->m_len; } bcopy(cp, (caddr_t) tl, xfer); m1->m_len = len+tlen; siz -= xfer; cp += xfer; } *mb = m1; *bpos = mtod(m1, caddr_t)+m1->m_len; return (0); } /* * Called once to initialize data structures... */ int nfs_init() { register int i; /* * Check to see if major data structures haven't bloated. */ if (sizeof (struct nfsnode) > NFS_NODEALLOC) { printf("struct nfsnode bloated (> %dbytes)\n", NFS_NODEALLOC); printf("Try reducing NFS_SMALLFH\n"); } if (sizeof (struct nfsmount) > NFS_MNTALLOC) { printf("struct nfsmount bloated (> %dbytes)\n", NFS_MNTALLOC); printf("Try reducing NFS_MUIDHASHSIZ\n"); } if (sizeof (struct nfssvc_sock) > NFS_SVCALLOC) { printf("struct nfssvc_sock bloated (> %dbytes)\n",NFS_SVCALLOC); printf("Try reducing NFS_UIDHASHSIZ\n"); } if (sizeof (struct nfsuid) > NFS_UIDALLOC) { printf("struct nfsuid bloated (> %dbytes)\n",NFS_UIDALLOC); printf("Try unionizing the nu_nickname and nu_flag fields\n"); } nfsrtt.pos = 0; rpc_vers = txdr_unsigned(RPC_VER2); rpc_call = txdr_unsigned(RPC_CALL); rpc_reply = txdr_unsigned(RPC_REPLY); rpc_msgdenied = txdr_unsigned(RPC_MSGDENIED); rpc_msgaccepted = txdr_unsigned(RPC_MSGACCEPTED); rpc_mismatch = txdr_unsigned(RPC_MISMATCH); rpc_autherr = txdr_unsigned(RPC_AUTHERR); rpc_auth_unix = txdr_unsigned(RPCAUTH_UNIX); rpc_auth_kerb = txdr_unsigned(RPCAUTH_KERB4); nfs_prog = txdr_unsigned(NFS_PROG); nqnfs_prog = txdr_unsigned(NQNFS_PROG); nfs_true = txdr_unsigned(TRUE); nfs_false = txdr_unsigned(FALSE); nfs_xdrneg1 = txdr_unsigned(-1); nfs_ticks = (hz * NFS_TICKINTVL + 500) / 1000; if (nfs_ticks < 1) nfs_ticks = 1; /* Ensure async daemons disabled */ for (i = 0; i < NFS_MAXASYNCDAEMON; i++) nfs_iodwant[i] = (struct proc *)0; TAILQ_INIT(&nfs_bufq); nfs_nhinit(); /* Init the nfsnode table */ #ifndef NFS_NOSERVER nfsrv_init(0); /* Init server data structures */ nfsrv_initcache(); /* Init the server request cache */ #endif /* * Initialize the nqnfs server stuff. */ if (nqnfsstarttime == 0) { nqnfsstarttime = boottime.tv_sec + nqsrv_maxlease + nqsrv_clockskew + nqsrv_writeslack; NQLOADNOVRAM(nqnfsstarttime); CIRCLEQ_INIT(&nqtimerhead); nqfhhashtbl = hashinit(NQLCHSZ, M_NQLEASE, &nqfhhash); } /* * Initialize reply list and start timer */ TAILQ_INIT(&nfs_reqq); nfs_timer(0); #ifdef __FreeBSD__ /* * Set up lease_check and lease_updatetime so that other parts * of the system can call us, if we are loadable. */ #ifndef NFS_NOSERVER lease_check = nfs_lease_check; #endif lease_updatetime = nfs_lease_updatetime; vfsconf[MOUNT_NFS]->vfc_refcount++; /* make us non-unloadable */ #ifdef VFS_LKM sysent[SYS_nfssvc].sy_narg = 2; sysent[SYS_nfssvc].sy_call = nfssvc; #ifndef NFS_NOSERVER sysent[SYS_getfh].sy_narg = 2; sysent[SYS_getfh].sy_call = getfh; #endif #endif #endif return (0); } /* * Attribute cache routines. * nfs_loadattrcache() - loads or updates the cache contents from attributes * that are on the mbuf list * nfs_getattrcache() - returns valid attributes if found in cache, returns * error otherwise */ /* * Load the attribute cache (that lives in the nfsnode entry) with * the values on the mbuf list and * Iff vap not NULL * copy the attributes to *vaper */ int nfs_loadattrcache(vpp, mdp, dposp, vaper) struct vnode **vpp; struct mbuf **mdp; caddr_t *dposp; struct vattr *vaper; { register struct vnode *vp = *vpp; register struct vattr *vap; register struct nfs_fattr *fp; register struct nfsnode *np; register struct nfsnodehashhead *nhpp; register long t1; caddr_t cp2; int error = 0, rdev; struct mbuf *md; enum vtype vtyp; u_short vmode; struct timespec mtime; struct vnode *nvp; int v3 = NFS_ISV3(vp); md = *mdp; t1 = (mtod(md, caddr_t) + md->m_len) - *dposp; if (error = nfsm_disct(mdp, dposp, NFSX_FATTR(v3), t1, &cp2)) return (error); fp = (struct nfs_fattr *)cp2; if (v3) { vtyp = nfsv3tov_type(fp->fa_type); vmode = fxdr_unsigned(u_short, fp->fa_mode); rdev = makedev(fxdr_unsigned(int, fp->fa3_rdev.specdata1), fxdr_unsigned(int, fp->fa3_rdev.specdata2)); fxdr_nfsv3time(&fp->fa3_mtime, &mtime); } else { vtyp = nfsv2tov_type(fp->fa_type); vmode = fxdr_unsigned(u_short, fp->fa_mode); /* * XXX * * The duplicate information returned in fa_type and fa_mode * is an ambiguity in the NFS version 2 protocol. * * VREG should be taken literally as a regular file. If a * server intents to return some type information differently * in the upper bits of the mode field (e.g. for sockets, or * FIFOs), NFSv2 mandates fa_type to be VNON. Anyway, we * leave the examination of the mode bits even in the VREG * case to avoid breakage for bogus servers, but we make sure * that there are actually type bits set in the upper part of * fa_mode (and failing that, trust the va_type field). * * NFSv3 cleared the issue, and requires fa_mode to not * contain any type information (while also introduing sockets * and FIFOs for fa_type). */ if (vtyp == VNON || (vtyp == VREG && (vmode & S_IFMT) != 0)) vtyp = IFTOVT(vmode); rdev = fxdr_unsigned(long, fp->fa2_rdev); fxdr_nfsv2time(&fp->fa2_mtime, &mtime); /* * Really ugly NFSv2 kludge. */ if (vtyp == VCHR && rdev == 0xffffffff) vtyp = VFIFO; } /* * If v_type == VNON it is a new node, so fill in the v_type, * n_mtime fields. Check to see if it represents a special * device, and if so, check for a possible alias. Once the * correct vnode has been obtained, fill in the rest of the * information. */ np = VTONFS(vp); if (vp->v_type != vtyp) { /* * If we had a lock and it turns out that the vnode * is an object which we don't want to lock (e.g. VDIR) * to avoid nasty hanging problems on a server crash, * then release it here. */ if (vtyp != VREG && VOP_ISLOCKED(vp)) VOP_UNLOCK(vp); vp->v_type = vtyp; if (vp->v_type == VFIFO) { vp->v_op = fifo_nfsv2nodeop_p; } if (vp->v_type == VCHR || vp->v_type == VBLK) { vp->v_op = spec_nfsv2nodeop_p; nvp = checkalias(vp, (dev_t)rdev, vp->v_mount); if (nvp) { /* * Discard unneeded vnode, but save its nfsnode. */ LIST_REMOVE(np, n_hash); nvp->v_data = vp->v_data; vp->v_data = NULL; vp->v_op = spec_vnodeop_p; vrele(vp); vgone(vp); /* * Reinitialize aliased node. */ np->n_vnode = nvp; nhpp = NFSNOHASH(nfs_hash(np->n_fhp, np->n_fhsize)); LIST_INSERT_HEAD(nhpp, np, n_hash); *vpp = vp = nvp; } } np->n_mtime = mtime.ts_sec; } vap = &np->n_vattr; vap->va_type = vtyp; vap->va_mode = (vmode & 07777); vap->va_rdev = (dev_t)rdev; vap->va_mtime = mtime; vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0]; if (v3) { vap->va_nlink = fxdr_unsigned(u_short, fp->fa_nlink); vap->va_uid = fxdr_unsigned(uid_t, fp->fa_uid); vap->va_gid = fxdr_unsigned(gid_t, fp->fa_gid); fxdr_hyper(&fp->fa3_size, &vap->va_size); vap->va_blocksize = NFS_FABLKSIZE; fxdr_hyper(&fp->fa3_used, &vap->va_bytes); vap->va_fileid = fxdr_unsigned(int, fp->fa3_fileid.nfsuquad[1]); fxdr_nfsv3time(&fp->fa3_atime, &vap->va_atime); fxdr_nfsv3time(&fp->fa3_ctime, &vap->va_ctime); vap->va_flags = 0; vap->va_filerev = 0; } else { vap->va_nlink = fxdr_unsigned(u_short, fp->fa_nlink); vap->va_uid = fxdr_unsigned(uid_t, fp->fa_uid); vap->va_gid = fxdr_unsigned(gid_t, fp->fa_gid); vap->va_size = fxdr_unsigned(u_long, fp->fa2_size); vap->va_blocksize = fxdr_unsigned(long, fp->fa2_blocksize); vap->va_bytes = fxdr_unsigned(long, fp->fa2_blocks) * NFS_FABLKSIZE; vap->va_fileid = fxdr_unsigned(long, fp->fa2_fileid); fxdr_nfsv2time(&fp->fa2_atime, &vap->va_atime); vap->va_flags = 0; vap->va_ctime.ts_sec = fxdr_unsigned(long, fp->fa2_ctime.nfsv2_sec); vap->va_ctime.ts_nsec = 0; vap->va_gen = fxdr_unsigned(u_long, fp->fa2_ctime.nfsv2_usec); vap->va_filerev = 0; } if (vap->va_size != np->n_size) { if (vap->va_type == VREG) { if (np->n_flag & NMODIFIED) { if (vap->va_size < np->n_size) vap->va_size = np->n_size; else np->n_size = vap->va_size; } else np->n_size = vap->va_size; vnode_pager_setsize(vp, (u_long)np->n_size); } else np->n_size = vap->va_size; } np->n_attrstamp = time.tv_sec; if (vaper != NULL) { bcopy((caddr_t)vap, (caddr_t)vaper, sizeof(*vap)); if (np->n_flag & NCHG) { if (np->n_flag & NACC) vaper->va_atime = np->n_atim; if (np->n_flag & NUPD) vaper->va_mtime = np->n_mtim; } } return (0); } /* * Check the time stamp * If the cache is valid, copy contents to *vap and return 0 * otherwise return an error */ int nfs_getattrcache(vp, vaper) register struct vnode *vp; struct vattr *vaper; { register struct nfsnode *np = VTONFS(vp); register struct vattr *vap; if ((time.tv_sec - np->n_attrstamp) >= NFS_ATTRTIMEO(np)) { nfsstats.attrcache_misses++; return (ENOENT); } nfsstats.attrcache_hits++; vap = &np->n_vattr; if (vap->va_size != np->n_size) { if (vap->va_type == VREG) { if (np->n_flag & NMODIFIED) { if (vap->va_size < np->n_size) vap->va_size = np->n_size; else np->n_size = vap->va_size; } else np->n_size = vap->va_size; vnode_pager_setsize(vp, (u_long)np->n_size); } else np->n_size = vap->va_size; } bcopy((caddr_t)vap, (caddr_t)vaper, sizeof(struct vattr)); if (np->n_flag & NCHG) { if (np->n_flag & NACC) vaper->va_atime = np->n_atim; if (np->n_flag & NUPD) vaper->va_mtime = np->n_mtim; } return (0); } #ifndef NFS_NOSERVER /* * Set up nameidata for a lookup() call and do it */ int nfs_namei(ndp, fhp, len, slp, nam, mdp, dposp, retdirp, p, kerbflag) register struct nameidata *ndp; fhandle_t *fhp; int len; struct nfssvc_sock *slp; struct mbuf *nam; struct mbuf **mdp; caddr_t *dposp; struct vnode **retdirp; struct proc *p; int kerbflag; { register int i, rem; register struct mbuf *md; register char *fromcp, *tocp; struct vnode *dp; int error, rdonly; struct componentname *cnp = &ndp->ni_cnd; *retdirp = (struct vnode *)0; MALLOC(cnp->cn_pnbuf, char *, len + 1, M_NAMEI, M_WAITOK); /* * Copy the name from the mbuf list to ndp->ni_pnbuf * and set the various ndp fields appropriately. */ fromcp = *dposp; tocp = cnp->cn_pnbuf; md = *mdp; rem = mtod(md, caddr_t) + md->m_len - fromcp; cnp->cn_hash = 0; for (i = 0; i < len; i++) { while (rem == 0) { md = md->m_next; if (md == NULL) { error = EBADRPC; goto out; } fromcp = mtod(md, caddr_t); rem = md->m_len; } if (*fromcp == '\0' || *fromcp == '/') { error = EACCES; goto out; } cnp->cn_hash += (unsigned char)*fromcp; *tocp++ = *fromcp++; rem--; } *tocp = '\0'; *mdp = md; *dposp = fromcp; len = nfsm_rndup(len)-len; if (len > 0) { if (rem >= len) *dposp += len; else if (error = nfs_adv(mdp, dposp, len, rem)) goto out; } ndp->ni_pathlen = tocp - cnp->cn_pnbuf; cnp->cn_nameptr = cnp->cn_pnbuf; /* * Extract and set starting directory. */ if (error = nfsrv_fhtovp(fhp, FALSE, &dp, ndp->ni_cnd.cn_cred, slp, nam, &rdonly, kerbflag)) goto out; if (dp->v_type != VDIR) { - nfsrv_vrele(dp); + vrele(dp); error = ENOTDIR; goto out; } VREF(dp); *retdirp = dp; ndp->ni_startdir = dp; if (rdonly) cnp->cn_flags |= (NOCROSSMOUNT | RDONLY); else cnp->cn_flags |= NOCROSSMOUNT; /* * And call lookup() to do the real work */ cnp->cn_proc = p; if (error = lookup(ndp)) goto out; /* * Check for encountering a symbolic link */ if (cnp->cn_flags & ISSYMLINK) { if ((cnp->cn_flags & LOCKPARENT) && ndp->ni_pathlen == 1) vput(ndp->ni_dvp); else vrele(ndp->ni_dvp); vput(ndp->ni_vp); ndp->ni_vp = NULL; error = EINVAL; goto out; } - nfsrv_vmio(ndp->ni_vp); + nfsrv_object_create(ndp->ni_vp); /* * Check for saved name request */ if (cnp->cn_flags & (SAVENAME | SAVESTART)) { cnp->cn_flags |= HASBUF; return (0); } out: FREE(cnp->cn_pnbuf, M_NAMEI); return (error); } /* * A fiddled version of m_adj() that ensures null fill to a long * boundary and only trims off the back end */ void nfsm_adj(mp, len, nul) struct mbuf *mp; register int len; int nul; { register struct mbuf *m; register int count, i; register char *cp; /* * Trim from tail. Scan the mbuf chain, * calculating its length and finding the last mbuf. * If the adjustment only affects this mbuf, then just * adjust and return. Otherwise, rescan and truncate * after the remaining size. */ count = 0; m = mp; for (;;) { count += m->m_len; if (m->m_next == (struct mbuf *)0) break; m = m->m_next; } if (m->m_len > len) { m->m_len -= len; if (nul > 0) { cp = mtod(m, caddr_t)+m->m_len-nul; for (i = 0; i < nul; i++) *cp++ = '\0'; } return; } count -= len; if (count < 0) count = 0; /* * Correct length for chain is "count". * Find the mbuf with last data, adjust its length, * and toss data from remaining mbufs on chain. */ for (m = mp; m; m = m->m_next) { if (m->m_len >= count) { m->m_len = count; if (nul > 0) { cp = mtod(m, caddr_t)+m->m_len-nul; for (i = 0; i < nul; i++) *cp++ = '\0'; } break; } count -= m->m_len; } for (m = m->m_next;m;m = m->m_next) m->m_len = 0; } /* * Make these functions instead of macros, so that the kernel text size * doesn't get too big... */ void nfsm_srvwcc(nfsd, before_ret, before_vap, after_ret, after_vap, mbp, bposp) struct nfsrv_descript *nfsd; int before_ret; register struct vattr *before_vap; int after_ret; struct vattr *after_vap; struct mbuf **mbp; char **bposp; { register struct mbuf *mb = *mbp, *mb2; register char *bpos = *bposp; register u_long *tl; if (before_ret) { nfsm_build(tl, u_long *, NFSX_UNSIGNED); *tl = nfs_false; } else { nfsm_build(tl, u_long *, 7 * NFSX_UNSIGNED); *tl++ = nfs_true; txdr_hyper(&(before_vap->va_size), tl); tl += 2; txdr_nfsv3time(&(before_vap->va_mtime), tl); tl += 2; txdr_nfsv3time(&(before_vap->va_ctime), tl); } *bposp = bpos; *mbp = mb; nfsm_srvpostopattr(nfsd, after_ret, after_vap, mbp, bposp); } void nfsm_srvpostopattr(nfsd, after_ret, after_vap, mbp, bposp) struct nfsrv_descript *nfsd; int after_ret; struct vattr *after_vap; struct mbuf **mbp; char **bposp; { register struct mbuf *mb = *mbp, *mb2; register char *bpos = *bposp; register u_long *tl; register struct nfs_fattr *fp; if (after_ret) { nfsm_build(tl, u_long *, NFSX_UNSIGNED); *tl = nfs_false; } else { nfsm_build(tl, u_long *, NFSX_UNSIGNED + NFSX_V3FATTR); *tl++ = nfs_true; fp = (struct nfs_fattr *)tl; nfsm_srvfattr(nfsd, after_vap, fp); } *mbp = mb; *bposp = bpos; } void nfsm_srvfattr(nfsd, vap, fp) register struct nfsrv_descript *nfsd; register struct vattr *vap; register struct nfs_fattr *fp; { fp->fa_nlink = txdr_unsigned(vap->va_nlink); fp->fa_uid = txdr_unsigned(vap->va_uid); fp->fa_gid = txdr_unsigned(vap->va_gid); if (nfsd->nd_flag & ND_NFSV3) { fp->fa_type = vtonfsv3_type(vap->va_type); fp->fa_mode = vtonfsv3_mode(vap->va_mode); txdr_hyper(&vap->va_size, &fp->fa3_size); txdr_hyper(&vap->va_bytes, &fp->fa3_used); fp->fa3_rdev.specdata1 = txdr_unsigned(major(vap->va_rdev)); fp->fa3_rdev.specdata2 = txdr_unsigned(minor(vap->va_rdev)); fp->fa3_fsid.nfsuquad[0] = 0; fp->fa3_fsid.nfsuquad[1] = txdr_unsigned(vap->va_fsid); fp->fa3_fileid.nfsuquad[0] = 0; fp->fa3_fileid.nfsuquad[1] = txdr_unsigned(vap->va_fileid); txdr_nfsv3time(&vap->va_atime, &fp->fa3_atime); txdr_nfsv3time(&vap->va_mtime, &fp->fa3_mtime); txdr_nfsv3time(&vap->va_ctime, &fp->fa3_ctime); } else { fp->fa_type = vtonfsv2_type(vap->va_type); fp->fa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode); fp->fa2_size = txdr_unsigned(vap->va_size); fp->fa2_blocksize = txdr_unsigned(vap->va_blocksize); if (vap->va_type == VFIFO) fp->fa2_rdev = 0xffffffff; else fp->fa2_rdev = txdr_unsigned(vap->va_rdev); fp->fa2_blocks = txdr_unsigned(vap->va_bytes / NFS_FABLKSIZE); fp->fa2_fsid = txdr_unsigned(vap->va_fsid); fp->fa2_fileid = txdr_unsigned(vap->va_fileid); txdr_nfsv2time(&vap->va_atime, &fp->fa2_atime); txdr_nfsv2time(&vap->va_mtime, &fp->fa2_mtime); txdr_nfsv2time(&vap->va_ctime, &fp->fa2_ctime); } } /* * nfsrv_fhtovp() - convert a fh to a vnode ptr (optionally locked) * - look up fsid in mount list (if not found ret error) * - get vp and export rights by calling VFS_FHTOVP() * - if cred->cr_uid == 0 or MNT_EXPORTANON set it to credanon * - if not lockflag unlock it with VOP_UNLOCK() */ int nfsrv_fhtovp(fhp, lockflag, vpp, cred, slp, nam, rdonlyp, kerbflag) fhandle_t *fhp; int lockflag; struct vnode **vpp; struct ucred *cred; struct nfssvc_sock *slp; struct mbuf *nam; int *rdonlyp; int kerbflag; { register struct mount *mp; register int i; struct ucred *credanon; int error, exflags; *vpp = (struct vnode *)0; mp = getvfs(&fhp->fh_fsid); if (!mp) return (ESTALE); error = VFS_FHTOVP(mp, &fhp->fh_fid, nam, vpp, &exflags, &credanon); if (error) return (error); /* * Check/setup credentials. */ if (exflags & MNT_EXKERB) { if (!kerbflag) { vput(*vpp); return (NFSERR_AUTHERR | AUTH_TOOWEAK); } } else if (kerbflag) { vput(*vpp); return (NFSERR_AUTHERR | AUTH_TOOWEAK); } else if (cred->cr_uid == 0 || (exflags & MNT_EXPORTANON)) { cred->cr_uid = credanon->cr_uid; for (i = 0; i < credanon->cr_ngroups && i < NGROUPS; i++) cred->cr_groups[i] = credanon->cr_groups[i]; cred->cr_ngroups = i; } if (exflags & MNT_EXRDONLY) *rdonlyp = 1; else *rdonlyp = 0; - nfsrv_vmio(*vpp); + nfsrv_object_create(*vpp); if (!lockflag) VOP_UNLOCK(*vpp); return (0); } #endif /* NFS_NOSERVER */ /* * This function compares two net addresses by family and returns TRUE * if they are the same host. * If there is any doubt, return FALSE. * The AF_INET family is handled as a special case so that address mbufs * don't need to be saved to store "struct in_addr", which is only 4 bytes. */ int netaddr_match(family, haddr, nam) int family; union nethostaddr *haddr; struct mbuf *nam; { register struct sockaddr_in *inetaddr; switch (family) { case AF_INET: inetaddr = mtod(nam, struct sockaddr_in *); if (inetaddr->sin_family == AF_INET && inetaddr->sin_addr.s_addr == haddr->had_inetaddr) return (1); break; #ifdef ISO case AF_ISO: { register struct sockaddr_iso *isoaddr1, *isoaddr2; isoaddr1 = mtod(nam, struct sockaddr_iso *); isoaddr2 = mtod(haddr->had_nam, struct sockaddr_iso *); if (isoaddr1->siso_family == AF_ISO && isoaddr1->siso_nlen > 0 && isoaddr1->siso_nlen == isoaddr2->siso_nlen && SAME_ISOADDR(isoaddr1, isoaddr2)) return (1); break; } #endif /* ISO */ default: break; }; return (0); } static nfsuint64 nfs_nullcookie = { 0, 0 }; /* * This function finds the directory cookie that corresponds to the * logical byte offset given. */ nfsuint64 * nfs_getcookie(np, off, add) register struct nfsnode *np; off_t off; int add; { register struct nfsdmap *dp, *dp2; register int pos; pos = off / NFS_DIRBLKSIZ; if (pos == 0) { #ifdef DIAGNOSTIC if (add) panic("nfs getcookie add at 0"); #endif return (&nfs_nullcookie); } pos--; dp = np->n_cookies.lh_first; if (!dp) { if (add) { MALLOC(dp, struct nfsdmap *, sizeof (struct nfsdmap), M_NFSDIROFF, M_WAITOK); dp->ndm_eocookie = 0; LIST_INSERT_HEAD(&np->n_cookies, dp, ndm_list); } else return ((nfsuint64 *)0); } while (pos >= NFSNUMCOOKIES) { pos -= NFSNUMCOOKIES; if (dp->ndm_list.le_next) { if (!add && dp->ndm_eocookie < NFSNUMCOOKIES && pos >= dp->ndm_eocookie) return ((nfsuint64 *)0); dp = dp->ndm_list.le_next; } else if (add) { MALLOC(dp2, struct nfsdmap *, sizeof (struct nfsdmap), M_NFSDIROFF, M_WAITOK); dp2->ndm_eocookie = 0; LIST_INSERT_AFTER(dp, dp2, ndm_list); dp = dp2; } else return ((nfsuint64 *)0); } if (pos >= dp->ndm_eocookie) { if (add) dp->ndm_eocookie = pos + 1; else return ((nfsuint64 *)0); } return (&dp->ndm_cookies[pos]); } /* * Invalidate cached directory information, except for the actual directory * blocks (which are invalidated separately). * Done mainly to avoid the use of stale offset cookies. */ void nfs_invaldir(vp) register struct vnode *vp; { register struct nfsnode *np = VTONFS(vp); #ifdef DIAGNOSTIC if (vp->v_type != VDIR) panic("nfs: invaldir not dir"); #endif np->n_direofoffset = 0; np->n_cookieverf.nfsuquad[0] = 0; np->n_cookieverf.nfsuquad[1] = 0; if (np->n_cookies.lh_first) np->n_cookies.lh_first->ndm_eocookie = 0; } /* * The write verifier has changed (probably due to a server reboot), so all * B_NEEDCOMMIT blocks will have to be written again. Since they are on the * dirty block list as B_DELWRI, all this takes is clearing the B_NEEDCOMMIT * flag. Once done the new write verifier can be set for the mount point. */ void nfs_clearcommit(mp) struct mount *mp; { register struct vnode *vp, *nvp; register struct buf *bp, *nbp; int s; s = splbio(); loop: for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) { if (vp->v_mount != mp) /* Paranoia */ goto loop; nvp = vp->v_mntvnodes.le_next; for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) { nbp = bp->b_vnbufs.le_next; if ((bp->b_flags & (B_BUSY | B_DELWRI | B_NEEDCOMMIT)) == (B_DELWRI | B_NEEDCOMMIT)) bp->b_flags &= ~B_NEEDCOMMIT; } } splx(s); } #ifndef NFS_NOSERVER /* * Map errnos to NFS error numbers. For Version 3 also filter out error * numbers not specified for the associated procedure. */ int nfsrv_errmap(nd, err) struct nfsrv_descript *nd; register int err; { register short *defaulterrp, *errp; if (nd->nd_flag & ND_NFSV3) { if (nd->nd_procnum <= NFSPROC_COMMIT) { errp = defaulterrp = nfsrv_v3errmap[nd->nd_procnum]; while (*++errp) { if (*errp == err) return (err); else if (*errp > err) break; } return ((int)*defaulterrp); } else return (err & 0xffff); } if (err <= ELAST) return ((int)nfsrv_v2errmap[err - 1]); return (NFSERR_IO); } int -nfsrv_vmio(struct vnode *vp) { - vm_object_t object; +nfsrv_object_create(struct vnode *vp) { if ((vp == NULL) || (vp->v_type != VREG)) return 1; - -retry: - if ((vp->v_flag & VVMIO) == 0) { - struct vattr vat; - struct proc *p = curproc; - - if (VOP_GETATTR(vp, &vat, p->p_ucred, p) != 0) - panic("nfsrv_vmio: VOP_GETATTR failed"); - - (void) vnode_pager_alloc(vp, OFF_TO_IDX(round_page(vat.va_size)), 0, 0); - - vp->v_flag |= VVMIO; - } else { - if ((object = vp->v_object) && - (object->flags & OBJ_DEAD)) { - tsleep(object, PVM, "nfdead", 0); - goto retry; - } - if (!object) - panic("nfsrv_vmio: VMIO object missing"); - vm_object_reference(object); - } - return 0; -} -int -nfsrv_vput(struct vnode *vp) { - if ((vp->v_flag & VVMIO) && vp->v_object) { - vput(vp); - vm_object_deallocate(vp->v_object); - } else { - vput(vp); - } - return 0; -} -int -nfsrv_vrele(struct vnode *vp) { - if ((vp->v_flag & VVMIO) && vp->v_object) { - vrele(vp); - vm_object_deallocate(vp->v_object); - } else { - vrele(vp); - } - return 0; + return vfs_object_create(vp, curproc, curproc?curproc->p_ucred:NULL, 1); } #endif /* NFS_NOSERVER */ Index: head/sys/nfsclient/nfsargs.h =================================================================== --- head/sys/nfsclient/nfsargs.h (revision 17760) +++ head/sys/nfsclient/nfsargs.h (revision 17761) @@ -1,603 +1,601 @@ /* * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)nfs.h 8.1 (Berkeley) 6/10/93 - * $Id: nfs.h,v 1.16 1995/12/17 21:12:05 phk Exp $ + * $Id: nfs.h,v 1.17 1996/01/30 22:59:39 mpp Exp $ */ #ifndef _NFS_NFS_H_ #define _NFS_NFS_H_ #include /* * Tunable constants for nfs */ #define NFS_MAXIOVEC 34 #define NFS_TICKINTVL 5 /* Desired time for a tick (msec) */ #define NFS_HZ (hz / nfs_ticks) /* Ticks/sec */ #define NFS_TIMEO (1 * NFS_HZ) /* Default timeout = 1 second */ #define NFS_MINTIMEO (1 * NFS_HZ) /* Min timeout to use */ #define NFS_MAXTIMEO (60 * NFS_HZ) /* Max timeout to backoff to */ #define NFS_MINIDEMTIMEO (5 * NFS_HZ) /* Min timeout for non-idempotent ops*/ #define NFS_MAXREXMIT 100 /* Stop counting after this many */ #define NFS_MAXWINDOW 1024 /* Max number of outstanding requests */ #define NFS_RETRANS 10 /* Num of retrans for soft mounts */ #define NFS_MAXGRPS 16 /* Max. size of groups list */ #ifndef NFS_MINATTRTIMO #define NFS_MINATTRTIMO 5 /* Attribute cache timeout in sec */ #endif #ifndef NFS_MAXATTRTIMO #define NFS_MAXATTRTIMO 60 #endif #define NFS_WSIZE 8192 /* Def. write data size <= 8192 */ #define NFS_RSIZE 8192 /* Def. read data size <= 8192 */ #define NFS_READDIRSIZE 8192 /* Def. readdir size */ #define NFS_DEFRAHEAD 1 /* Def. read ahead # blocks */ #define NFS_MAXRAHEAD 4 /* Max. read ahead # blocks */ #define NFS_MAXUIDHASH 64 /* Max. # of hashed uid entries/mp */ #define NFS_MAXASYNCDAEMON 20 /* Max. number async_daemons runnable */ #define NFS_MAXGATHERDELAY 100 /* Max. write gather delay (msec) */ #ifndef NFS_GATHERDELAY #define NFS_GATHERDELAY 10 /* Default write gather delay (msec) */ #endif #define NFS_DIRBLKSIZ 4096 /* Must be a multiple of DIRBLKSIZ */ /* * Oddballs */ #define NMOD(a) ((a) % nfs_asyncdaemons) #define NFS_CMPFH(n, f, s) \ ((n)->n_fhsize == (s) && !bcmp((caddr_t)(n)->n_fhp, (caddr_t)(f), (s))) #define NFS_ISV3(v) (VFSTONFS((v)->v_mount)->nm_flag & NFSMNT_NFSV3) #define NFS_SRVMAXDATA(n) \ (((n)->nd_flag & ND_NFSV3) ? (((n)->nd_nam2) ? \ NFS_MAXDGRAMDATA : NFS_MAXDATA) : NFS_V2MAXDATA) /* * XXX * sys/buf.h should be edited to change B_APPENDWRITE --> B_NEEDCOMMIT, but * until then... * Same goes for sys/malloc.h, which needs M_NFSDIROFF, * M_NFSRVDESC and M_NFSBIGFH added. * The VA_EXCLUSIVE flag should be added for va_vaflags and set for an * exclusive create. * The B_INVAFTERWRITE flag should be set to whatever is required by the * buffer cache code to say "Invalidate the block after it is written back". */ #ifndef B_NEEDCOMMIT #define B_NEEDCOMMIT B_APPENDWRITE #endif #ifndef M_NFSRVDESC #define M_NFSRVDESC M_TEMP #endif #ifndef M_NFSDIROFF #define M_NFSDIROFF M_TEMP #endif #ifndef M_NFSBIGFH #define M_NFSBIGFH M_TEMP #endif #ifndef VA_EXCLUSIVE #define VA_EXCLUSIVE 0 #endif #ifdef __FreeBSD__ #define B_INVAFTERWRITE B_NOCACHE #else #define B_INVAFTERWRITE B_INVAL #endif /* * These ifdefs try to handle the differences between the various 4.4BSD-Lite * based vfs interfaces. * btw: NetBSD-current does have a VOP_LEASDE(), but I don't know how to * differentiate between NetBSD-1.0 and NetBSD-current, so.. * I also don't know about BSDi's 2.0 release. */ #if !defined(HAS_VOPLEASE) && !defined(__FreeBSD__) && !defined(__NetBSD__) #define HAS_VOPLEASE 1 #endif #if !defined(HAS_VOPREVOKE) && !defined(__FreeBSD__) && !defined(__NetBSD__) #define HAS_VOPREVOKE 1 #endif /* * The IO_METASYNC flag should be implemented for local file systems. * (Until then, it is nothin at all.) */ #ifndef IO_METASYNC #define IO_METASYNC 0 #endif /* * Set the attribute timeout based on how recently the file has been modified. */ #define NFS_ATTRTIMEO(np) \ ((((np)->n_flag & NMODIFIED) || \ (time.tv_sec - (np)->n_mtime) / 10 < NFS_MINATTRTIMO) ? NFS_MINATTRTIMO : \ ((time.tv_sec - (np)->n_mtime) / 10 > NFS_MAXATTRTIMO ? NFS_MAXATTRTIMO : \ (time.tv_sec - (np)->n_mtime) / 10)) /* * Expected allocation sizes for major data structures. If the actual size * of the structure exceeds these sizes, then malloc() will be allocating * almost twice the memory required. This is used in nfs_init() to warn * the sysadmin that the size of a structure should be reduced. * (These sizes are always a power of 2. If the kernel malloc() changes * to one that does not allocate space in powers of 2 size, then this all * becomes bunk!) */ #define NFS_NODEALLOC 256 #define NFS_MNTALLOC 512 #define NFS_SVCALLOC 256 #define NFS_UIDALLOC 128 /* * Structures for the nfssvc(2) syscall. Not that anyone but nfsd and mount_nfs * should ever try and use it. */ struct nfsd_args { int sock; /* Socket to serve */ caddr_t name; /* Client address for connection based sockets */ int namelen; /* Length of name */ }; struct nfsd_srvargs { struct nfsd *nsd_nfsd; /* Pointer to in kernel nfsd struct */ uid_t nsd_uid; /* Effective uid mapped to cred */ u_long nsd_haddr; /* Ip address of client */ struct ucred nsd_cr; /* Cred. uid maps to */ int nsd_authlen; /* Length of auth string (ret) */ u_char *nsd_authstr; /* Auth string (ret) */ int nsd_verflen; /* and the verfier */ u_char *nsd_verfstr; struct timeval nsd_timestamp; /* timestamp from verifier */ u_long nsd_ttl; /* credential ttl (sec) */ NFSKERBKEY_T nsd_key; /* Session key */ }; struct nfsd_cargs { char *ncd_dirp; /* Mount dir path */ uid_t ncd_authuid; /* Effective uid */ int ncd_authtype; /* Type of authenticator */ int ncd_authlen; /* Length of authenticator string */ u_char *ncd_authstr; /* Authenticator string */ int ncd_verflen; /* and the verifier */ u_char *ncd_verfstr; NFSKERBKEY_T ncd_key; /* Session key */ }; /* * Stats structure */ struct nfsstats { int attrcache_hits; int attrcache_misses; int lookupcache_hits; int lookupcache_misses; int direofcache_hits; int direofcache_misses; int biocache_reads; int read_bios; int read_physios; int biocache_writes; int write_bios; int write_physios; int biocache_readlinks; int readlink_bios; int biocache_readdirs; int readdir_bios; int rpccnt[NFS_NPROCS]; int rpcretries; int srvrpccnt[NFS_NPROCS]; int srvrpc_errs; int srv_errs; int rpcrequests; int rpctimeouts; int rpcunexpected; int rpcinvalid; int srvcache_inproghits; int srvcache_idemdonehits; int srvcache_nonidemdonehits; int srvcache_misses; int srvnqnfs_leases; int srvnqnfs_maxleases; int srvnqnfs_getleases; int srvvop_writes; }; /* * Flags for nfssvc() system call. */ #define NFSSVC_BIOD 0x002 #define NFSSVC_NFSD 0x004 #define NFSSVC_ADDSOCK 0x008 #define NFSSVC_AUTHIN 0x010 #define NFSSVC_GOTAUTH 0x040 #define NFSSVC_AUTHINFAIL 0x080 #define NFSSVC_MNTD 0x100 /* * fs.nfs sysctl(3) identifiers */ #define NFS_NFSSTATS 1 /* struct: struct nfsstats */ #define FS_NFS_NAMES { \ { 0, 0 }, \ { "nfsstats", CTLTYPE_STRUCT }, \ } /* * The set of signals the interrupt an I/O in progress for NFSMNT_INT mounts. * What should be in this set is open to debate, but I believe that since * I/O system calls on ufs are never interrupted by signals the set should * be minimal. My reasoning is that many current programs that use signals * such as SIGALRM will not expect file I/O system calls to be interrupted * by them and break. */ #if defined(KERNEL) || defined(_KERNEL) struct uio; struct buf; struct vattr; struct nameidata; /* XXX */ #define NFSINT_SIGMASK (sigmask(SIGINT)|sigmask(SIGTERM)|sigmask(SIGKILL)| \ sigmask(SIGHUP)|sigmask(SIGQUIT)) /* * Socket errors ignored for connectionless sockets?? * For now, ignore them all */ #define NFSIGNORE_SOERROR(s, e) \ ((e) != EINTR && (e) != ERESTART && (e) != EWOULDBLOCK && \ ((s) & PR_CONNREQUIRED) == 0) /* * Nfs outstanding request list element */ struct nfsreq { TAILQ_ENTRY(nfsreq) r_chain; struct mbuf *r_mreq; struct mbuf *r_mrep; struct mbuf *r_md; caddr_t r_dpos; struct nfsmount *r_nmp; struct vnode *r_vp; u_long r_xid; int r_flags; /* flags on request, see below */ int r_retry; /* max retransmission count */ int r_rexmit; /* current retrans count */ int r_timer; /* tick counter on reply */ int r_procnum; /* NFS procedure number */ int r_rtt; /* RTT for rpc */ struct proc *r_procp; /* Proc that did I/O system call */ }; /* * Queue head for nfsreq's */ extern TAILQ_HEAD(nfs_reqq, nfsreq) nfs_reqq; /* Flag values for r_flags */ #define R_TIMING 0x01 /* timing request (in mntp) */ #define R_SENT 0x02 /* request has been sent */ #define R_SOFTTERM 0x04 /* soft mnt, too many retries */ #define R_INTR 0x08 /* intr mnt, signal pending */ #define R_SOCKERR 0x10 /* Fatal error on socket */ #define R_TPRINTFMSG 0x20 /* Did a tprintf msg. */ #define R_MUSTRESEND 0x40 /* Must resend request */ #define R_GETONEREP 0x80 /* Probe for one reply only */ /* * A list of nfssvc_sock structures is maintained with all the sockets * that require service by the nfsd. * The nfsuid structs hang off of the nfssvc_sock structs in both lru * and uid hash lists. */ #ifndef NFS_UIDHASHSIZ #define NFS_UIDHASHSIZ 29 /* Tune the size of nfssvc_sock with this */ #endif #define NUIDHASH(sock, uid) \ (&(sock)->ns_uidhashtbl[(uid) % NFS_UIDHASHSIZ]) #ifndef NFS_WDELAYHASHSIZ #define NFS_WDELAYHASHSIZ 16 /* and with this */ #endif #define NWDELAYHASH(sock, f) \ (&(sock)->ns_wdelayhashtbl[(*((u_long *)(f))) % NFS_WDELAYHASHSIZ]) #ifndef NFS_MUIDHASHSIZ #define NFS_MUIDHASHSIZ 67 /* Tune the size of nfsmount with this */ #endif #define NMUIDHASH(nmp, uid) \ (&(nmp)->nm_uidhashtbl[(uid) % NFS_MUIDHASHSIZ]) #define NFSNOHASH(fhsum) \ (&nfsnodehashtbl[(fhsum) & nfsnodehash]) /* * Network address hash list element */ union nethostaddr { u_long had_inetaddr; struct mbuf *had_nam; }; struct nfsuid { TAILQ_ENTRY(nfsuid) nu_lru; /* LRU chain */ LIST_ENTRY(nfsuid) nu_hash; /* Hash list */ int nu_flag; /* Flags */ union nethostaddr nu_haddr; /* Host addr. for dgram sockets */ struct ucred nu_cr; /* Cred uid mapped to */ int nu_expire; /* Expiry time (sec) */ struct timeval nu_timestamp; /* Kerb. timestamp */ u_long nu_nickname; /* Nickname on server */ NFSKERBKEY_T nu_key; /* and session key */ }; #define nu_inetaddr nu_haddr.had_inetaddr #define nu_nam nu_haddr.had_nam /* Bits for nu_flag */ #define NU_INETADDR 0x1 #define NU_NAM 0x2 #define NU_NETFAM(u) (((u)->nu_flag & NU_INETADDR) ? AF_INET : AF_ISO) struct nfssvc_sock { TAILQ_ENTRY(nfssvc_sock) ns_chain; /* List of all nfssvc_sock's */ TAILQ_HEAD(, nfsuid) ns_uidlruhead; struct file *ns_fp; struct socket *ns_so; struct mbuf *ns_nam; struct mbuf *ns_raw; struct mbuf *ns_rawend; struct mbuf *ns_rec; struct mbuf *ns_recend; struct mbuf *ns_frag; int ns_flag; int ns_solock; int ns_cc; int ns_reclen; int ns_numuids; u_long ns_sref; LIST_HEAD(, nfsrv_descript) ns_tq; /* Write gather lists */ LIST_HEAD(, nfsuid) ns_uidhashtbl[NFS_UIDHASHSIZ]; LIST_HEAD(nfsrvw_delayhash, nfsrv_descript) ns_wdelayhashtbl[NFS_WDELAYHASHSIZ]; }; /* Bits for "ns_flag" */ #define SLP_VALID 0x01 #define SLP_DOREC 0x02 #define SLP_NEEDQ 0x04 #define SLP_DISCONN 0x08 #define SLP_GETSTREAM 0x10 #define SLP_LASTFRAG 0x20 #define SLP_ALLFLAGS 0xff extern TAILQ_HEAD(nfssvc_sockhead, nfssvc_sock) nfssvc_sockhead; extern int nfssvc_sockhead_flag; #define SLP_INIT 0x01 #define SLP_WANTINIT 0x02 /* * One of these structures is allocated for each nfsd. */ struct nfsd { TAILQ_ENTRY(nfsd) nfsd_chain; /* List of all nfsd's */ int nfsd_flag; /* NFSD_ flags */ struct nfssvc_sock *nfsd_slp; /* Current socket */ int nfsd_authlen; /* Authenticator len */ u_char nfsd_authstr[RPCAUTH_MAXSIZ]; /* Authenticator data */ int nfsd_verflen; /* and the Verifier */ u_char nfsd_verfstr[RPCVERF_MAXSIZ]; struct proc *nfsd_procp; /* Proc ptr */ struct nfsrv_descript *nfsd_nd; /* Associated nfsrv_descript */ }; /* Bits for "nfsd_flag" */ #define NFSD_WAITING 0x01 #define NFSD_REQINPROG 0x02 #define NFSD_NEEDAUTH 0x04 #define NFSD_AUTHFAIL 0x08 /* * This structure is used by the server for describing each request. * Some fields are used only when write request gathering is performed. */ struct nfsrv_descript { u_quad_t nd_time; /* Write deadline (usec) */ off_t nd_off; /* Start byte offset */ off_t nd_eoff; /* and end byte offset */ LIST_ENTRY(nfsrv_descript) nd_hash; /* Hash list */ LIST_ENTRY(nfsrv_descript) nd_tq; /* and timer list */ LIST_HEAD(,nfsrv_descript) nd_coalesce; /* coalesced writes */ struct mbuf *nd_mrep; /* Request mbuf list */ struct mbuf *nd_md; /* Current dissect mbuf */ struct mbuf *nd_mreq; /* Reply mbuf list */ struct mbuf *nd_nam; /* and socket addr */ struct mbuf *nd_nam2; /* return socket addr */ caddr_t nd_dpos; /* Current dissect pos */ int nd_procnum; /* RPC # */ int nd_stable; /* storage type */ int nd_flag; /* nd_flag */ int nd_len; /* Length of this write */ int nd_repstat; /* Reply status */ u_long nd_retxid; /* Reply xid */ u_long nd_duration; /* Lease duration */ struct timeval nd_starttime; /* Time RPC initiated */ fhandle_t nd_fh; /* File handle */ struct ucred nd_cr; /* Credentials */ }; /* Bits for "nd_flag" */ #define ND_READ LEASE_READ #define ND_WRITE LEASE_WRITE #define ND_CHECK 0x04 #define ND_LEASE (ND_READ | ND_WRITE | ND_CHECK) #define ND_NFSV3 0x08 #define ND_NQNFS 0x10 #define ND_KERBNICK 0x20 #define ND_KERBFULL 0x40 #define ND_KERBAUTH (ND_KERBNICK | ND_KERBFULL) extern TAILQ_HEAD(nfsd_head, nfsd) nfsd_head; extern int nfsd_head_flag; #define NFSD_CHECKSLP 0x01 /* * These macros compare nfsrv_descript structures. */ #define NFSW_CONTIG(o, n) \ ((o)->nd_eoff >= (n)->nd_off && \ !bcmp((caddr_t)&(o)->nd_fh, (caddr_t)&(n)->nd_fh, NFSX_V3FH)) #define NFSW_SAMECRED(o, n) \ (((o)->nd_flag & ND_KERBAUTH) == ((n)->nd_flag & ND_KERBAUTH) && \ !bcmp((caddr_t)&(o)->nd_cr, (caddr_t)&(n)->nd_cr, \ sizeof (struct ucred))) int nfs_reply __P((struct nfsreq *)); int nfs_getreq __P((struct nfsrv_descript *,struct nfsd *,int)); int nfs_send __P((struct socket *,struct mbuf *,struct mbuf *,struct nfsreq *)); int nfs_rephead __P((int,struct nfsrv_descript *,struct nfssvc_sock *,int,int,u_quad_t *,struct mbuf **,struct mbuf **,caddr_t *)); int nfs_sndlock __P((int *,struct nfsreq *)); void nfs_sndunlock __P((int *flagp)); int nfs_disct __P((struct mbuf **,caddr_t *,int,int,caddr_t *)); int nfs_vinvalbuf __P((struct vnode *,int,struct ucred *,struct proc *,int)); int nfs_readrpc __P((struct vnode *,struct uio *,struct ucred *)); int nfs_writerpc __P((struct vnode *,struct uio *,struct ucred *,int *,int *)); int nfs_readdirrpc __P((register struct vnode *,struct uio *,struct ucred *)); int nfs_asyncio __P((struct buf *,struct ucred *)); int nfs_doio __P((struct buf *,struct ucred *,struct proc *)); int nfs_readlinkrpc __P((struct vnode *,struct uio *,struct ucred *)); int nfs_sigintr __P((struct nfsmount *,struct nfsreq *r,struct proc *)); int nfs_readdirplusrpc __P((struct vnode *,register struct uio *,struct ucred *)); int nfsm_disct __P((struct mbuf **,caddr_t *,int,int,caddr_t *)); void nfsm_srvfattr __P((struct nfsrv_descript *,struct vattr *,struct nfs_fattr *)); void nfsm_srvwcc __P((struct nfsrv_descript *,int,struct vattr *,int,struct vattr *,struct mbuf **,char **)); void nfsm_srvpostopattr __P((struct nfsrv_descript *,int,struct vattr *,struct mbuf **,char **)); int netaddr_match __P((int,union nethostaddr *,struct mbuf *)); int nfs_request __P((struct vnode *,struct mbuf *,int,struct proc *,struct ucred *,struct mbuf **,struct mbuf **,caddr_t *)); int nfs_loadattrcache __P((struct vnode **,struct mbuf **,caddr_t *,struct vattr *)); int nfs_namei __P((struct nameidata *,fhandle_t *,int,struct nfssvc_sock *,struct mbuf *,struct mbuf **,caddr_t *,struct vnode **,struct proc *,int)); void nfsm_adj __P((struct mbuf *,int,int)); int nfsm_mbuftouio __P((struct mbuf **,struct uio *,int,caddr_t *)); void nfsrv_initcache __P((void)); int nfs_getauth __P((struct nfsmount *,struct nfsreq *,struct ucred *,char **,int *,char *,int *,NFSKERBKEY_T)); int nfs_getnickauth __P((struct nfsmount *,struct ucred *,char **,int *,char *,int)); int nfs_savenickauth __P((struct nfsmount *,struct ucred *,int,NFSKERBKEY_T,struct mbuf **,char **,struct mbuf *)); int nfs_adv __P((struct mbuf **,caddr_t *,int,int)); void nfs_nhinit __P((void)); void nfs_timer __P((void*)); u_long nfs_hash __P((nfsfh_t *,int)); void nfsrv_slpderef __P((struct nfssvc_sock *slp)); int nfsrv_dorec __P((struct nfssvc_sock *,struct nfsd *,struct nfsrv_descript **)); void nfsrv_cleancache __P((void)); int nfsrv_getcache __P((struct nfsrv_descript *,struct nfssvc_sock *,struct mbuf **)); int nfs_init __P((void)); void nfsrv_updatecache __P((struct nfsrv_descript *,int,struct mbuf *)); int nfs_connect __P((struct nfsmount *,struct nfsreq *)); void nfs_disconnect __P((struct nfsmount *nmp)); int nfs_getattrcache __P((struct vnode *,struct vattr *)); int nfsm_strtmbuf __P((struct mbuf **,char **,char *,long)); int nfs_bioread __P((struct vnode *,struct uio *,int,struct ucred *)); int nfsm_uiotombuf __P((struct uio *,struct mbuf **,int,caddr_t *)); void nfsrv_init __P((int)); void nfs_clearcommit __P((struct mount *)); int nfsrv_errmap __P((struct nfsrv_descript *, int)); void nfsrv_rcv __P((struct socket *so, caddr_t arg, int waitflag)); void nfsrvw_sort __P((gid_t [],int)); void nfsrv_setcred __P((struct ucred *,struct ucred *)); int nfs_writebp __P((struct buf *,int)); -int nfsrv_vput __P(( struct vnode * )); -int nfsrv_vrele __P(( struct vnode * )); -int nfsrv_vmio __P(( struct vnode * )); +int nfsrv_object_create __P(( struct vnode * )); void nfsrv_wakenfsd __P((struct nfssvc_sock *slp)); int nfsrv_writegather __P((struct nfsrv_descript **, struct nfssvc_sock *, struct proc *, struct mbuf **)); int nfs_fsinfo __P((struct nfsmount *, struct vnode *, struct ucred *, struct proc *p)); int nfsrv3_access __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_commit __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_create __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_fhtovp __P((fhandle_t *,int,struct vnode **, struct ucred *,struct nfssvc_sock *,struct mbuf *, int *,int)); int nfsrv_fsinfo __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_getattr __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_link __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_lookup __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_mkdir __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_mknod __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_noop __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_null __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_pathconf __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_read __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_readdir __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_readdirplus __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_readlink __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_remove __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_rename __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_rmdir __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_setattr __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_statfs __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_symlink __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_write __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); #endif /* KERNEL */ #endif Index: head/sys/nfsclient/nfsstats.h =================================================================== --- head/sys/nfsclient/nfsstats.h (revision 17760) +++ head/sys/nfsclient/nfsstats.h (revision 17761) @@ -1,603 +1,601 @@ /* * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)nfs.h 8.1 (Berkeley) 6/10/93 - * $Id: nfs.h,v 1.16 1995/12/17 21:12:05 phk Exp $ + * $Id: nfs.h,v 1.17 1996/01/30 22:59:39 mpp Exp $ */ #ifndef _NFS_NFS_H_ #define _NFS_NFS_H_ #include /* * Tunable constants for nfs */ #define NFS_MAXIOVEC 34 #define NFS_TICKINTVL 5 /* Desired time for a tick (msec) */ #define NFS_HZ (hz / nfs_ticks) /* Ticks/sec */ #define NFS_TIMEO (1 * NFS_HZ) /* Default timeout = 1 second */ #define NFS_MINTIMEO (1 * NFS_HZ) /* Min timeout to use */ #define NFS_MAXTIMEO (60 * NFS_HZ) /* Max timeout to backoff to */ #define NFS_MINIDEMTIMEO (5 * NFS_HZ) /* Min timeout for non-idempotent ops*/ #define NFS_MAXREXMIT 100 /* Stop counting after this many */ #define NFS_MAXWINDOW 1024 /* Max number of outstanding requests */ #define NFS_RETRANS 10 /* Num of retrans for soft mounts */ #define NFS_MAXGRPS 16 /* Max. size of groups list */ #ifndef NFS_MINATTRTIMO #define NFS_MINATTRTIMO 5 /* Attribute cache timeout in sec */ #endif #ifndef NFS_MAXATTRTIMO #define NFS_MAXATTRTIMO 60 #endif #define NFS_WSIZE 8192 /* Def. write data size <= 8192 */ #define NFS_RSIZE 8192 /* Def. read data size <= 8192 */ #define NFS_READDIRSIZE 8192 /* Def. readdir size */ #define NFS_DEFRAHEAD 1 /* Def. read ahead # blocks */ #define NFS_MAXRAHEAD 4 /* Max. read ahead # blocks */ #define NFS_MAXUIDHASH 64 /* Max. # of hashed uid entries/mp */ #define NFS_MAXASYNCDAEMON 20 /* Max. number async_daemons runnable */ #define NFS_MAXGATHERDELAY 100 /* Max. write gather delay (msec) */ #ifndef NFS_GATHERDELAY #define NFS_GATHERDELAY 10 /* Default write gather delay (msec) */ #endif #define NFS_DIRBLKSIZ 4096 /* Must be a multiple of DIRBLKSIZ */ /* * Oddballs */ #define NMOD(a) ((a) % nfs_asyncdaemons) #define NFS_CMPFH(n, f, s) \ ((n)->n_fhsize == (s) && !bcmp((caddr_t)(n)->n_fhp, (caddr_t)(f), (s))) #define NFS_ISV3(v) (VFSTONFS((v)->v_mount)->nm_flag & NFSMNT_NFSV3) #define NFS_SRVMAXDATA(n) \ (((n)->nd_flag & ND_NFSV3) ? (((n)->nd_nam2) ? \ NFS_MAXDGRAMDATA : NFS_MAXDATA) : NFS_V2MAXDATA) /* * XXX * sys/buf.h should be edited to change B_APPENDWRITE --> B_NEEDCOMMIT, but * until then... * Same goes for sys/malloc.h, which needs M_NFSDIROFF, * M_NFSRVDESC and M_NFSBIGFH added. * The VA_EXCLUSIVE flag should be added for va_vaflags and set for an * exclusive create. * The B_INVAFTERWRITE flag should be set to whatever is required by the * buffer cache code to say "Invalidate the block after it is written back". */ #ifndef B_NEEDCOMMIT #define B_NEEDCOMMIT B_APPENDWRITE #endif #ifndef M_NFSRVDESC #define M_NFSRVDESC M_TEMP #endif #ifndef M_NFSDIROFF #define M_NFSDIROFF M_TEMP #endif #ifndef M_NFSBIGFH #define M_NFSBIGFH M_TEMP #endif #ifndef VA_EXCLUSIVE #define VA_EXCLUSIVE 0 #endif #ifdef __FreeBSD__ #define B_INVAFTERWRITE B_NOCACHE #else #define B_INVAFTERWRITE B_INVAL #endif /* * These ifdefs try to handle the differences between the various 4.4BSD-Lite * based vfs interfaces. * btw: NetBSD-current does have a VOP_LEASDE(), but I don't know how to * differentiate between NetBSD-1.0 and NetBSD-current, so.. * I also don't know about BSDi's 2.0 release. */ #if !defined(HAS_VOPLEASE) && !defined(__FreeBSD__) && !defined(__NetBSD__) #define HAS_VOPLEASE 1 #endif #if !defined(HAS_VOPREVOKE) && !defined(__FreeBSD__) && !defined(__NetBSD__) #define HAS_VOPREVOKE 1 #endif /* * The IO_METASYNC flag should be implemented for local file systems. * (Until then, it is nothin at all.) */ #ifndef IO_METASYNC #define IO_METASYNC 0 #endif /* * Set the attribute timeout based on how recently the file has been modified. */ #define NFS_ATTRTIMEO(np) \ ((((np)->n_flag & NMODIFIED) || \ (time.tv_sec - (np)->n_mtime) / 10 < NFS_MINATTRTIMO) ? NFS_MINATTRTIMO : \ ((time.tv_sec - (np)->n_mtime) / 10 > NFS_MAXATTRTIMO ? NFS_MAXATTRTIMO : \ (time.tv_sec - (np)->n_mtime) / 10)) /* * Expected allocation sizes for major data structures. If the actual size * of the structure exceeds these sizes, then malloc() will be allocating * almost twice the memory required. This is used in nfs_init() to warn * the sysadmin that the size of a structure should be reduced. * (These sizes are always a power of 2. If the kernel malloc() changes * to one that does not allocate space in powers of 2 size, then this all * becomes bunk!) */ #define NFS_NODEALLOC 256 #define NFS_MNTALLOC 512 #define NFS_SVCALLOC 256 #define NFS_UIDALLOC 128 /* * Structures for the nfssvc(2) syscall. Not that anyone but nfsd and mount_nfs * should ever try and use it. */ struct nfsd_args { int sock; /* Socket to serve */ caddr_t name; /* Client address for connection based sockets */ int namelen; /* Length of name */ }; struct nfsd_srvargs { struct nfsd *nsd_nfsd; /* Pointer to in kernel nfsd struct */ uid_t nsd_uid; /* Effective uid mapped to cred */ u_long nsd_haddr; /* Ip address of client */ struct ucred nsd_cr; /* Cred. uid maps to */ int nsd_authlen; /* Length of auth string (ret) */ u_char *nsd_authstr; /* Auth string (ret) */ int nsd_verflen; /* and the verfier */ u_char *nsd_verfstr; struct timeval nsd_timestamp; /* timestamp from verifier */ u_long nsd_ttl; /* credential ttl (sec) */ NFSKERBKEY_T nsd_key; /* Session key */ }; struct nfsd_cargs { char *ncd_dirp; /* Mount dir path */ uid_t ncd_authuid; /* Effective uid */ int ncd_authtype; /* Type of authenticator */ int ncd_authlen; /* Length of authenticator string */ u_char *ncd_authstr; /* Authenticator string */ int ncd_verflen; /* and the verifier */ u_char *ncd_verfstr; NFSKERBKEY_T ncd_key; /* Session key */ }; /* * Stats structure */ struct nfsstats { int attrcache_hits; int attrcache_misses; int lookupcache_hits; int lookupcache_misses; int direofcache_hits; int direofcache_misses; int biocache_reads; int read_bios; int read_physios; int biocache_writes; int write_bios; int write_physios; int biocache_readlinks; int readlink_bios; int biocache_readdirs; int readdir_bios; int rpccnt[NFS_NPROCS]; int rpcretries; int srvrpccnt[NFS_NPROCS]; int srvrpc_errs; int srv_errs; int rpcrequests; int rpctimeouts; int rpcunexpected; int rpcinvalid; int srvcache_inproghits; int srvcache_idemdonehits; int srvcache_nonidemdonehits; int srvcache_misses; int srvnqnfs_leases; int srvnqnfs_maxleases; int srvnqnfs_getleases; int srvvop_writes; }; /* * Flags for nfssvc() system call. */ #define NFSSVC_BIOD 0x002 #define NFSSVC_NFSD 0x004 #define NFSSVC_ADDSOCK 0x008 #define NFSSVC_AUTHIN 0x010 #define NFSSVC_GOTAUTH 0x040 #define NFSSVC_AUTHINFAIL 0x080 #define NFSSVC_MNTD 0x100 /* * fs.nfs sysctl(3) identifiers */ #define NFS_NFSSTATS 1 /* struct: struct nfsstats */ #define FS_NFS_NAMES { \ { 0, 0 }, \ { "nfsstats", CTLTYPE_STRUCT }, \ } /* * The set of signals the interrupt an I/O in progress for NFSMNT_INT mounts. * What should be in this set is open to debate, but I believe that since * I/O system calls on ufs are never interrupted by signals the set should * be minimal. My reasoning is that many current programs that use signals * such as SIGALRM will not expect file I/O system calls to be interrupted * by them and break. */ #if defined(KERNEL) || defined(_KERNEL) struct uio; struct buf; struct vattr; struct nameidata; /* XXX */ #define NFSINT_SIGMASK (sigmask(SIGINT)|sigmask(SIGTERM)|sigmask(SIGKILL)| \ sigmask(SIGHUP)|sigmask(SIGQUIT)) /* * Socket errors ignored for connectionless sockets?? * For now, ignore them all */ #define NFSIGNORE_SOERROR(s, e) \ ((e) != EINTR && (e) != ERESTART && (e) != EWOULDBLOCK && \ ((s) & PR_CONNREQUIRED) == 0) /* * Nfs outstanding request list element */ struct nfsreq { TAILQ_ENTRY(nfsreq) r_chain; struct mbuf *r_mreq; struct mbuf *r_mrep; struct mbuf *r_md; caddr_t r_dpos; struct nfsmount *r_nmp; struct vnode *r_vp; u_long r_xid; int r_flags; /* flags on request, see below */ int r_retry; /* max retransmission count */ int r_rexmit; /* current retrans count */ int r_timer; /* tick counter on reply */ int r_procnum; /* NFS procedure number */ int r_rtt; /* RTT for rpc */ struct proc *r_procp; /* Proc that did I/O system call */ }; /* * Queue head for nfsreq's */ extern TAILQ_HEAD(nfs_reqq, nfsreq) nfs_reqq; /* Flag values for r_flags */ #define R_TIMING 0x01 /* timing request (in mntp) */ #define R_SENT 0x02 /* request has been sent */ #define R_SOFTTERM 0x04 /* soft mnt, too many retries */ #define R_INTR 0x08 /* intr mnt, signal pending */ #define R_SOCKERR 0x10 /* Fatal error on socket */ #define R_TPRINTFMSG 0x20 /* Did a tprintf msg. */ #define R_MUSTRESEND 0x40 /* Must resend request */ #define R_GETONEREP 0x80 /* Probe for one reply only */ /* * A list of nfssvc_sock structures is maintained with all the sockets * that require service by the nfsd. * The nfsuid structs hang off of the nfssvc_sock structs in both lru * and uid hash lists. */ #ifndef NFS_UIDHASHSIZ #define NFS_UIDHASHSIZ 29 /* Tune the size of nfssvc_sock with this */ #endif #define NUIDHASH(sock, uid) \ (&(sock)->ns_uidhashtbl[(uid) % NFS_UIDHASHSIZ]) #ifndef NFS_WDELAYHASHSIZ #define NFS_WDELAYHASHSIZ 16 /* and with this */ #endif #define NWDELAYHASH(sock, f) \ (&(sock)->ns_wdelayhashtbl[(*((u_long *)(f))) % NFS_WDELAYHASHSIZ]) #ifndef NFS_MUIDHASHSIZ #define NFS_MUIDHASHSIZ 67 /* Tune the size of nfsmount with this */ #endif #define NMUIDHASH(nmp, uid) \ (&(nmp)->nm_uidhashtbl[(uid) % NFS_MUIDHASHSIZ]) #define NFSNOHASH(fhsum) \ (&nfsnodehashtbl[(fhsum) & nfsnodehash]) /* * Network address hash list element */ union nethostaddr { u_long had_inetaddr; struct mbuf *had_nam; }; struct nfsuid { TAILQ_ENTRY(nfsuid) nu_lru; /* LRU chain */ LIST_ENTRY(nfsuid) nu_hash; /* Hash list */ int nu_flag; /* Flags */ union nethostaddr nu_haddr; /* Host addr. for dgram sockets */ struct ucred nu_cr; /* Cred uid mapped to */ int nu_expire; /* Expiry time (sec) */ struct timeval nu_timestamp; /* Kerb. timestamp */ u_long nu_nickname; /* Nickname on server */ NFSKERBKEY_T nu_key; /* and session key */ }; #define nu_inetaddr nu_haddr.had_inetaddr #define nu_nam nu_haddr.had_nam /* Bits for nu_flag */ #define NU_INETADDR 0x1 #define NU_NAM 0x2 #define NU_NETFAM(u) (((u)->nu_flag & NU_INETADDR) ? AF_INET : AF_ISO) struct nfssvc_sock { TAILQ_ENTRY(nfssvc_sock) ns_chain; /* List of all nfssvc_sock's */ TAILQ_HEAD(, nfsuid) ns_uidlruhead; struct file *ns_fp; struct socket *ns_so; struct mbuf *ns_nam; struct mbuf *ns_raw; struct mbuf *ns_rawend; struct mbuf *ns_rec; struct mbuf *ns_recend; struct mbuf *ns_frag; int ns_flag; int ns_solock; int ns_cc; int ns_reclen; int ns_numuids; u_long ns_sref; LIST_HEAD(, nfsrv_descript) ns_tq; /* Write gather lists */ LIST_HEAD(, nfsuid) ns_uidhashtbl[NFS_UIDHASHSIZ]; LIST_HEAD(nfsrvw_delayhash, nfsrv_descript) ns_wdelayhashtbl[NFS_WDELAYHASHSIZ]; }; /* Bits for "ns_flag" */ #define SLP_VALID 0x01 #define SLP_DOREC 0x02 #define SLP_NEEDQ 0x04 #define SLP_DISCONN 0x08 #define SLP_GETSTREAM 0x10 #define SLP_LASTFRAG 0x20 #define SLP_ALLFLAGS 0xff extern TAILQ_HEAD(nfssvc_sockhead, nfssvc_sock) nfssvc_sockhead; extern int nfssvc_sockhead_flag; #define SLP_INIT 0x01 #define SLP_WANTINIT 0x02 /* * One of these structures is allocated for each nfsd. */ struct nfsd { TAILQ_ENTRY(nfsd) nfsd_chain; /* List of all nfsd's */ int nfsd_flag; /* NFSD_ flags */ struct nfssvc_sock *nfsd_slp; /* Current socket */ int nfsd_authlen; /* Authenticator len */ u_char nfsd_authstr[RPCAUTH_MAXSIZ]; /* Authenticator data */ int nfsd_verflen; /* and the Verifier */ u_char nfsd_verfstr[RPCVERF_MAXSIZ]; struct proc *nfsd_procp; /* Proc ptr */ struct nfsrv_descript *nfsd_nd; /* Associated nfsrv_descript */ }; /* Bits for "nfsd_flag" */ #define NFSD_WAITING 0x01 #define NFSD_REQINPROG 0x02 #define NFSD_NEEDAUTH 0x04 #define NFSD_AUTHFAIL 0x08 /* * This structure is used by the server for describing each request. * Some fields are used only when write request gathering is performed. */ struct nfsrv_descript { u_quad_t nd_time; /* Write deadline (usec) */ off_t nd_off; /* Start byte offset */ off_t nd_eoff; /* and end byte offset */ LIST_ENTRY(nfsrv_descript) nd_hash; /* Hash list */ LIST_ENTRY(nfsrv_descript) nd_tq; /* and timer list */ LIST_HEAD(,nfsrv_descript) nd_coalesce; /* coalesced writes */ struct mbuf *nd_mrep; /* Request mbuf list */ struct mbuf *nd_md; /* Current dissect mbuf */ struct mbuf *nd_mreq; /* Reply mbuf list */ struct mbuf *nd_nam; /* and socket addr */ struct mbuf *nd_nam2; /* return socket addr */ caddr_t nd_dpos; /* Current dissect pos */ int nd_procnum; /* RPC # */ int nd_stable; /* storage type */ int nd_flag; /* nd_flag */ int nd_len; /* Length of this write */ int nd_repstat; /* Reply status */ u_long nd_retxid; /* Reply xid */ u_long nd_duration; /* Lease duration */ struct timeval nd_starttime; /* Time RPC initiated */ fhandle_t nd_fh; /* File handle */ struct ucred nd_cr; /* Credentials */ }; /* Bits for "nd_flag" */ #define ND_READ LEASE_READ #define ND_WRITE LEASE_WRITE #define ND_CHECK 0x04 #define ND_LEASE (ND_READ | ND_WRITE | ND_CHECK) #define ND_NFSV3 0x08 #define ND_NQNFS 0x10 #define ND_KERBNICK 0x20 #define ND_KERBFULL 0x40 #define ND_KERBAUTH (ND_KERBNICK | ND_KERBFULL) extern TAILQ_HEAD(nfsd_head, nfsd) nfsd_head; extern int nfsd_head_flag; #define NFSD_CHECKSLP 0x01 /* * These macros compare nfsrv_descript structures. */ #define NFSW_CONTIG(o, n) \ ((o)->nd_eoff >= (n)->nd_off && \ !bcmp((caddr_t)&(o)->nd_fh, (caddr_t)&(n)->nd_fh, NFSX_V3FH)) #define NFSW_SAMECRED(o, n) \ (((o)->nd_flag & ND_KERBAUTH) == ((n)->nd_flag & ND_KERBAUTH) && \ !bcmp((caddr_t)&(o)->nd_cr, (caddr_t)&(n)->nd_cr, \ sizeof (struct ucred))) int nfs_reply __P((struct nfsreq *)); int nfs_getreq __P((struct nfsrv_descript *,struct nfsd *,int)); int nfs_send __P((struct socket *,struct mbuf *,struct mbuf *,struct nfsreq *)); int nfs_rephead __P((int,struct nfsrv_descript *,struct nfssvc_sock *,int,int,u_quad_t *,struct mbuf **,struct mbuf **,caddr_t *)); int nfs_sndlock __P((int *,struct nfsreq *)); void nfs_sndunlock __P((int *flagp)); int nfs_disct __P((struct mbuf **,caddr_t *,int,int,caddr_t *)); int nfs_vinvalbuf __P((struct vnode *,int,struct ucred *,struct proc *,int)); int nfs_readrpc __P((struct vnode *,struct uio *,struct ucred *)); int nfs_writerpc __P((struct vnode *,struct uio *,struct ucred *,int *,int *)); int nfs_readdirrpc __P((register struct vnode *,struct uio *,struct ucred *)); int nfs_asyncio __P((struct buf *,struct ucred *)); int nfs_doio __P((struct buf *,struct ucred *,struct proc *)); int nfs_readlinkrpc __P((struct vnode *,struct uio *,struct ucred *)); int nfs_sigintr __P((struct nfsmount *,struct nfsreq *r,struct proc *)); int nfs_readdirplusrpc __P((struct vnode *,register struct uio *,struct ucred *)); int nfsm_disct __P((struct mbuf **,caddr_t *,int,int,caddr_t *)); void nfsm_srvfattr __P((struct nfsrv_descript *,struct vattr *,struct nfs_fattr *)); void nfsm_srvwcc __P((struct nfsrv_descript *,int,struct vattr *,int,struct vattr *,struct mbuf **,char **)); void nfsm_srvpostopattr __P((struct nfsrv_descript *,int,struct vattr *,struct mbuf **,char **)); int netaddr_match __P((int,union nethostaddr *,struct mbuf *)); int nfs_request __P((struct vnode *,struct mbuf *,int,struct proc *,struct ucred *,struct mbuf **,struct mbuf **,caddr_t *)); int nfs_loadattrcache __P((struct vnode **,struct mbuf **,caddr_t *,struct vattr *)); int nfs_namei __P((struct nameidata *,fhandle_t *,int,struct nfssvc_sock *,struct mbuf *,struct mbuf **,caddr_t *,struct vnode **,struct proc *,int)); void nfsm_adj __P((struct mbuf *,int,int)); int nfsm_mbuftouio __P((struct mbuf **,struct uio *,int,caddr_t *)); void nfsrv_initcache __P((void)); int nfs_getauth __P((struct nfsmount *,struct nfsreq *,struct ucred *,char **,int *,char *,int *,NFSKERBKEY_T)); int nfs_getnickauth __P((struct nfsmount *,struct ucred *,char **,int *,char *,int)); int nfs_savenickauth __P((struct nfsmount *,struct ucred *,int,NFSKERBKEY_T,struct mbuf **,char **,struct mbuf *)); int nfs_adv __P((struct mbuf **,caddr_t *,int,int)); void nfs_nhinit __P((void)); void nfs_timer __P((void*)); u_long nfs_hash __P((nfsfh_t *,int)); void nfsrv_slpderef __P((struct nfssvc_sock *slp)); int nfsrv_dorec __P((struct nfssvc_sock *,struct nfsd *,struct nfsrv_descript **)); void nfsrv_cleancache __P((void)); int nfsrv_getcache __P((struct nfsrv_descript *,struct nfssvc_sock *,struct mbuf **)); int nfs_init __P((void)); void nfsrv_updatecache __P((struct nfsrv_descript *,int,struct mbuf *)); int nfs_connect __P((struct nfsmount *,struct nfsreq *)); void nfs_disconnect __P((struct nfsmount *nmp)); int nfs_getattrcache __P((struct vnode *,struct vattr *)); int nfsm_strtmbuf __P((struct mbuf **,char **,char *,long)); int nfs_bioread __P((struct vnode *,struct uio *,int,struct ucred *)); int nfsm_uiotombuf __P((struct uio *,struct mbuf **,int,caddr_t *)); void nfsrv_init __P((int)); void nfs_clearcommit __P((struct mount *)); int nfsrv_errmap __P((struct nfsrv_descript *, int)); void nfsrv_rcv __P((struct socket *so, caddr_t arg, int waitflag)); void nfsrvw_sort __P((gid_t [],int)); void nfsrv_setcred __P((struct ucred *,struct ucred *)); int nfs_writebp __P((struct buf *,int)); -int nfsrv_vput __P(( struct vnode * )); -int nfsrv_vrele __P(( struct vnode * )); -int nfsrv_vmio __P(( struct vnode * )); +int nfsrv_object_create __P(( struct vnode * )); void nfsrv_wakenfsd __P((struct nfssvc_sock *slp)); int nfsrv_writegather __P((struct nfsrv_descript **, struct nfssvc_sock *, struct proc *, struct mbuf **)); int nfs_fsinfo __P((struct nfsmount *, struct vnode *, struct ucred *, struct proc *p)); int nfsrv3_access __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_commit __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_create __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_fhtovp __P((fhandle_t *,int,struct vnode **, struct ucred *,struct nfssvc_sock *,struct mbuf *, int *,int)); int nfsrv_fsinfo __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_getattr __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_link __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_lookup __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_mkdir __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_mknod __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_noop __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_null __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_pathconf __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_read __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_readdir __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_readdirplus __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_readlink __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_remove __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_rename __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_rmdir __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_setattr __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_statfs __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_symlink __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_write __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); #endif /* KERNEL */ #endif Index: head/sys/nfsserver/nfs.h =================================================================== --- head/sys/nfsserver/nfs.h (revision 17760) +++ head/sys/nfsserver/nfs.h (revision 17761) @@ -1,603 +1,601 @@ /* * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)nfs.h 8.1 (Berkeley) 6/10/93 - * $Id: nfs.h,v 1.16 1995/12/17 21:12:05 phk Exp $ + * $Id: nfs.h,v 1.17 1996/01/30 22:59:39 mpp Exp $ */ #ifndef _NFS_NFS_H_ #define _NFS_NFS_H_ #include /* * Tunable constants for nfs */ #define NFS_MAXIOVEC 34 #define NFS_TICKINTVL 5 /* Desired time for a tick (msec) */ #define NFS_HZ (hz / nfs_ticks) /* Ticks/sec */ #define NFS_TIMEO (1 * NFS_HZ) /* Default timeout = 1 second */ #define NFS_MINTIMEO (1 * NFS_HZ) /* Min timeout to use */ #define NFS_MAXTIMEO (60 * NFS_HZ) /* Max timeout to backoff to */ #define NFS_MINIDEMTIMEO (5 * NFS_HZ) /* Min timeout for non-idempotent ops*/ #define NFS_MAXREXMIT 100 /* Stop counting after this many */ #define NFS_MAXWINDOW 1024 /* Max number of outstanding requests */ #define NFS_RETRANS 10 /* Num of retrans for soft mounts */ #define NFS_MAXGRPS 16 /* Max. size of groups list */ #ifndef NFS_MINATTRTIMO #define NFS_MINATTRTIMO 5 /* Attribute cache timeout in sec */ #endif #ifndef NFS_MAXATTRTIMO #define NFS_MAXATTRTIMO 60 #endif #define NFS_WSIZE 8192 /* Def. write data size <= 8192 */ #define NFS_RSIZE 8192 /* Def. read data size <= 8192 */ #define NFS_READDIRSIZE 8192 /* Def. readdir size */ #define NFS_DEFRAHEAD 1 /* Def. read ahead # blocks */ #define NFS_MAXRAHEAD 4 /* Max. read ahead # blocks */ #define NFS_MAXUIDHASH 64 /* Max. # of hashed uid entries/mp */ #define NFS_MAXASYNCDAEMON 20 /* Max. number async_daemons runnable */ #define NFS_MAXGATHERDELAY 100 /* Max. write gather delay (msec) */ #ifndef NFS_GATHERDELAY #define NFS_GATHERDELAY 10 /* Default write gather delay (msec) */ #endif #define NFS_DIRBLKSIZ 4096 /* Must be a multiple of DIRBLKSIZ */ /* * Oddballs */ #define NMOD(a) ((a) % nfs_asyncdaemons) #define NFS_CMPFH(n, f, s) \ ((n)->n_fhsize == (s) && !bcmp((caddr_t)(n)->n_fhp, (caddr_t)(f), (s))) #define NFS_ISV3(v) (VFSTONFS((v)->v_mount)->nm_flag & NFSMNT_NFSV3) #define NFS_SRVMAXDATA(n) \ (((n)->nd_flag & ND_NFSV3) ? (((n)->nd_nam2) ? \ NFS_MAXDGRAMDATA : NFS_MAXDATA) : NFS_V2MAXDATA) /* * XXX * sys/buf.h should be edited to change B_APPENDWRITE --> B_NEEDCOMMIT, but * until then... * Same goes for sys/malloc.h, which needs M_NFSDIROFF, * M_NFSRVDESC and M_NFSBIGFH added. * The VA_EXCLUSIVE flag should be added for va_vaflags and set for an * exclusive create. * The B_INVAFTERWRITE flag should be set to whatever is required by the * buffer cache code to say "Invalidate the block after it is written back". */ #ifndef B_NEEDCOMMIT #define B_NEEDCOMMIT B_APPENDWRITE #endif #ifndef M_NFSRVDESC #define M_NFSRVDESC M_TEMP #endif #ifndef M_NFSDIROFF #define M_NFSDIROFF M_TEMP #endif #ifndef M_NFSBIGFH #define M_NFSBIGFH M_TEMP #endif #ifndef VA_EXCLUSIVE #define VA_EXCLUSIVE 0 #endif #ifdef __FreeBSD__ #define B_INVAFTERWRITE B_NOCACHE #else #define B_INVAFTERWRITE B_INVAL #endif /* * These ifdefs try to handle the differences between the various 4.4BSD-Lite * based vfs interfaces. * btw: NetBSD-current does have a VOP_LEASDE(), but I don't know how to * differentiate between NetBSD-1.0 and NetBSD-current, so.. * I also don't know about BSDi's 2.0 release. */ #if !defined(HAS_VOPLEASE) && !defined(__FreeBSD__) && !defined(__NetBSD__) #define HAS_VOPLEASE 1 #endif #if !defined(HAS_VOPREVOKE) && !defined(__FreeBSD__) && !defined(__NetBSD__) #define HAS_VOPREVOKE 1 #endif /* * The IO_METASYNC flag should be implemented for local file systems. * (Until then, it is nothin at all.) */ #ifndef IO_METASYNC #define IO_METASYNC 0 #endif /* * Set the attribute timeout based on how recently the file has been modified. */ #define NFS_ATTRTIMEO(np) \ ((((np)->n_flag & NMODIFIED) || \ (time.tv_sec - (np)->n_mtime) / 10 < NFS_MINATTRTIMO) ? NFS_MINATTRTIMO : \ ((time.tv_sec - (np)->n_mtime) / 10 > NFS_MAXATTRTIMO ? NFS_MAXATTRTIMO : \ (time.tv_sec - (np)->n_mtime) / 10)) /* * Expected allocation sizes for major data structures. If the actual size * of the structure exceeds these sizes, then malloc() will be allocating * almost twice the memory required. This is used in nfs_init() to warn * the sysadmin that the size of a structure should be reduced. * (These sizes are always a power of 2. If the kernel malloc() changes * to one that does not allocate space in powers of 2 size, then this all * becomes bunk!) */ #define NFS_NODEALLOC 256 #define NFS_MNTALLOC 512 #define NFS_SVCALLOC 256 #define NFS_UIDALLOC 128 /* * Structures for the nfssvc(2) syscall. Not that anyone but nfsd and mount_nfs * should ever try and use it. */ struct nfsd_args { int sock; /* Socket to serve */ caddr_t name; /* Client address for connection based sockets */ int namelen; /* Length of name */ }; struct nfsd_srvargs { struct nfsd *nsd_nfsd; /* Pointer to in kernel nfsd struct */ uid_t nsd_uid; /* Effective uid mapped to cred */ u_long nsd_haddr; /* Ip address of client */ struct ucred nsd_cr; /* Cred. uid maps to */ int nsd_authlen; /* Length of auth string (ret) */ u_char *nsd_authstr; /* Auth string (ret) */ int nsd_verflen; /* and the verfier */ u_char *nsd_verfstr; struct timeval nsd_timestamp; /* timestamp from verifier */ u_long nsd_ttl; /* credential ttl (sec) */ NFSKERBKEY_T nsd_key; /* Session key */ }; struct nfsd_cargs { char *ncd_dirp; /* Mount dir path */ uid_t ncd_authuid; /* Effective uid */ int ncd_authtype; /* Type of authenticator */ int ncd_authlen; /* Length of authenticator string */ u_char *ncd_authstr; /* Authenticator string */ int ncd_verflen; /* and the verifier */ u_char *ncd_verfstr; NFSKERBKEY_T ncd_key; /* Session key */ }; /* * Stats structure */ struct nfsstats { int attrcache_hits; int attrcache_misses; int lookupcache_hits; int lookupcache_misses; int direofcache_hits; int direofcache_misses; int biocache_reads; int read_bios; int read_physios; int biocache_writes; int write_bios; int write_physios; int biocache_readlinks; int readlink_bios; int biocache_readdirs; int readdir_bios; int rpccnt[NFS_NPROCS]; int rpcretries; int srvrpccnt[NFS_NPROCS]; int srvrpc_errs; int srv_errs; int rpcrequests; int rpctimeouts; int rpcunexpected; int rpcinvalid; int srvcache_inproghits; int srvcache_idemdonehits; int srvcache_nonidemdonehits; int srvcache_misses; int srvnqnfs_leases; int srvnqnfs_maxleases; int srvnqnfs_getleases; int srvvop_writes; }; /* * Flags for nfssvc() system call. */ #define NFSSVC_BIOD 0x002 #define NFSSVC_NFSD 0x004 #define NFSSVC_ADDSOCK 0x008 #define NFSSVC_AUTHIN 0x010 #define NFSSVC_GOTAUTH 0x040 #define NFSSVC_AUTHINFAIL 0x080 #define NFSSVC_MNTD 0x100 /* * fs.nfs sysctl(3) identifiers */ #define NFS_NFSSTATS 1 /* struct: struct nfsstats */ #define FS_NFS_NAMES { \ { 0, 0 }, \ { "nfsstats", CTLTYPE_STRUCT }, \ } /* * The set of signals the interrupt an I/O in progress for NFSMNT_INT mounts. * What should be in this set is open to debate, but I believe that since * I/O system calls on ufs are never interrupted by signals the set should * be minimal. My reasoning is that many current programs that use signals * such as SIGALRM will not expect file I/O system calls to be interrupted * by them and break. */ #if defined(KERNEL) || defined(_KERNEL) struct uio; struct buf; struct vattr; struct nameidata; /* XXX */ #define NFSINT_SIGMASK (sigmask(SIGINT)|sigmask(SIGTERM)|sigmask(SIGKILL)| \ sigmask(SIGHUP)|sigmask(SIGQUIT)) /* * Socket errors ignored for connectionless sockets?? * For now, ignore them all */ #define NFSIGNORE_SOERROR(s, e) \ ((e) != EINTR && (e) != ERESTART && (e) != EWOULDBLOCK && \ ((s) & PR_CONNREQUIRED) == 0) /* * Nfs outstanding request list element */ struct nfsreq { TAILQ_ENTRY(nfsreq) r_chain; struct mbuf *r_mreq; struct mbuf *r_mrep; struct mbuf *r_md; caddr_t r_dpos; struct nfsmount *r_nmp; struct vnode *r_vp; u_long r_xid; int r_flags; /* flags on request, see below */ int r_retry; /* max retransmission count */ int r_rexmit; /* current retrans count */ int r_timer; /* tick counter on reply */ int r_procnum; /* NFS procedure number */ int r_rtt; /* RTT for rpc */ struct proc *r_procp; /* Proc that did I/O system call */ }; /* * Queue head for nfsreq's */ extern TAILQ_HEAD(nfs_reqq, nfsreq) nfs_reqq; /* Flag values for r_flags */ #define R_TIMING 0x01 /* timing request (in mntp) */ #define R_SENT 0x02 /* request has been sent */ #define R_SOFTTERM 0x04 /* soft mnt, too many retries */ #define R_INTR 0x08 /* intr mnt, signal pending */ #define R_SOCKERR 0x10 /* Fatal error on socket */ #define R_TPRINTFMSG 0x20 /* Did a tprintf msg. */ #define R_MUSTRESEND 0x40 /* Must resend request */ #define R_GETONEREP 0x80 /* Probe for one reply only */ /* * A list of nfssvc_sock structures is maintained with all the sockets * that require service by the nfsd. * The nfsuid structs hang off of the nfssvc_sock structs in both lru * and uid hash lists. */ #ifndef NFS_UIDHASHSIZ #define NFS_UIDHASHSIZ 29 /* Tune the size of nfssvc_sock with this */ #endif #define NUIDHASH(sock, uid) \ (&(sock)->ns_uidhashtbl[(uid) % NFS_UIDHASHSIZ]) #ifndef NFS_WDELAYHASHSIZ #define NFS_WDELAYHASHSIZ 16 /* and with this */ #endif #define NWDELAYHASH(sock, f) \ (&(sock)->ns_wdelayhashtbl[(*((u_long *)(f))) % NFS_WDELAYHASHSIZ]) #ifndef NFS_MUIDHASHSIZ #define NFS_MUIDHASHSIZ 67 /* Tune the size of nfsmount with this */ #endif #define NMUIDHASH(nmp, uid) \ (&(nmp)->nm_uidhashtbl[(uid) % NFS_MUIDHASHSIZ]) #define NFSNOHASH(fhsum) \ (&nfsnodehashtbl[(fhsum) & nfsnodehash]) /* * Network address hash list element */ union nethostaddr { u_long had_inetaddr; struct mbuf *had_nam; }; struct nfsuid { TAILQ_ENTRY(nfsuid) nu_lru; /* LRU chain */ LIST_ENTRY(nfsuid) nu_hash; /* Hash list */ int nu_flag; /* Flags */ union nethostaddr nu_haddr; /* Host addr. for dgram sockets */ struct ucred nu_cr; /* Cred uid mapped to */ int nu_expire; /* Expiry time (sec) */ struct timeval nu_timestamp; /* Kerb. timestamp */ u_long nu_nickname; /* Nickname on server */ NFSKERBKEY_T nu_key; /* and session key */ }; #define nu_inetaddr nu_haddr.had_inetaddr #define nu_nam nu_haddr.had_nam /* Bits for nu_flag */ #define NU_INETADDR 0x1 #define NU_NAM 0x2 #define NU_NETFAM(u) (((u)->nu_flag & NU_INETADDR) ? AF_INET : AF_ISO) struct nfssvc_sock { TAILQ_ENTRY(nfssvc_sock) ns_chain; /* List of all nfssvc_sock's */ TAILQ_HEAD(, nfsuid) ns_uidlruhead; struct file *ns_fp; struct socket *ns_so; struct mbuf *ns_nam; struct mbuf *ns_raw; struct mbuf *ns_rawend; struct mbuf *ns_rec; struct mbuf *ns_recend; struct mbuf *ns_frag; int ns_flag; int ns_solock; int ns_cc; int ns_reclen; int ns_numuids; u_long ns_sref; LIST_HEAD(, nfsrv_descript) ns_tq; /* Write gather lists */ LIST_HEAD(, nfsuid) ns_uidhashtbl[NFS_UIDHASHSIZ]; LIST_HEAD(nfsrvw_delayhash, nfsrv_descript) ns_wdelayhashtbl[NFS_WDELAYHASHSIZ]; }; /* Bits for "ns_flag" */ #define SLP_VALID 0x01 #define SLP_DOREC 0x02 #define SLP_NEEDQ 0x04 #define SLP_DISCONN 0x08 #define SLP_GETSTREAM 0x10 #define SLP_LASTFRAG 0x20 #define SLP_ALLFLAGS 0xff extern TAILQ_HEAD(nfssvc_sockhead, nfssvc_sock) nfssvc_sockhead; extern int nfssvc_sockhead_flag; #define SLP_INIT 0x01 #define SLP_WANTINIT 0x02 /* * One of these structures is allocated for each nfsd. */ struct nfsd { TAILQ_ENTRY(nfsd) nfsd_chain; /* List of all nfsd's */ int nfsd_flag; /* NFSD_ flags */ struct nfssvc_sock *nfsd_slp; /* Current socket */ int nfsd_authlen; /* Authenticator len */ u_char nfsd_authstr[RPCAUTH_MAXSIZ]; /* Authenticator data */ int nfsd_verflen; /* and the Verifier */ u_char nfsd_verfstr[RPCVERF_MAXSIZ]; struct proc *nfsd_procp; /* Proc ptr */ struct nfsrv_descript *nfsd_nd; /* Associated nfsrv_descript */ }; /* Bits for "nfsd_flag" */ #define NFSD_WAITING 0x01 #define NFSD_REQINPROG 0x02 #define NFSD_NEEDAUTH 0x04 #define NFSD_AUTHFAIL 0x08 /* * This structure is used by the server for describing each request. * Some fields are used only when write request gathering is performed. */ struct nfsrv_descript { u_quad_t nd_time; /* Write deadline (usec) */ off_t nd_off; /* Start byte offset */ off_t nd_eoff; /* and end byte offset */ LIST_ENTRY(nfsrv_descript) nd_hash; /* Hash list */ LIST_ENTRY(nfsrv_descript) nd_tq; /* and timer list */ LIST_HEAD(,nfsrv_descript) nd_coalesce; /* coalesced writes */ struct mbuf *nd_mrep; /* Request mbuf list */ struct mbuf *nd_md; /* Current dissect mbuf */ struct mbuf *nd_mreq; /* Reply mbuf list */ struct mbuf *nd_nam; /* and socket addr */ struct mbuf *nd_nam2; /* return socket addr */ caddr_t nd_dpos; /* Current dissect pos */ int nd_procnum; /* RPC # */ int nd_stable; /* storage type */ int nd_flag; /* nd_flag */ int nd_len; /* Length of this write */ int nd_repstat; /* Reply status */ u_long nd_retxid; /* Reply xid */ u_long nd_duration; /* Lease duration */ struct timeval nd_starttime; /* Time RPC initiated */ fhandle_t nd_fh; /* File handle */ struct ucred nd_cr; /* Credentials */ }; /* Bits for "nd_flag" */ #define ND_READ LEASE_READ #define ND_WRITE LEASE_WRITE #define ND_CHECK 0x04 #define ND_LEASE (ND_READ | ND_WRITE | ND_CHECK) #define ND_NFSV3 0x08 #define ND_NQNFS 0x10 #define ND_KERBNICK 0x20 #define ND_KERBFULL 0x40 #define ND_KERBAUTH (ND_KERBNICK | ND_KERBFULL) extern TAILQ_HEAD(nfsd_head, nfsd) nfsd_head; extern int nfsd_head_flag; #define NFSD_CHECKSLP 0x01 /* * These macros compare nfsrv_descript structures. */ #define NFSW_CONTIG(o, n) \ ((o)->nd_eoff >= (n)->nd_off && \ !bcmp((caddr_t)&(o)->nd_fh, (caddr_t)&(n)->nd_fh, NFSX_V3FH)) #define NFSW_SAMECRED(o, n) \ (((o)->nd_flag & ND_KERBAUTH) == ((n)->nd_flag & ND_KERBAUTH) && \ !bcmp((caddr_t)&(o)->nd_cr, (caddr_t)&(n)->nd_cr, \ sizeof (struct ucred))) int nfs_reply __P((struct nfsreq *)); int nfs_getreq __P((struct nfsrv_descript *,struct nfsd *,int)); int nfs_send __P((struct socket *,struct mbuf *,struct mbuf *,struct nfsreq *)); int nfs_rephead __P((int,struct nfsrv_descript *,struct nfssvc_sock *,int,int,u_quad_t *,struct mbuf **,struct mbuf **,caddr_t *)); int nfs_sndlock __P((int *,struct nfsreq *)); void nfs_sndunlock __P((int *flagp)); int nfs_disct __P((struct mbuf **,caddr_t *,int,int,caddr_t *)); int nfs_vinvalbuf __P((struct vnode *,int,struct ucred *,struct proc *,int)); int nfs_readrpc __P((struct vnode *,struct uio *,struct ucred *)); int nfs_writerpc __P((struct vnode *,struct uio *,struct ucred *,int *,int *)); int nfs_readdirrpc __P((register struct vnode *,struct uio *,struct ucred *)); int nfs_asyncio __P((struct buf *,struct ucred *)); int nfs_doio __P((struct buf *,struct ucred *,struct proc *)); int nfs_readlinkrpc __P((struct vnode *,struct uio *,struct ucred *)); int nfs_sigintr __P((struct nfsmount *,struct nfsreq *r,struct proc *)); int nfs_readdirplusrpc __P((struct vnode *,register struct uio *,struct ucred *)); int nfsm_disct __P((struct mbuf **,caddr_t *,int,int,caddr_t *)); void nfsm_srvfattr __P((struct nfsrv_descript *,struct vattr *,struct nfs_fattr *)); void nfsm_srvwcc __P((struct nfsrv_descript *,int,struct vattr *,int,struct vattr *,struct mbuf **,char **)); void nfsm_srvpostopattr __P((struct nfsrv_descript *,int,struct vattr *,struct mbuf **,char **)); int netaddr_match __P((int,union nethostaddr *,struct mbuf *)); int nfs_request __P((struct vnode *,struct mbuf *,int,struct proc *,struct ucred *,struct mbuf **,struct mbuf **,caddr_t *)); int nfs_loadattrcache __P((struct vnode **,struct mbuf **,caddr_t *,struct vattr *)); int nfs_namei __P((struct nameidata *,fhandle_t *,int,struct nfssvc_sock *,struct mbuf *,struct mbuf **,caddr_t *,struct vnode **,struct proc *,int)); void nfsm_adj __P((struct mbuf *,int,int)); int nfsm_mbuftouio __P((struct mbuf **,struct uio *,int,caddr_t *)); void nfsrv_initcache __P((void)); int nfs_getauth __P((struct nfsmount *,struct nfsreq *,struct ucred *,char **,int *,char *,int *,NFSKERBKEY_T)); int nfs_getnickauth __P((struct nfsmount *,struct ucred *,char **,int *,char *,int)); int nfs_savenickauth __P((struct nfsmount *,struct ucred *,int,NFSKERBKEY_T,struct mbuf **,char **,struct mbuf *)); int nfs_adv __P((struct mbuf **,caddr_t *,int,int)); void nfs_nhinit __P((void)); void nfs_timer __P((void*)); u_long nfs_hash __P((nfsfh_t *,int)); void nfsrv_slpderef __P((struct nfssvc_sock *slp)); int nfsrv_dorec __P((struct nfssvc_sock *,struct nfsd *,struct nfsrv_descript **)); void nfsrv_cleancache __P((void)); int nfsrv_getcache __P((struct nfsrv_descript *,struct nfssvc_sock *,struct mbuf **)); int nfs_init __P((void)); void nfsrv_updatecache __P((struct nfsrv_descript *,int,struct mbuf *)); int nfs_connect __P((struct nfsmount *,struct nfsreq *)); void nfs_disconnect __P((struct nfsmount *nmp)); int nfs_getattrcache __P((struct vnode *,struct vattr *)); int nfsm_strtmbuf __P((struct mbuf **,char **,char *,long)); int nfs_bioread __P((struct vnode *,struct uio *,int,struct ucred *)); int nfsm_uiotombuf __P((struct uio *,struct mbuf **,int,caddr_t *)); void nfsrv_init __P((int)); void nfs_clearcommit __P((struct mount *)); int nfsrv_errmap __P((struct nfsrv_descript *, int)); void nfsrv_rcv __P((struct socket *so, caddr_t arg, int waitflag)); void nfsrvw_sort __P((gid_t [],int)); void nfsrv_setcred __P((struct ucred *,struct ucred *)); int nfs_writebp __P((struct buf *,int)); -int nfsrv_vput __P(( struct vnode * )); -int nfsrv_vrele __P(( struct vnode * )); -int nfsrv_vmio __P(( struct vnode * )); +int nfsrv_object_create __P(( struct vnode * )); void nfsrv_wakenfsd __P((struct nfssvc_sock *slp)); int nfsrv_writegather __P((struct nfsrv_descript **, struct nfssvc_sock *, struct proc *, struct mbuf **)); int nfs_fsinfo __P((struct nfsmount *, struct vnode *, struct ucred *, struct proc *p)); int nfsrv3_access __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_commit __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_create __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_fhtovp __P((fhandle_t *,int,struct vnode **, struct ucred *,struct nfssvc_sock *,struct mbuf *, int *,int)); int nfsrv_fsinfo __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_getattr __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_link __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_lookup __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_mkdir __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_mknod __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_noop __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_null __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_pathconf __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_read __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_readdir __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_readdirplus __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_readlink __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_remove __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_rename __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_rmdir __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_setattr __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_statfs __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_symlink __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_write __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); #endif /* KERNEL */ #endif Index: head/sys/nfsserver/nfs_serv.c =================================================================== --- head/sys/nfsserver/nfs_serv.c (revision 17760) +++ head/sys/nfsserver/nfs_serv.c (revision 17761) @@ -1,3437 +1,3417 @@ /* * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)nfs_serv.c 8.3 (Berkeley) 1/12/94 - * $Id: nfs_serv.c,v 1.29 1996/04/30 23:23:07 bde Exp $ + * $Id: nfs_serv.c,v 1.30 1996/06/08 12:16:26 bde Exp $ */ /* * nfs version 2 and 3 server calls to vnode ops * - these routines generally have 3 phases * 1 - break down and validate rpc request in mbuf list * 2 - do the vnode ops for the request * (surprisingly ?? many are very similar to syscalls in vfs_syscalls.c) * 3 - build the rpc reply in an mbuf list * nb: * - do not mix the phases, since the nfsm_?? macros can return failures * on a bad rpc or similar and do not do any vrele() or vput()'s * * - the nfsm_reply() macro generates an nfs rpc reply with the nfs * error number iff error != 0 whereas * returning an error from the server function implies a fatal error * such as a badly constructed rpc request that should be dropped without * a reply. * For Version 3, nfsm_reply() does not return for the error case, since * most version 3 rpcs return more than the status for error cases. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include nfstype nfsv3_type[9] = { NFNON, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, NFSOCK, NFFIFO, NFNON }; #ifndef NFS_NOSERVER nfstype nfsv2_type[9] = { NFNON, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, NFNON, NFCHR, NFNON }; /* Global vars */ extern u_long nfs_xdrneg1; extern u_long nfs_false, nfs_true; extern enum vtype nv3tov_type[8]; extern struct nfsstats nfsstats; int nfsrvw_procrastinate = NFS_GATHERDELAY * 1000; int nfs_async; SYSCTL_INT(_vfs_nfs, OID_AUTO, async, CTLFLAG_RW, &nfs_async, 0, ""); static int nfsrv_access __P((struct vnode *,int,struct ucred *,int, struct proc *)); static void nfsrvw_coalesce __P((struct nfsrv_descript *, struct nfsrv_descript *)); /* * nfs v3 access service */ int nfsrv3_access(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; struct mbuf *nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; struct ucred *cred = &nfsd->nd_cr; struct vnode *vp; nfsfh_t nfh; fhandle_t *fhp; register u_long *tl; register long t1; caddr_t bpos; int error = 0, rdonly, cache, getret; char *cp2; struct mbuf *mb, *mreq, *mb2; struct vattr vattr, *vap = &vattr; u_long testmode, nfsmode; u_quad_t frev; #ifndef nolint cache = 0; #endif fhp = &nfh.fh_generic; nfsm_srvmtofh(fhp); nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); if (error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, &rdonly, (nfsd->nd_flag & ND_KERBAUTH))) { nfsm_reply(NFSX_UNSIGNED); nfsm_srvpostop_attr(1, (struct vattr *)0); return (0); } nfsmode = fxdr_unsigned(u_long, *tl); if ((nfsmode & NFSV3ACCESS_READ) && nfsrv_access(vp, VREAD, cred, rdonly, procp)) nfsmode &= ~NFSV3ACCESS_READ; if (vp->v_type == VDIR) testmode = (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND | NFSV3ACCESS_DELETE); else testmode = (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND); if ((nfsmode & testmode) && nfsrv_access(vp, VWRITE, cred, rdonly, procp)) nfsmode &= ~testmode; if (vp->v_type == VDIR) testmode = NFSV3ACCESS_LOOKUP; else testmode = NFSV3ACCESS_EXECUTE; if ((nfsmode & testmode) && nfsrv_access(vp, VEXEC, cred, rdonly, procp)) nfsmode &= ~testmode; getret = VOP_GETATTR(vp, vap, cred, procp); - nfsrv_vput(vp); + vput(vp); nfsm_reply(NFSX_POSTOPATTR(1) + NFSX_UNSIGNED); nfsm_srvpostop_attr(getret, vap); nfsm_build(tl, u_long *, NFSX_UNSIGNED); *tl = txdr_unsigned(nfsmode); nfsm_srvdone; } /* * nfs getattr service */ int nfsrv_getattr(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; struct mbuf *nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; struct ucred *cred = &nfsd->nd_cr; register struct nfs_fattr *fp; struct vattr va; register struct vattr *vap = &va; struct vnode *vp; nfsfh_t nfh; fhandle_t *fhp; register u_long *tl; register long t1; caddr_t bpos; int error = 0, rdonly, cache; char *cp2; struct mbuf *mb, *mb2, *mreq; u_quad_t frev; fhp = &nfh.fh_generic; nfsm_srvmtofh(fhp); if (error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, &rdonly, (nfsd->nd_flag & ND_KERBAUTH))) { nfsm_reply(0); return (0); } nqsrv_getl(vp, ND_READ); error = VOP_GETATTR(vp, vap, cred, procp); - nfsrv_vput(vp); + vput(vp); nfsm_reply(NFSX_FATTR(nfsd->nd_flag & ND_NFSV3)); if (error) return (0); nfsm_build(fp, struct nfs_fattr *, NFSX_FATTR(nfsd->nd_flag & ND_NFSV3)); nfsm_srvfillattr(vap, fp); nfsm_srvdone; } /* * nfs setattr service */ int nfsrv_setattr(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; struct mbuf *nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; struct ucred *cred = &nfsd->nd_cr; struct vattr va, preat; register struct vattr *vap = &va; register struct nfsv2_sattr *sp; register struct nfs_fattr *fp; struct vnode *vp; nfsfh_t nfh; fhandle_t *fhp; register u_long *tl; register long t1; caddr_t bpos; int error = 0, rdonly, cache, preat_ret = 1, postat_ret = 1; int v3 = (nfsd->nd_flag & ND_NFSV3), gcheck = 0; char *cp2; struct mbuf *mb, *mb2, *mreq; u_quad_t frev; struct timespec guard; fhp = &nfh.fh_generic; nfsm_srvmtofh(fhp); VATTR_NULL(vap); if (v3) { nfsm_srvsattr(vap); nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); gcheck = fxdr_unsigned(int, *tl); if (gcheck) { nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED); fxdr_nfsv3time(tl, &guard); } } else { nfsm_dissect(sp, struct nfsv2_sattr *, NFSX_V2SATTR); /* * Nah nah nah nah na nah * There is a bug in the Sun client that puts 0xffff in the mode * field of sattr when it should put in 0xffffffff. The u_short * doesn't sign extend. * --> check the low order 2 bytes for 0xffff */ if ((fxdr_unsigned(int, sp->sa_mode) & 0xffff) != 0xffff) vap->va_mode = nfstov_mode(sp->sa_mode); if (sp->sa_uid != nfs_xdrneg1) vap->va_uid = fxdr_unsigned(uid_t, sp->sa_uid); if (sp->sa_gid != nfs_xdrneg1) vap->va_gid = fxdr_unsigned(gid_t, sp->sa_gid); if (sp->sa_size != nfs_xdrneg1) vap->va_size = fxdr_unsigned(u_quad_t, sp->sa_size); if (sp->sa_atime.nfsv2_sec != nfs_xdrneg1) { #ifdef notyet fxdr_nfsv2time(&sp->sa_atime, &vap->va_atime); #else vap->va_atime.ts_sec = fxdr_unsigned(long, sp->sa_atime.nfsv2_sec); vap->va_atime.ts_nsec = 0; #endif } if (sp->sa_mtime.nfsv2_sec != nfs_xdrneg1) fxdr_nfsv2time(&sp->sa_mtime, &vap->va_mtime); } /* * Now that we have all the fields, lets do it. */ if (error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, &rdonly, (nfsd->nd_flag & ND_KERBAUTH))) { nfsm_reply(2 * NFSX_UNSIGNED); nfsm_srvwcc_data(preat_ret, &preat, postat_ret, vap); return (0); } nqsrv_getl(vp, ND_WRITE); if (v3) { error = preat_ret = VOP_GETATTR(vp, &preat, cred, procp); if (!error && gcheck && (preat.va_ctime.ts_sec != guard.ts_sec || preat.va_ctime.ts_nsec != guard.ts_nsec)) error = NFSERR_NOT_SYNC; if (error) { - nfsrv_vput(vp); + vput(vp); nfsm_reply(NFSX_WCCDATA(v3)); nfsm_srvwcc_data(preat_ret, &preat, postat_ret, vap); return (0); } } /* * If the size is being changed write acces is required, otherwise * just check for a read only file system. */ if (vap->va_size == ((u_quad_t)((quad_t) -1))) { if (rdonly || (vp->v_mount->mnt_flag & MNT_RDONLY)) { error = EROFS; goto out; } } else { if (vp->v_type == VDIR) { error = EISDIR; goto out; } else if (error = nfsrv_access(vp, VWRITE, cred, rdonly, procp)) goto out; } error = VOP_SETATTR(vp, vap, cred, procp); postat_ret = VOP_GETATTR(vp, vap, cred, procp); if (!error) error = postat_ret; out: - nfsrv_vput(vp); + vput(vp); nfsm_reply(NFSX_WCCORFATTR(v3)); if (v3) { nfsm_srvwcc_data(preat_ret, &preat, postat_ret, vap); return (0); } else { nfsm_build(fp, struct nfs_fattr *, NFSX_V2FATTR); nfsm_srvfillattr(vap, fp); } nfsm_srvdone; } /* * nfs lookup rpc */ int nfsrv_lookup(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; struct mbuf *nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; struct ucred *cred = &nfsd->nd_cr; register struct nfs_fattr *fp; struct nameidata nd; struct vnode *vp, *dirp; nfsfh_t nfh; fhandle_t *fhp; register caddr_t cp; register u_long *tl; register long t1; caddr_t bpos; int error = 0, cache, len, dirattr_ret = 1; int v3 = (nfsd->nd_flag & ND_NFSV3); char *cp2; struct mbuf *mb, *mb2, *mreq; struct vattr va, dirattr, *vap = &va; u_quad_t frev; fhp = &nfh.fh_generic; nfsm_srvmtofh(fhp); nfsm_srvnamesiz(len); nd.ni_cnd.cn_cred = cred; nd.ni_cnd.cn_nameiop = LOOKUP; nd.ni_cnd.cn_flags = LOCKLEAF | SAVESTART; error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos, &dirp, procp, (nfsd->nd_flag & ND_KERBAUTH)); if (dirp) { if (v3) dirattr_ret = VOP_GETATTR(dirp, &dirattr, cred, procp); - nfsrv_vrele(dirp); + vrele(dirp); } if (error) { nfsm_reply(NFSX_POSTOPATTR(v3)); nfsm_srvpostop_attr(dirattr_ret, &dirattr); return (0); } nqsrv_getl(nd.ni_startdir, ND_READ); - nfsrv_vrele(nd.ni_startdir); + vrele(nd.ni_startdir); FREE(nd.ni_cnd.cn_pnbuf, M_NAMEI); vp = nd.ni_vp; bzero((caddr_t)fhp, sizeof(nfh)); fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid; error = VFS_VPTOFH(vp, &fhp->fh_fid); if (!error) error = VOP_GETATTR(vp, vap, cred, procp); - nfsrv_vput(vp); + vput(vp); nfsm_reply(NFSX_SRVFH(v3) + NFSX_POSTOPORFATTR(v3) + NFSX_POSTOPATTR(v3)); if (error) { nfsm_srvpostop_attr(dirattr_ret, &dirattr); return (0); } nfsm_srvfhtom(fhp, v3); if (v3) { nfsm_srvpostop_attr(0, vap); nfsm_srvpostop_attr(dirattr_ret, &dirattr); } else { nfsm_build(fp, struct nfs_fattr *, NFSX_V2FATTR); nfsm_srvfillattr(vap, fp); } nfsm_srvdone; } /* * nfs readlink service */ int nfsrv_readlink(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; struct mbuf *nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; struct ucred *cred = &nfsd->nd_cr; struct iovec iv[(NFS_MAXPATHLEN+MLEN-1)/MLEN]; register struct iovec *ivp = iv; register struct mbuf *mp; register u_long *tl; register long t1; caddr_t bpos; int error = 0, rdonly, cache, i, tlen, len, getret; int v3 = (nfsd->nd_flag & ND_NFSV3); char *cp2; struct mbuf *mb, *mb2, *mp2, *mp3, *mreq; struct vnode *vp; struct vattr attr; nfsfh_t nfh; fhandle_t *fhp; struct uio io, *uiop = &io; u_quad_t frev; #ifndef nolint mp2 = mp3 = (struct mbuf *)0; #endif fhp = &nfh.fh_generic; nfsm_srvmtofh(fhp); len = 0; i = 0; while (len < NFS_MAXPATHLEN) { MGET(mp, M_WAIT, MT_DATA); MCLGET(mp, M_WAIT); mp->m_len = NFSMSIZ(mp); if (len == 0) mp3 = mp2 = mp; else { mp2->m_next = mp; mp2 = mp; } if ((len+mp->m_len) > NFS_MAXPATHLEN) { mp->m_len = NFS_MAXPATHLEN-len; len = NFS_MAXPATHLEN; } else len += mp->m_len; ivp->iov_base = mtod(mp, caddr_t); ivp->iov_len = mp->m_len; i++; ivp++; } uiop->uio_iov = iv; uiop->uio_iovcnt = i; uiop->uio_offset = 0; uiop->uio_resid = len; uiop->uio_rw = UIO_READ; uiop->uio_segflg = UIO_SYSSPACE; uiop->uio_procp = (struct proc *)0; if (error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, &rdonly, (nfsd->nd_flag & ND_KERBAUTH))) { m_freem(mp3); nfsm_reply(2 * NFSX_UNSIGNED); nfsm_srvpostop_attr(1, (struct vattr *)0); return (0); } if (vp->v_type != VLNK) { if (v3) error = EINVAL; else error = ENXIO; goto out; } nqsrv_getl(vp, ND_READ); error = VOP_READLINK(vp, uiop, cred); out: getret = VOP_GETATTR(vp, &attr, cred, procp); - nfsrv_vput(vp); + vput(vp); if (error) m_freem(mp3); nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_UNSIGNED); if (v3) { nfsm_srvpostop_attr(getret, &attr); if (error) return (0); } if (uiop->uio_resid > 0) { len -= uiop->uio_resid; tlen = nfsm_rndup(len); nfsm_adj(mp3, NFS_MAXPATHLEN-tlen, tlen-len); } nfsm_build(tl, u_long *, NFSX_UNSIGNED); *tl = txdr_unsigned(len); mb->m_next = mp3; nfsm_srvdone; } /* * nfs read service */ int nfsrv_read(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; struct mbuf *nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; struct ucred *cred = &nfsd->nd_cr; register struct iovec *iv; struct iovec *iv2; register struct mbuf *m; register struct nfs_fattr *fp; register u_long *tl; register long t1; register int i; caddr_t bpos; int error = 0, rdonly, cache, cnt, len, left, siz, tlen, getret; int v3 = (nfsd->nd_flag & ND_NFSV3), reqlen; char *cp2; struct mbuf *mb, *mb2, *mreq; struct mbuf *m2; struct vnode *vp; nfsfh_t nfh; fhandle_t *fhp; struct uio io, *uiop = &io; struct vattr va, *vap = &va; off_t off; u_quad_t frev; fhp = &nfh.fh_generic; nfsm_srvmtofh(fhp); if (v3) { nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED); fxdr_hyper(tl, &off); } else { nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); off = (off_t)fxdr_unsigned(u_long, *tl); } nfsm_srvstrsiz(reqlen, NFS_SRVMAXDATA(nfsd)); if (error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, &rdonly, (nfsd->nd_flag & ND_KERBAUTH))) { nfsm_reply(2 * NFSX_UNSIGNED); nfsm_srvpostop_attr(1, (struct vattr *)0); return (0); } if (vp->v_type != VREG) { if (v3) error = EINVAL; else error = (vp->v_type == VDIR) ? EISDIR : EACCES; } if (!error) { nqsrv_getl(vp, ND_READ); if (error = nfsrv_access(vp, VREAD, cred, rdonly, procp)) error = nfsrv_access(vp, VEXEC, cred, rdonly, procp); } getret = VOP_GETATTR(vp, vap, cred, procp); if (!error) error = getret; if (error) { - nfsrv_vput(vp); + vput(vp); nfsm_reply(NFSX_POSTOPATTR(v3)); nfsm_srvpostop_attr(getret, vap); return (0); } if (off >= vap->va_size) cnt = 0; else if ((off + reqlen) > vap->va_size) cnt = nfsm_rndup(vap->va_size - off); else cnt = reqlen; nfsm_reply(NFSX_POSTOPORFATTR(v3) + 3 * NFSX_UNSIGNED+nfsm_rndup(cnt)); if (v3) { nfsm_build(tl, u_long *, NFSX_V3FATTR + 4 * NFSX_UNSIGNED); *tl++ = nfs_true; fp = (struct nfs_fattr *)tl; tl += (NFSX_V3FATTR / sizeof (u_long)); } else { nfsm_build(tl, u_long *, NFSX_V2FATTR + NFSX_UNSIGNED); fp = (struct nfs_fattr *)tl; tl += (NFSX_V2FATTR / sizeof (u_long)); } len = left = cnt; if (cnt > 0) { /* * Generate the mbuf list with the uio_iov ref. to it. */ i = 0; m = m2 = mb; while (left > 0) { siz = min(M_TRAILINGSPACE(m), left); if (siz > 0) { left -= siz; i++; } if (left > 0) { MGET(m, M_WAIT, MT_DATA); MCLGET(m, M_WAIT); m->m_len = 0; m2->m_next = m; m2 = m; } } MALLOC(iv, struct iovec *, i * sizeof (struct iovec), M_TEMP, M_WAITOK); uiop->uio_iov = iv2 = iv; m = mb; left = cnt; i = 0; while (left > 0) { if (m == NULL) panic("nfsrv_read iov"); siz = min(M_TRAILINGSPACE(m), left); if (siz > 0) { iv->iov_base = mtod(m, caddr_t) + m->m_len; iv->iov_len = siz; m->m_len += siz; left -= siz; iv++; i++; } m = m->m_next; } uiop->uio_iovcnt = i; uiop->uio_offset = off; uiop->uio_resid = cnt; uiop->uio_rw = UIO_READ; uiop->uio_segflg = UIO_SYSSPACE; error = VOP_READ(vp, uiop, IO_NODELOCKED, cred); off = uiop->uio_offset; FREE((caddr_t)iv2, M_TEMP); if (error || (getret = VOP_GETATTR(vp, vap, cred, procp))) { if (!error) error = getret; m_freem(mreq); - nfsrv_vput(vp); + vput(vp); nfsm_reply(NFSX_POSTOPATTR(v3)); nfsm_srvpostop_attr(getret, vap); return (0); } } else uiop->uio_resid = 0; - nfsrv_vput(vp); + vput(vp); nfsm_srvfillattr(vap, fp); len -= uiop->uio_resid; tlen = nfsm_rndup(len); if (cnt != tlen || tlen != len) nfsm_adj(mb, cnt - tlen, tlen - len); if (v3) { *tl++ = txdr_unsigned(len); if (len < reqlen) *tl++ = nfs_true; else *tl++ = nfs_false; } *tl = txdr_unsigned(len); nfsm_srvdone; } /* * nfs write service */ int nfsrv_write(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; struct mbuf *nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; struct ucred *cred = &nfsd->nd_cr; register struct iovec *ivp; register int i, cnt; register struct mbuf *mp; register struct nfs_fattr *fp; struct iovec *iv; struct vattr va, forat; register struct vattr *vap = &va; register u_long *tl; register long t1; caddr_t bpos; int error = 0, rdonly, cache, len, forat_ret = 1; int ioflags, aftat_ret = 1, retlen, zeroing, adjust; int stable = NFSV3WRITE_FILESYNC; int v3 = (nfsd->nd_flag & ND_NFSV3); char *cp2; struct mbuf *mb, *mb2, *mreq; struct vnode *vp; nfsfh_t nfh; fhandle_t *fhp; struct uio io, *uiop = &io; off_t off; u_quad_t frev; if (mrep == NULL) { *mrq = NULL; return (0); } fhp = &nfh.fh_generic; nfsm_srvmtofh(fhp); if (v3) { nfsm_dissect(tl, u_long *, 5 * NFSX_UNSIGNED); fxdr_hyper(tl, &off); tl += 3; stable = fxdr_unsigned(int, *tl++); } else { nfsm_dissect(tl, u_long *, 4 * NFSX_UNSIGNED); off = (off_t)fxdr_unsigned(u_long, *++tl); tl += 2; if (nfs_async) stable = NFSV3WRITE_UNSTABLE; } retlen = len = fxdr_unsigned(long, *tl); cnt = i = 0; /* * For NFS Version 2, it is not obvious what a write of zero length * should do, but I might as well be consistent with Version 3, * which is to return ok so long as there are no permission problems. */ if (len > 0) { zeroing = 1; mp = mrep; while (mp) { if (mp == md) { zeroing = 0; adjust = dpos - mtod(mp, caddr_t); mp->m_len -= adjust; if (mp->m_len > 0 && adjust > 0) NFSMADV(mp, adjust); } if (zeroing) mp->m_len = 0; else if (mp->m_len > 0) { i += mp->m_len; if (i > len) { mp->m_len -= (i - len); zeroing = 1; } if (mp->m_len > 0) cnt++; } mp = mp->m_next; } } if (len > NFS_MAXDATA || len < 0 || i < len) { error = EIO; nfsm_reply(2 * NFSX_UNSIGNED); nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, vap); return (0); } if (error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, &rdonly, (nfsd->nd_flag & ND_KERBAUTH))) { nfsm_reply(2 * NFSX_UNSIGNED); nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, vap); return (0); } if (v3) forat_ret = VOP_GETATTR(vp, &forat, cred, procp); if (vp->v_type != VREG) { if (v3) error = EINVAL; else error = (vp->v_type == VDIR) ? EISDIR : EACCES; } if (!error) { nqsrv_getl(vp, ND_WRITE); error = nfsrv_access(vp, VWRITE, cred, rdonly, procp); } if (error) { - nfsrv_vput(vp); + vput(vp); nfsm_reply(NFSX_WCCDATA(v3)); nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, vap); return (0); } if (len > 0) { MALLOC(ivp, struct iovec *, cnt * sizeof (struct iovec), M_TEMP, M_WAITOK); uiop->uio_iov = iv = ivp; uiop->uio_iovcnt = cnt; mp = mrep; while (mp) { if (mp->m_len > 0) { ivp->iov_base = mtod(mp, caddr_t); ivp->iov_len = mp->m_len; ivp++; } mp = mp->m_next; } /* * XXX * The IO_METASYNC flag indicates that all metadata (and not just * enough to ensure data integrity) mus be written to stable storage * synchronously. * (IO_METASYNC is not yet implemented in 4.4BSD-Lite.) */ if (stable == NFSV3WRITE_UNSTABLE) ioflags = IO_NODELOCKED; else if (stable == NFSV3WRITE_DATASYNC) ioflags = (IO_SYNC | IO_NODELOCKED); else ioflags = (IO_METASYNC | IO_SYNC | IO_NODELOCKED); uiop->uio_resid = len; uiop->uio_rw = UIO_WRITE; uiop->uio_segflg = UIO_SYSSPACE; uiop->uio_procp = (struct proc *)0; uiop->uio_offset = off; error = VOP_WRITE(vp, uiop, ioflags, cred); nfsstats.srvvop_writes++; FREE((caddr_t)iv, M_TEMP); } aftat_ret = VOP_GETATTR(vp, vap, cred, procp); - nfsrv_vput(vp); + vput(vp); if (!error) error = aftat_ret; nfsm_reply(NFSX_PREOPATTR(v3) + NFSX_POSTOPORFATTR(v3) + 2 * NFSX_UNSIGNED + NFSX_WRITEVERF(v3)); if (v3) { nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, vap); if (error) return (0); nfsm_build(tl, u_long *, 4 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(retlen); if (stable == NFSV3WRITE_UNSTABLE) *tl++ = txdr_unsigned(stable); else *tl++ = txdr_unsigned(NFSV3WRITE_FILESYNC); /* * Actually, there is no need to txdr these fields, * but it may make the values more human readable, * for debugging purposes. */ *tl++ = txdr_unsigned(boottime.tv_sec); *tl = txdr_unsigned(boottime.tv_usec); } else { nfsm_build(fp, struct nfs_fattr *, NFSX_V2FATTR); nfsm_srvfillattr(vap, fp); } nfsm_srvdone; } /* * NFS write service with write gathering support. Called when * nfsrvw_procrastinate > 0. * See: Chet Juszczak, "Improving the Write Performance of an NFS Server", * in Proc. of the Winter 1994 Usenix Conference, pg. 247-259, San Franscisco, * Jan. 1994. */ int nfsrv_writegather(ndp, slp, procp, mrq) struct nfsrv_descript **ndp; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { register struct iovec *ivp; register struct mbuf *mp; register struct nfsrv_descript *wp, *nfsd, *owp, *swp; register struct nfs_fattr *fp; register int i; struct iovec *iov; struct nfsrvw_delayhash *wpp; struct ucred *cred; struct vattr va, forat; register u_long *tl; register long t1; caddr_t bpos, dpos; int error = 0, rdonly, cache, len, forat_ret = 1; int ioflags, aftat_ret = 1, s, adjust, v3, zeroing; char *cp2; struct mbuf *mb, *mb2, *mreq, *mrep, *md; struct vnode *vp; struct uio io, *uiop = &io; u_quad_t frev, cur_usec; #ifndef nolint i = 0; len = 0; #endif *mrq = NULL; if (*ndp) { nfsd = *ndp; *ndp = NULL; mrep = nfsd->nd_mrep; md = nfsd->nd_md; dpos = nfsd->nd_dpos; cred = &nfsd->nd_cr; v3 = (nfsd->nd_flag & ND_NFSV3); LIST_INIT(&nfsd->nd_coalesce); nfsd->nd_mreq = NULL; nfsd->nd_stable = NFSV3WRITE_FILESYNC; cur_usec = (u_quad_t)time.tv_sec * 1000000 + (u_quad_t)time.tv_usec; nfsd->nd_time = cur_usec + nfsrvw_procrastinate; /* * Now, get the write header.. */ nfsm_srvmtofh(&nfsd->nd_fh); if (v3) { nfsm_dissect(tl, u_long *, 5 * NFSX_UNSIGNED); fxdr_hyper(tl, &nfsd->nd_off); tl += 3; nfsd->nd_stable = fxdr_unsigned(int, *tl++); } else { nfsm_dissect(tl, u_long *, 4 * NFSX_UNSIGNED); nfsd->nd_off = (off_t)fxdr_unsigned(u_long, *++tl); tl += 2; if (nfs_async) nfsd->nd_stable = NFSV3WRITE_UNSTABLE; } len = fxdr_unsigned(long, *tl); nfsd->nd_len = len; nfsd->nd_eoff = nfsd->nd_off + len; /* * Trim the header out of the mbuf list and trim off any trailing * junk so that the mbuf list has only the write data. */ zeroing = 1; i = 0; mp = mrep; while (mp) { if (mp == md) { zeroing = 0; adjust = dpos - mtod(mp, caddr_t); mp->m_len -= adjust; if (mp->m_len > 0 && adjust > 0) NFSMADV(mp, adjust); } if (zeroing) mp->m_len = 0; else { i += mp->m_len; if (i > len) { mp->m_len -= (i - len); zeroing = 1; } } mp = mp->m_next; } if (len > NFS_MAXDATA || len < 0 || i < len) { nfsmout: m_freem(mrep); error = EIO; nfsm_writereply(2 * NFSX_UNSIGNED, v3); if (v3) nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, &va); nfsd->nd_mreq = mreq; nfsd->nd_mrep = NULL; nfsd->nd_time = 0; } /* * Add this entry to the hash and time queues. */ s = splsoftclock(); owp = NULL; wp = slp->ns_tq.lh_first; while (wp && wp->nd_time < nfsd->nd_time) { owp = wp; wp = wp->nd_tq.le_next; } if (owp) { LIST_INSERT_AFTER(owp, nfsd, nd_tq); } else { LIST_INSERT_HEAD(&slp->ns_tq, nfsd, nd_tq); } if (nfsd->nd_mrep) { wpp = NWDELAYHASH(slp, nfsd->nd_fh.fh_fid.fid_data); owp = NULL; wp = wpp->lh_first; while (wp && bcmp((caddr_t)&nfsd->nd_fh,(caddr_t)&wp->nd_fh,NFSX_V3FH)) { owp = wp; wp = wp->nd_hash.le_next; } while (wp && wp->nd_off < nfsd->nd_off && !bcmp((caddr_t)&nfsd->nd_fh,(caddr_t)&wp->nd_fh,NFSX_V3FH)) { owp = wp; wp = wp->nd_hash.le_next; } if (owp) { LIST_INSERT_AFTER(owp, nfsd, nd_hash); /* * Search the hash list for overlapping entries and * coalesce. */ for(; nfsd && NFSW_CONTIG(owp, nfsd); nfsd = wp) { wp = nfsd->nd_hash.le_next; if (NFSW_SAMECRED(owp, nfsd)) nfsrvw_coalesce(owp, nfsd); } } else { LIST_INSERT_HEAD(wpp, nfsd, nd_hash); } } splx(s); } /* * Now, do VOP_WRITE()s for any one(s) that need to be done now * and generate the associated reply mbuf list(s). */ loop1: cur_usec = (u_quad_t)time.tv_sec * 1000000 + (u_quad_t)time.tv_usec; s = splsoftclock(); for (nfsd = slp->ns_tq.lh_first; nfsd; nfsd = owp) { owp = nfsd->nd_tq.le_next; if (nfsd->nd_time > cur_usec) break; if (nfsd->nd_mreq) continue; LIST_REMOVE(nfsd, nd_tq); LIST_REMOVE(nfsd, nd_hash); splx(s); mrep = nfsd->nd_mrep; nfsd->nd_mrep = NULL; cred = &nfsd->nd_cr; v3 = (nfsd->nd_flag & ND_NFSV3); forat_ret = aftat_ret = 1; error = nfsrv_fhtovp(&nfsd->nd_fh, 1, &vp, cred, slp, nfsd->nd_nam, &rdonly, (nfsd->nd_flag & ND_KERBAUTH)); if (!error) { if (v3) forat_ret = VOP_GETATTR(vp, &forat, cred, procp); if (vp->v_type != VREG) { if (v3) error = EINVAL; else error = (vp->v_type == VDIR) ? EISDIR : EACCES; } } else vp = NULL; if (!error) { nqsrv_getl(vp, ND_WRITE); error = nfsrv_access(vp, VWRITE, cred, rdonly, procp); } if (nfsd->nd_stable == NFSV3WRITE_UNSTABLE) ioflags = IO_NODELOCKED; else if (nfsd->nd_stable == NFSV3WRITE_DATASYNC) ioflags = (IO_SYNC | IO_NODELOCKED); else ioflags = (IO_METASYNC | IO_SYNC | IO_NODELOCKED); uiop->uio_rw = UIO_WRITE; uiop->uio_segflg = UIO_SYSSPACE; uiop->uio_procp = (struct proc *)0; uiop->uio_offset = nfsd->nd_off; uiop->uio_resid = nfsd->nd_eoff - nfsd->nd_off; if (uiop->uio_resid > 0) { mp = mrep; i = 0; while (mp) { if (mp->m_len > 0) i++; mp = mp->m_next; } uiop->uio_iovcnt = i; MALLOC(iov, struct iovec *, i * sizeof (struct iovec), M_TEMP, M_WAITOK); uiop->uio_iov = ivp = iov; mp = mrep; while (mp) { if (mp->m_len > 0) { ivp->iov_base = mtod(mp, caddr_t); ivp->iov_len = mp->m_len; ivp++; } mp = mp->m_next; } if (!error) { error = VOP_WRITE(vp, uiop, ioflags, cred); nfsstats.srvvop_writes++; } FREE((caddr_t)iov, M_TEMP); } m_freem(mrep); if (vp) { aftat_ret = VOP_GETATTR(vp, &va, cred, procp); - nfsrv_vput(vp); + vput(vp); } /* * Loop around generating replies for all write rpcs that have * now been completed. */ swp = nfsd; do { if (error) { nfsm_writereply(NFSX_WCCDATA(v3), v3); if (v3) { nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, &va); } } else { nfsm_writereply(NFSX_PREOPATTR(v3) + NFSX_POSTOPORFATTR(v3) + 2 * NFSX_UNSIGNED + NFSX_WRITEVERF(v3), v3); if (v3) { nfsm_srvwcc_data(forat_ret, &forat, aftat_ret, &va); nfsm_build(tl, u_long *, 4 * NFSX_UNSIGNED); *tl++ = txdr_unsigned(nfsd->nd_len); *tl++ = txdr_unsigned(swp->nd_stable); /* * Actually, there is no need to txdr these fields, * but it may make the values more human readable, * for debugging purposes. */ *tl++ = txdr_unsigned(boottime.tv_sec); *tl = txdr_unsigned(boottime.tv_usec); } else { nfsm_build(fp, struct nfs_fattr *, NFSX_V2FATTR); nfsm_srvfillattr(&va, fp); } } nfsd->nd_mreq = mreq; if (nfsd->nd_mrep) panic("nfsrv_write: nd_mrep not free"); /* * Done. Put it at the head of the timer queue so that * the final phase can return the reply. */ s = splsoftclock(); if (nfsd != swp) { nfsd->nd_time = 0; LIST_INSERT_HEAD(&slp->ns_tq, nfsd, nd_tq); } nfsd = swp->nd_coalesce.lh_first; if (nfsd) { LIST_REMOVE(nfsd, nd_tq); } splx(s); } while (nfsd); s = splsoftclock(); swp->nd_time = 0; LIST_INSERT_HEAD(&slp->ns_tq, swp, nd_tq); splx(s); goto loop1; } splx(s); /* * Search for a reply to return. */ s = splsoftclock(); for (nfsd = slp->ns_tq.lh_first; nfsd; nfsd = nfsd->nd_tq.le_next) if (nfsd->nd_mreq) { LIST_REMOVE(nfsd, nd_tq); *mrq = nfsd->nd_mreq; *ndp = nfsd; break; } splx(s); return (0); } /* * Coalesce the write request nfsd into owp. To do this we must: * - remove nfsd from the queues * - merge nfsd->nd_mrep into owp->nd_mrep * - update the nd_eoff and nd_stable for owp * - put nfsd on owp's nd_coalesce list * NB: Must be called at splsoftclock(). */ static void nfsrvw_coalesce(owp, nfsd) register struct nfsrv_descript *owp; register struct nfsrv_descript *nfsd; { register int overlap; register struct mbuf *mp; LIST_REMOVE(nfsd, nd_hash); LIST_REMOVE(nfsd, nd_tq); if (owp->nd_eoff < nfsd->nd_eoff) { overlap = owp->nd_eoff - nfsd->nd_off; if (overlap < 0) panic("nfsrv_coalesce: bad off"); if (overlap > 0) m_adj(nfsd->nd_mrep, overlap); mp = owp->nd_mrep; while (mp->m_next) mp = mp->m_next; mp->m_next = nfsd->nd_mrep; owp->nd_eoff = nfsd->nd_eoff; } else m_freem(nfsd->nd_mrep); nfsd->nd_mrep = NULL; if (nfsd->nd_stable == NFSV3WRITE_FILESYNC) owp->nd_stable = NFSV3WRITE_FILESYNC; else if (nfsd->nd_stable == NFSV3WRITE_DATASYNC && owp->nd_stable == NFSV3WRITE_UNSTABLE) owp->nd_stable = NFSV3WRITE_DATASYNC; LIST_INSERT_HEAD(&owp->nd_coalesce, nfsd, nd_tq); } /* * Sort the group list in increasing numerical order. * (Insertion sort by Chris Torek, who was grossed out by the bubble sort * that used to be here.) */ void nfsrvw_sort(list, num) register gid_t *list; register int num; { register int i, j; gid_t v; /* Insertion sort. */ for (i = 1; i < num; i++) { v = list[i]; /* find correct slot for value v, moving others up */ for (j = i; --j >= 0 && v < list[j];) list[j + 1] = list[j]; list[j + 1] = v; } } /* * copy credentials making sure that the result can be compared with bcmp(). */ void nfsrv_setcred(incred, outcred) register struct ucred *incred, *outcred; { register int i; bzero((caddr_t)outcred, sizeof (struct ucred)); outcred->cr_ref = 1; outcred->cr_uid = incred->cr_uid; outcred->cr_ngroups = incred->cr_ngroups; for (i = 0; i < incred->cr_ngroups; i++) outcred->cr_groups[i] = incred->cr_groups[i]; nfsrvw_sort(outcred->cr_groups, outcred->cr_ngroups); } /* * nfs create service * now does a truncate to 0 length via. setattr if it already exists */ int nfsrv_create(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; struct mbuf *nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; struct ucred *cred = &nfsd->nd_cr; register struct nfs_fattr *fp; struct vattr va, dirfor, diraft; register struct vattr *vap = &va; register struct nfsv2_sattr *sp; register u_long *tl; struct nameidata nd; register caddr_t cp; register long t1; caddr_t bpos; int error = 0, rdev, cache, len, tsize, dirfor_ret = 1, diraft_ret = 1; int v3 = (nfsd->nd_flag & ND_NFSV3), how, exclusive_flag = 0; char *cp2; struct mbuf *mb, *mb2, *mreq; struct vnode *vp, *dirp = (struct vnode *)0; nfsfh_t nfh; fhandle_t *fhp; u_quad_t frev, tempsize; u_char cverf[NFSX_V3CREATEVERF]; #ifndef nolint rdev = 0; #endif nd.ni_cnd.cn_nameiop = 0; fhp = &nfh.fh_generic; nfsm_srvmtofh(fhp); nfsm_srvnamesiz(len); nd.ni_cnd.cn_cred = cred; nd.ni_cnd.cn_nameiop = CREATE; nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF | SAVESTART; error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos, &dirp, procp, (nfsd->nd_flag & ND_KERBAUTH)); if (dirp) { if (v3) dirfor_ret = VOP_GETATTR(dirp, &dirfor, cred, procp); else { - nfsrv_vrele(dirp); + vrele(dirp); dirp = (struct vnode *)0; } } if (error) { nfsm_reply(NFSX_WCCDATA(v3)); nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft); if (dirp) - nfsrv_vrele(dirp); + vrele(dirp); return (0); } VATTR_NULL(vap); if (v3) { nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); how = fxdr_unsigned(int, *tl); switch (how) { case NFSV3CREATE_GUARDED: if (nd.ni_vp) { error = EEXIST; break; } case NFSV3CREATE_UNCHECKED: nfsm_srvsattr(vap); break; case NFSV3CREATE_EXCLUSIVE: nfsm_dissect(cp, caddr_t, NFSX_V3CREATEVERF); bcopy(cp, cverf, NFSX_V3CREATEVERF); exclusive_flag = 1; if (nd.ni_vp == NULL) vap->va_mode = 0; break; }; vap->va_type = VREG; } else { nfsm_dissect(sp, struct nfsv2_sattr *, NFSX_V2SATTR); vap->va_type = IFTOVT(fxdr_unsigned(u_long, sp->sa_mode)); if (vap->va_type == VNON) vap->va_type = VREG; vap->va_mode = nfstov_mode(sp->sa_mode); switch (vap->va_type) { case VREG: tsize = fxdr_unsigned(long, sp->sa_size); if (tsize != -1) vap->va_size = (u_quad_t)tsize; break; case VCHR: case VBLK: case VFIFO: rdev = fxdr_unsigned(long, sp->sa_size); break; }; } /* * Iff doesn't exist, create it * otherwise just truncate to 0 length * should I set the mode too ?? */ if (nd.ni_vp == NULL) { if (vap->va_type == VREG || vap->va_type == VSOCK) { - nfsrv_vrele(nd.ni_startdir); + vrele(nd.ni_startdir); nqsrv_getl(nd.ni_dvp, ND_WRITE); error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap); if (!error) { - nfsrv_vmio(nd.ni_vp); + nfsrv_object_create(nd.ni_vp); FREE(nd.ni_cnd.cn_pnbuf, M_NAMEI); if (exclusive_flag) { exclusive_flag = 0; VATTR_NULL(vap); bcopy(cverf, (caddr_t)&vap->va_atime, NFSX_V3CREATEVERF); error = VOP_SETATTR(nd.ni_vp, vap, cred, procp); } } } else if (vap->va_type == VCHR || vap->va_type == VBLK || vap->va_type == VFIFO) { if (vap->va_type == VCHR && rdev == 0xffffffff) vap->va_type = VFIFO; if (error = suser(cred, (u_short *)0)) { - nfsrv_vrele(nd.ni_startdir); + vrele(nd.ni_startdir); free(nd.ni_cnd.cn_pnbuf, M_NAMEI); VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); - nfsrv_vput(nd.ni_dvp); + vput(nd.ni_dvp); nfsm_reply(0); return (error); } else vap->va_rdev = (dev_t)rdev; nqsrv_getl(nd.ni_dvp, ND_WRITE); if (error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap)) { - nfsrv_vrele(nd.ni_startdir); + vrele(nd.ni_startdir); nfsm_reply(0); } nd.ni_cnd.cn_nameiop = LOOKUP; nd.ni_cnd.cn_flags &= ~(LOCKPARENT | SAVESTART); nd.ni_cnd.cn_proc = procp; nd.ni_cnd.cn_cred = cred; if (error = lookup(&nd)) { free(nd.ni_cnd.cn_pnbuf, M_NAMEI); nfsm_reply(0); } - nfsrv_vmio(nd.ni_vp); + nfsrv_object_create(nd.ni_vp); FREE(nd.ni_cnd.cn_pnbuf, M_NAMEI); if (nd.ni_cnd.cn_flags & ISSYMLINK) { - nfsrv_vrele(nd.ni_dvp); - nfsrv_vput(nd.ni_vp); + vrele(nd.ni_dvp); + vput(nd.ni_vp); VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); error = EINVAL; nfsm_reply(0); } } else { - nfsrv_vrele(nd.ni_startdir); + vrele(nd.ni_startdir); free(nd.ni_cnd.cn_pnbuf, M_NAMEI); VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); - nfsrv_vput(nd.ni_dvp); + vput(nd.ni_dvp); error = ENXIO; } vp = nd.ni_vp; } else { - nfsrv_vrele(nd.ni_startdir); + vrele(nd.ni_startdir); free(nd.ni_cnd.cn_pnbuf, M_NAMEI); vp = nd.ni_vp; if (nd.ni_dvp == vp) - nfsrv_vrele(nd.ni_dvp); + vrele(nd.ni_dvp); else - nfsrv_vput(nd.ni_dvp); + vput(nd.ni_dvp); VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); if (vap->va_size != -1) { error = nfsrv_access(vp, VWRITE, cred, (nd.ni_cnd.cn_flags & RDONLY), procp); if (!error) { nqsrv_getl(vp, ND_WRITE); tempsize = vap->va_size; VATTR_NULL(vap); vap->va_size = tempsize; error = VOP_SETATTR(vp, vap, cred, procp); } if (error) - nfsrv_vput(vp); + vput(vp); } } if (!error) { bzero((caddr_t)fhp, sizeof(nfh)); fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid; error = VFS_VPTOFH(vp, &fhp->fh_fid); if (!error) error = VOP_GETATTR(vp, vap, cred, procp); - nfsrv_vput(vp); + vput(vp); } if (v3) { if (exclusive_flag && !error && bcmp(cverf, (caddr_t)&vap->va_atime, NFSX_V3CREATEVERF)) error = EEXIST; diraft_ret = VOP_GETATTR(dirp, &diraft, cred, procp); - nfsrv_vrele(dirp); + vrele(dirp); } nfsm_reply(NFSX_SRVFH(v3) + NFSX_FATTR(v3) + NFSX_WCCDATA(v3)); if (v3) { if (!error) { nfsm_srvpostop_fh(fhp); nfsm_srvpostop_attr(0, vap); } nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft); } else { nfsm_srvfhtom(fhp, v3); nfsm_build(fp, struct nfs_fattr *, NFSX_V2FATTR); nfsm_srvfillattr(vap, fp); } return (0); nfsmout: if (dirp) - nfsrv_vrele(dirp); + vrele(dirp); if (nd.ni_cnd.cn_nameiop) { - nfsrv_vrele(nd.ni_startdir); + vrele(nd.ni_startdir); free((caddr_t)nd.ni_cnd.cn_pnbuf, M_NAMEI); } VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); if (nd.ni_dvp == nd.ni_vp) - nfsrv_vrele(nd.ni_dvp); + vrele(nd.ni_dvp); else - nfsrv_vput(nd.ni_dvp); + vput(nd.ni_dvp); if (nd.ni_vp) - nfsrv_vput(nd.ni_vp); + vput(nd.ni_vp); return (error); } /* * nfs v3 mknod service */ int nfsrv_mknod(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; struct mbuf *nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; struct ucred *cred = &nfsd->nd_cr; struct vattr va, dirfor, diraft; register struct vattr *vap = &va; register u_long *tl; struct nameidata nd; register long t1; caddr_t bpos; int error = 0, cache, len, dirfor_ret = 1, diraft_ret = 1; u_long major, minor; enum vtype vtyp; char *cp2; struct mbuf *mb, *mb2, *mreq; struct vnode *vp, *dirp = (struct vnode *)0; nfsfh_t nfh; fhandle_t *fhp; u_quad_t frev; nd.ni_cnd.cn_nameiop = 0; fhp = &nfh.fh_generic; nfsm_srvmtofh(fhp); nfsm_srvnamesiz(len); nd.ni_cnd.cn_cred = cred; nd.ni_cnd.cn_nameiop = CREATE; nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF | SAVESTART; error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos, &dirp, procp, (nfsd->nd_flag & ND_KERBAUTH)); if (dirp) dirfor_ret = VOP_GETATTR(dirp, &dirfor, cred, procp); if (error) { nfsm_reply(NFSX_WCCDATA(1)); nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft); if (dirp) - nfsrv_vrele(dirp); + vrele(dirp); return (0); } nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); vtyp = nfsv3tov_type(*tl); if (vtyp != VCHR && vtyp != VBLK && vtyp != VSOCK && vtyp != VFIFO) { - nfsrv_vrele(nd.ni_startdir); + vrele(nd.ni_startdir); free((caddr_t)nd.ni_cnd.cn_pnbuf, M_NAMEI); error = NFSERR_BADTYPE; VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); - nfsrv_vput(nd.ni_dvp); + vput(nd.ni_dvp); goto out; } VATTR_NULL(vap); nfsm_srvsattr(vap); if (vtyp == VCHR || vtyp == VBLK) { nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED); major = fxdr_unsigned(u_long, *tl++); minor = fxdr_unsigned(u_long, *tl); vap->va_rdev = makedev(major, minor); } /* * Iff doesn't exist, create it. */ if (nd.ni_vp) { - nfsrv_vrele(nd.ni_startdir); + vrele(nd.ni_startdir); free((caddr_t)nd.ni_cnd.cn_pnbuf, M_NAMEI); error = EEXIST; VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); - nfsrv_vput(nd.ni_dvp); + vput(nd.ni_dvp); goto out; } vap->va_type = vtyp; if (vtyp == VSOCK) { - nfsrv_vrele(nd.ni_startdir); + vrele(nd.ni_startdir); nqsrv_getl(nd.ni_dvp, ND_WRITE); error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap); if (!error) FREE(nd.ni_cnd.cn_pnbuf, M_NAMEI); } else { if (error = suser(cred, (u_short *)0)) { - nfsrv_vrele(nd.ni_startdir); + vrele(nd.ni_startdir); free((caddr_t)nd.ni_cnd.cn_pnbuf, M_NAMEI); VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); - nfsrv_vput(nd.ni_dvp); + vput(nd.ni_dvp); goto out; } nqsrv_getl(nd.ni_dvp, ND_WRITE); if (error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap)) { - nfsrv_vrele(nd.ni_startdir); + vrele(nd.ni_startdir); goto out; } nd.ni_cnd.cn_nameiop = LOOKUP; nd.ni_cnd.cn_flags &= ~(LOCKPARENT | SAVESTART); nd.ni_cnd.cn_proc = procp; nd.ni_cnd.cn_cred = procp->p_ucred; error = lookup(&nd); FREE(nd.ni_cnd.cn_pnbuf, M_NAMEI); if (error) goto out; if (nd.ni_cnd.cn_flags & ISSYMLINK) { - nfsrv_vrele(nd.ni_dvp); - nfsrv_vput(nd.ni_vp); + vrele(nd.ni_dvp); + vput(nd.ni_vp); VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); error = EINVAL; } } out: vp = nd.ni_vp; if (!error) { bzero((caddr_t)fhp, sizeof(nfh)); fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid; error = VFS_VPTOFH(vp, &fhp->fh_fid); if (!error) error = VOP_GETATTR(vp, vap, cred, procp); - nfsrv_vput(vp); + vput(vp); } diraft_ret = VOP_GETATTR(dirp, &diraft, cred, procp); - nfsrv_vrele(dirp); + vrele(dirp); nfsm_reply(NFSX_SRVFH(1) + NFSX_POSTOPATTR(1) + NFSX_WCCDATA(1)); if (!error) { nfsm_srvpostop_fh(fhp); nfsm_srvpostop_attr(0, vap); } nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft); return (0); nfsmout: if (dirp) - nfsrv_vrele(dirp); + vrele(dirp); if (nd.ni_cnd.cn_nameiop) { - nfsrv_vrele(nd.ni_startdir); + vrele(nd.ni_startdir); free((caddr_t)nd.ni_cnd.cn_pnbuf, M_NAMEI); } VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); if (nd.ni_dvp == nd.ni_vp) - nfsrv_vrele(nd.ni_dvp); + vrele(nd.ni_dvp); else - nfsrv_vput(nd.ni_dvp); + vput(nd.ni_dvp); if (nd.ni_vp) - nfsrv_vput(nd.ni_vp); + vput(nd.ni_vp); return (error); } /* * nfs remove service */ int nfsrv_remove(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; struct mbuf *nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; struct ucred *cred = &nfsd->nd_cr; struct nameidata nd; register u_long *tl; register long t1; caddr_t bpos; int error = 0, cache, len, dirfor_ret = 1, diraft_ret = 1; int v3 = (nfsd->nd_flag & ND_NFSV3); char *cp2; struct mbuf *mb, *mreq; struct vnode *vp, *dirp; struct vattr dirfor, diraft; nfsfh_t nfh; fhandle_t *fhp; u_quad_t frev; #ifndef nolint vp = (struct vnode *)0; #endif fhp = &nfh.fh_generic; nfsm_srvmtofh(fhp); nfsm_srvnamesiz(len); nd.ni_cnd.cn_cred = cred; nd.ni_cnd.cn_nameiop = DELETE; nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF; error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos, &dirp, procp, (nfsd->nd_flag & ND_KERBAUTH)); if (dirp) { if (v3) dirfor_ret = VOP_GETATTR(dirp, &dirfor, cred, procp); else - nfsrv_vrele(dirp); + vrele(dirp); } if (!error) { vp = nd.ni_vp; if (vp->v_type == VDIR && (error = suser(cred, (u_short *)0))) goto out; /* * The root of a mounted filesystem cannot be deleted. */ if (vp->v_flag & VROOT) { error = EBUSY; goto out; } - vnode_pager_uncache(vp); out: if (!error) { - int deallocobj = 0; + vnode_pager_uncache(vp); nqsrv_getl(nd.ni_dvp, ND_WRITE); nqsrv_getl(vp, ND_WRITE); - if ((vp->v_flag & VVMIO) && vp->v_object) - deallocobj = 1; error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); - if (error == 0 && deallocobj) - vm_object_deallocate(vp->v_object); + } else { VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); if (nd.ni_dvp == vp) - nfsrv_vrele(nd.ni_dvp); + vrele(nd.ni_dvp); else - nfsrv_vput(nd.ni_dvp); - nfsrv_vput(vp); + vput(nd.ni_dvp); + vput(vp); } } if (dirp && v3) { diraft_ret = VOP_GETATTR(dirp, &diraft, cred, procp); - nfsrv_vrele(dirp); + vrele(dirp); } nfsm_reply(NFSX_WCCDATA(v3)); if (v3) { nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft); return (0); } nfsm_srvdone; } /* * nfs rename service */ int nfsrv_rename(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; struct mbuf *nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; struct ucred *cred = &nfsd->nd_cr; register u_long *tl; register long t1; caddr_t bpos; int error = 0, cache, len, len2, fdirfor_ret = 1, fdiraft_ret = 1; int tdirfor_ret = 1, tdiraft_ret = 1; int v3 = (nfsd->nd_flag & ND_NFSV3); char *cp2; struct mbuf *mb, *mreq; struct nameidata fromnd, tond; struct vnode *fvp, *tvp, *tdvp, *fdirp = (struct vnode *)0; struct vnode *tdirp = (struct vnode *)0; struct vattr fdirfor, fdiraft, tdirfor, tdiraft; nfsfh_t fnfh, tnfh; fhandle_t *ffhp, *tfhp; u_quad_t frev; uid_t saved_uid; #ifndef nolint fvp = (struct vnode *)0; #endif ffhp = &fnfh.fh_generic; tfhp = &tnfh.fh_generic; fromnd.ni_cnd.cn_nameiop = 0; tond.ni_cnd.cn_nameiop = 0; nfsm_srvmtofh(ffhp); nfsm_srvnamesiz(len); /* * Remember our original uid so that we can reset cr_uid before * the second nfs_namei() call, in case it is remapped. */ saved_uid = cred->cr_uid; fromnd.ni_cnd.cn_cred = cred; fromnd.ni_cnd.cn_nameiop = DELETE; fromnd.ni_cnd.cn_flags = WANTPARENT | SAVESTART; error = nfs_namei(&fromnd, ffhp, len, slp, nam, &md, &dpos, &fdirp, procp, (nfsd->nd_flag & ND_KERBAUTH)); if (fdirp) { if (v3) fdirfor_ret = VOP_GETATTR(fdirp, &fdirfor, cred, procp); else { - nfsrv_vrele(fdirp); + vrele(fdirp); fdirp = (struct vnode *)0; } } if (error) { nfsm_reply(2 * NFSX_WCCDATA(v3)); nfsm_srvwcc_data(fdirfor_ret, &fdirfor, fdiraft_ret, &fdiraft); nfsm_srvwcc_data(tdirfor_ret, &tdirfor, tdiraft_ret, &tdiraft); if (fdirp) - nfsrv_vrele(fdirp); + vrele(fdirp); return (0); } fvp = fromnd.ni_vp; nfsm_srvmtofh(tfhp); nfsm_strsiz(len2, NFS_MAXNAMLEN); cred->cr_uid = saved_uid; tond.ni_cnd.cn_cred = cred; tond.ni_cnd.cn_nameiop = RENAME; tond.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART; error = nfs_namei(&tond, tfhp, len2, slp, nam, &md, &dpos, &tdirp, procp, (nfsd->nd_flag & ND_KERBAUTH)); if (tdirp) { if (v3) tdirfor_ret = VOP_GETATTR(tdirp, &tdirfor, cred, procp); else { - nfsrv_vrele(tdirp); + vrele(tdirp); tdirp = (struct vnode *)0; } } if (error) { VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); - nfsrv_vrele(fromnd.ni_dvp); - nfsrv_vrele(fvp); + vrele(fromnd.ni_dvp); + vrele(fvp); goto out1; } tdvp = tond.ni_dvp; tvp = tond.ni_vp; if (tvp != NULL) { if (fvp->v_type == VDIR && tvp->v_type != VDIR) { if (v3) error = EEXIST; else error = EISDIR; goto out; } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { if (v3) error = EEXIST; else error = ENOTDIR; goto out; } if (tvp->v_type == VDIR && tvp->v_mountedhere) { if (v3) error = EXDEV; else error = ENOTEMPTY; goto out; } } if (fvp->v_type == VDIR && fvp->v_mountedhere) { if (v3) error = EXDEV; else error = ENOTEMPTY; goto out; } if (fvp->v_mount != tdvp->v_mount) { if (v3) error = EXDEV; else error = ENOTEMPTY; goto out; } if (fvp == tdvp) if (v3) error = EINVAL; else error = ENOTEMPTY; /* * If source is the same as the destination (that is the * same vnode with the same name in the same directory), * then there is nothing to do. */ if (fvp == tvp && fromnd.ni_dvp == tdvp && fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen && !bcmp(fromnd.ni_cnd.cn_nameptr, tond.ni_cnd.cn_nameptr, fromnd.ni_cnd.cn_namelen)) error = -1; out: if (!error) { - int deallocobjfrom = 0, deallocobjto = 0; nqsrv_getl(fromnd.ni_dvp, ND_WRITE); nqsrv_getl(tdvp, ND_WRITE); if (tvp) { nqsrv_getl(tvp, ND_WRITE); - if ((tvp->v_flag & VVMIO) && tvp->v_object) - deallocobjto = 1; (void) vnode_pager_uncache(tvp); } - if ((fvp->v_flag & VVMIO) && fvp->v_object) - deallocobjfrom = 1; error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); - if (deallocobjfrom) - vm_object_deallocate(fvp->v_object); - if (deallocobjto) - vm_object_deallocate(tvp->v_object); - } else { VOP_ABORTOP(tond.ni_dvp, &tond.ni_cnd); if (tdvp == tvp) - nfsrv_vrele(tdvp); + vrele(tdvp); else - nfsrv_vput(tdvp); + vput(tdvp); if (tvp) - nfsrv_vput(tvp); + vput(tvp); VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); - nfsrv_vrele(fromnd.ni_dvp); - nfsrv_vrele(fvp); + vrele(fromnd.ni_dvp); + vrele(fvp); if (error == -1) error = 0; } - nfsrv_vrele(tond.ni_startdir); + vrele(tond.ni_startdir); FREE(tond.ni_cnd.cn_pnbuf, M_NAMEI); out1: if (fdirp) { fdiraft_ret = VOP_GETATTR(fdirp, &fdiraft, cred, procp); - nfsrv_vrele(fdirp); + vrele(fdirp); } if (tdirp) { tdiraft_ret = VOP_GETATTR(tdirp, &tdiraft, cred, procp); - nfsrv_vrele(tdirp); + vrele(tdirp); } - nfsrv_vrele(fromnd.ni_startdir); + vrele(fromnd.ni_startdir); FREE(fromnd.ni_cnd.cn_pnbuf, M_NAMEI); nfsm_reply(2 * NFSX_WCCDATA(v3)); if (v3) { nfsm_srvwcc_data(fdirfor_ret, &fdirfor, fdiraft_ret, &fdiraft); nfsm_srvwcc_data(tdirfor_ret, &tdirfor, tdiraft_ret, &tdiraft); } return (0); nfsmout: if (fdirp) - nfsrv_vrele(fdirp); + vrele(fdirp); if (tdirp) - nfsrv_vrele(tdirp); + vrele(tdirp); if (tond.ni_cnd.cn_nameiop) { - nfsrv_vrele(tond.ni_startdir); + vrele(tond.ni_startdir); FREE(tond.ni_cnd.cn_pnbuf, M_NAMEI); } if (fromnd.ni_cnd.cn_nameiop) { - nfsrv_vrele(fromnd.ni_startdir); + vrele(fromnd.ni_startdir); FREE(fromnd.ni_cnd.cn_pnbuf, M_NAMEI); VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); - nfsrv_vrele(fromnd.ni_dvp); - nfsrv_vrele(fvp); + vrele(fromnd.ni_dvp); + vrele(fvp); } return (error); } /* * nfs link service */ int nfsrv_link(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; struct mbuf *nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; struct ucred *cred = &nfsd->nd_cr; struct nameidata nd; register u_long *tl; register long t1; caddr_t bpos; int error = 0, rdonly, cache, len, dirfor_ret = 1, diraft_ret = 1; int getret = 1, v3 = (nfsd->nd_flag & ND_NFSV3); char *cp2; struct mbuf *mb, *mreq; struct vnode *vp, *xp, *dirp = (struct vnode *)0; struct vattr dirfor, diraft, at; nfsfh_t nfh, dnfh; fhandle_t *fhp, *dfhp; u_quad_t frev; fhp = &nfh.fh_generic; dfhp = &dnfh.fh_generic; nfsm_srvmtofh(fhp); nfsm_srvmtofh(dfhp); nfsm_srvnamesiz(len); if (error = nfsrv_fhtovp(fhp, FALSE, &vp, cred, slp, nam, &rdonly, (nfsd->nd_flag & ND_KERBAUTH))) { nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_WCCDATA(v3)); nfsm_srvpostop_attr(getret, &at); nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft); return (0); } if (vp->v_type == VDIR && (error = suser(cred, (u_short *)0))) goto out1; nd.ni_cnd.cn_cred = cred; nd.ni_cnd.cn_nameiop = CREATE; nd.ni_cnd.cn_flags = LOCKPARENT; error = nfs_namei(&nd, dfhp, len, slp, nam, &md, &dpos, &dirp, procp, (nfsd->nd_flag & ND_KERBAUTH)); if (dirp) { if (v3) dirfor_ret = VOP_GETATTR(dirp, &dirfor, cred, procp); else { - nfsrv_vrele(dirp); + vrele(dirp); dirp = (struct vnode *)0; } } if (error) goto out1; xp = nd.ni_vp; if (xp != NULL) { error = EEXIST; goto out; } xp = nd.ni_dvp; if (vp->v_mount != xp->v_mount) error = EXDEV; out: if (!error) { nqsrv_getl(vp, ND_WRITE); nqsrv_getl(xp, ND_WRITE); #if defined(__NetBSD__) || defined(__FreeBSD__) error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); #else error = VOP_LINK(vp, nd.ni_dvp, &nd.ni_cnd); #endif } else { VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); if (nd.ni_dvp == nd.ni_vp) - nfsrv_vrele(nd.ni_dvp); + vrele(nd.ni_dvp); else - nfsrv_vput(nd.ni_dvp); + vput(nd.ni_dvp); if (nd.ni_vp) - nfsrv_vrele(nd.ni_vp); + vrele(nd.ni_vp); } out1: if (v3) getret = VOP_GETATTR(vp, &at, cred, procp); if (dirp) { diraft_ret = VOP_GETATTR(dirp, &diraft, cred, procp); - nfsrv_vrele(dirp); + vrele(dirp); } - nfsrv_vrele(vp); + vrele(vp); nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_WCCDATA(v3)); if (v3) { nfsm_srvpostop_attr(getret, &at); nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft); return (0); } nfsm_srvdone; } /* * nfs symbolic link service */ int nfsrv_symlink(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; struct mbuf *nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; struct ucred *cred = &nfsd->nd_cr; struct vattr va, dirfor, diraft; - struct vnode *ovp; struct nameidata nd; register struct vattr *vap = &va; register u_long *tl; register long t1; struct nfsv2_sattr *sp; char *bpos, *pathcp = (char *)0, *cp2; struct uio io; struct iovec iv; int error = 0, cache, len, len2, dirfor_ret = 1, diraft_ret = 1; int v3 = (nfsd->nd_flag & ND_NFSV3); struct mbuf *mb, *mreq, *mb2; struct vnode *dirp = (struct vnode *)0; nfsfh_t nfh; fhandle_t *fhp; u_quad_t frev; - int deallocobj = 0; nd.ni_cnd.cn_nameiop = 0; fhp = &nfh.fh_generic; nfsm_srvmtofh(fhp); nfsm_srvnamesiz(len); nd.ni_cnd.cn_cred = cred; nd.ni_cnd.cn_nameiop = CREATE; nd.ni_cnd.cn_flags = LOCKPARENT | SAVESTART; error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos, &dirp, procp, (nfsd->nd_flag & ND_KERBAUTH)); if (dirp) { if (v3) dirfor_ret = VOP_GETATTR(dirp, &dirfor, cred, procp); else { - nfsrv_vrele(dirp); + vrele(dirp); dirp = (struct vnode *)0; } } if (error) goto out; VATTR_NULL(vap); if (v3) nfsm_srvsattr(vap); nfsm_strsiz(len2, NFS_MAXPATHLEN); MALLOC(pathcp, caddr_t, len2 + 1, M_TEMP, M_WAITOK); iv.iov_base = pathcp; iv.iov_len = len2; io.uio_resid = len2; io.uio_offset = 0; io.uio_iov = &iv; io.uio_iovcnt = 1; io.uio_segflg = UIO_SYSSPACE; io.uio_rw = UIO_READ; io.uio_procp = (struct proc *)0; nfsm_mtouio(&io, len2); if (!v3) { nfsm_dissect(sp, struct nfsv2_sattr *, NFSX_V2SATTR); vap->va_mode = fxdr_unsigned(u_short, sp->sa_mode); } *(pathcp + len2) = '\0'; if (nd.ni_vp) { - nfsrv_vrele(nd.ni_startdir); + vrele(nd.ni_startdir); free(nd.ni_cnd.cn_pnbuf, M_NAMEI); VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); if (nd.ni_dvp == nd.ni_vp) - nfsrv_vrele(nd.ni_dvp); + vrele(nd.ni_dvp); else - nfsrv_vput(nd.ni_dvp); - nfsrv_vrele(nd.ni_vp); + vput(nd.ni_dvp); + vrele(nd.ni_vp); error = EEXIST; goto out; } nqsrv_getl(nd.ni_dvp, ND_WRITE); - if ((ovp = nd.ni_vp) && (ovp->v_flag & VVMIO) && ovp->v_object) - deallocobj = 1; error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap, pathcp); - if (error == 0 && deallocobj) - vm_object_deallocate(ovp->v_object); if (error) - nfsrv_vrele(nd.ni_startdir); + vrele(nd.ni_startdir); else { if (v3) { nd.ni_cnd.cn_nameiop = LOOKUP; nd.ni_cnd.cn_flags &= ~(LOCKPARENT | SAVESTART | FOLLOW); nd.ni_cnd.cn_flags |= (NOFOLLOW | LOCKLEAF); nd.ni_cnd.cn_proc = procp; nd.ni_cnd.cn_cred = cred; error = lookup(&nd); if (!error) { bzero((caddr_t)fhp, sizeof(nfh)); fhp->fh_fsid = nd.ni_vp->v_mount->mnt_stat.f_fsid; error = VFS_VPTOFH(nd.ni_vp, &fhp->fh_fid); if (!error) error = VOP_GETATTR(nd.ni_vp, vap, cred, procp); - nfsrv_vput(nd.ni_vp); + vput(nd.ni_vp); } } else - nfsrv_vrele(nd.ni_startdir); + vrele(nd.ni_startdir); FREE(nd.ni_cnd.cn_pnbuf, M_NAMEI); } out: if (pathcp) FREE(pathcp, M_TEMP); if (dirp) { diraft_ret = VOP_GETATTR(dirp, &diraft, cred, procp); - nfsrv_vrele(dirp); + vrele(dirp); } nfsm_reply(NFSX_SRVFH(v3) + NFSX_POSTOPATTR(v3) + NFSX_WCCDATA(v3)); if (v3) { if (!error) { nfsm_srvpostop_fh(fhp); nfsm_srvpostop_attr(0, vap); } nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft); } return (0); nfsmout: if (nd.ni_cnd.cn_nameiop) { - nfsrv_vrele(nd.ni_startdir); + vrele(nd.ni_startdir); free(nd.ni_cnd.cn_pnbuf, M_NAMEI); } if (dirp) - nfsrv_vrele(dirp); + vrele(dirp); VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); if (nd.ni_dvp == nd.ni_vp) - nfsrv_vrele(nd.ni_dvp); + vrele(nd.ni_dvp); else - nfsrv_vput(nd.ni_dvp); + vput(nd.ni_dvp); if (nd.ni_vp) - nfsrv_vrele(nd.ni_vp); + vrele(nd.ni_vp); if (pathcp) FREE(pathcp, M_TEMP); return (error); } /* * nfs mkdir service */ int nfsrv_mkdir(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; struct mbuf *nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; struct ucred *cred = &nfsd->nd_cr; struct vattr va, dirfor, diraft; register struct vattr *vap = &va; register struct nfs_fattr *fp; struct nameidata nd; register caddr_t cp; register u_long *tl; register long t1; caddr_t bpos; int error = 0, cache, len, dirfor_ret = 1, diraft_ret = 1; int v3 = (nfsd->nd_flag & ND_NFSV3); char *cp2; struct mbuf *mb, *mb2, *mreq; struct vnode *vp, *dirp = (struct vnode *)0; nfsfh_t nfh; fhandle_t *fhp; u_quad_t frev; fhp = &nfh.fh_generic; nfsm_srvmtofh(fhp); nfsm_srvnamesiz(len); nd.ni_cnd.cn_cred = cred; nd.ni_cnd.cn_nameiop = CREATE; nd.ni_cnd.cn_flags = LOCKPARENT; error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos, &dirp, procp, (nfsd->nd_flag & ND_KERBAUTH)); if (dirp) { if (v3) dirfor_ret = VOP_GETATTR(dirp, &dirfor, cred, procp); else { - nfsrv_vrele(dirp); + vrele(dirp); dirp = (struct vnode *)0; } } if (error) { nfsm_reply(NFSX_WCCDATA(v3)); nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft); if (dirp) - nfsrv_vrele(dirp); + vrele(dirp); return (0); } VATTR_NULL(vap); if (v3) { nfsm_srvsattr(vap); } else { nfsm_dissect(tl, u_long *, NFSX_UNSIGNED); vap->va_mode = nfstov_mode(*tl++); } vap->va_type = VDIR; vp = nd.ni_vp; if (vp != NULL) { VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); if (nd.ni_dvp == vp) - nfsrv_vrele(nd.ni_dvp); + vrele(nd.ni_dvp); else - nfsrv_vput(nd.ni_dvp); - nfsrv_vrele(vp); + vput(nd.ni_dvp); + vrele(vp); error = EEXIST; goto out; } nqsrv_getl(nd.ni_dvp, ND_WRITE); error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap); if (!error) { vp = nd.ni_vp; bzero((caddr_t)fhp, sizeof(nfh)); fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid; error = VFS_VPTOFH(vp, &fhp->fh_fid); if (!error) error = VOP_GETATTR(vp, vap, cred, procp); - nfsrv_vput(vp); + vput(vp); } out: if (dirp) { diraft_ret = VOP_GETATTR(dirp, &diraft, cred, procp); - nfsrv_vrele(dirp); + vrele(dirp); } nfsm_reply(NFSX_SRVFH(v3) + NFSX_POSTOPATTR(v3) + NFSX_WCCDATA(v3)); if (v3) { if (!error) { nfsm_srvpostop_fh(fhp); nfsm_srvpostop_attr(0, vap); } nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft); } else { nfsm_srvfhtom(fhp, v3); nfsm_build(fp, struct nfs_fattr *, NFSX_V2FATTR); nfsm_srvfillattr(vap, fp); } return (0); nfsmout: if (dirp) - nfsrv_vrele(dirp); + vrele(dirp); VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); if (nd.ni_dvp == nd.ni_vp) - nfsrv_vrele(nd.ni_dvp); + vrele(nd.ni_dvp); else - nfsrv_vput(nd.ni_dvp); + vput(nd.ni_dvp); if (nd.ni_vp) - nfsrv_vrele(nd.ni_vp); + vrele(nd.ni_vp); return (error); } /* * nfs rmdir service */ int nfsrv_rmdir(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; struct mbuf *nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; struct ucred *cred = &nfsd->nd_cr; register u_long *tl; register long t1; caddr_t bpos; int error = 0, cache, len, dirfor_ret = 1, diraft_ret = 1; int v3 = (nfsd->nd_flag & ND_NFSV3); char *cp2; struct mbuf *mb, *mreq; struct vnode *vp, *dirp = (struct vnode *)0; struct vattr dirfor, diraft; nfsfh_t nfh; fhandle_t *fhp; struct nameidata nd; u_quad_t frev; fhp = &nfh.fh_generic; nfsm_srvmtofh(fhp); nfsm_srvnamesiz(len); nd.ni_cnd.cn_cred = cred; nd.ni_cnd.cn_nameiop = DELETE; nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF; error = nfs_namei(&nd, fhp, len, slp, nam, &md, &dpos, &dirp, procp, (nfsd->nd_flag & ND_KERBAUTH)); if (dirp) { if (v3) dirfor_ret = VOP_GETATTR(dirp, &dirfor, cred, procp); else { - nfsrv_vrele(dirp); + vrele(dirp); dirp = (struct vnode *)0; } } if (error) { nfsm_reply(NFSX_WCCDATA(v3)); nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft); if (dirp) - nfsrv_vrele(dirp); + vrele(dirp); return (0); } vp = nd.ni_vp; if (vp->v_type != VDIR) { error = ENOTDIR; goto out; } /* * No rmdir "." please. */ if (nd.ni_dvp == vp) { error = EINVAL; goto out; } /* * The root of a mounted filesystem cannot be deleted. */ if (vp->v_flag & VROOT) error = EBUSY; out: if (!error) { nqsrv_getl(nd.ni_dvp, ND_WRITE); nqsrv_getl(vp, ND_WRITE); error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); } else { VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); if (nd.ni_dvp == nd.ni_vp) - nfsrv_vrele(nd.ni_dvp); + vrele(nd.ni_dvp); else - nfsrv_vput(nd.ni_dvp); - nfsrv_vput(vp); + vput(nd.ni_dvp); + vput(vp); } if (dirp) { diraft_ret = VOP_GETATTR(dirp, &diraft, cred, procp); - nfsrv_vrele(dirp); + vrele(dirp); } nfsm_reply(NFSX_WCCDATA(v3)); if (v3) { nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft); return (0); } nfsm_srvdone; } /* * nfs readdir service * - mallocs what it thinks is enough to read * count rounded up to a multiple of NFS_DIRBLKSIZ <= NFS_MAXREADDIR * - calls VOP_READDIR() * - loops around building the reply * if the output generated exceeds count break out of loop * The nfsm_clget macro is used here so that the reply will be packed * tightly in mbuf clusters. * - it only knows that it has encountered eof when the VOP_READDIR() * reads nothing * - as such one readdir rpc will return eof false although you are there * and then the next will return eof * - it trims out records with d_fileno == 0 * this doesn't matter for Unix clients, but they might confuse clients * for other os'. * NB: It is tempting to set eof to true if the VOP_READDIR() reads less * than requested, but this may not apply to all filesystems. For * example, client NFS does not { although it is never remote mounted * anyhow } * The alternate call nfsrv_readdirplus() does lookups as well. * PS: The NFS protocol spec. does not clarify what the "count" byte * argument is a count of.. just name strings and file id's or the * entire reply rpc or ... * I tried just file name and id sizes and it confused the Sun client, * so I am using the full rpc size now. The "paranoia.." comment refers * to including the status longwords that are not a part of the dir. * "entry" structures, but are in the rpc. */ struct flrep { nfsuint64 fl_off; u_long fl_postopok; u_long fl_fattr[NFSX_V3FATTR / sizeof (u_long)]; u_long fl_fhok; u_long fl_fhsize; u_long fl_nfh[NFSX_V3FH / sizeof (u_long)]; }; int nfsrv_readdir(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; struct mbuf *nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; struct ucred *cred = &nfsd->nd_cr; register char *bp, *be; register struct mbuf *mp; register struct dirent *dp; register caddr_t cp; register u_long *tl; register long t1; caddr_t bpos; struct mbuf *mb, *mb2, *mreq, *mp2; char *cpos, *cend, *cp2, *rbuf; struct vnode *vp; struct vattr at; nfsfh_t nfh; fhandle_t *fhp; struct uio io; struct iovec iv; int len, nlen, rem, xfer, tsiz, i, error = 0, getret = 1; int siz, cnt, fullsiz, eofflag, rdonly, cache, ncookies; int v3 = (nfsd->nd_flag & ND_NFSV3); u_quad_t frev, off, toff, verf; u_int *cookies = NULL, *cookiep; fhp = &nfh.fh_generic; nfsm_srvmtofh(fhp); if (v3) { nfsm_dissect(tl, u_long *, 5 * NFSX_UNSIGNED); fxdr_hyper(tl, &toff); tl += 2; fxdr_hyper(tl, &verf); tl += 2; } else { nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED); toff = fxdr_unsigned(u_quad_t, *tl++); } off = toff; cnt = fxdr_unsigned(int, *tl); siz = ((cnt + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1)); xfer = NFS_SRVMAXDATA(nfsd); if (siz > xfer) siz = xfer; fullsiz = siz; if (error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, &rdonly, (nfsd->nd_flag & ND_KERBAUTH))) { nfsm_reply(NFSX_UNSIGNED); nfsm_srvpostop_attr(getret, &at); return (0); } nqsrv_getl(vp, ND_READ); if (v3) { error = getret = VOP_GETATTR(vp, &at, cred, procp); if (!error && toff && verf != at.va_filerev) error = NFSERR_BAD_COOKIE; } if (!error) error = nfsrv_access(vp, VEXEC, cred, rdonly, procp); if (error) { - nfsrv_vput(vp); + vput(vp); nfsm_reply(NFSX_POSTOPATTR(v3)); nfsm_srvpostop_attr(getret, &at); return (0); } VOP_UNLOCK(vp); MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK); #ifdef __NetBSD__ ncookies = siz / (5 * NFSX_UNSIGNED); /*7 for V3, but it's an est. so*/ MALLOC(cookies, u_long *, ncookies * sizeof (u_long *), M_TEMP, M_WAITOK); #endif again: iv.iov_base = rbuf; iv.iov_len = fullsiz; io.uio_iov = &iv; io.uio_iovcnt = 1; io.uio_offset = (off_t)off; io.uio_resid = fullsiz; io.uio_segflg = UIO_SYSSPACE; io.uio_rw = UIO_READ; io.uio_procp = (struct proc *)0; eofflag = 0; VOP_LOCK(vp); #ifndef __NetBSD__ if (cookies) { free((caddr_t)cookies, M_TEMP); cookies = NULL; } error = VOP_READDIR(vp, &io, cred, &eofflag, &ncookies, &cookies); #else error = VOP_READDIR(vp, &io, cred, &eofflag, cookies, ncookies); #endif off = (off_t)io.uio_offset; if (!cookies && !error) error = NFSERR_PERM; if (v3) { getret = VOP_GETATTR(vp, &at, cred, procp); if (!error) error = getret; } VOP_UNLOCK(vp); if (error) { - nfsrv_vrele(vp); + vrele(vp); free((caddr_t)rbuf, M_TEMP); if (cookies) free((caddr_t)cookies, M_TEMP); nfsm_reply(NFSX_POSTOPATTR(v3)); nfsm_srvpostop_attr(getret, &at); return (0); } if (io.uio_resid) { siz -= io.uio_resid; /* * If nothing read, return eof * rpc reply */ if (siz == 0) { - nfsrv_vrele(vp); + vrele(vp); nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_COOKIEVERF(v3) + 2 * NFSX_UNSIGNED); if (v3) { nfsm_srvpostop_attr(getret, &at); nfsm_build(tl, u_long *, 4 * NFSX_UNSIGNED); txdr_hyper(&at.va_filerev, tl); tl += 2; } else nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED); *tl++ = nfs_false; *tl = nfs_true; FREE((caddr_t)rbuf, M_TEMP); FREE((caddr_t)cookies, M_TEMP); return (0); } } /* * Check for degenerate cases of nothing useful read. * If so go try again */ cpos = rbuf; cend = rbuf + siz; dp = (struct dirent *)cpos; cookiep = cookies; #ifdef __FreeBSD__ /* * For some reason FreeBSD's ufs_readdir() chooses to back the * directory offset up to a block boundary, so it is necessary to * skip over the records that preceed the requested offset. This * requires the assumption that file offset cookies monotonically * increase. */ while (cpos < cend && ncookies > 0 && (dp->d_fileno == 0 || ((u_quad_t)(*cookiep)) <= toff)) { #else while (dp->d_fileno == 0 && cpos < cend && ncookies > 0) { #endif cpos += dp->d_reclen; dp = (struct dirent *)cpos; cookiep++; ncookies--; } if (cpos >= cend || ncookies == 0) { toff = off; siz = fullsiz; goto again; } len = 3 * NFSX_UNSIGNED; /* paranoia, probably can be 0 */ nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_COOKIEVERF(v3) + siz); if (v3) { nfsm_srvpostop_attr(getret, &at); nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED); txdr_hyper(&at.va_filerev, tl); } mp = mp2 = mb; bp = bpos; be = bp + M_TRAILINGSPACE(mp); /* Loop through the records and build reply */ while (cpos < cend && ncookies > 0) { if (dp->d_fileno != 0) { nlen = dp->d_namlen; rem = nfsm_rndup(nlen)-nlen; len += (4 * NFSX_UNSIGNED + nlen + rem); if (v3) len += 2 * NFSX_UNSIGNED; if (len > cnt) { eofflag = 0; break; } /* * Build the directory record xdr from * the dirent entry. */ nfsm_clget; *tl = nfs_true; bp += NFSX_UNSIGNED; if (v3) { nfsm_clget; *tl = 0; bp += NFSX_UNSIGNED; } nfsm_clget; *tl = txdr_unsigned(dp->d_fileno); bp += NFSX_UNSIGNED; nfsm_clget; *tl = txdr_unsigned(nlen); bp += NFSX_UNSIGNED; /* And loop around copying the name */ xfer = nlen; cp = dp->d_name; while (xfer > 0) { nfsm_clget; if ((bp+xfer) > be) tsiz = be-bp; else tsiz = xfer; bcopy(cp, bp, tsiz); bp += tsiz; xfer -= tsiz; if (xfer > 0) cp += tsiz; } /* And null pad to a long boundary */ for (i = 0; i < rem; i++) *bp++ = '\0'; nfsm_clget; /* Finish off the record */ if (v3) { *tl = 0; bp += NFSX_UNSIGNED; nfsm_clget; } *tl = txdr_unsigned(*cookiep); bp += NFSX_UNSIGNED; } cpos += dp->d_reclen; dp = (struct dirent *)cpos; cookiep++; ncookies--; } - nfsrv_vrele(vp); + vrele(vp); nfsm_clget; *tl = nfs_false; bp += NFSX_UNSIGNED; nfsm_clget; if (eofflag) *tl = nfs_true; else *tl = nfs_false; bp += NFSX_UNSIGNED; if (mp != mb) { if (bp < be) mp->m_len = bp - mtod(mp, caddr_t); } else mp->m_len += bp - bpos; FREE((caddr_t)rbuf, M_TEMP); FREE((caddr_t)cookies, M_TEMP); nfsm_srvdone; } int nfsrv_readdirplus(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; struct mbuf *nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; struct ucred *cred = &nfsd->nd_cr; register char *bp, *be; register struct mbuf *mp; register struct dirent *dp; register caddr_t cp; register u_long *tl; register long t1; caddr_t bpos; struct mbuf *mb, *mb2, *mreq, *mp2; char *cpos, *cend, *cp2, *rbuf; struct vnode *vp, *nvp; struct flrep fl; nfsfh_t nfh; fhandle_t *fhp, *nfhp = (fhandle_t *)fl.fl_nfh; struct uio io; struct iovec iv; struct vattr va, at, *vap = &va; struct nfs_fattr *fp; int len, nlen, rem, xfer, tsiz, i, error = 0, getret = 1; int siz, cnt, fullsiz, eofflag, rdonly, cache, dirlen, ncookies; u_quad_t frev, off, toff, verf; u_int *cookies = NULL, *cookiep; fhp = &nfh.fh_generic; nfsm_srvmtofh(fhp); nfsm_dissect(tl, u_long *, 6 * NFSX_UNSIGNED); fxdr_hyper(tl, &toff); tl += 2; fxdr_hyper(tl, &verf); tl += 2; siz = fxdr_unsigned(int, *tl++); cnt = fxdr_unsigned(int, *tl); off = toff; siz = ((siz + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1)); xfer = NFS_SRVMAXDATA(nfsd); if (siz > xfer) siz = xfer; fullsiz = siz; if (error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, &rdonly, (nfsd->nd_flag & ND_KERBAUTH))) { nfsm_reply(NFSX_UNSIGNED); nfsm_srvpostop_attr(getret, &at); return (0); } error = getret = VOP_GETATTR(vp, &at, cred, procp); if (!error && toff && verf != at.va_filerev) error = NFSERR_BAD_COOKIE; if (!error) { nqsrv_getl(vp, ND_READ); error = nfsrv_access(vp, VEXEC, cred, rdonly, procp); } if (error) { - nfsrv_vput(vp); + vput(vp); nfsm_reply(NFSX_V3POSTOPATTR); nfsm_srvpostop_attr(getret, &at); return (0); } VOP_UNLOCK(vp); MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK); #ifdef __NetBSD__ ncookies = siz / (7 * NFSX_UNSIGNED); MALLOC(cookies, u_long *, ncookies * sizeof (u_long *), M_TEMP, M_WAITOK); #endif again: iv.iov_base = rbuf; iv.iov_len = fullsiz; io.uio_iov = &iv; io.uio_iovcnt = 1; io.uio_offset = (off_t)off; io.uio_resid = fullsiz; io.uio_segflg = UIO_SYSSPACE; io.uio_rw = UIO_READ; io.uio_procp = (struct proc *)0; eofflag = 0; VOP_LOCK(vp); #ifndef __NetBSD__ if (cookies) { free((caddr_t)cookies, M_TEMP); cookies = NULL; } error = VOP_READDIR(vp, &io, cred, &eofflag, &ncookies, &cookies); #else error = VOP_READDIR(vp, &io, cred, &eofflag, cookies, ncookies); #endif off = (u_quad_t)io.uio_offset; getret = VOP_GETATTR(vp, &at, cred, procp); VOP_UNLOCK(vp); if (!cookies && !error) error = NFSERR_PERM; if (!error) error = getret; if (error) { - nfsrv_vrele(vp); + vrele(vp); if (cookies) free((caddr_t)cookies, M_TEMP); free((caddr_t)rbuf, M_TEMP); nfsm_reply(NFSX_V3POSTOPATTR); nfsm_srvpostop_attr(getret, &at); return (0); } if (io.uio_resid) { siz -= io.uio_resid; /* * If nothing read, return eof * rpc reply */ if (siz == 0) { - nfsrv_vrele(vp); + vrele(vp); nfsm_reply(NFSX_V3POSTOPATTR + NFSX_V3COOKIEVERF + 2 * NFSX_UNSIGNED); nfsm_srvpostop_attr(getret, &at); nfsm_build(tl, u_long *, 4 * NFSX_UNSIGNED); txdr_hyper(&at.va_filerev, tl); tl += 2; *tl++ = nfs_false; *tl = nfs_true; FREE((caddr_t)cookies, M_TEMP); FREE((caddr_t)rbuf, M_TEMP); return (0); } } /* * Check for degenerate cases of nothing useful read. * If so go try again */ cpos = rbuf; cend = rbuf + siz; dp = (struct dirent *)cpos; cookiep = cookies; #ifdef __FreeBSD__ /* * For some reason FreeBSD's ufs_readdir() chooses to back the * directory offset up to a block boundary, so it is necessary to * skip over the records that preceed the requested offset. This * requires the assumption that file offset cookies monotonically * increase. */ while (cpos < cend && ncookies > 0 && (dp->d_fileno == 0 || ((u_quad_t)(*cookiep)) <= toff)) { #else while (dp->d_fileno == 0 && cpos < cend && ncookies > 0) { #endif cpos += dp->d_reclen; dp = (struct dirent *)cpos; cookiep++; ncookies--; } if (cpos >= cend || ncookies == 0) { toff = off; siz = fullsiz; goto again; } /* * Probe one of the directory entries to see if the filesystem * supports VGET. */ if (VFS_VGET(vp->v_mount, dp->d_fileno, &nvp) == EOPNOTSUPP) { error = NFSERR_NOTSUPP; - nfsrv_vrele(vp); + vrele(vp); free((caddr_t)cookies, M_TEMP); free((caddr_t)rbuf, M_TEMP); nfsm_reply(NFSX_V3POSTOPATTR); nfsm_srvpostop_attr(getret, &at); return (0); } dirlen = len = NFSX_V3POSTOPATTR + NFSX_V3COOKIEVERF + 2 * NFSX_UNSIGNED; nfsm_reply(cnt); nfsm_srvpostop_attr(getret, &at); nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED); txdr_hyper(&at.va_filerev, tl); mp = mp2 = mb; bp = bpos; be = bp + M_TRAILINGSPACE(mp); /* Loop through the records and build reply */ while (cpos < cend && ncookies > 0) { if (dp->d_fileno != 0) { nlen = dp->d_namlen; rem = nfsm_rndup(nlen)-nlen; /* * For readdir_and_lookup get the vnode using * the file number. */ if (VFS_VGET(vp->v_mount, dp->d_fileno, &nvp)) goto invalid; bzero((caddr_t)nfhp, NFSX_V3FH); nfhp->fh_fsid = nvp->v_mount->mnt_stat.f_fsid; if (VFS_VPTOFH(nvp, &nfhp->fh_fid)) { vput(nvp); goto invalid; } if (VOP_GETATTR(nvp, vap, cred, procp)) { vput(nvp); goto invalid; } vput(nvp); /* * If either the dircount or maxcount will be * exceeded, get out now. Both of these lengths * are calculated conservatively, including all * XDR overheads. */ len += (7 * NFSX_UNSIGNED + nlen + rem + NFSX_V3FH + NFSX_V3POSTOPATTR); dirlen += (6 * NFSX_UNSIGNED + nlen + rem); if (len > cnt || dirlen > fullsiz) { eofflag = 0; break; } /* * Build the directory record xdr from * the dirent entry. */ fp = (struct nfs_fattr *)&fl.fl_fattr; nfsm_srvfillattr(vap, fp); fl.fl_fhsize = txdr_unsigned(NFSX_V3FH); fl.fl_fhok = nfs_true; fl.fl_postopok = nfs_true; fl.fl_off.nfsuquad[0] = 0; fl.fl_off.nfsuquad[1] = txdr_unsigned(*cookiep); nfsm_clget; *tl = nfs_true; bp += NFSX_UNSIGNED; nfsm_clget; *tl = 0; bp += NFSX_UNSIGNED; nfsm_clget; *tl = txdr_unsigned(dp->d_fileno); bp += NFSX_UNSIGNED; nfsm_clget; *tl = txdr_unsigned(nlen); bp += NFSX_UNSIGNED; /* And loop around copying the name */ xfer = nlen; cp = dp->d_name; while (xfer > 0) { nfsm_clget; if ((bp + xfer) > be) tsiz = be - bp; else tsiz = xfer; bcopy(cp, bp, tsiz); bp += tsiz; xfer -= tsiz; if (xfer > 0) cp += tsiz; } /* And null pad to a long boundary */ for (i = 0; i < rem; i++) *bp++ = '\0'; /* * Now copy the flrep structure out. */ xfer = sizeof (struct flrep); cp = (caddr_t)&fl; while (xfer > 0) { nfsm_clget; if ((bp + xfer) > be) tsiz = be - bp; else tsiz = xfer; bcopy(cp, bp, tsiz); bp += tsiz; xfer -= tsiz; if (xfer > 0) cp += tsiz; } } invalid: cpos += dp->d_reclen; dp = (struct dirent *)cpos; cookiep++; ncookies--; } - nfsrv_vrele(vp); + vrele(vp); nfsm_clget; *tl = nfs_false; bp += NFSX_UNSIGNED; nfsm_clget; if (eofflag) *tl = nfs_true; else *tl = nfs_false; bp += NFSX_UNSIGNED; if (mp != mb) { if (bp < be) mp->m_len = bp - mtod(mp, caddr_t); } else mp->m_len += bp - bpos; FREE((caddr_t)cookies, M_TEMP); FREE((caddr_t)rbuf, M_TEMP); nfsm_srvdone; } /* * nfs commit service */ int nfsrv_commit(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; struct mbuf *nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; struct ucred *cred = &nfsd->nd_cr; struct vattr bfor, aft; struct vnode *vp; nfsfh_t nfh; fhandle_t *fhp; register u_long *tl; register long t1; caddr_t bpos; int error = 0, rdonly, for_ret = 1, aft_ret = 1, cnt, cache; char *cp2; struct mbuf *mb, *mb2, *mreq; u_quad_t frev, off; #ifndef nolint cache = 0; #endif fhp = &nfh.fh_generic; nfsm_srvmtofh(fhp); nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED); /* * XXX At this time VOP_FSYNC() does not accept offset and byte * count parameters, so these arguments are useless (someday maybe). */ fxdr_hyper(tl, &off); tl += 2; cnt = fxdr_unsigned(int, *tl); if (error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, &rdonly, (nfsd->nd_flag & ND_KERBAUTH))) { nfsm_reply(2 * NFSX_UNSIGNED); nfsm_srvwcc_data(for_ret, &bfor, aft_ret, &aft); return (0); } for_ret = VOP_GETATTR(vp, &bfor, cred, procp); error = VOP_FSYNC(vp, cred, MNT_WAIT, procp); aft_ret = VOP_GETATTR(vp, &aft, cred, procp); - nfsrv_vput(vp); + vput(vp); nfsm_reply(NFSX_V3WCCDATA + NFSX_V3WRITEVERF); nfsm_srvwcc_data(for_ret, &bfor, aft_ret, &aft); if (!error) { nfsm_build(tl, u_long *, NFSX_V3WRITEVERF); *tl++ = txdr_unsigned(boottime.tv_sec); *tl = txdr_unsigned(boottime.tv_usec); } else return (0); nfsm_srvdone; } /* * nfs statfs service */ int nfsrv_statfs(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; struct mbuf *nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; struct ucred *cred = &nfsd->nd_cr; register struct statfs *sf; register struct nfs_statfs *sfp; register u_long *tl; register long t1; caddr_t bpos; int error = 0, rdonly, cache, getret = 1; int v3 = (nfsd->nd_flag & ND_NFSV3); char *cp2; struct mbuf *mb, *mb2, *mreq; struct vnode *vp; struct vattr at; nfsfh_t nfh; fhandle_t *fhp; struct statfs statfs; u_quad_t frev, tval; #ifndef nolint cache = 0; #endif fhp = &nfh.fh_generic; nfsm_srvmtofh(fhp); if (error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, &rdonly, (nfsd->nd_flag & ND_KERBAUTH))) { nfsm_reply(NFSX_UNSIGNED); nfsm_srvpostop_attr(getret, &at); return (0); } sf = &statfs; error = VFS_STATFS(vp->v_mount, sf, procp); getret = VOP_GETATTR(vp, &at, cred, procp); - nfsrv_vput(vp); + vput(vp); nfsm_reply(NFSX_POSTOPATTR(v3) + NFSX_STATFS(v3)); if (v3) nfsm_srvpostop_attr(getret, &at); if (error) return (0); nfsm_build(sfp, struct nfs_statfs *, NFSX_STATFS(v3)); if (v3) { tval = (u_quad_t)sf->f_blocks; tval *= (u_quad_t)sf->f_bsize; txdr_hyper(&tval, &sfp->sf_tbytes); tval = (u_quad_t)sf->f_bfree; tval *= (u_quad_t)sf->f_bsize; txdr_hyper(&tval, &sfp->sf_fbytes); tval = (u_quad_t)sf->f_bavail; tval *= (u_quad_t)sf->f_bsize; txdr_hyper(&tval, &sfp->sf_abytes); sfp->sf_tfiles.nfsuquad[0] = 0; sfp->sf_tfiles.nfsuquad[1] = txdr_unsigned(sf->f_files); sfp->sf_ffiles.nfsuquad[0] = 0; sfp->sf_ffiles.nfsuquad[1] = txdr_unsigned(sf->f_ffree); sfp->sf_afiles.nfsuquad[0] = 0; sfp->sf_afiles.nfsuquad[1] = txdr_unsigned(sf->f_ffree); sfp->sf_invarsec = 0; } else { sfp->sf_tsize = txdr_unsigned(NFS_MAXDGRAMDATA); sfp->sf_bsize = txdr_unsigned(sf->f_bsize); sfp->sf_blocks = txdr_unsigned(sf->f_blocks); sfp->sf_bfree = txdr_unsigned(sf->f_bfree); sfp->sf_bavail = txdr_unsigned(sf->f_bavail); } nfsm_srvdone; } /* * nfs fsinfo service */ int nfsrv_fsinfo(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; struct mbuf *nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; struct ucred *cred = &nfsd->nd_cr; register u_long *tl; register struct nfsv3_fsinfo *sip; register long t1; caddr_t bpos; int error = 0, rdonly, cache, getret = 1, pref; char *cp2; struct mbuf *mb, *mb2, *mreq; struct vnode *vp; struct vattr at; nfsfh_t nfh; fhandle_t *fhp; u_quad_t frev; #ifndef nolint cache = 0; #endif fhp = &nfh.fh_generic; nfsm_srvmtofh(fhp); if (error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, &rdonly, (nfsd->nd_flag & ND_KERBAUTH))) { nfsm_reply(NFSX_UNSIGNED); nfsm_srvpostop_attr(getret, &at); return (0); } getret = VOP_GETATTR(vp, &at, cred, procp); - nfsrv_vput(vp); + vput(vp); nfsm_reply(NFSX_V3POSTOPATTR + NFSX_V3FSINFO); nfsm_srvpostop_attr(getret, &at); nfsm_build(sip, struct nfsv3_fsinfo *, NFSX_V3FSINFO); /* * XXX * There should be file system VFS OP(s) to get this information. * For now, assume ufs. */ if (slp->ns_so->so_type == SOCK_DGRAM) pref = NFS_MAXDGRAMDATA; else pref = NFS_MAXDATA; sip->fs_rtmax = txdr_unsigned(NFS_MAXDATA); sip->fs_rtpref = txdr_unsigned(pref); sip->fs_rtmult = txdr_unsigned(NFS_FABLKSIZE); sip->fs_wtmax = txdr_unsigned(NFS_MAXDATA); sip->fs_wtpref = txdr_unsigned(pref); sip->fs_wtmult = txdr_unsigned(NFS_FABLKSIZE); sip->fs_dtpref = txdr_unsigned(pref); sip->fs_maxfilesize.nfsuquad[0] = 0xffffffff; sip->fs_maxfilesize.nfsuquad[1] = 0xffffffff; sip->fs_timedelta.nfsv3_sec = 0; sip->fs_timedelta.nfsv3_nsec = txdr_unsigned(1); sip->fs_properties = txdr_unsigned(NFSV3FSINFO_LINK | NFSV3FSINFO_SYMLINK | NFSV3FSINFO_HOMOGENEOUS | NFSV3FSINFO_CANSETTIME); nfsm_srvdone; } /* * nfs pathconf service */ int nfsrv_pathconf(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep, *md = nfsd->nd_md; struct mbuf *nam = nfsd->nd_nam; caddr_t dpos = nfsd->nd_dpos; struct ucred *cred = &nfsd->nd_cr; register u_long *tl; register struct nfsv3_pathconf *pc; register long t1; caddr_t bpos; int error = 0, rdonly, cache, getret = 1, linkmax, namemax; int chownres, notrunc; char *cp2; struct mbuf *mb, *mb2, *mreq; struct vnode *vp; struct vattr at; nfsfh_t nfh; fhandle_t *fhp; u_quad_t frev; #ifndef nolint cache = 0; #endif fhp = &nfh.fh_generic; nfsm_srvmtofh(fhp); if (error = nfsrv_fhtovp(fhp, 1, &vp, cred, slp, nam, &rdonly, (nfsd->nd_flag & ND_KERBAUTH))) { nfsm_reply(NFSX_UNSIGNED); nfsm_srvpostop_attr(getret, &at); return (0); } error = VOP_PATHCONF(vp, _PC_LINK_MAX, &linkmax); if (!error) error = VOP_PATHCONF(vp, _PC_NAME_MAX, &namemax); if (!error) error = VOP_PATHCONF(vp, _PC_CHOWN_RESTRICTED, &chownres); if (!error) error = VOP_PATHCONF(vp, _PC_NO_TRUNC, ¬runc); getret = VOP_GETATTR(vp, &at, cred, procp); - nfsrv_vput(vp); + vput(vp); nfsm_reply(NFSX_V3POSTOPATTR + NFSX_V3PATHCONF); nfsm_srvpostop_attr(getret, &at); if (error) return (0); nfsm_build(pc, struct nfsv3_pathconf *, NFSX_V3PATHCONF); pc->pc_linkmax = txdr_unsigned(linkmax); pc->pc_namemax = txdr_unsigned(namemax); pc->pc_notrunc = txdr_unsigned(notrunc); pc->pc_chownrestricted = txdr_unsigned(chownres); /* * These should probably be supported by VOP_PATHCONF(), but * until msdosfs is exportable (why would you want to?), the * Unix defaults should be ok. */ pc->pc_caseinsensitive = nfs_false; pc->pc_casepreserving = nfs_true; nfsm_srvdone; } /* * Null operation, used by clients to ping server */ /* ARGSUSED */ int nfsrv_null(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep; caddr_t bpos; int error = NFSERR_RETVOID, cache; struct mbuf *mb, *mreq; u_quad_t frev; #ifndef nolint cache = 0; #endif nfsm_reply(0); return (0); } /* * No operation, used for obsolete procedures */ /* ARGSUSED */ int nfsrv_noop(nfsd, slp, procp, mrq) struct nfsrv_descript *nfsd; struct nfssvc_sock *slp; struct proc *procp; struct mbuf **mrq; { struct mbuf *mrep = nfsd->nd_mrep; caddr_t bpos; int error, cache; struct mbuf *mb, *mreq; u_quad_t frev; #ifndef nolint cache = 0; #endif if (nfsd->nd_repstat) error = nfsd->nd_repstat; else error = EPROCUNAVAIL; nfsm_reply(0); return (0); } /* * Perform access checking for vnodes obtained from file handles that would * refer to files already opened by a Unix client. You cannot just use * vn_writechk() and VOP_ACCESS() for two reasons. * 1 - You must check for exported rdonly as well as MNT_RDONLY for the write case * 2 - The owner is to be given access irrespective of mode bits so that * processes that chmod after opening a file don't break. I don't like * this because it opens a security hole, but since the nfs server opens * a security hole the size of a barn door anyhow, what the heck. */ static int nfsrv_access(vp, flags, cred, rdonly, p) register struct vnode *vp; int flags; register struct ucred *cred; int rdonly; struct proc *p; { struct vattr vattr; int error; if (flags & VWRITE) { /* Just vn_writechk() changed to check rdonly */ /* * Disallow write attempts on read-only file systems; * unless the file is a socket or a block or character * device resident on the file system. */ if (rdonly || (vp->v_mount->mnt_flag & MNT_RDONLY)) { switch (vp->v_type) { case VREG: case VDIR: case VLNK: return (EROFS); } } /* * If there's shared text associated with * the inode, we can't allow writing. */ if (vp->v_flag & VTEXT) return (ETXTBSY); } if (error = VOP_GETATTR(vp, &vattr, cred, p)) return (error); if ((error = VOP_ACCESS(vp, flags, cred, p)) && cred->cr_uid != vattr.va_uid) return (error); return (0); } #endif /* NFS_NOSERVER */ Index: head/sys/nfsserver/nfs_srvsubs.c =================================================================== --- head/sys/nfsserver/nfs_srvsubs.c (revision 17760) +++ head/sys/nfsserver/nfs_srvsubs.c (revision 17761) @@ -1,1992 +1,1949 @@ /* * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)nfs_subs.c 8.3 (Berkeley) 1/4/94 - * $Id: nfs_subs.c,v 1.30 1996/06/23 17:19:25 bde Exp $ + * $Id: nfs_subs.c,v 1.31 1996/07/16 10:19:44 dfr Exp $ */ /* * These functions support the macros and help fiddle mbuf chains for * the nfs op functions. They do things like create the rpc header and * copy data between mbuf chains and uio lists. */ #include #include #include #include #include #include #include #include #include #include #include #ifdef VFS_LKM #include #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef ISO #include #endif /* * Data items converted to xdr at startup, since they are constant * This is kinda hokey, but may save a little time doing byte swaps */ u_long nfs_xdrneg1; u_long rpc_call, rpc_vers, rpc_reply, rpc_msgdenied, rpc_autherr, rpc_mismatch, rpc_auth_unix, rpc_msgaccepted, rpc_auth_kerb; u_long nfs_prog, nqnfs_prog, nfs_true, nfs_false; /* And other global data */ static u_long nfs_xid = 0; static enum vtype nv2tov_type[8]= { VNON, VREG, VDIR, VBLK, VCHR, VLNK, VNON, VNON }; enum vtype nv3tov_type[8]= { VNON, VREG, VDIR, VBLK, VCHR, VLNK, VSOCK, VFIFO }; int nfs_ticks; struct nfs_reqq nfs_reqq; struct nfssvc_sockhead nfssvc_sockhead; int nfssvc_sockhead_flag; struct nfsd_head nfsd_head; int nfsd_head_flag; struct nfs_bufq nfs_bufq; struct nqtimerhead nqtimerhead; struct nqfhhashhead *nqfhhashtbl; u_long nqfhhash; #ifndef NFS_NOSERVER /* * Mapping of old NFS Version 2 RPC numbers to generic numbers. */ int nfsv3_procid[NFS_NPROCS] = { NFSPROC_NULL, NFSPROC_GETATTR, NFSPROC_SETATTR, NFSPROC_NOOP, NFSPROC_LOOKUP, NFSPROC_READLINK, NFSPROC_READ, NFSPROC_NOOP, NFSPROC_WRITE, NFSPROC_CREATE, NFSPROC_REMOVE, NFSPROC_RENAME, NFSPROC_LINK, NFSPROC_SYMLINK, NFSPROC_MKDIR, NFSPROC_RMDIR, NFSPROC_READDIR, NFSPROC_FSSTAT, NFSPROC_NOOP, NFSPROC_NOOP, NFSPROC_NOOP, NFSPROC_NOOP, NFSPROC_NOOP, NFSPROC_NOOP, NFSPROC_NOOP, NFSPROC_NOOP }; #endif /* NFS_NOSERVER */ /* * and the reverse mapping from generic to Version 2 procedure numbers */ int nfsv2_procid[NFS_NPROCS] = { NFSV2PROC_NULL, NFSV2PROC_GETATTR, NFSV2PROC_SETATTR, NFSV2PROC_LOOKUP, NFSV2PROC_NOOP, NFSV2PROC_READLINK, NFSV2PROC_READ, NFSV2PROC_WRITE, NFSV2PROC_CREATE, NFSV2PROC_MKDIR, NFSV2PROC_SYMLINK, NFSV2PROC_CREATE, NFSV2PROC_REMOVE, NFSV2PROC_RMDIR, NFSV2PROC_RENAME, NFSV2PROC_LINK, NFSV2PROC_READDIR, NFSV2PROC_NOOP, NFSV2PROC_STATFS, NFSV2PROC_NOOP, NFSV2PROC_NOOP, NFSV2PROC_NOOP, NFSV2PROC_NOOP, NFSV2PROC_NOOP, NFSV2PROC_NOOP, NFSV2PROC_NOOP, }; #ifndef NFS_NOSERVER /* * Maps errno values to nfs error numbers. * Use NFSERR_IO as the catch all for ones not specifically defined in * RFC 1094. */ static u_char nfsrv_v2errmap[ELAST] = { NFSERR_PERM, NFSERR_NOENT, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_NXIO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_ACCES, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_EXIST, NFSERR_IO, NFSERR_NODEV, NFSERR_NOTDIR, NFSERR_ISDIR, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_FBIG, NFSERR_NOSPC, NFSERR_IO, NFSERR_ROFS, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_NAMETOL, NFSERR_IO, NFSERR_IO, NFSERR_NOTEMPTY, NFSERR_IO, NFSERR_IO, NFSERR_DQUOT, NFSERR_STALE, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, }; /* * Maps errno values to nfs error numbers. * Although it is not obvious whether or not NFS clients really care if * a returned error value is in the specified list for the procedure, the * safest thing to do is filter them appropriately. For Version 2, the * X/Open XNFS document is the only specification that defines error values * for each RPC (The RFC simply lists all possible error values for all RPCs), * so I have decided to not do this for Version 2. * The first entry is the default error return and the rest are the valid * errors for that RPC in increasing numeric order. */ static short nfsv3err_null[] = { 0, 0, }; static short nfsv3err_getattr[] = { NFSERR_IO, NFSERR_IO, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_SERVERFAULT, 0, }; static short nfsv3err_setattr[] = { NFSERR_IO, NFSERR_PERM, NFSERR_IO, NFSERR_ACCES, NFSERR_INVAL, NFSERR_NOSPC, NFSERR_ROFS, NFSERR_DQUOT, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_NOT_SYNC, NFSERR_SERVERFAULT, 0, }; static short nfsv3err_lookup[] = { NFSERR_IO, NFSERR_NOENT, NFSERR_IO, NFSERR_ACCES, NFSERR_NOTDIR, NFSERR_NAMETOL, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_SERVERFAULT, 0, }; static short nfsv3err_access[] = { NFSERR_IO, NFSERR_IO, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_SERVERFAULT, 0, }; static short nfsv3err_readlink[] = { NFSERR_IO, NFSERR_IO, NFSERR_ACCES, NFSERR_INVAL, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_NOTSUPP, NFSERR_SERVERFAULT, 0, }; static short nfsv3err_read[] = { NFSERR_IO, NFSERR_IO, NFSERR_NXIO, NFSERR_ACCES, NFSERR_INVAL, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_SERVERFAULT, 0, }; static short nfsv3err_write[] = { NFSERR_IO, NFSERR_IO, NFSERR_ACCES, NFSERR_INVAL, NFSERR_FBIG, NFSERR_NOSPC, NFSERR_ROFS, NFSERR_DQUOT, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_SERVERFAULT, 0, }; static short nfsv3err_create[] = { NFSERR_IO, NFSERR_IO, NFSERR_ACCES, NFSERR_EXIST, NFSERR_NOTDIR, NFSERR_NOSPC, NFSERR_ROFS, NFSERR_NAMETOL, NFSERR_DQUOT, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_NOTSUPP, NFSERR_SERVERFAULT, 0, }; static short nfsv3err_mkdir[] = { NFSERR_IO, NFSERR_IO, NFSERR_ACCES, NFSERR_EXIST, NFSERR_NOTDIR, NFSERR_NOSPC, NFSERR_ROFS, NFSERR_NAMETOL, NFSERR_DQUOT, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_NOTSUPP, NFSERR_SERVERFAULT, 0, }; static short nfsv3err_symlink[] = { NFSERR_IO, NFSERR_IO, NFSERR_ACCES, NFSERR_EXIST, NFSERR_NOTDIR, NFSERR_NOSPC, NFSERR_ROFS, NFSERR_NAMETOL, NFSERR_DQUOT, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_NOTSUPP, NFSERR_SERVERFAULT, 0, }; static short nfsv3err_mknod[] = { NFSERR_IO, NFSERR_IO, NFSERR_ACCES, NFSERR_EXIST, NFSERR_NOTDIR, NFSERR_NOSPC, NFSERR_ROFS, NFSERR_NAMETOL, NFSERR_DQUOT, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_NOTSUPP, NFSERR_SERVERFAULT, NFSERR_BADTYPE, 0, }; static short nfsv3err_remove[] = { NFSERR_IO, NFSERR_NOENT, NFSERR_IO, NFSERR_ACCES, NFSERR_NOTDIR, NFSERR_ROFS, NFSERR_NAMETOL, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_SERVERFAULT, 0, }; static short nfsv3err_rmdir[] = { NFSERR_IO, NFSERR_NOENT, NFSERR_IO, NFSERR_ACCES, NFSERR_EXIST, NFSERR_NOTDIR, NFSERR_INVAL, NFSERR_ROFS, NFSERR_NAMETOL, NFSERR_NOTEMPTY, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_NOTSUPP, NFSERR_SERVERFAULT, 0, }; static short nfsv3err_rename[] = { NFSERR_IO, NFSERR_NOENT, NFSERR_IO, NFSERR_ACCES, NFSERR_EXIST, NFSERR_XDEV, NFSERR_NOTDIR, NFSERR_ISDIR, NFSERR_INVAL, NFSERR_NOSPC, NFSERR_ROFS, NFSERR_MLINK, NFSERR_NAMETOL, NFSERR_NOTEMPTY, NFSERR_DQUOT, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_NOTSUPP, NFSERR_SERVERFAULT, 0, }; static short nfsv3err_link[] = { NFSERR_IO, NFSERR_IO, NFSERR_ACCES, NFSERR_EXIST, NFSERR_XDEV, NFSERR_NOTDIR, NFSERR_INVAL, NFSERR_NOSPC, NFSERR_ROFS, NFSERR_MLINK, NFSERR_NAMETOL, NFSERR_DQUOT, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_NOTSUPP, NFSERR_SERVERFAULT, 0, }; static short nfsv3err_readdir[] = { NFSERR_IO, NFSERR_IO, NFSERR_ACCES, NFSERR_NOTDIR, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_BAD_COOKIE, NFSERR_TOOSMALL, NFSERR_SERVERFAULT, 0, }; static short nfsv3err_readdirplus[] = { NFSERR_IO, NFSERR_IO, NFSERR_ACCES, NFSERR_NOTDIR, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_BAD_COOKIE, NFSERR_NOTSUPP, NFSERR_TOOSMALL, NFSERR_SERVERFAULT, 0, }; static short nfsv3err_fsstat[] = { NFSERR_IO, NFSERR_IO, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_SERVERFAULT, 0, }; static short nfsv3err_fsinfo[] = { NFSERR_STALE, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_SERVERFAULT, 0, }; static short nfsv3err_pathconf[] = { NFSERR_STALE, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_SERVERFAULT, 0, }; static short nfsv3err_commit[] = { NFSERR_IO, NFSERR_IO, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_SERVERFAULT, 0, }; static short *nfsrv_v3errmap[] = { nfsv3err_null, nfsv3err_getattr, nfsv3err_setattr, nfsv3err_lookup, nfsv3err_access, nfsv3err_readlink, nfsv3err_read, nfsv3err_write, nfsv3err_create, nfsv3err_mkdir, nfsv3err_symlink, nfsv3err_mknod, nfsv3err_remove, nfsv3err_rmdir, nfsv3err_rename, nfsv3err_link, nfsv3err_readdir, nfsv3err_readdirplus, nfsv3err_fsstat, nfsv3err_fsinfo, nfsv3err_pathconf, nfsv3err_commit, }; #endif /* NFS_NOSERVER */ extern struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON]; extern struct nfsrtt nfsrtt; extern time_t nqnfsstarttime; extern int nqsrv_clockskew; extern int nqsrv_writeslack; extern int nqsrv_maxlease; extern struct nfsstats nfsstats; extern int nqnfs_piggy[NFS_NPROCS]; extern nfstype nfsv2_type[9]; extern nfstype nfsv3_type[9]; extern struct nfsnodehashhead *nfsnodehashtbl; extern u_long nfsnodehash; #ifdef VFS_LKM struct getfh_args; extern int getfh(struct proc *, struct getfh_args *, int *); struct nfssvc_args; extern int nfssvc(struct proc *, struct nfssvc_args *, int *); #endif LIST_HEAD(nfsnodehashhead, nfsnode); /* * Create the header for an rpc request packet * The hsiz is the size of the rest of the nfs request header. * (just used to decide if a cluster is a good idea) */ struct mbuf * nfsm_reqh(vp, procid, hsiz, bposp) struct vnode *vp; u_long procid; int hsiz; caddr_t *bposp; { register struct mbuf *mb; register u_long *tl; register caddr_t bpos; struct mbuf *mb2; struct nfsmount *nmp; int nqflag; MGET(mb, M_WAIT, MT_DATA); if (hsiz >= MINCLSIZE) MCLGET(mb, M_WAIT); mb->m_len = 0; bpos = mtod(mb, caddr_t); /* * For NQNFS, add lease request. */ if (vp) { nmp = VFSTONFS(vp->v_mount); if (nmp->nm_flag & NFSMNT_NQNFS) { nqflag = NQNFS_NEEDLEASE(vp, procid); if (nqflag) { nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED); *tl++ = txdr_unsigned(nqflag); *tl = txdr_unsigned(nmp->nm_leaseterm); } else { nfsm_build(tl, u_long *, NFSX_UNSIGNED); *tl = 0; } } } /* Finally, return values */ *bposp = bpos; return (mb); } /* * Build the RPC header and fill in the authorization info. * The authorization string argument is only used when the credentials * come from outside of the kernel. * Returns the head of the mbuf list. */ struct mbuf * nfsm_rpchead(cr, nmflag, procid, auth_type, auth_len, auth_str, verf_len, verf_str, mrest, mrest_len, mbp, xidp) register struct ucred *cr; int nmflag; int procid; int auth_type; int auth_len; char *auth_str; int verf_len; char *verf_str; struct mbuf *mrest; int mrest_len; struct mbuf **mbp; u_long *xidp; { register struct mbuf *mb; register u_long *tl; register caddr_t bpos; register int i; struct mbuf *mreq, *mb2; int siz, grpsiz, authsiz; struct timeval tv; static u_long base; authsiz = nfsm_rndup(auth_len); MGETHDR(mb, M_WAIT, MT_DATA); if ((authsiz + 10 * NFSX_UNSIGNED) >= MINCLSIZE) { MCLGET(mb, M_WAIT); } else if ((authsiz + 10 * NFSX_UNSIGNED) < MHLEN) { MH_ALIGN(mb, authsiz + 10 * NFSX_UNSIGNED); } else { MH_ALIGN(mb, 8 * NFSX_UNSIGNED); } mb->m_len = 0; mreq = mb; bpos = mtod(mb, caddr_t); /* * First the RPC header. */ nfsm_build(tl, u_long *, 8 * NFSX_UNSIGNED); /* * derive initial xid from system time * XXX time is invalid if root not yet mounted */ if (!base && (rootvp)) { microtime(&tv); base = tv.tv_sec << 12; nfs_xid = base; } /* * Skip zero xid if it should ever happen. */ if (++nfs_xid == 0) nfs_xid++; *tl++ = *xidp = txdr_unsigned(nfs_xid); *tl++ = rpc_call; *tl++ = rpc_vers; if (nmflag & NFSMNT_NQNFS) { *tl++ = txdr_unsigned(NQNFS_PROG); *tl++ = txdr_unsigned(NQNFS_VER3); } else { *tl++ = txdr_unsigned(NFS_PROG); if (nmflag & NFSMNT_NFSV3) *tl++ = txdr_unsigned(NFS_VER3); else *tl++ = txdr_unsigned(NFS_VER2); } if (nmflag & NFSMNT_NFSV3) *tl++ = txdr_unsigned(procid); else *tl++ = txdr_unsigned(nfsv2_procid[procid]); /* * And then the authorization cred. */ *tl++ = txdr_unsigned(auth_type); *tl = txdr_unsigned(authsiz); switch (auth_type) { case RPCAUTH_UNIX: nfsm_build(tl, u_long *, auth_len); *tl++ = 0; /* stamp ?? */ *tl++ = 0; /* NULL hostname */ *tl++ = txdr_unsigned(cr->cr_uid); *tl++ = txdr_unsigned(cr->cr_groups[0]); grpsiz = (auth_len >> 2) - 5; *tl++ = txdr_unsigned(grpsiz); for (i = 1; i <= grpsiz; i++) *tl++ = txdr_unsigned(cr->cr_groups[i]); break; case RPCAUTH_KERB4: siz = auth_len; while (siz > 0) { if (M_TRAILINGSPACE(mb) == 0) { MGET(mb2, M_WAIT, MT_DATA); if (siz >= MINCLSIZE) MCLGET(mb2, M_WAIT); mb->m_next = mb2; mb = mb2; mb->m_len = 0; bpos = mtod(mb, caddr_t); } i = min(siz, M_TRAILINGSPACE(mb)); bcopy(auth_str, bpos, i); mb->m_len += i; auth_str += i; bpos += i; siz -= i; } if ((siz = (nfsm_rndup(auth_len) - auth_len)) > 0) { for (i = 0; i < siz; i++) *bpos++ = '\0'; mb->m_len += siz; } break; }; /* * And the verifier... */ nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED); if (verf_str) { *tl++ = txdr_unsigned(RPCAUTH_KERB4); *tl = txdr_unsigned(verf_len); siz = verf_len; while (siz > 0) { if (M_TRAILINGSPACE(mb) == 0) { MGET(mb2, M_WAIT, MT_DATA); if (siz >= MINCLSIZE) MCLGET(mb2, M_WAIT); mb->m_next = mb2; mb = mb2; mb->m_len = 0; bpos = mtod(mb, caddr_t); } i = min(siz, M_TRAILINGSPACE(mb)); bcopy(verf_str, bpos, i); mb->m_len += i; verf_str += i; bpos += i; siz -= i; } if ((siz = (nfsm_rndup(verf_len) - verf_len)) > 0) { for (i = 0; i < siz; i++) *bpos++ = '\0'; mb->m_len += siz; } } else { *tl++ = txdr_unsigned(RPCAUTH_NULL); *tl = 0; } mb->m_next = mrest; mreq->m_pkthdr.len = authsiz + 10 * NFSX_UNSIGNED + mrest_len; mreq->m_pkthdr.rcvif = (struct ifnet *)0; *mbp = mb; return (mreq); } /* * copies mbuf chain to the uio scatter/gather list */ int nfsm_mbuftouio(mrep, uiop, siz, dpos) struct mbuf **mrep; register struct uio *uiop; int siz; caddr_t *dpos; { register char *mbufcp, *uiocp; register int xfer, left, len; register struct mbuf *mp; long uiosiz, rem; int error = 0; mp = *mrep; mbufcp = *dpos; len = mtod(mp, caddr_t)+mp->m_len-mbufcp; rem = nfsm_rndup(siz)-siz; while (siz > 0) { if (uiop->uio_iovcnt <= 0 || uiop->uio_iov == NULL) return (EFBIG); left = uiop->uio_iov->iov_len; uiocp = uiop->uio_iov->iov_base; if (left > siz) left = siz; uiosiz = left; while (left > 0) { while (len == 0) { mp = mp->m_next; if (mp == NULL) return (EBADRPC); mbufcp = mtod(mp, caddr_t); len = mp->m_len; } xfer = (left > len) ? len : left; #ifdef notdef /* Not Yet.. */ if (uiop->uio_iov->iov_op != NULL) (*(uiop->uio_iov->iov_op)) (mbufcp, uiocp, xfer); else #endif if (uiop->uio_segflg == UIO_SYSSPACE) bcopy(mbufcp, uiocp, xfer); else copyout(mbufcp, uiocp, xfer); left -= xfer; len -= xfer; mbufcp += xfer; uiocp += xfer; uiop->uio_offset += xfer; uiop->uio_resid -= xfer; } if (uiop->uio_iov->iov_len <= siz) { uiop->uio_iovcnt--; uiop->uio_iov++; } else { uiop->uio_iov->iov_base += uiosiz; uiop->uio_iov->iov_len -= uiosiz; } siz -= uiosiz; } *dpos = mbufcp; *mrep = mp; if (rem > 0) { if (len < rem) error = nfs_adv(mrep, dpos, rem, len); else *dpos += rem; } return (error); } /* * copies a uio scatter/gather list to an mbuf chain. * NOTE: can ony handle iovcnt == 1 */ int nfsm_uiotombuf(uiop, mq, siz, bpos) register struct uio *uiop; struct mbuf **mq; int siz; caddr_t *bpos; { register char *uiocp; register struct mbuf *mp, *mp2; register int xfer, left, mlen; int uiosiz, clflg, rem; char *cp; if (uiop->uio_iovcnt != 1) panic("nfsm_uiotombuf: iovcnt != 1"); if (siz > MLEN) /* or should it >= MCLBYTES ?? */ clflg = 1; else clflg = 0; rem = nfsm_rndup(siz)-siz; mp = mp2 = *mq; while (siz > 0) { left = uiop->uio_iov->iov_len; uiocp = uiop->uio_iov->iov_base; if (left > siz) left = siz; uiosiz = left; while (left > 0) { mlen = M_TRAILINGSPACE(mp); if (mlen == 0) { MGET(mp, M_WAIT, MT_DATA); if (clflg) MCLGET(mp, M_WAIT); mp->m_len = 0; mp2->m_next = mp; mp2 = mp; mlen = M_TRAILINGSPACE(mp); } xfer = (left > mlen) ? mlen : left; #ifdef notdef /* Not Yet.. */ if (uiop->uio_iov->iov_op != NULL) (*(uiop->uio_iov->iov_op)) (uiocp, mtod(mp, caddr_t)+mp->m_len, xfer); else #endif if (uiop->uio_segflg == UIO_SYSSPACE) bcopy(uiocp, mtod(mp, caddr_t)+mp->m_len, xfer); else copyin(uiocp, mtod(mp, caddr_t)+mp->m_len, xfer); mp->m_len += xfer; left -= xfer; uiocp += xfer; uiop->uio_offset += xfer; uiop->uio_resid -= xfer; } uiop->uio_iov->iov_base += uiosiz; uiop->uio_iov->iov_len -= uiosiz; siz -= uiosiz; } if (rem > 0) { if (rem > M_TRAILINGSPACE(mp)) { MGET(mp, M_WAIT, MT_DATA); mp->m_len = 0; mp2->m_next = mp; } cp = mtod(mp, caddr_t)+mp->m_len; for (left = 0; left < rem; left++) *cp++ = '\0'; mp->m_len += rem; *bpos = cp; } else *bpos = mtod(mp, caddr_t)+mp->m_len; *mq = mp; return (0); } /* * Help break down an mbuf chain by setting the first siz bytes contiguous * pointed to by returned val. * This is used by the macros nfsm_dissect and nfsm_dissecton for tough * cases. (The macros use the vars. dpos and dpos2) */ int nfsm_disct(mdp, dposp, siz, left, cp2) struct mbuf **mdp; caddr_t *dposp; int siz; int left; caddr_t *cp2; { register struct mbuf *mp, *mp2; register int siz2, xfer; register caddr_t p; mp = *mdp; while (left == 0) { *mdp = mp = mp->m_next; if (mp == NULL) return (EBADRPC); left = mp->m_len; *dposp = mtod(mp, caddr_t); } if (left >= siz) { *cp2 = *dposp; *dposp += siz; } else if (mp->m_next == NULL) { return (EBADRPC); } else if (siz > MHLEN) { panic("nfs S too big"); } else { MGET(mp2, M_WAIT, MT_DATA); mp2->m_next = mp->m_next; mp->m_next = mp2; mp->m_len -= left; mp = mp2; *cp2 = p = mtod(mp, caddr_t); bcopy(*dposp, p, left); /* Copy what was left */ siz2 = siz-left; p += left; mp2 = mp->m_next; /* Loop around copying up the siz2 bytes */ while (siz2 > 0) { if (mp2 == NULL) return (EBADRPC); xfer = (siz2 > mp2->m_len) ? mp2->m_len : siz2; if (xfer > 0) { bcopy(mtod(mp2, caddr_t), p, xfer); NFSMADV(mp2, xfer); mp2->m_len -= xfer; p += xfer; siz2 -= xfer; } if (siz2 > 0) mp2 = mp2->m_next; } mp->m_len = siz; *mdp = mp2; *dposp = mtod(mp2, caddr_t); } return (0); } /* * Advance the position in the mbuf chain. */ int nfs_adv(mdp, dposp, offs, left) struct mbuf **mdp; caddr_t *dposp; int offs; int left; { register struct mbuf *m; register int s; m = *mdp; s = left; while (s < offs) { offs -= s; m = m->m_next; if (m == NULL) return (EBADRPC); s = m->m_len; } *mdp = m; *dposp = mtod(m, caddr_t)+offs; return (0); } /* * Copy a string into mbufs for the hard cases... */ int nfsm_strtmbuf(mb, bpos, cp, siz) struct mbuf **mb; char **bpos; char *cp; long siz; { register struct mbuf *m1 = 0, *m2; long left, xfer, len, tlen; u_long *tl; int putsize; putsize = 1; m2 = *mb; left = M_TRAILINGSPACE(m2); if (left > 0) { tl = ((u_long *)(*bpos)); *tl++ = txdr_unsigned(siz); putsize = 0; left -= NFSX_UNSIGNED; m2->m_len += NFSX_UNSIGNED; if (left > 0) { bcopy(cp, (caddr_t) tl, left); siz -= left; cp += left; m2->m_len += left; left = 0; } } /* Loop around adding mbufs */ while (siz > 0) { MGET(m1, M_WAIT, MT_DATA); if (siz > MLEN) MCLGET(m1, M_WAIT); m1->m_len = NFSMSIZ(m1); m2->m_next = m1; m2 = m1; tl = mtod(m1, u_long *); tlen = 0; if (putsize) { *tl++ = txdr_unsigned(siz); m1->m_len -= NFSX_UNSIGNED; tlen = NFSX_UNSIGNED; putsize = 0; } if (siz < m1->m_len) { len = nfsm_rndup(siz); xfer = siz; if (xfer < len) *(tl+(xfer>>2)) = 0; } else { xfer = len = m1->m_len; } bcopy(cp, (caddr_t) tl, xfer); m1->m_len = len+tlen; siz -= xfer; cp += xfer; } *mb = m1; *bpos = mtod(m1, caddr_t)+m1->m_len; return (0); } /* * Called once to initialize data structures... */ int nfs_init() { register int i; /* * Check to see if major data structures haven't bloated. */ if (sizeof (struct nfsnode) > NFS_NODEALLOC) { printf("struct nfsnode bloated (> %dbytes)\n", NFS_NODEALLOC); printf("Try reducing NFS_SMALLFH\n"); } if (sizeof (struct nfsmount) > NFS_MNTALLOC) { printf("struct nfsmount bloated (> %dbytes)\n", NFS_MNTALLOC); printf("Try reducing NFS_MUIDHASHSIZ\n"); } if (sizeof (struct nfssvc_sock) > NFS_SVCALLOC) { printf("struct nfssvc_sock bloated (> %dbytes)\n",NFS_SVCALLOC); printf("Try reducing NFS_UIDHASHSIZ\n"); } if (sizeof (struct nfsuid) > NFS_UIDALLOC) { printf("struct nfsuid bloated (> %dbytes)\n",NFS_UIDALLOC); printf("Try unionizing the nu_nickname and nu_flag fields\n"); } nfsrtt.pos = 0; rpc_vers = txdr_unsigned(RPC_VER2); rpc_call = txdr_unsigned(RPC_CALL); rpc_reply = txdr_unsigned(RPC_REPLY); rpc_msgdenied = txdr_unsigned(RPC_MSGDENIED); rpc_msgaccepted = txdr_unsigned(RPC_MSGACCEPTED); rpc_mismatch = txdr_unsigned(RPC_MISMATCH); rpc_autherr = txdr_unsigned(RPC_AUTHERR); rpc_auth_unix = txdr_unsigned(RPCAUTH_UNIX); rpc_auth_kerb = txdr_unsigned(RPCAUTH_KERB4); nfs_prog = txdr_unsigned(NFS_PROG); nqnfs_prog = txdr_unsigned(NQNFS_PROG); nfs_true = txdr_unsigned(TRUE); nfs_false = txdr_unsigned(FALSE); nfs_xdrneg1 = txdr_unsigned(-1); nfs_ticks = (hz * NFS_TICKINTVL + 500) / 1000; if (nfs_ticks < 1) nfs_ticks = 1; /* Ensure async daemons disabled */ for (i = 0; i < NFS_MAXASYNCDAEMON; i++) nfs_iodwant[i] = (struct proc *)0; TAILQ_INIT(&nfs_bufq); nfs_nhinit(); /* Init the nfsnode table */ #ifndef NFS_NOSERVER nfsrv_init(0); /* Init server data structures */ nfsrv_initcache(); /* Init the server request cache */ #endif /* * Initialize the nqnfs server stuff. */ if (nqnfsstarttime == 0) { nqnfsstarttime = boottime.tv_sec + nqsrv_maxlease + nqsrv_clockskew + nqsrv_writeslack; NQLOADNOVRAM(nqnfsstarttime); CIRCLEQ_INIT(&nqtimerhead); nqfhhashtbl = hashinit(NQLCHSZ, M_NQLEASE, &nqfhhash); } /* * Initialize reply list and start timer */ TAILQ_INIT(&nfs_reqq); nfs_timer(0); #ifdef __FreeBSD__ /* * Set up lease_check and lease_updatetime so that other parts * of the system can call us, if we are loadable. */ #ifndef NFS_NOSERVER lease_check = nfs_lease_check; #endif lease_updatetime = nfs_lease_updatetime; vfsconf[MOUNT_NFS]->vfc_refcount++; /* make us non-unloadable */ #ifdef VFS_LKM sysent[SYS_nfssvc].sy_narg = 2; sysent[SYS_nfssvc].sy_call = nfssvc; #ifndef NFS_NOSERVER sysent[SYS_getfh].sy_narg = 2; sysent[SYS_getfh].sy_call = getfh; #endif #endif #endif return (0); } /* * Attribute cache routines. * nfs_loadattrcache() - loads or updates the cache contents from attributes * that are on the mbuf list * nfs_getattrcache() - returns valid attributes if found in cache, returns * error otherwise */ /* * Load the attribute cache (that lives in the nfsnode entry) with * the values on the mbuf list and * Iff vap not NULL * copy the attributes to *vaper */ int nfs_loadattrcache(vpp, mdp, dposp, vaper) struct vnode **vpp; struct mbuf **mdp; caddr_t *dposp; struct vattr *vaper; { register struct vnode *vp = *vpp; register struct vattr *vap; register struct nfs_fattr *fp; register struct nfsnode *np; register struct nfsnodehashhead *nhpp; register long t1; caddr_t cp2; int error = 0, rdev; struct mbuf *md; enum vtype vtyp; u_short vmode; struct timespec mtime; struct vnode *nvp; int v3 = NFS_ISV3(vp); md = *mdp; t1 = (mtod(md, caddr_t) + md->m_len) - *dposp; if (error = nfsm_disct(mdp, dposp, NFSX_FATTR(v3), t1, &cp2)) return (error); fp = (struct nfs_fattr *)cp2; if (v3) { vtyp = nfsv3tov_type(fp->fa_type); vmode = fxdr_unsigned(u_short, fp->fa_mode); rdev = makedev(fxdr_unsigned(int, fp->fa3_rdev.specdata1), fxdr_unsigned(int, fp->fa3_rdev.specdata2)); fxdr_nfsv3time(&fp->fa3_mtime, &mtime); } else { vtyp = nfsv2tov_type(fp->fa_type); vmode = fxdr_unsigned(u_short, fp->fa_mode); /* * XXX * * The duplicate information returned in fa_type and fa_mode * is an ambiguity in the NFS version 2 protocol. * * VREG should be taken literally as a regular file. If a * server intents to return some type information differently * in the upper bits of the mode field (e.g. for sockets, or * FIFOs), NFSv2 mandates fa_type to be VNON. Anyway, we * leave the examination of the mode bits even in the VREG * case to avoid breakage for bogus servers, but we make sure * that there are actually type bits set in the upper part of * fa_mode (and failing that, trust the va_type field). * * NFSv3 cleared the issue, and requires fa_mode to not * contain any type information (while also introduing sockets * and FIFOs for fa_type). */ if (vtyp == VNON || (vtyp == VREG && (vmode & S_IFMT) != 0)) vtyp = IFTOVT(vmode); rdev = fxdr_unsigned(long, fp->fa2_rdev); fxdr_nfsv2time(&fp->fa2_mtime, &mtime); /* * Really ugly NFSv2 kludge. */ if (vtyp == VCHR && rdev == 0xffffffff) vtyp = VFIFO; } /* * If v_type == VNON it is a new node, so fill in the v_type, * n_mtime fields. Check to see if it represents a special * device, and if so, check for a possible alias. Once the * correct vnode has been obtained, fill in the rest of the * information. */ np = VTONFS(vp); if (vp->v_type != vtyp) { /* * If we had a lock and it turns out that the vnode * is an object which we don't want to lock (e.g. VDIR) * to avoid nasty hanging problems on a server crash, * then release it here. */ if (vtyp != VREG && VOP_ISLOCKED(vp)) VOP_UNLOCK(vp); vp->v_type = vtyp; if (vp->v_type == VFIFO) { vp->v_op = fifo_nfsv2nodeop_p; } if (vp->v_type == VCHR || vp->v_type == VBLK) { vp->v_op = spec_nfsv2nodeop_p; nvp = checkalias(vp, (dev_t)rdev, vp->v_mount); if (nvp) { /* * Discard unneeded vnode, but save its nfsnode. */ LIST_REMOVE(np, n_hash); nvp->v_data = vp->v_data; vp->v_data = NULL; vp->v_op = spec_vnodeop_p; vrele(vp); vgone(vp); /* * Reinitialize aliased node. */ np->n_vnode = nvp; nhpp = NFSNOHASH(nfs_hash(np->n_fhp, np->n_fhsize)); LIST_INSERT_HEAD(nhpp, np, n_hash); *vpp = vp = nvp; } } np->n_mtime = mtime.ts_sec; } vap = &np->n_vattr; vap->va_type = vtyp; vap->va_mode = (vmode & 07777); vap->va_rdev = (dev_t)rdev; vap->va_mtime = mtime; vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0]; if (v3) { vap->va_nlink = fxdr_unsigned(u_short, fp->fa_nlink); vap->va_uid = fxdr_unsigned(uid_t, fp->fa_uid); vap->va_gid = fxdr_unsigned(gid_t, fp->fa_gid); fxdr_hyper(&fp->fa3_size, &vap->va_size); vap->va_blocksize = NFS_FABLKSIZE; fxdr_hyper(&fp->fa3_used, &vap->va_bytes); vap->va_fileid = fxdr_unsigned(int, fp->fa3_fileid.nfsuquad[1]); fxdr_nfsv3time(&fp->fa3_atime, &vap->va_atime); fxdr_nfsv3time(&fp->fa3_ctime, &vap->va_ctime); vap->va_flags = 0; vap->va_filerev = 0; } else { vap->va_nlink = fxdr_unsigned(u_short, fp->fa_nlink); vap->va_uid = fxdr_unsigned(uid_t, fp->fa_uid); vap->va_gid = fxdr_unsigned(gid_t, fp->fa_gid); vap->va_size = fxdr_unsigned(u_long, fp->fa2_size); vap->va_blocksize = fxdr_unsigned(long, fp->fa2_blocksize); vap->va_bytes = fxdr_unsigned(long, fp->fa2_blocks) * NFS_FABLKSIZE; vap->va_fileid = fxdr_unsigned(long, fp->fa2_fileid); fxdr_nfsv2time(&fp->fa2_atime, &vap->va_atime); vap->va_flags = 0; vap->va_ctime.ts_sec = fxdr_unsigned(long, fp->fa2_ctime.nfsv2_sec); vap->va_ctime.ts_nsec = 0; vap->va_gen = fxdr_unsigned(u_long, fp->fa2_ctime.nfsv2_usec); vap->va_filerev = 0; } if (vap->va_size != np->n_size) { if (vap->va_type == VREG) { if (np->n_flag & NMODIFIED) { if (vap->va_size < np->n_size) vap->va_size = np->n_size; else np->n_size = vap->va_size; } else np->n_size = vap->va_size; vnode_pager_setsize(vp, (u_long)np->n_size); } else np->n_size = vap->va_size; } np->n_attrstamp = time.tv_sec; if (vaper != NULL) { bcopy((caddr_t)vap, (caddr_t)vaper, sizeof(*vap)); if (np->n_flag & NCHG) { if (np->n_flag & NACC) vaper->va_atime = np->n_atim; if (np->n_flag & NUPD) vaper->va_mtime = np->n_mtim; } } return (0); } /* * Check the time stamp * If the cache is valid, copy contents to *vap and return 0 * otherwise return an error */ int nfs_getattrcache(vp, vaper) register struct vnode *vp; struct vattr *vaper; { register struct nfsnode *np = VTONFS(vp); register struct vattr *vap; if ((time.tv_sec - np->n_attrstamp) >= NFS_ATTRTIMEO(np)) { nfsstats.attrcache_misses++; return (ENOENT); } nfsstats.attrcache_hits++; vap = &np->n_vattr; if (vap->va_size != np->n_size) { if (vap->va_type == VREG) { if (np->n_flag & NMODIFIED) { if (vap->va_size < np->n_size) vap->va_size = np->n_size; else np->n_size = vap->va_size; } else np->n_size = vap->va_size; vnode_pager_setsize(vp, (u_long)np->n_size); } else np->n_size = vap->va_size; } bcopy((caddr_t)vap, (caddr_t)vaper, sizeof(struct vattr)); if (np->n_flag & NCHG) { if (np->n_flag & NACC) vaper->va_atime = np->n_atim; if (np->n_flag & NUPD) vaper->va_mtime = np->n_mtim; } return (0); } #ifndef NFS_NOSERVER /* * Set up nameidata for a lookup() call and do it */ int nfs_namei(ndp, fhp, len, slp, nam, mdp, dposp, retdirp, p, kerbflag) register struct nameidata *ndp; fhandle_t *fhp; int len; struct nfssvc_sock *slp; struct mbuf *nam; struct mbuf **mdp; caddr_t *dposp; struct vnode **retdirp; struct proc *p; int kerbflag; { register int i, rem; register struct mbuf *md; register char *fromcp, *tocp; struct vnode *dp; int error, rdonly; struct componentname *cnp = &ndp->ni_cnd; *retdirp = (struct vnode *)0; MALLOC(cnp->cn_pnbuf, char *, len + 1, M_NAMEI, M_WAITOK); /* * Copy the name from the mbuf list to ndp->ni_pnbuf * and set the various ndp fields appropriately. */ fromcp = *dposp; tocp = cnp->cn_pnbuf; md = *mdp; rem = mtod(md, caddr_t) + md->m_len - fromcp; cnp->cn_hash = 0; for (i = 0; i < len; i++) { while (rem == 0) { md = md->m_next; if (md == NULL) { error = EBADRPC; goto out; } fromcp = mtod(md, caddr_t); rem = md->m_len; } if (*fromcp == '\0' || *fromcp == '/') { error = EACCES; goto out; } cnp->cn_hash += (unsigned char)*fromcp; *tocp++ = *fromcp++; rem--; } *tocp = '\0'; *mdp = md; *dposp = fromcp; len = nfsm_rndup(len)-len; if (len > 0) { if (rem >= len) *dposp += len; else if (error = nfs_adv(mdp, dposp, len, rem)) goto out; } ndp->ni_pathlen = tocp - cnp->cn_pnbuf; cnp->cn_nameptr = cnp->cn_pnbuf; /* * Extract and set starting directory. */ if (error = nfsrv_fhtovp(fhp, FALSE, &dp, ndp->ni_cnd.cn_cred, slp, nam, &rdonly, kerbflag)) goto out; if (dp->v_type != VDIR) { - nfsrv_vrele(dp); + vrele(dp); error = ENOTDIR; goto out; } VREF(dp); *retdirp = dp; ndp->ni_startdir = dp; if (rdonly) cnp->cn_flags |= (NOCROSSMOUNT | RDONLY); else cnp->cn_flags |= NOCROSSMOUNT; /* * And call lookup() to do the real work */ cnp->cn_proc = p; if (error = lookup(ndp)) goto out; /* * Check for encountering a symbolic link */ if (cnp->cn_flags & ISSYMLINK) { if ((cnp->cn_flags & LOCKPARENT) && ndp->ni_pathlen == 1) vput(ndp->ni_dvp); else vrele(ndp->ni_dvp); vput(ndp->ni_vp); ndp->ni_vp = NULL; error = EINVAL; goto out; } - nfsrv_vmio(ndp->ni_vp); + nfsrv_object_create(ndp->ni_vp); /* * Check for saved name request */ if (cnp->cn_flags & (SAVENAME | SAVESTART)) { cnp->cn_flags |= HASBUF; return (0); } out: FREE(cnp->cn_pnbuf, M_NAMEI); return (error); } /* * A fiddled version of m_adj() that ensures null fill to a long * boundary and only trims off the back end */ void nfsm_adj(mp, len, nul) struct mbuf *mp; register int len; int nul; { register struct mbuf *m; register int count, i; register char *cp; /* * Trim from tail. Scan the mbuf chain, * calculating its length and finding the last mbuf. * If the adjustment only affects this mbuf, then just * adjust and return. Otherwise, rescan and truncate * after the remaining size. */ count = 0; m = mp; for (;;) { count += m->m_len; if (m->m_next == (struct mbuf *)0) break; m = m->m_next; } if (m->m_len > len) { m->m_len -= len; if (nul > 0) { cp = mtod(m, caddr_t)+m->m_len-nul; for (i = 0; i < nul; i++) *cp++ = '\0'; } return; } count -= len; if (count < 0) count = 0; /* * Correct length for chain is "count". * Find the mbuf with last data, adjust its length, * and toss data from remaining mbufs on chain. */ for (m = mp; m; m = m->m_next) { if (m->m_len >= count) { m->m_len = count; if (nul > 0) { cp = mtod(m, caddr_t)+m->m_len-nul; for (i = 0; i < nul; i++) *cp++ = '\0'; } break; } count -= m->m_len; } for (m = m->m_next;m;m = m->m_next) m->m_len = 0; } /* * Make these functions instead of macros, so that the kernel text size * doesn't get too big... */ void nfsm_srvwcc(nfsd, before_ret, before_vap, after_ret, after_vap, mbp, bposp) struct nfsrv_descript *nfsd; int before_ret; register struct vattr *before_vap; int after_ret; struct vattr *after_vap; struct mbuf **mbp; char **bposp; { register struct mbuf *mb = *mbp, *mb2; register char *bpos = *bposp; register u_long *tl; if (before_ret) { nfsm_build(tl, u_long *, NFSX_UNSIGNED); *tl = nfs_false; } else { nfsm_build(tl, u_long *, 7 * NFSX_UNSIGNED); *tl++ = nfs_true; txdr_hyper(&(before_vap->va_size), tl); tl += 2; txdr_nfsv3time(&(before_vap->va_mtime), tl); tl += 2; txdr_nfsv3time(&(before_vap->va_ctime), tl); } *bposp = bpos; *mbp = mb; nfsm_srvpostopattr(nfsd, after_ret, after_vap, mbp, bposp); } void nfsm_srvpostopattr(nfsd, after_ret, after_vap, mbp, bposp) struct nfsrv_descript *nfsd; int after_ret; struct vattr *after_vap; struct mbuf **mbp; char **bposp; { register struct mbuf *mb = *mbp, *mb2; register char *bpos = *bposp; register u_long *tl; register struct nfs_fattr *fp; if (after_ret) { nfsm_build(tl, u_long *, NFSX_UNSIGNED); *tl = nfs_false; } else { nfsm_build(tl, u_long *, NFSX_UNSIGNED + NFSX_V3FATTR); *tl++ = nfs_true; fp = (struct nfs_fattr *)tl; nfsm_srvfattr(nfsd, after_vap, fp); } *mbp = mb; *bposp = bpos; } void nfsm_srvfattr(nfsd, vap, fp) register struct nfsrv_descript *nfsd; register struct vattr *vap; register struct nfs_fattr *fp; { fp->fa_nlink = txdr_unsigned(vap->va_nlink); fp->fa_uid = txdr_unsigned(vap->va_uid); fp->fa_gid = txdr_unsigned(vap->va_gid); if (nfsd->nd_flag & ND_NFSV3) { fp->fa_type = vtonfsv3_type(vap->va_type); fp->fa_mode = vtonfsv3_mode(vap->va_mode); txdr_hyper(&vap->va_size, &fp->fa3_size); txdr_hyper(&vap->va_bytes, &fp->fa3_used); fp->fa3_rdev.specdata1 = txdr_unsigned(major(vap->va_rdev)); fp->fa3_rdev.specdata2 = txdr_unsigned(minor(vap->va_rdev)); fp->fa3_fsid.nfsuquad[0] = 0; fp->fa3_fsid.nfsuquad[1] = txdr_unsigned(vap->va_fsid); fp->fa3_fileid.nfsuquad[0] = 0; fp->fa3_fileid.nfsuquad[1] = txdr_unsigned(vap->va_fileid); txdr_nfsv3time(&vap->va_atime, &fp->fa3_atime); txdr_nfsv3time(&vap->va_mtime, &fp->fa3_mtime); txdr_nfsv3time(&vap->va_ctime, &fp->fa3_ctime); } else { fp->fa_type = vtonfsv2_type(vap->va_type); fp->fa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode); fp->fa2_size = txdr_unsigned(vap->va_size); fp->fa2_blocksize = txdr_unsigned(vap->va_blocksize); if (vap->va_type == VFIFO) fp->fa2_rdev = 0xffffffff; else fp->fa2_rdev = txdr_unsigned(vap->va_rdev); fp->fa2_blocks = txdr_unsigned(vap->va_bytes / NFS_FABLKSIZE); fp->fa2_fsid = txdr_unsigned(vap->va_fsid); fp->fa2_fileid = txdr_unsigned(vap->va_fileid); txdr_nfsv2time(&vap->va_atime, &fp->fa2_atime); txdr_nfsv2time(&vap->va_mtime, &fp->fa2_mtime); txdr_nfsv2time(&vap->va_ctime, &fp->fa2_ctime); } } /* * nfsrv_fhtovp() - convert a fh to a vnode ptr (optionally locked) * - look up fsid in mount list (if not found ret error) * - get vp and export rights by calling VFS_FHTOVP() * - if cred->cr_uid == 0 or MNT_EXPORTANON set it to credanon * - if not lockflag unlock it with VOP_UNLOCK() */ int nfsrv_fhtovp(fhp, lockflag, vpp, cred, slp, nam, rdonlyp, kerbflag) fhandle_t *fhp; int lockflag; struct vnode **vpp; struct ucred *cred; struct nfssvc_sock *slp; struct mbuf *nam; int *rdonlyp; int kerbflag; { register struct mount *mp; register int i; struct ucred *credanon; int error, exflags; *vpp = (struct vnode *)0; mp = getvfs(&fhp->fh_fsid); if (!mp) return (ESTALE); error = VFS_FHTOVP(mp, &fhp->fh_fid, nam, vpp, &exflags, &credanon); if (error) return (error); /* * Check/setup credentials. */ if (exflags & MNT_EXKERB) { if (!kerbflag) { vput(*vpp); return (NFSERR_AUTHERR | AUTH_TOOWEAK); } } else if (kerbflag) { vput(*vpp); return (NFSERR_AUTHERR | AUTH_TOOWEAK); } else if (cred->cr_uid == 0 || (exflags & MNT_EXPORTANON)) { cred->cr_uid = credanon->cr_uid; for (i = 0; i < credanon->cr_ngroups && i < NGROUPS; i++) cred->cr_groups[i] = credanon->cr_groups[i]; cred->cr_ngroups = i; } if (exflags & MNT_EXRDONLY) *rdonlyp = 1; else *rdonlyp = 0; - nfsrv_vmio(*vpp); + nfsrv_object_create(*vpp); if (!lockflag) VOP_UNLOCK(*vpp); return (0); } #endif /* NFS_NOSERVER */ /* * This function compares two net addresses by family and returns TRUE * if they are the same host. * If there is any doubt, return FALSE. * The AF_INET family is handled as a special case so that address mbufs * don't need to be saved to store "struct in_addr", which is only 4 bytes. */ int netaddr_match(family, haddr, nam) int family; union nethostaddr *haddr; struct mbuf *nam; { register struct sockaddr_in *inetaddr; switch (family) { case AF_INET: inetaddr = mtod(nam, struct sockaddr_in *); if (inetaddr->sin_family == AF_INET && inetaddr->sin_addr.s_addr == haddr->had_inetaddr) return (1); break; #ifdef ISO case AF_ISO: { register struct sockaddr_iso *isoaddr1, *isoaddr2; isoaddr1 = mtod(nam, struct sockaddr_iso *); isoaddr2 = mtod(haddr->had_nam, struct sockaddr_iso *); if (isoaddr1->siso_family == AF_ISO && isoaddr1->siso_nlen > 0 && isoaddr1->siso_nlen == isoaddr2->siso_nlen && SAME_ISOADDR(isoaddr1, isoaddr2)) return (1); break; } #endif /* ISO */ default: break; }; return (0); } static nfsuint64 nfs_nullcookie = { 0, 0 }; /* * This function finds the directory cookie that corresponds to the * logical byte offset given. */ nfsuint64 * nfs_getcookie(np, off, add) register struct nfsnode *np; off_t off; int add; { register struct nfsdmap *dp, *dp2; register int pos; pos = off / NFS_DIRBLKSIZ; if (pos == 0) { #ifdef DIAGNOSTIC if (add) panic("nfs getcookie add at 0"); #endif return (&nfs_nullcookie); } pos--; dp = np->n_cookies.lh_first; if (!dp) { if (add) { MALLOC(dp, struct nfsdmap *, sizeof (struct nfsdmap), M_NFSDIROFF, M_WAITOK); dp->ndm_eocookie = 0; LIST_INSERT_HEAD(&np->n_cookies, dp, ndm_list); } else return ((nfsuint64 *)0); } while (pos >= NFSNUMCOOKIES) { pos -= NFSNUMCOOKIES; if (dp->ndm_list.le_next) { if (!add && dp->ndm_eocookie < NFSNUMCOOKIES && pos >= dp->ndm_eocookie) return ((nfsuint64 *)0); dp = dp->ndm_list.le_next; } else if (add) { MALLOC(dp2, struct nfsdmap *, sizeof (struct nfsdmap), M_NFSDIROFF, M_WAITOK); dp2->ndm_eocookie = 0; LIST_INSERT_AFTER(dp, dp2, ndm_list); dp = dp2; } else return ((nfsuint64 *)0); } if (pos >= dp->ndm_eocookie) { if (add) dp->ndm_eocookie = pos + 1; else return ((nfsuint64 *)0); } return (&dp->ndm_cookies[pos]); } /* * Invalidate cached directory information, except for the actual directory * blocks (which are invalidated separately). * Done mainly to avoid the use of stale offset cookies. */ void nfs_invaldir(vp) register struct vnode *vp; { register struct nfsnode *np = VTONFS(vp); #ifdef DIAGNOSTIC if (vp->v_type != VDIR) panic("nfs: invaldir not dir"); #endif np->n_direofoffset = 0; np->n_cookieverf.nfsuquad[0] = 0; np->n_cookieverf.nfsuquad[1] = 0; if (np->n_cookies.lh_first) np->n_cookies.lh_first->ndm_eocookie = 0; } /* * The write verifier has changed (probably due to a server reboot), so all * B_NEEDCOMMIT blocks will have to be written again. Since they are on the * dirty block list as B_DELWRI, all this takes is clearing the B_NEEDCOMMIT * flag. Once done the new write verifier can be set for the mount point. */ void nfs_clearcommit(mp) struct mount *mp; { register struct vnode *vp, *nvp; register struct buf *bp, *nbp; int s; s = splbio(); loop: for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) { if (vp->v_mount != mp) /* Paranoia */ goto loop; nvp = vp->v_mntvnodes.le_next; for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) { nbp = bp->b_vnbufs.le_next; if ((bp->b_flags & (B_BUSY | B_DELWRI | B_NEEDCOMMIT)) == (B_DELWRI | B_NEEDCOMMIT)) bp->b_flags &= ~B_NEEDCOMMIT; } } splx(s); } #ifndef NFS_NOSERVER /* * Map errnos to NFS error numbers. For Version 3 also filter out error * numbers not specified for the associated procedure. */ int nfsrv_errmap(nd, err) struct nfsrv_descript *nd; register int err; { register short *defaulterrp, *errp; if (nd->nd_flag & ND_NFSV3) { if (nd->nd_procnum <= NFSPROC_COMMIT) { errp = defaulterrp = nfsrv_v3errmap[nd->nd_procnum]; while (*++errp) { if (*errp == err) return (err); else if (*errp > err) break; } return ((int)*defaulterrp); } else return (err & 0xffff); } if (err <= ELAST) return ((int)nfsrv_v2errmap[err - 1]); return (NFSERR_IO); } int -nfsrv_vmio(struct vnode *vp) { - vm_object_t object; +nfsrv_object_create(struct vnode *vp) { if ((vp == NULL) || (vp->v_type != VREG)) return 1; - -retry: - if ((vp->v_flag & VVMIO) == 0) { - struct vattr vat; - struct proc *p = curproc; - - if (VOP_GETATTR(vp, &vat, p->p_ucred, p) != 0) - panic("nfsrv_vmio: VOP_GETATTR failed"); - - (void) vnode_pager_alloc(vp, OFF_TO_IDX(round_page(vat.va_size)), 0, 0); - - vp->v_flag |= VVMIO; - } else { - if ((object = vp->v_object) && - (object->flags & OBJ_DEAD)) { - tsleep(object, PVM, "nfdead", 0); - goto retry; - } - if (!object) - panic("nfsrv_vmio: VMIO object missing"); - vm_object_reference(object); - } - return 0; -} -int -nfsrv_vput(struct vnode *vp) { - if ((vp->v_flag & VVMIO) && vp->v_object) { - vput(vp); - vm_object_deallocate(vp->v_object); - } else { - vput(vp); - } - return 0; -} -int -nfsrv_vrele(struct vnode *vp) { - if ((vp->v_flag & VVMIO) && vp->v_object) { - vrele(vp); - vm_object_deallocate(vp->v_object); - } else { - vrele(vp); - } - return 0; + return vfs_object_create(vp, curproc, curproc?curproc->p_ucred:NULL, 1); } #endif /* NFS_NOSERVER */ Index: head/sys/nfsserver/nfsrvstats.h =================================================================== --- head/sys/nfsserver/nfsrvstats.h (revision 17760) +++ head/sys/nfsserver/nfsrvstats.h (revision 17761) @@ -1,603 +1,601 @@ /* * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)nfs.h 8.1 (Berkeley) 6/10/93 - * $Id: nfs.h,v 1.16 1995/12/17 21:12:05 phk Exp $ + * $Id: nfs.h,v 1.17 1996/01/30 22:59:39 mpp Exp $ */ #ifndef _NFS_NFS_H_ #define _NFS_NFS_H_ #include /* * Tunable constants for nfs */ #define NFS_MAXIOVEC 34 #define NFS_TICKINTVL 5 /* Desired time for a tick (msec) */ #define NFS_HZ (hz / nfs_ticks) /* Ticks/sec */ #define NFS_TIMEO (1 * NFS_HZ) /* Default timeout = 1 second */ #define NFS_MINTIMEO (1 * NFS_HZ) /* Min timeout to use */ #define NFS_MAXTIMEO (60 * NFS_HZ) /* Max timeout to backoff to */ #define NFS_MINIDEMTIMEO (5 * NFS_HZ) /* Min timeout for non-idempotent ops*/ #define NFS_MAXREXMIT 100 /* Stop counting after this many */ #define NFS_MAXWINDOW 1024 /* Max number of outstanding requests */ #define NFS_RETRANS 10 /* Num of retrans for soft mounts */ #define NFS_MAXGRPS 16 /* Max. size of groups list */ #ifndef NFS_MINATTRTIMO #define NFS_MINATTRTIMO 5 /* Attribute cache timeout in sec */ #endif #ifndef NFS_MAXATTRTIMO #define NFS_MAXATTRTIMO 60 #endif #define NFS_WSIZE 8192 /* Def. write data size <= 8192 */ #define NFS_RSIZE 8192 /* Def. read data size <= 8192 */ #define NFS_READDIRSIZE 8192 /* Def. readdir size */ #define NFS_DEFRAHEAD 1 /* Def. read ahead # blocks */ #define NFS_MAXRAHEAD 4 /* Max. read ahead # blocks */ #define NFS_MAXUIDHASH 64 /* Max. # of hashed uid entries/mp */ #define NFS_MAXASYNCDAEMON 20 /* Max. number async_daemons runnable */ #define NFS_MAXGATHERDELAY 100 /* Max. write gather delay (msec) */ #ifndef NFS_GATHERDELAY #define NFS_GATHERDELAY 10 /* Default write gather delay (msec) */ #endif #define NFS_DIRBLKSIZ 4096 /* Must be a multiple of DIRBLKSIZ */ /* * Oddballs */ #define NMOD(a) ((a) % nfs_asyncdaemons) #define NFS_CMPFH(n, f, s) \ ((n)->n_fhsize == (s) && !bcmp((caddr_t)(n)->n_fhp, (caddr_t)(f), (s))) #define NFS_ISV3(v) (VFSTONFS((v)->v_mount)->nm_flag & NFSMNT_NFSV3) #define NFS_SRVMAXDATA(n) \ (((n)->nd_flag & ND_NFSV3) ? (((n)->nd_nam2) ? \ NFS_MAXDGRAMDATA : NFS_MAXDATA) : NFS_V2MAXDATA) /* * XXX * sys/buf.h should be edited to change B_APPENDWRITE --> B_NEEDCOMMIT, but * until then... * Same goes for sys/malloc.h, which needs M_NFSDIROFF, * M_NFSRVDESC and M_NFSBIGFH added. * The VA_EXCLUSIVE flag should be added for va_vaflags and set for an * exclusive create. * The B_INVAFTERWRITE flag should be set to whatever is required by the * buffer cache code to say "Invalidate the block after it is written back". */ #ifndef B_NEEDCOMMIT #define B_NEEDCOMMIT B_APPENDWRITE #endif #ifndef M_NFSRVDESC #define M_NFSRVDESC M_TEMP #endif #ifndef M_NFSDIROFF #define M_NFSDIROFF M_TEMP #endif #ifndef M_NFSBIGFH #define M_NFSBIGFH M_TEMP #endif #ifndef VA_EXCLUSIVE #define VA_EXCLUSIVE 0 #endif #ifdef __FreeBSD__ #define B_INVAFTERWRITE B_NOCACHE #else #define B_INVAFTERWRITE B_INVAL #endif /* * These ifdefs try to handle the differences between the various 4.4BSD-Lite * based vfs interfaces. * btw: NetBSD-current does have a VOP_LEASDE(), but I don't know how to * differentiate between NetBSD-1.0 and NetBSD-current, so.. * I also don't know about BSDi's 2.0 release. */ #if !defined(HAS_VOPLEASE) && !defined(__FreeBSD__) && !defined(__NetBSD__) #define HAS_VOPLEASE 1 #endif #if !defined(HAS_VOPREVOKE) && !defined(__FreeBSD__) && !defined(__NetBSD__) #define HAS_VOPREVOKE 1 #endif /* * The IO_METASYNC flag should be implemented for local file systems. * (Until then, it is nothin at all.) */ #ifndef IO_METASYNC #define IO_METASYNC 0 #endif /* * Set the attribute timeout based on how recently the file has been modified. */ #define NFS_ATTRTIMEO(np) \ ((((np)->n_flag & NMODIFIED) || \ (time.tv_sec - (np)->n_mtime) / 10 < NFS_MINATTRTIMO) ? NFS_MINATTRTIMO : \ ((time.tv_sec - (np)->n_mtime) / 10 > NFS_MAXATTRTIMO ? NFS_MAXATTRTIMO : \ (time.tv_sec - (np)->n_mtime) / 10)) /* * Expected allocation sizes for major data structures. If the actual size * of the structure exceeds these sizes, then malloc() will be allocating * almost twice the memory required. This is used in nfs_init() to warn * the sysadmin that the size of a structure should be reduced. * (These sizes are always a power of 2. If the kernel malloc() changes * to one that does not allocate space in powers of 2 size, then this all * becomes bunk!) */ #define NFS_NODEALLOC 256 #define NFS_MNTALLOC 512 #define NFS_SVCALLOC 256 #define NFS_UIDALLOC 128 /* * Structures for the nfssvc(2) syscall. Not that anyone but nfsd and mount_nfs * should ever try and use it. */ struct nfsd_args { int sock; /* Socket to serve */ caddr_t name; /* Client address for connection based sockets */ int namelen; /* Length of name */ }; struct nfsd_srvargs { struct nfsd *nsd_nfsd; /* Pointer to in kernel nfsd struct */ uid_t nsd_uid; /* Effective uid mapped to cred */ u_long nsd_haddr; /* Ip address of client */ struct ucred nsd_cr; /* Cred. uid maps to */ int nsd_authlen; /* Length of auth string (ret) */ u_char *nsd_authstr; /* Auth string (ret) */ int nsd_verflen; /* and the verfier */ u_char *nsd_verfstr; struct timeval nsd_timestamp; /* timestamp from verifier */ u_long nsd_ttl; /* credential ttl (sec) */ NFSKERBKEY_T nsd_key; /* Session key */ }; struct nfsd_cargs { char *ncd_dirp; /* Mount dir path */ uid_t ncd_authuid; /* Effective uid */ int ncd_authtype; /* Type of authenticator */ int ncd_authlen; /* Length of authenticator string */ u_char *ncd_authstr; /* Authenticator string */ int ncd_verflen; /* and the verifier */ u_char *ncd_verfstr; NFSKERBKEY_T ncd_key; /* Session key */ }; /* * Stats structure */ struct nfsstats { int attrcache_hits; int attrcache_misses; int lookupcache_hits; int lookupcache_misses; int direofcache_hits; int direofcache_misses; int biocache_reads; int read_bios; int read_physios; int biocache_writes; int write_bios; int write_physios; int biocache_readlinks; int readlink_bios; int biocache_readdirs; int readdir_bios; int rpccnt[NFS_NPROCS]; int rpcretries; int srvrpccnt[NFS_NPROCS]; int srvrpc_errs; int srv_errs; int rpcrequests; int rpctimeouts; int rpcunexpected; int rpcinvalid; int srvcache_inproghits; int srvcache_idemdonehits; int srvcache_nonidemdonehits; int srvcache_misses; int srvnqnfs_leases; int srvnqnfs_maxleases; int srvnqnfs_getleases; int srvvop_writes; }; /* * Flags for nfssvc() system call. */ #define NFSSVC_BIOD 0x002 #define NFSSVC_NFSD 0x004 #define NFSSVC_ADDSOCK 0x008 #define NFSSVC_AUTHIN 0x010 #define NFSSVC_GOTAUTH 0x040 #define NFSSVC_AUTHINFAIL 0x080 #define NFSSVC_MNTD 0x100 /* * fs.nfs sysctl(3) identifiers */ #define NFS_NFSSTATS 1 /* struct: struct nfsstats */ #define FS_NFS_NAMES { \ { 0, 0 }, \ { "nfsstats", CTLTYPE_STRUCT }, \ } /* * The set of signals the interrupt an I/O in progress for NFSMNT_INT mounts. * What should be in this set is open to debate, but I believe that since * I/O system calls on ufs are never interrupted by signals the set should * be minimal. My reasoning is that many current programs that use signals * such as SIGALRM will not expect file I/O system calls to be interrupted * by them and break. */ #if defined(KERNEL) || defined(_KERNEL) struct uio; struct buf; struct vattr; struct nameidata; /* XXX */ #define NFSINT_SIGMASK (sigmask(SIGINT)|sigmask(SIGTERM)|sigmask(SIGKILL)| \ sigmask(SIGHUP)|sigmask(SIGQUIT)) /* * Socket errors ignored for connectionless sockets?? * For now, ignore them all */ #define NFSIGNORE_SOERROR(s, e) \ ((e) != EINTR && (e) != ERESTART && (e) != EWOULDBLOCK && \ ((s) & PR_CONNREQUIRED) == 0) /* * Nfs outstanding request list element */ struct nfsreq { TAILQ_ENTRY(nfsreq) r_chain; struct mbuf *r_mreq; struct mbuf *r_mrep; struct mbuf *r_md; caddr_t r_dpos; struct nfsmount *r_nmp; struct vnode *r_vp; u_long r_xid; int r_flags; /* flags on request, see below */ int r_retry; /* max retransmission count */ int r_rexmit; /* current retrans count */ int r_timer; /* tick counter on reply */ int r_procnum; /* NFS procedure number */ int r_rtt; /* RTT for rpc */ struct proc *r_procp; /* Proc that did I/O system call */ }; /* * Queue head for nfsreq's */ extern TAILQ_HEAD(nfs_reqq, nfsreq) nfs_reqq; /* Flag values for r_flags */ #define R_TIMING 0x01 /* timing request (in mntp) */ #define R_SENT 0x02 /* request has been sent */ #define R_SOFTTERM 0x04 /* soft mnt, too many retries */ #define R_INTR 0x08 /* intr mnt, signal pending */ #define R_SOCKERR 0x10 /* Fatal error on socket */ #define R_TPRINTFMSG 0x20 /* Did a tprintf msg. */ #define R_MUSTRESEND 0x40 /* Must resend request */ #define R_GETONEREP 0x80 /* Probe for one reply only */ /* * A list of nfssvc_sock structures is maintained with all the sockets * that require service by the nfsd. * The nfsuid structs hang off of the nfssvc_sock structs in both lru * and uid hash lists. */ #ifndef NFS_UIDHASHSIZ #define NFS_UIDHASHSIZ 29 /* Tune the size of nfssvc_sock with this */ #endif #define NUIDHASH(sock, uid) \ (&(sock)->ns_uidhashtbl[(uid) % NFS_UIDHASHSIZ]) #ifndef NFS_WDELAYHASHSIZ #define NFS_WDELAYHASHSIZ 16 /* and with this */ #endif #define NWDELAYHASH(sock, f) \ (&(sock)->ns_wdelayhashtbl[(*((u_long *)(f))) % NFS_WDELAYHASHSIZ]) #ifndef NFS_MUIDHASHSIZ #define NFS_MUIDHASHSIZ 67 /* Tune the size of nfsmount with this */ #endif #define NMUIDHASH(nmp, uid) \ (&(nmp)->nm_uidhashtbl[(uid) % NFS_MUIDHASHSIZ]) #define NFSNOHASH(fhsum) \ (&nfsnodehashtbl[(fhsum) & nfsnodehash]) /* * Network address hash list element */ union nethostaddr { u_long had_inetaddr; struct mbuf *had_nam; }; struct nfsuid { TAILQ_ENTRY(nfsuid) nu_lru; /* LRU chain */ LIST_ENTRY(nfsuid) nu_hash; /* Hash list */ int nu_flag; /* Flags */ union nethostaddr nu_haddr; /* Host addr. for dgram sockets */ struct ucred nu_cr; /* Cred uid mapped to */ int nu_expire; /* Expiry time (sec) */ struct timeval nu_timestamp; /* Kerb. timestamp */ u_long nu_nickname; /* Nickname on server */ NFSKERBKEY_T nu_key; /* and session key */ }; #define nu_inetaddr nu_haddr.had_inetaddr #define nu_nam nu_haddr.had_nam /* Bits for nu_flag */ #define NU_INETADDR 0x1 #define NU_NAM 0x2 #define NU_NETFAM(u) (((u)->nu_flag & NU_INETADDR) ? AF_INET : AF_ISO) struct nfssvc_sock { TAILQ_ENTRY(nfssvc_sock) ns_chain; /* List of all nfssvc_sock's */ TAILQ_HEAD(, nfsuid) ns_uidlruhead; struct file *ns_fp; struct socket *ns_so; struct mbuf *ns_nam; struct mbuf *ns_raw; struct mbuf *ns_rawend; struct mbuf *ns_rec; struct mbuf *ns_recend; struct mbuf *ns_frag; int ns_flag; int ns_solock; int ns_cc; int ns_reclen; int ns_numuids; u_long ns_sref; LIST_HEAD(, nfsrv_descript) ns_tq; /* Write gather lists */ LIST_HEAD(, nfsuid) ns_uidhashtbl[NFS_UIDHASHSIZ]; LIST_HEAD(nfsrvw_delayhash, nfsrv_descript) ns_wdelayhashtbl[NFS_WDELAYHASHSIZ]; }; /* Bits for "ns_flag" */ #define SLP_VALID 0x01 #define SLP_DOREC 0x02 #define SLP_NEEDQ 0x04 #define SLP_DISCONN 0x08 #define SLP_GETSTREAM 0x10 #define SLP_LASTFRAG 0x20 #define SLP_ALLFLAGS 0xff extern TAILQ_HEAD(nfssvc_sockhead, nfssvc_sock) nfssvc_sockhead; extern int nfssvc_sockhead_flag; #define SLP_INIT 0x01 #define SLP_WANTINIT 0x02 /* * One of these structures is allocated for each nfsd. */ struct nfsd { TAILQ_ENTRY(nfsd) nfsd_chain; /* List of all nfsd's */ int nfsd_flag; /* NFSD_ flags */ struct nfssvc_sock *nfsd_slp; /* Current socket */ int nfsd_authlen; /* Authenticator len */ u_char nfsd_authstr[RPCAUTH_MAXSIZ]; /* Authenticator data */ int nfsd_verflen; /* and the Verifier */ u_char nfsd_verfstr[RPCVERF_MAXSIZ]; struct proc *nfsd_procp; /* Proc ptr */ struct nfsrv_descript *nfsd_nd; /* Associated nfsrv_descript */ }; /* Bits for "nfsd_flag" */ #define NFSD_WAITING 0x01 #define NFSD_REQINPROG 0x02 #define NFSD_NEEDAUTH 0x04 #define NFSD_AUTHFAIL 0x08 /* * This structure is used by the server for describing each request. * Some fields are used only when write request gathering is performed. */ struct nfsrv_descript { u_quad_t nd_time; /* Write deadline (usec) */ off_t nd_off; /* Start byte offset */ off_t nd_eoff; /* and end byte offset */ LIST_ENTRY(nfsrv_descript) nd_hash; /* Hash list */ LIST_ENTRY(nfsrv_descript) nd_tq; /* and timer list */ LIST_HEAD(,nfsrv_descript) nd_coalesce; /* coalesced writes */ struct mbuf *nd_mrep; /* Request mbuf list */ struct mbuf *nd_md; /* Current dissect mbuf */ struct mbuf *nd_mreq; /* Reply mbuf list */ struct mbuf *nd_nam; /* and socket addr */ struct mbuf *nd_nam2; /* return socket addr */ caddr_t nd_dpos; /* Current dissect pos */ int nd_procnum; /* RPC # */ int nd_stable; /* storage type */ int nd_flag; /* nd_flag */ int nd_len; /* Length of this write */ int nd_repstat; /* Reply status */ u_long nd_retxid; /* Reply xid */ u_long nd_duration; /* Lease duration */ struct timeval nd_starttime; /* Time RPC initiated */ fhandle_t nd_fh; /* File handle */ struct ucred nd_cr; /* Credentials */ }; /* Bits for "nd_flag" */ #define ND_READ LEASE_READ #define ND_WRITE LEASE_WRITE #define ND_CHECK 0x04 #define ND_LEASE (ND_READ | ND_WRITE | ND_CHECK) #define ND_NFSV3 0x08 #define ND_NQNFS 0x10 #define ND_KERBNICK 0x20 #define ND_KERBFULL 0x40 #define ND_KERBAUTH (ND_KERBNICK | ND_KERBFULL) extern TAILQ_HEAD(nfsd_head, nfsd) nfsd_head; extern int nfsd_head_flag; #define NFSD_CHECKSLP 0x01 /* * These macros compare nfsrv_descript structures. */ #define NFSW_CONTIG(o, n) \ ((o)->nd_eoff >= (n)->nd_off && \ !bcmp((caddr_t)&(o)->nd_fh, (caddr_t)&(n)->nd_fh, NFSX_V3FH)) #define NFSW_SAMECRED(o, n) \ (((o)->nd_flag & ND_KERBAUTH) == ((n)->nd_flag & ND_KERBAUTH) && \ !bcmp((caddr_t)&(o)->nd_cr, (caddr_t)&(n)->nd_cr, \ sizeof (struct ucred))) int nfs_reply __P((struct nfsreq *)); int nfs_getreq __P((struct nfsrv_descript *,struct nfsd *,int)); int nfs_send __P((struct socket *,struct mbuf *,struct mbuf *,struct nfsreq *)); int nfs_rephead __P((int,struct nfsrv_descript *,struct nfssvc_sock *,int,int,u_quad_t *,struct mbuf **,struct mbuf **,caddr_t *)); int nfs_sndlock __P((int *,struct nfsreq *)); void nfs_sndunlock __P((int *flagp)); int nfs_disct __P((struct mbuf **,caddr_t *,int,int,caddr_t *)); int nfs_vinvalbuf __P((struct vnode *,int,struct ucred *,struct proc *,int)); int nfs_readrpc __P((struct vnode *,struct uio *,struct ucred *)); int nfs_writerpc __P((struct vnode *,struct uio *,struct ucred *,int *,int *)); int nfs_readdirrpc __P((register struct vnode *,struct uio *,struct ucred *)); int nfs_asyncio __P((struct buf *,struct ucred *)); int nfs_doio __P((struct buf *,struct ucred *,struct proc *)); int nfs_readlinkrpc __P((struct vnode *,struct uio *,struct ucred *)); int nfs_sigintr __P((struct nfsmount *,struct nfsreq *r,struct proc *)); int nfs_readdirplusrpc __P((struct vnode *,register struct uio *,struct ucred *)); int nfsm_disct __P((struct mbuf **,caddr_t *,int,int,caddr_t *)); void nfsm_srvfattr __P((struct nfsrv_descript *,struct vattr *,struct nfs_fattr *)); void nfsm_srvwcc __P((struct nfsrv_descript *,int,struct vattr *,int,struct vattr *,struct mbuf **,char **)); void nfsm_srvpostopattr __P((struct nfsrv_descript *,int,struct vattr *,struct mbuf **,char **)); int netaddr_match __P((int,union nethostaddr *,struct mbuf *)); int nfs_request __P((struct vnode *,struct mbuf *,int,struct proc *,struct ucred *,struct mbuf **,struct mbuf **,caddr_t *)); int nfs_loadattrcache __P((struct vnode **,struct mbuf **,caddr_t *,struct vattr *)); int nfs_namei __P((struct nameidata *,fhandle_t *,int,struct nfssvc_sock *,struct mbuf *,struct mbuf **,caddr_t *,struct vnode **,struct proc *,int)); void nfsm_adj __P((struct mbuf *,int,int)); int nfsm_mbuftouio __P((struct mbuf **,struct uio *,int,caddr_t *)); void nfsrv_initcache __P((void)); int nfs_getauth __P((struct nfsmount *,struct nfsreq *,struct ucred *,char **,int *,char *,int *,NFSKERBKEY_T)); int nfs_getnickauth __P((struct nfsmount *,struct ucred *,char **,int *,char *,int)); int nfs_savenickauth __P((struct nfsmount *,struct ucred *,int,NFSKERBKEY_T,struct mbuf **,char **,struct mbuf *)); int nfs_adv __P((struct mbuf **,caddr_t *,int,int)); void nfs_nhinit __P((void)); void nfs_timer __P((void*)); u_long nfs_hash __P((nfsfh_t *,int)); void nfsrv_slpderef __P((struct nfssvc_sock *slp)); int nfsrv_dorec __P((struct nfssvc_sock *,struct nfsd *,struct nfsrv_descript **)); void nfsrv_cleancache __P((void)); int nfsrv_getcache __P((struct nfsrv_descript *,struct nfssvc_sock *,struct mbuf **)); int nfs_init __P((void)); void nfsrv_updatecache __P((struct nfsrv_descript *,int,struct mbuf *)); int nfs_connect __P((struct nfsmount *,struct nfsreq *)); void nfs_disconnect __P((struct nfsmount *nmp)); int nfs_getattrcache __P((struct vnode *,struct vattr *)); int nfsm_strtmbuf __P((struct mbuf **,char **,char *,long)); int nfs_bioread __P((struct vnode *,struct uio *,int,struct ucred *)); int nfsm_uiotombuf __P((struct uio *,struct mbuf **,int,caddr_t *)); void nfsrv_init __P((int)); void nfs_clearcommit __P((struct mount *)); int nfsrv_errmap __P((struct nfsrv_descript *, int)); void nfsrv_rcv __P((struct socket *so, caddr_t arg, int waitflag)); void nfsrvw_sort __P((gid_t [],int)); void nfsrv_setcred __P((struct ucred *,struct ucred *)); int nfs_writebp __P((struct buf *,int)); -int nfsrv_vput __P(( struct vnode * )); -int nfsrv_vrele __P(( struct vnode * )); -int nfsrv_vmio __P(( struct vnode * )); +int nfsrv_object_create __P(( struct vnode * )); void nfsrv_wakenfsd __P((struct nfssvc_sock *slp)); int nfsrv_writegather __P((struct nfsrv_descript **, struct nfssvc_sock *, struct proc *, struct mbuf **)); int nfs_fsinfo __P((struct nfsmount *, struct vnode *, struct ucred *, struct proc *p)); int nfsrv3_access __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_commit __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_create __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_fhtovp __P((fhandle_t *,int,struct vnode **, struct ucred *,struct nfssvc_sock *,struct mbuf *, int *,int)); int nfsrv_fsinfo __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_getattr __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_link __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_lookup __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_mkdir __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_mknod __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_noop __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_null __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_pathconf __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_read __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_readdir __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_readdirplus __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_readlink __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_remove __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_rename __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_rmdir __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_setattr __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_statfs __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_symlink __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); int nfsrv_write __P((struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mrq)); #endif /* KERNEL */ #endif Index: head/sys/sys/vnode.h =================================================================== --- head/sys/sys/vnode.h (revision 17760) +++ head/sys/sys/vnode.h (revision 17761) @@ -1,435 +1,435 @@ /* * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)vnode.h 8.7 (Berkeley) 2/4/94 - * $Id: vnode.h,v 1.32 1996/03/29 06:39:39 davidg Exp $ + * $Id: vnode.h,v 1.33 1996/05/31 00:20:32 peter Exp $ */ #ifndef _SYS_VNODE_H_ #define _SYS_VNODE_H_ #include /* * The vnode is the focus of all file activity in UNIX. There is a * unique vnode allocated for each active file, each current directory, * each mounted-on file, text file, and the root. */ /* * Vnode types. VNON means no type. */ enum vtype { VNON, VREG, VDIR, VBLK, VCHR, VLNK, VSOCK, VFIFO, VBAD }; /* * Vnode tag types. * These are for the benefit of external programs only (e.g., pstat) * and should NEVER be inspected by the kernel. */ enum vtagtype { VT_NON, VT_UFS, VT_NFS, VT_MFS, VT_PC, VT_LFS, VT_LOFS, VT_FDESC, VT_PORTAL, VT_NULL, VT_UMAP, VT_KERNFS, VT_PROCFS, VT_AFS, VT_ISOFS, VT_UNION, VT_MSDOSFS, VT_DEVFS }; /* * Each underlying filesystem allocates its own private area and hangs * it from v_data. If non-null, this area is freed in getnewvnode(). */ LIST_HEAD(buflists, buf); typedef int vop_t __P((void *)); +struct vm_object; struct vnode { u_long v_flag; /* vnode flags (see below) */ int v_usecount; /* reference count of users */ int v_writecount; /* reference count of writers */ int v_holdcnt; /* page & buffer references */ daddr_t v_lastr; /* last read (read-ahead) */ u_long v_id; /* capability identifier */ struct mount *v_mount; /* ptr to vfs we are in */ vop_t **v_op; /* vnode operations vector */ TAILQ_ENTRY(vnode) v_freelist; /* vnode freelist */ LIST_ENTRY(vnode) v_mntvnodes; /* vnodes for mount point */ struct buflists v_cleanblkhd; /* clean blocklist head */ struct buflists v_dirtyblkhd; /* dirty blocklist head */ long v_numoutput; /* num of writes in progress */ enum vtype v_type; /* vnode type */ union { struct mount *vu_mountedhere;/* ptr to mounted vfs (VDIR) */ struct socket *vu_socket; /* unix ipc (VSOCK) */ struct specinfo *vu_specinfo; /* device (VCHR, VBLK) */ struct fifoinfo *vu_fifoinfo; /* fifo (VFIFO) */ } v_un; struct nqlease *v_lease; /* Soft reference to lease */ daddr_t v_lastw; /* last write (write cluster) */ daddr_t v_cstart; /* start block of cluster */ daddr_t v_lasta; /* last allocation */ int v_clen; /* length of current cluster */ int v_ralen; /* Read-ahead length */ int v_usage; /* Vnode usage counter */ daddr_t v_maxra; /* last readahead block */ - void *v_object; /* Place to store VM object */ + struct vm_object *v_object; /* Place to store VM object */ enum vtagtype v_tag; /* type of underlying data */ void *v_data; /* private data for fs */ }; #define v_mountedhere v_un.vu_mountedhere #define v_socket v_un.vu_socket #define v_specinfo v_un.vu_specinfo #define v_fifoinfo v_un.vu_fifoinfo /* * Vnode flags. */ #define VROOT 0x0001 /* root of its file system */ #define VTEXT 0x0002 /* vnode is a pure text prototype */ #define VSYSTEM 0x0004 /* vnode being used by kernel */ #define VOLOCK 0x0008 /* vnode is locked waiting for an object */ #define VOWANT 0x0010 /* a process is waiting for VOLOCK */ #define VXLOCK 0x0100 /* vnode is locked to change underlying type */ #define VXWANT 0x0200 /* process is waiting for vnode */ #define VBWAIT 0x0400 /* waiting for output to complete */ #define VALIASED 0x0800 /* vnode has an alias */ #define VDIROP 0x1000 /* LFS: vnode is involved in a directory op */ #define VVMIO 0x2000 /* VMIO flag */ #define VNINACT 0x4000 /* LFS: skip ufs_inactive() in lfs_vunref */ #define VAGE 0x8000 /* Insert vnode at head of free list */ /* * Vnode attributes. A field value of VNOVAL represents a field whose value * is unavailable (getattr) or which is not to be changed (setattr). */ struct vattr { enum vtype va_type; /* vnode type (for create) */ u_short va_mode; /* files access mode and type */ short va_nlink; /* number of references to file */ uid_t va_uid; /* owner user id */ gid_t va_gid; /* owner group id */ long va_fsid; /* file system id (dev for now) */ long va_fileid; /* file id */ u_quad_t va_size; /* file size in bytes */ long va_blocksize; /* blocksize preferred for i/o */ struct timespec va_atime; /* time of last access */ struct timespec va_mtime; /* time of last modification */ struct timespec va_ctime; /* time file changed */ u_long va_gen; /* generation number of file */ u_long va_flags; /* flags defined for file */ dev_t va_rdev; /* device the special file represents */ u_quad_t va_bytes; /* bytes of disk space held by file */ u_quad_t va_filerev; /* file modification number */ u_int va_vaflags; /* operations flags, see below */ long va_spare; /* remain quad aligned */ }; /* * Flags for va_cflags. */ #define VA_UTIMES_NULL 0x01 /* utimes argument was NULL */ /* * Flags for ioflag. */ #define IO_UNIT 0x01 /* do I/O as atomic unit */ #define IO_APPEND 0x02 /* append write to end */ #define IO_SYNC 0x04 /* do I/O synchronously */ #define IO_NODELOCKED 0x08 /* underlying node already locked */ #define IO_NDELAY 0x10 /* FNDELAY flag set in file table */ #define IO_VMIO 0x20 /* data already in VMIO space */ /* * Modes. Some values same as Ixxx entries from inode.h for now. */ #define VSUID 04000 /* set user id on execution */ #define VSGID 02000 /* set group id on execution */ #define VSVTX 01000 /* save swapped text even after use */ #define VREAD 00400 /* read, write, execute permissions */ #define VWRITE 00200 #define VEXEC 00100 /* * Token indicating no attribute value yet assigned. */ #define VNOVAL (-1) #ifdef KERNEL /* * Convert between vnode types and inode formats (since POSIX.1 * defines mode word of stat structure in terms of inode formats). */ extern enum vtype iftovt_tab[]; extern int vttoif_tab[]; #define IFTOVT(mode) (iftovt_tab[((mode) & S_IFMT) >> 12]) #define VTTOIF(indx) (vttoif_tab[(int)(indx)]) #define MAKEIMODE(indx, mode) (int)(VTTOIF(indx) | (mode)) /* * Flags to various vnode functions. */ #define SKIPSYSTEM 0x0001 /* vflush: skip vnodes marked VSYSTEM */ #define FORCECLOSE 0x0002 /* vflush: force file closure */ #define WRITECLOSE 0x0004 /* vflush: only close writable files */ #define DOCLOSE 0x0008 /* vclean: close active files */ #define V_SAVE 0x0001 /* vinvalbuf: sync file first */ #define V_SAVEMETA 0x0002 /* vinvalbuf: leave indirect blocks */ #ifdef DIAGNOSTIC #define HOLDRELE(vp) holdrele(vp) #define VATTR_NULL(vap) vattr_null(vap) #define VHOLD(vp) vhold(vp) #define VREF(vp) vref(vp) void holdrele __P((struct vnode *)); void vhold __P((struct vnode *)); #else #define HOLDRELE(vp) (vp)->v_holdcnt-- /* decrease buf or page ref */ #define VATTR_NULL(vap) (*(vap) = va_null) /* initialize a vattr */ #define VHOLD(vp) (vp)->v_holdcnt++ /* increase buf or page ref */ -#define VREF(vp) (vp)->v_usecount++ /* increase reference */ +#define VREF(vp) vref(vp) /* increase reference */ #endif #define NULLVP ((struct vnode *)NULL) #ifdef VFS_LKM #define VNODEOP_SET(f) DATA_SET(MODVNOPS,f) #else #define VNODEOP_SET(f) DATA_SET(vfs_opv_descs_,f) #endif /* * Global vnode data. */ extern struct vnode *rootvnode; /* root (i.e. "/") vnode */ extern int desiredvnodes; /* number of vnodes desired */ extern int extravnodes; /* extra vnodes to allocate at boot */ extern int prtactive; /* nonzero to call vprint() */ extern struct vattr va_null; /* predefined null vattr structure */ /* * Macro/function to check for client cache inconsistency w.r.t. leasing. */ #define LEASE_READ 0x1 /* Check lease for readers */ #define LEASE_WRITE 0x2 /* Check lease for modifiers */ extern void (*lease_check) __P((struct vnode *vp, struct proc *p, struct ucred *ucred, int flag)); extern void (*lease_updatetime) __P((int deltat)); #ifdef NFS #ifdef NQNFS #define LEASE_CHECK(vp, p, cred, flag) lease_check((vp), (p), (cred), (flag)) #define LEASE_UPDATETIME(dt) lease_updatetime(dt) #else #define LEASE_CHECK(vp, p, cred, flag) #define LEASE_UPDATETIME(dt) #endif /* NQNFS */ #else #define LEASE_CHECK(vp, p, cred, flag) \ do { if(lease_check) lease_check((vp), (p), (cred), (flag)); } while(0) #define LEASE_UPDATETIME(dt) \ do { if(lease_updatetime) lease_updatetime(dt); } while(0) #endif /* NFS */ #endif /* KERNEL */ /* * Mods for extensibility. */ /* * Flags for vdesc_flags: */ #define VDESC_MAX_VPS 16 /* Low order 16 flag bits are reserved for willrele flags for vp arguments. */ #define VDESC_VP0_WILLRELE 0x0001 #define VDESC_VP1_WILLRELE 0x0002 #define VDESC_VP2_WILLRELE 0x0004 #define VDESC_VP3_WILLRELE 0x0008 #define VDESC_NOMAP_VPP 0x0100 #define VDESC_VPP_WILLRELE 0x0200 /* * VDESC_NO_OFFSET is used to identify the end of the offset list * and in places where no such field exists. */ #define VDESC_NO_OFFSET -1 /* * This structure describes the vnode operation taking place. */ struct vnodeop_desc { int vdesc_offset; /* offset in vector--first for speed */ char *vdesc_name; /* a readable name for debugging */ int vdesc_flags; /* VDESC_* flags */ /* * These ops are used by bypass routines to map and locate arguments. * Creds and procs are not needed in bypass routines, but sometimes * they are useful to (for example) transport layers. * Nameidata is useful because it has a cred in it. */ int *vdesc_vp_offsets; /* list ended by VDESC_NO_OFFSET */ int vdesc_vpp_offset; /* return vpp location */ int vdesc_cred_offset; /* cred location, if any */ int vdesc_proc_offset; /* proc location, if any */ int vdesc_componentname_offset; /* if any */ /* * Finally, we've got a list of private data (about each operation) * for each transport layer. (Support to manage this list is not * yet part of BSD.) */ caddr_t *vdesc_transports; }; #ifdef KERNEL /* * A list of all the operation descs. */ extern struct vnodeop_desc *vnodeop_descs[]; /* * This macro is very helpful in defining those offsets in the vdesc struct. * * This is stolen from X11R4. I ignored all the fancy stuff for * Crays, so if you decide to port this to such a serious machine, * you might want to consult Intrinsic.h's XtOffset{,Of,To}. */ #define VOPARG_OFFSET(p_type,field) \ ((int) (((char *) (&(((p_type)NULL)->field))) - ((char *) NULL))) #define VOPARG_OFFSETOF(s_type,field) \ VOPARG_OFFSET(s_type*,field) #define VOPARG_OFFSETTO(S_TYPE,S_OFFSET,STRUCT_P) \ ((S_TYPE)(((char*)(STRUCT_P))+(S_OFFSET))) /* * This structure is used to configure the new vnodeops vector. */ struct vnodeopv_entry_desc { struct vnodeop_desc *opve_op; /* which operation this is */ vop_t *opve_impl; /* code implementing this operation */ }; struct vnodeopv_desc { /* ptr to the ptr to the vector where op should go */ vop_t ***opv_desc_vector_p; struct vnodeopv_entry_desc *opv_desc_ops; /* null terminated list */ }; /* * A default routine which just returns an error. */ int vn_default_error __P((void)); /* * A generic structure. * This can be used by bypass routines to identify generic arguments. */ struct vop_generic_args { struct vnodeop_desc *a_desc; /* other random data follows, presumably */ }; /* * VOCALL calls an op given an ops vector. We break it out because BSD's * vclean changes the ops vector and then wants to call ops with the old * vector. */ #define VOCALL(OPSV,OFF,AP) (( *((OPSV)[(OFF)])) (AP)) /* * This call works for vnodes in the kernel. */ #define VCALL(VP,OFF,AP) VOCALL((VP)->v_op,(OFF),(AP)) #define VDESC(OP) (& __CONCAT(OP,_desc)) #define VOFFSET(OP) (VDESC(OP)->vdesc_offset) /* * Finally, include the default set of vnode operations. */ #include /* * Public vnode manipulation functions. */ struct componentname; struct file; struct mount; struct nameidata; struct proc; struct stat; struct ucred; struct uio; struct vattr; struct vnode; struct vop_bwrite_args; int bdevvp __P((dev_t dev, struct vnode **vpp)); /* cache_* may belong in namei.h. */ void cache_enter __P((struct vnode *dvp, struct vnode *vp, struct componentname *cnp)); int cache_lookup __P((struct vnode *dvp, struct vnode **vpp, struct componentname *cnp)); void cache_purge __P((struct vnode *vp)); void cache_purgevfs __P((struct mount *mp)); struct vnode * checkalias __P((struct vnode *vp, dev_t nvp_rdev, struct mount *mp)); int getnewvnode __P((enum vtagtype tag, struct mount *mp, vop_t **vops, struct vnode **vpp)); void insmntque __P((struct vnode *vp, struct mount *mp)); void vattr_null __P((struct vattr *vap)); int vcount __P((struct vnode *vp)); int vfinddev __P((dev_t dev, enum vtype type, struct vnode **vpp)); void vfs_opv_init __P((struct vnodeopv_desc **them)); int vget __P((struct vnode *vp, int lockflag)); void vgone __P((struct vnode *vp)); void vgoneall __P((struct vnode *vp)); int vinvalbuf __P((struct vnode *vp, int save, struct ucred *cred, struct proc *p, int slpflag, int slptimeo)); int vn_bwrite __P((struct vop_bwrite_args *ap)); int vn_close __P((struct vnode *vp, int flags, struct ucred *cred, struct proc *p)); int vn_open __P((struct nameidata *ndp, int fmode, int cmode)); int vn_rdwr __P((enum uio_rw rw, struct vnode *vp, caddr_t base, int len, off_t offset, enum uio_seg segflg, int ioflg, struct ucred *cred, int *aresid, struct proc *p)); int vn_stat __P((struct vnode *vp, struct stat *sb, struct proc *p)); -void vn_vmio_close __P((struct vnode *vp)); -int vn_vmio_open __P((struct vnode *vp, struct proc *p, - struct ucred *cred)); +int vfs_object_create __P((struct vnode *vp, struct proc *p, + struct ucred *cred, int waslocked)); int vn_writechk __P((struct vnode *vp)); void vprint __P((char *label, struct vnode *vp)); void vput __P((struct vnode *vp)); void vref __P((struct vnode *vp)); void vrele __P((struct vnode *vp)); #endif /* KERNEL */ #endif /* !_SYS_VNODE_H_ */ Index: head/sys/ufs/ffs/ffs_vfsops.c =================================================================== --- head/sys/ufs/ffs/ffs_vfsops.c (revision 17760) +++ head/sys/ufs/ffs/ffs_vfsops.c (revision 17761) @@ -1,1060 +1,1072 @@ /* * Copyright (c) 1989, 1991, 1993, 1994 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ffs_vfsops.c 8.8 (Berkeley) 4/18/94 - * $Id: ffs_vfsops.c,v 1.38 1996/03/02 22:18:34 dyson Exp $ + * $Id: ffs_vfsops.c,v 1.39 1996/06/12 03:37:51 davidg Exp $ */ #include "opt_quota.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include +#include static int ffs_sbupdate __P((struct ufsmount *, int)); static int ffs_reload __P((struct mount *,struct ucred *,struct proc *)); static int ffs_oldfscompat __P((struct fs *)); static int ffs_mount __P((struct mount *, char *, caddr_t, struct nameidata *, struct proc *)); struct vfsops ufs_vfsops = { ffs_mount, ufs_start, ffs_unmount, ufs_root, ufs_quotactl, ffs_statfs, ffs_sync, ffs_vget, ffs_fhtovp, ffs_vptofh, ffs_init, }; VFS_SET(ufs_vfsops, ufs, MOUNT_UFS, 0); extern u_long nextgennumber; /* * ffs_mount * * Called when mounting local physical media * * PARAMETERS: * mountroot * mp mount point structure * path NULL (flag for root mount!!!) * data * ndp * p process (user credentials check [statfs]) * * mount * mp mount point structure * path path to mount point * data pointer to argument struct in user space * ndp mount point namei() return (used for * credentials on reload), reused to look * up block device. * p process (user credentials check) * * RETURNS: 0 Success * !0 error number (errno.h) * * LOCK STATE: * * ENTRY * mount point is locked * EXIT * mount point is locked * * NOTES: * A NULL path can be used for a flag since the mount * system call will fail with EFAULT in copyinstr in * namei() if it is a genuine NULL from the user. */ static int ffs_mount( mp, path, data, ndp, p) register struct mount *mp; /* mount struct pointer*/ char *path; /* path to mount point*/ caddr_t data; /* arguments to FS specific mount*/ struct nameidata *ndp; /* mount point credentials*/ struct proc *p; /* process requesting mount*/ { u_int size; int err = 0; struct vnode *devvp; struct ufs_args args; struct ufsmount *ump = 0; register struct fs *fs; int flags; /* * Use NULL path to flag a root mount */ if( path == NULL) { /* *** * Mounting root file system *** */ /* Get vnode for root device*/ if( bdevvp( rootdev, &rootvp)) panic("ffs_mountroot: can't setup bdevvp for root"); /* * FS specific handling */ mp->mnt_flag |= MNT_RDONLY; /* XXX globally applicable?*/ /* * Attempt mount */ if( ( err = ffs_mountfs(rootvp, mp, p)) != 0) { /* fs specific cleanup (if any)*/ goto error_1; } goto dostatfs; /* success*/ } /* *** * Mounting non-root file system or updating a file system *** */ /* copy in user arguments*/ err = copyin(data, (caddr_t)&args, sizeof (struct ufs_args)); if (err) goto error_1; /* can't get arguments*/ /* * If updating, check whether changing from read-only to * read/write; if there is no device name, that's all we do. */ if (mp->mnt_flag & MNT_UPDATE) { ump = VFSTOUFS(mp); fs = ump->um_fs; err = 0; if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) { flags = WRITECLOSE; if (mp->mnt_flag & MNT_FORCE) flags |= FORCECLOSE; if (vfs_busy(mp)) { err = EBUSY; goto error_1; } err = ffs_flushfiles(mp, flags, p); vfs_unbusy(mp); } if (!err && (mp->mnt_flag & MNT_RELOAD)) err = ffs_reload(mp, ndp->ni_cnd.cn_cred, p); if (err) { goto error_1; } if (fs->fs_ronly && (mp->mnt_flag & MNT_WANTRDWR)) { if (!fs->fs_clean) { if (mp->mnt_flag & MNT_FORCE) { printf("WARNING: %s was not properly dismounted.\n",fs->fs_fsmnt); } else { printf("WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck.\n", fs->fs_fsmnt); err = EPERM; goto error_1; } } fs->fs_ronly = 0; } if (fs->fs_ronly == 0) { fs->fs_clean = 0; ffs_sbupdate(ump, MNT_WAIT); } /* if not updating name...*/ if (args.fspec == 0) { /* * Process export requests. Jumping to "success" * will return the vfs_export() error code. */ err = vfs_export(mp, &ump->um_export, &args.export); goto success; } } /* * Not an update, or updating the name: look up the name * and verify that it refers to a sensible block device. */ NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, p); err = namei(ndp); if (err) { /* can't get devvp!*/ goto error_1; } devvp = ndp->ni_vp; if (devvp->v_type != VBLK) { err = ENOTBLK; goto error_2; } if (major(devvp->v_rdev) >= nblkdev) { err = ENXIO; goto error_2; } if (mp->mnt_flag & MNT_UPDATE) { /* ******************** * UPDATE ******************** */ if (devvp != ump->um_devvp) err = EINVAL; /* needs translation */ else vrele(devvp); /* * Update device name only on success */ if( !err) { /* Save "mounted from" info for mount point (NULL pad)*/ copyinstr( args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &size); bzero( mp->mnt_stat.f_mntfromname + size, MNAMELEN - size); } } else { /* ******************** * NEW MOUNT ******************** */ /* * Since this is a new mount, we want the names for * the device and the mount point copied in. If an * error occurs, the mountpoint is discarded by the * upper level code. */ /* Save "last mounted on" info for mount point (NULL pad)*/ copyinstr( path, /* mount point*/ mp->mnt_stat.f_mntonname, /* save area*/ MNAMELEN - 1, /* max size*/ &size); /* real size*/ bzero( mp->mnt_stat.f_mntonname + size, MNAMELEN - size); /* Save "mounted from" info for mount point (NULL pad)*/ copyinstr( args.fspec, /* device name*/ mp->mnt_stat.f_mntfromname, /* save area*/ MNAMELEN - 1, /* max size*/ &size); /* real size*/ bzero( mp->mnt_stat.f_mntfromname + size, MNAMELEN - size); err = ffs_mountfs(devvp, mp, p); } if (err) { goto error_2; } dostatfs: /* * Initialize FS stat information in mount struct; uses both * mp->mnt_stat.f_mntonname and mp->mnt_stat.f_mntfromname * * This code is common to root and non-root mounts */ (void)VFS_STATFS(mp, &mp->mnt_stat, p); goto success; error_2: /* error with devvp held*/ /* release devvp before failing*/ vrele(devvp); error_1: /* no state to back out*/ success: return( err); } /* * Reload all incore data for a filesystem (used after running fsck on * the root filesystem and finding things to fix). The filesystem must * be mounted read-only. * * Things to do to update the mount: * 1) invalidate all cached meta-data. * 2) re-read superblock from disk. * 3) re-read summary information from disk. * 4) invalidate all inactive vnodes. * 5) invalidate all cached file data. * 6) re-read inode data for all active vnodes. */ static int ffs_reload(mp, cred, p) register struct mount *mp; struct ucred *cred; struct proc *p; { register struct vnode *vp, *nvp, *devvp; struct inode *ip; struct csum *space; struct buf *bp; struct fs *fs; int i, blks, size, error; if ((mp->mnt_flag & MNT_RDONLY) == 0) return (EINVAL); /* * Step 1: invalidate all cached meta-data. */ devvp = VFSTOUFS(mp)->um_devvp; if (vinvalbuf(devvp, 0, cred, p, 0, 0)) panic("ffs_reload: dirty1"); /* * Step 2: re-read superblock from disk. */ error = bread(devvp, SBLOCK, SBSIZE, NOCRED, &bp); if (error) return (error); fs = (struct fs *)bp->b_data; if (fs->fs_magic != FS_MAGIC || fs->fs_bsize > MAXBSIZE || fs->fs_bsize < sizeof(struct fs)) { brelse(bp); return (EIO); /* XXX needs translation */ } fs = VFSTOUFS(mp)->um_fs; bcopy(&fs->fs_csp[0], &((struct fs *)bp->b_data)->fs_csp[0], sizeof(fs->fs_csp)); bcopy(bp->b_data, fs, (u_int)fs->fs_sbsize); if (fs->fs_sbsize < SBSIZE) bp->b_flags |= B_INVAL; brelse(bp); ffs_oldfscompat(fs); /* * Step 3: re-read summary information from disk. */ blks = howmany(fs->fs_cssize, fs->fs_fsize); space = fs->fs_csp[0]; for (i = 0; i < blks; i += fs->fs_frag) { size = fs->fs_bsize; if (i + fs->fs_frag > blks) size = (blks - i) * fs->fs_fsize; error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size, NOCRED, &bp); if (error) return (error); bcopy(bp->b_data, fs->fs_csp[fragstoblks(fs, i)], (u_int)size); brelse(bp); } loop: for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) { nvp = vp->v_mntvnodes.le_next; /* * Step 4: invalidate all inactive vnodes. */ if (vp->v_usecount == 0) { vgone(vp); continue; } /* * Step 5: invalidate all cached file data. */ if (vget(vp, 1)) goto loop; if (vinvalbuf(vp, 0, cred, p, 0, 0)) panic("ffs_reload: dirty2"); /* * Step 6: re-read inode data for all active vnodes. */ ip = VTOI(vp); error = bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)), (int)fs->fs_bsize, NOCRED, &bp); if (error) { vput(vp); return (error); } ip->i_din = *((struct dinode *)bp->b_data + ino_to_fsbo(fs, ip->i_number)); brelse(bp); vput(vp); if (vp->v_mount != mp) goto loop; } return (0); } /* * Common code for mount and mountroot */ int ffs_mountfs(devvp, mp, p) register struct vnode *devvp; struct mount *mp; struct proc *p; { register struct ufsmount *ump; struct buf *bp; register struct fs *fs; dev_t dev = devvp->v_rdev; struct partinfo dpart; caddr_t base, space; int havepart = 0, blks; int error, i, size; int ronly; u_int strsize; + int ncount; /* * Disallow multiple mounts of the same device. * Disallow mounting of a device that is currently in use * (except for root, which might share swap device for miniroot). * Flush out any old buffers remaining from a previous use. */ error = vfs_mountedon(devvp); if (error) return (error); - if (vcount(devvp) > 1 && devvp != rootvp) + ncount = vcount(devvp); + if (devvp->v_object) + ncount -= 1; + if (ncount > 1 && devvp != rootvp) return (EBUSY); error = vinvalbuf(devvp, V_SAVE, p->p_ucred, p, 0, 0); if (error) return (error); ronly = (mp->mnt_flag & MNT_RDONLY) != 0; error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, p); if (error) return (error); if (VOP_IOCTL(devvp, DIOCGPART, (caddr_t)&dpart, FREAD, NOCRED, p) != 0) size = DEV_BSIZE; else { havepart = 1; size = dpart.disklab->d_secsize; } bp = NULL; ump = NULL; error = bread(devvp, SBLOCK, SBSIZE, NOCRED, &bp); if (error) goto out; fs = (struct fs *)bp->b_data; if (fs->fs_magic != FS_MAGIC || fs->fs_bsize > MAXBSIZE || fs->fs_bsize < sizeof(struct fs)) { error = EINVAL; /* XXX needs translation */ goto out; } if (!fs->fs_clean) { if (ronly || (mp->mnt_flag & MNT_FORCE)) { printf("WARNING: %s was not properly dismounted.\n",fs->fs_fsmnt); } else { printf("WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck.\n",fs->fs_fsmnt); error = EPERM; goto out; } } ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK); bzero((caddr_t)ump, sizeof *ump); ump->um_fs = malloc((u_long)fs->fs_sbsize, M_UFSMNT, M_WAITOK); bcopy(bp->b_data, ump->um_fs, (u_int)fs->fs_sbsize); if (fs->fs_sbsize < SBSIZE) bp->b_flags |= B_INVAL; brelse(bp); bp = NULL; fs = ump->um_fs; fs->fs_ronly = ronly; if (ronly == 0) { fs->fs_fmod = 1; fs->fs_clean = 0; } blks = howmany(fs->fs_cssize, fs->fs_fsize); base = space = malloc((u_long)fs->fs_cssize, M_UFSMNT, M_WAITOK); for (i = 0; i < blks; i += fs->fs_frag) { size = fs->fs_bsize; if (i + fs->fs_frag > blks) size = (blks - i) * fs->fs_fsize; error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size, NOCRED, &bp); if (error) { free(base, M_UFSMNT); goto out; } bcopy(bp->b_data, space, (u_int)size); fs->fs_csp[fragstoblks(fs, i)] = (struct csum *)space; space += size; brelse(bp); bp = NULL; } mp->mnt_data = (qaddr_t)ump; mp->mnt_stat.f_fsid.val[0] = (long)dev; mp->mnt_stat.f_fsid.val[1] = MOUNT_UFS; mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen; mp->mnt_flag |= MNT_LOCAL; ump->um_mountp = mp; ump->um_dev = dev; ump->um_devvp = devvp; ump->um_nindir = fs->fs_nindir; ump->um_bptrtodb = fs->fs_fsbtodb; ump->um_seqinc = fs->fs_frag; for (i = 0; i < MAXQUOTAS; i++) ump->um_quotas[i] = NULLVP; devvp->v_specflags |= SI_MOUNTEDON; ffs_oldfscompat(fs); /* * Set FS local "last mounted on" information (NULL pad) */ copystr( mp->mnt_stat.f_mntonname, /* mount point*/ fs->fs_fsmnt, /* copy area*/ sizeof(fs->fs_fsmnt) - 1, /* max size*/ &strsize); /* real size*/ bzero( fs->fs_fsmnt + strsize, sizeof(fs->fs_fsmnt) - strsize); if( mp->mnt_flag & MNT_ROOTFS) { /* * Root mount; update timestamp in mount structure. * this will be used by the common root mount code * to update the system clock. */ mp->mnt_time = fs->fs_time; } if (ronly == 0) ffs_sbupdate(ump, MNT_WAIT); /* * Only VMIO the backing device if the backing device is a real * block device. This excludes the original MFS implementation. + * Note that it is optional that the backing device be VMIOed. This + * increases the opportunity for metadata caching. */ if ((devvp->v_type == VBLK) && (major(devvp->v_rdev) < nblkdev)) { - vn_vmio_open(devvp, p, p->p_ucred); + vfs_object_create(devvp, p, p->p_ucred, 0); } return (0); out: if (bp) brelse(bp); (void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD|FWRITE, NOCRED, p); if (ump) { free(ump->um_fs, M_UFSMNT); free(ump, M_UFSMNT); mp->mnt_data = (qaddr_t)0; } return (error); } /* * Sanity checks for old file systems. * * XXX - goes away some day. */ static int ffs_oldfscompat(fs) struct fs *fs; { fs->fs_npsect = max(fs->fs_npsect, fs->fs_nsect); /* XXX */ fs->fs_interleave = max(fs->fs_interleave, 1); /* XXX */ if (fs->fs_postblformat == FS_42POSTBLFMT) /* XXX */ fs->fs_nrpos = 8; /* XXX */ if (fs->fs_inodefmt < FS_44INODEFMT) { /* XXX */ #if 0 int i; /* XXX */ quad_t sizepb = fs->fs_bsize; /* XXX */ fs->fs_maxfilesize = fs->fs_bsize * NDADDR - 1; /* XXX */ for (i = 0; i < NIADDR; i++) { /* XXX */ sizepb *= NINDIR(fs); /* XXX */ fs->fs_maxfilesize += sizepb; /* XXX */ } /* XXX */ #endif fs->fs_maxfilesize = (u_quad_t) 1LL << 39; fs->fs_qbmask = ~fs->fs_bmask; /* XXX */ fs->fs_qfmask = ~fs->fs_fmask; /* XXX */ } /* XXX */ return (0); } /* * unmount system call */ int ffs_unmount(mp, mntflags, p) struct mount *mp; int mntflags; struct proc *p; { register struct ufsmount *ump; register struct fs *fs; int error, flags, ronly; flags = 0; if (mntflags & MNT_FORCE) { flags |= FORCECLOSE; } error = ffs_flushfiles(mp, flags, p); if (error) return (error); ump = VFSTOUFS(mp); fs = ump->um_fs; ronly = fs->fs_ronly; if (!ronly) { fs->fs_clean = 1; ffs_sbupdate(ump, MNT_WAIT); } ump->um_devvp->v_specflags &= ~SI_MOUNTEDON; + + vnode_pager_uncache(ump->um_devvp); + error = VOP_CLOSE(ump->um_devvp, ronly ? FREAD : FREAD|FWRITE, NOCRED, p); - vn_vmio_close(ump->um_devvp); + + vrele(ump->um_devvp); + free(fs->fs_csp[0], M_UFSMNT); free(fs, M_UFSMNT); free(ump, M_UFSMNT); mp->mnt_data = (qaddr_t)0; mp->mnt_flag &= ~MNT_LOCAL; return (error); } /* * Flush out all the files in a filesystem. */ int ffs_flushfiles(mp, flags, p) register struct mount *mp; int flags; struct proc *p; { register struct ufsmount *ump; int error; if (!doforce) flags &= ~FORCECLOSE; ump = VFSTOUFS(mp); #ifdef QUOTA if (mp->mnt_flag & MNT_QUOTA) { int i; error = vflush(mp, NULLVP, SKIPSYSTEM|flags); if (error) return (error); for (i = 0; i < MAXQUOTAS; i++) { if (ump->um_quotas[i] == NULLVP) continue; quotaoff(p, mp, i); } /* * Here we fall through to vflush again to ensure * that we have gotten rid of all the system vnodes. */ } #endif error = vflush(mp, NULLVP, flags); return (error); } /* * Get file system statistics. */ int ffs_statfs(mp, sbp, p) struct mount *mp; register struct statfs *sbp; struct proc *p; { register struct ufsmount *ump; register struct fs *fs; ump = VFSTOUFS(mp); fs = ump->um_fs; if (fs->fs_magic != FS_MAGIC) panic("ffs_statfs"); sbp->f_type = MOUNT_UFS; sbp->f_bsize = fs->fs_fsize; sbp->f_iosize = fs->fs_bsize; sbp->f_blocks = fs->fs_dsize; sbp->f_bfree = fs->fs_cstotal.cs_nbfree * fs->fs_frag + fs->fs_cstotal.cs_nffree; sbp->f_bavail = freespace(fs, fs->fs_minfree); sbp->f_files = fs->fs_ncg * fs->fs_ipg - ROOTINO; sbp->f_ffree = fs->fs_cstotal.cs_nifree; if (sbp != &mp->mnt_stat) { bcopy((caddr_t)mp->mnt_stat.f_mntonname, (caddr_t)&sbp->f_mntonname[0], MNAMELEN); bcopy((caddr_t)mp->mnt_stat.f_mntfromname, (caddr_t)&sbp->f_mntfromname[0], MNAMELEN); } return (0); } /* * Go through the disk queues to initiate sandbagged IO; * go through the inodes to write those that have been modified; * initiate the writing of the super block if it has been modified. * * Note: we are always called with the filesystem marked `MPBUSY'. */ int ffs_sync(mp, waitfor, cred, p) struct mount *mp; int waitfor; struct ucred *cred; struct proc *p; { register struct vnode *vp, *nvp; register struct inode *ip; register struct ufsmount *ump = VFSTOUFS(mp); register struct fs *fs; struct timeval tv; int error, allerror = 0; fs = ump->um_fs; /* * Write back modified superblock. * Consistency check that the superblock * is still in the buffer cache. */ if (fs->fs_fmod != 0) { if (fs->fs_ronly != 0) { /* XXX */ printf("fs = %s\n", fs->fs_fsmnt); panic("update: rofs mod"); } fs->fs_fmod = 0; fs->fs_time = time.tv_sec; allerror = ffs_sbupdate(ump, waitfor); } /* * Write back each (modified) inode. */ loop: for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) { /* * If the vnode that we are about to sync is no longer * associated with this mount point, start over. */ if (vp->v_mount != mp) goto loop; nvp = vp->v_mntvnodes.le_next; if (VOP_ISLOCKED(vp)) continue; ip = VTOI(vp); if ((((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0)) && vp->v_dirtyblkhd.lh_first == NULL) continue; if (vp->v_type != VCHR) { if (vget(vp, 1)) goto loop; error = VOP_FSYNC(vp, cred, waitfor, p); if (error) allerror = error; vput(vp); } else { tv = time; /* VOP_UPDATE(vp, &tv, &tv, waitfor == MNT_WAIT); */ VOP_UPDATE(vp, &tv, &tv, 0); } } /* * Force stale file system control information to be flushed. */ error = VOP_FSYNC(ump->um_devvp, cred, waitfor, p); if (error) allerror = error; #ifdef QUOTA qsync(mp); #endif return (allerror); } /* * Look up a FFS dinode number to find its incore vnode, otherwise read it * in from disk. If it is in core, wait for the lock bit to clear, then * return the inode locked. Detection and handling of mount points must be * done by the calling routine. */ static int ffs_inode_hash_lock; int ffs_vget(mp, ino, vpp) struct mount *mp; ino_t ino; struct vnode **vpp; { register struct fs *fs; register struct inode *ip; struct ufsmount *ump; struct buf *bp; struct vnode *vp; dev_t dev; int type, error; ump = VFSTOUFS(mp); dev = ump->um_dev; restart: if ((*vpp = ufs_ihashget(dev, ino)) != NULL) return (0); /* * Lock out the creation of new entries in the FFS hash table in * case getnewvnode() or MALLOC() blocks, otherwise a duplicate * may occur! */ if (ffs_inode_hash_lock) { while (ffs_inode_hash_lock) { ffs_inode_hash_lock = -1; tsleep(&ffs_inode_hash_lock, PVM, "ffsvgt", 0); } goto restart; } ffs_inode_hash_lock = 1; /* * If this MALLOC() is performed after the getnewvnode() * it might block, leaving a vnode with a NULL v_data to be * found by ffs_sync() if a sync happens to fire right then, * which will cause a panic because ffs_sync() blindly * dereferences vp->v_data (as well it should). */ type = ump->um_devvp->v_tag == VT_MFS ? M_MFSNODE : M_FFSNODE; /* XXX */ MALLOC(ip, struct inode *, sizeof(struct inode), type, M_WAITOK); /* Allocate a new vnode/inode. */ error = getnewvnode(VT_UFS, mp, ffs_vnodeop_p, &vp); if (error) { if (ffs_inode_hash_lock < 0) wakeup(&ffs_inode_hash_lock); ffs_inode_hash_lock = 0; *vpp = NULL; FREE(ip, type); return (error); } bzero((caddr_t)ip, sizeof(struct inode)); vp->v_data = ip; ip->i_vnode = vp; ip->i_fs = fs = ump->um_fs; ip->i_dev = dev; ip->i_number = ino; #ifdef QUOTA { int i; for (i = 0; i < MAXQUOTAS; i++) ip->i_dquot[i] = NODQUOT; } #endif /* * Put it onto its hash chain and lock it so that other requests for * this inode will block if they arrive while we are sleeping waiting * for old data structures to be purged or for the contents of the * disk portion of this inode to be read. */ ufs_ihashins(ip); if (ffs_inode_hash_lock < 0) wakeup(&ffs_inode_hash_lock); ffs_inode_hash_lock = 0; /* Read in the disk contents for the inode, copy into the inode. */ error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)), (int)fs->fs_bsize, NOCRED, &bp); if (error) { /* * The inode does not contain anything useful, so it would * be misleading to leave it on its hash chain. With mode * still zero, it will be unlinked and returned to the free * list by vput(). */ brelse(bp); vput(vp); *vpp = NULL; return (error); } ip->i_din = *((struct dinode *)bp->b_data + ino_to_fsbo(fs, ino)); bqrelse(bp); /* * Initialize the vnode from the inode, check for aliases. * Note that the underlying vnode may have changed. */ error = ufs_vinit(mp, ffs_specop_p, ffs_fifoop_p, &vp); if (error) { vput(vp); *vpp = NULL; return (error); } /* * Finish inode initialization now that aliasing has been resolved. */ ip->i_devvp = ump->um_devvp; VREF(ip->i_devvp); /* * Set up a generation number for this inode if it does not * already have one. This should only happen on old filesystems. */ if (ip->i_gen == 0) { if (++nextgennumber < (u_long)time.tv_sec) nextgennumber = time.tv_sec; ip->i_gen = nextgennumber; if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) ip->i_flag |= IN_MODIFIED; } /* * Ensure that uid and gid are correct. This is a temporary * fix until fsck has been changed to do the update. */ if (fs->fs_inodefmt < FS_44INODEFMT) { /* XXX */ ip->i_uid = ip->i_din.di_ouid; /* XXX */ ip->i_gid = ip->i_din.di_ogid; /* XXX */ } /* XXX */ *vpp = vp; return (0); } /* * File handle to vnode * * Have to be really careful about stale file handles: * - check that the inode number is valid * - call ffs_vget() to get the locked inode * - check for an unallocated inode (i_mode == 0) * - check that the given client host has export rights and return * those rights via. exflagsp and credanonp */ int ffs_fhtovp(mp, fhp, nam, vpp, exflagsp, credanonp) register struct mount *mp; struct fid *fhp; struct mbuf *nam; struct vnode **vpp; int *exflagsp; struct ucred **credanonp; { register struct ufid *ufhp; struct fs *fs; ufhp = (struct ufid *)fhp; fs = VFSTOUFS(mp)->um_fs; if (ufhp->ufid_ino < ROOTINO || ufhp->ufid_ino >= fs->fs_ncg * fs->fs_ipg) return (ESTALE); return (ufs_check_export(mp, ufhp, nam, vpp, exflagsp, credanonp)); } /* * Vnode pointer to File handle */ /* ARGSUSED */ int ffs_vptofh(vp, fhp) struct vnode *vp; struct fid *fhp; { register struct inode *ip; register struct ufid *ufhp; ip = VTOI(vp); ufhp = (struct ufid *)fhp; ufhp->ufid_len = sizeof(struct ufid); ufhp->ufid_ino = ip->i_number; ufhp->ufid_gen = ip->i_gen; return (0); } /* * Write a superblock and associated information back to disk. */ static int ffs_sbupdate(mp, waitfor) struct ufsmount *mp; int waitfor; { register struct fs *fs = mp->um_fs; register struct buf *bp; int blks; caddr_t space; int i, size, error = 0; bp = getblk(mp->um_devvp, SBLOCK, (int)fs->fs_sbsize, 0, 0); bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize); /* Restore compatibility to old file systems. XXX */ if (fs->fs_postblformat == FS_42POSTBLFMT) /* XXX */ ((struct fs *)bp->b_data)->fs_nrpos = -1; /* XXX */ if (waitfor == MNT_WAIT) error = bwrite(bp); else bawrite(bp); blks = howmany(fs->fs_cssize, fs->fs_fsize); space = (caddr_t)fs->fs_csp[0]; for (i = 0; i < blks; i += fs->fs_frag) { size = fs->fs_bsize; if (i + fs->fs_frag > blks) size = (blks - i) * fs->fs_fsize; bp = getblk(mp->um_devvp, fsbtodb(fs, fs->fs_csaddr + i), size, 0, 0); bcopy(space, bp->b_data, (u_int)size); space += size; if (waitfor == MNT_WAIT) error = bwrite(bp); else bawrite(bp); } return (error); } Index: head/sys/vm/vm_object.c =================================================================== --- head/sys/vm/vm_object.c (revision 17760) +++ head/sys/vm/vm_object.c (revision 17761) @@ -1,1481 +1,1487 @@ /* * Copyright (c) 1991, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * The Mach Operating System project at Carnegie-Mellon University. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)vm_object.c 8.5 (Berkeley) 3/22/94 * * * Copyright (c) 1987, 1990 Carnegie-Mellon University. * All rights reserved. * * Authors: Avadis Tevanian, Jr., Michael Wayne Young * * Permission to use, copy, modify and distribute this software and * its documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. * * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU * School of Computer Science * Carnegie Mellon University * Pittsburgh PA 15213-3890 * * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. * - * $Id: vm_object.c,v 1.77 1996/07/27 03:24:03 dyson Exp $ + * $Id: vm_object.c,v 1.78 1996/07/30 03:08:14 dyson Exp $ */ /* * Virtual memory object module. */ #include "opt_ddb.h" #include #include #include #include /* for curproc, pageproc */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DDB static void DDB_vm_object_check __P((void)); #endif static void _vm_object_allocate __P((objtype_t, vm_size_t, vm_object_t)); #ifdef DDB static int _vm_object_in_map __P((vm_map_t map, vm_object_t object, vm_map_entry_t entry)); static int vm_object_in_map __P((vm_object_t object)); #endif static void vm_object_qcollapse __P((vm_object_t object)); #ifdef not_used static void vm_object_deactivate_pages __P((vm_object_t)); #endif static void vm_object_terminate __P((vm_object_t)); static void vm_object_cache_trim __P((void)); /* * Virtual memory objects maintain the actual data * associated with allocated virtual memory. A given * page of memory exists within exactly one object. * * An object is only deallocated when all "references" * are given up. Only one "reference" to a given * region of an object should be writeable. * * Associated with each object is a list of all resident * memory pages belonging to that object; this list is * maintained by the "vm_page" module, and locked by the object's * lock. * * Each object also records a "pager" routine which is * used to retrieve (and store) pages to the proper backing * storage. In addition, objects may be backed by other * objects from which they were virtual-copied. * * The only items within the object structure which are * modified after time of creation are: * reference count locked by object's lock * pager routine locked by object's lock * */ int vm_object_cache_max; struct object_q vm_object_cached_list; static int vm_object_cached; struct object_q vm_object_list; static long vm_object_count; vm_object_t kernel_object; vm_object_t kmem_object; static struct vm_object kernel_object_store; static struct vm_object kmem_object_store; extern int vm_pageout_page_count; static long object_collapses; static long object_bypasses; static void _vm_object_allocate(type, size, object) objtype_t type; vm_size_t size; register vm_object_t object; { TAILQ_INIT(&object->memq); TAILQ_INIT(&object->shadow_head); object->type = type; object->size = size; object->ref_count = 1; object->flags = 0; object->behavior = OBJ_NORMAL; object->paging_in_progress = 0; object->resident_page_count = 0; object->shadow_count = 0; object->handle = NULL; object->paging_offset = (vm_ooffset_t) 0; object->backing_object = NULL; object->backing_object_offset = (vm_ooffset_t) 0; object->last_read = 0; TAILQ_INSERT_TAIL(&vm_object_list, object, object_list); vm_object_count++; } /* * vm_object_init: * * Initialize the VM objects module. */ void vm_object_init() { TAILQ_INIT(&vm_object_cached_list); TAILQ_INIT(&vm_object_list); vm_object_count = 0; vm_object_cache_max = 84; if (cnt.v_page_count > 1000) vm_object_cache_max += (cnt.v_page_count - 1000) / 4; kernel_object = &kernel_object_store; _vm_object_allocate(OBJT_DEFAULT, OFF_TO_IDX(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS), kernel_object); kmem_object = &kmem_object_store; _vm_object_allocate(OBJT_DEFAULT, OFF_TO_IDX(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS), kmem_object); } /* * vm_object_allocate: * * Returns a new object with the given size. */ vm_object_t vm_object_allocate(type, size) objtype_t type; vm_size_t size; { register vm_object_t result; result = (vm_object_t) malloc((u_long) sizeof *result, M_VMOBJ, M_WAITOK); _vm_object_allocate(type, size, result); return (result); } /* * vm_object_reference: * * Gets another reference to the given object. */ -inline void +void vm_object_reference(object) register vm_object_t object; { if (object == NULL) return; if (object->ref_count == 0) { if ((object->flags & OBJ_CANPERSIST) == 0) panic("vm_object_reference: non-persistent object with 0 ref_count"); TAILQ_REMOVE(&vm_object_cached_list, object, cached_list); vm_object_cached--; } object->ref_count++; } /* * vm_object_deallocate: * * Release a reference to the specified object, * gained either through a vm_object_allocate * or a vm_object_reference call. When all references * are gone, storage associated with this object * may be relinquished. * * No object may be locked. */ void vm_object_deallocate(object) vm_object_t object; { vm_object_t temp; while (object != NULL) { if (object->ref_count == 0) panic("vm_object_deallocate: object deallocated too many times"); /* * Lose the reference */ object->ref_count--; if (object->ref_count != 0) { if ((object->ref_count == 1) && (object->handle == NULL) && (object->type == OBJT_DEFAULT || object->type == OBJT_SWAP)) { vm_object_t robject; robject = TAILQ_FIRST(&object->shadow_head); if ((robject != NULL) && (robject->handle == NULL) && (robject->type == OBJT_DEFAULT || robject->type == OBJT_SWAP)) { int s; robject->ref_count += 2; object->ref_count += 2; do { s = splvm(); while (robject->paging_in_progress) { robject->flags |= OBJ_PIPWNT; tsleep(robject, PVM, "objde1", 0); } while (object->paging_in_progress) { object->flags |= OBJ_PIPWNT; tsleep(object, PVM, "objde2", 0); } splx(s); } while( object->paging_in_progress || robject->paging_in_progress); object->ref_count -= 2; robject->ref_count -= 2; if( robject->ref_count == 0) { robject->ref_count += 1; object = robject; continue; } vm_object_collapse(robject); return; } } /* * If there are still references, then we are done. */ return; } if (object->type == OBJT_VNODE) { struct vnode *vp = object->handle; vp->v_flag &= ~VTEXT; } /* * See if this object can persist and has some resident * pages. If so, enter it in the cache. */ if (object->flags & OBJ_CANPERSIST) { if (object->resident_page_count != 0) { +#if 0 vm_object_page_clean(object, 0, 0 ,TRUE, TRUE); +#endif TAILQ_INSERT_TAIL(&vm_object_cached_list, object, cached_list); vm_object_cached++; vm_object_cache_trim(); return; } else { object->flags &= ~OBJ_CANPERSIST; } } /* * Make sure no one uses us. */ object->flags |= OBJ_DEAD; temp = object->backing_object; if (temp) { TAILQ_REMOVE(&temp->shadow_head, object, shadow_list); --temp->shadow_count; } vm_object_terminate(object); /* unlocks and deallocates object */ object = temp; } } /* * vm_object_terminate actually destroys the specified object, freeing * up all previously used resources. * * The object must be locked. */ static void vm_object_terminate(object) register vm_object_t object; { register vm_page_t p; int s; /* * wait for the pageout daemon to be done with the object */ s = splvm(); while (object->paging_in_progress) { object->flags |= OBJ_PIPWNT; tsleep(object, PVM, "objtrm", 0); } splx(s); if (object->paging_in_progress != 0) panic("vm_object_deallocate: pageout in progress"); /* * Clean and free the pages, as appropriate. All references to the * object are gone, so we don't need to lock it. */ if (object->type == OBJT_VNODE) { struct vnode *vp = object->handle; + int waslocked; - VOP_LOCK(vp); + waslocked = VOP_ISLOCKED(vp); + if (!waslocked) + VOP_LOCK(vp); vm_object_page_clean(object, 0, 0, TRUE, FALSE); vinvalbuf(vp, V_SAVE, NOCRED, NULL, 0, 0); - VOP_UNLOCK(vp); + if (!waslocked) + VOP_UNLOCK(vp); } /* * Now free the pages. For internal objects, this also removes them * from paging queues. */ while ((p = TAILQ_FIRST(&object->memq)) != NULL) { if (p->flags & PG_BUSY) printf("vm_object_terminate: freeing busy page\n"); PAGE_WAKEUP(p); vm_page_free(p); cnt.v_pfree++; } /* * Let the pager know object is dead. */ vm_pager_deallocate(object); TAILQ_REMOVE(&vm_object_list, object, object_list); vm_object_count--; wakeup(object); /* * Free the space for the object. */ free((caddr_t) object, M_VMOBJ); } /* * vm_object_page_clean * * Clean all dirty pages in the specified range of object. * Leaves page on whatever queue it is currently on. * * Odd semantics: if start == end, we clean everything. * * The object must be locked. */ void vm_object_page_clean(object, start, end, syncio, lockflag) vm_object_t object; vm_pindex_t start; vm_pindex_t end; boolean_t syncio; boolean_t lockflag; { register vm_page_t p, np, tp; register vm_offset_t tstart, tend; vm_pindex_t pi; int s; struct vnode *vp; int runlen; int maxf; int chkb; int maxb; int i; vm_page_t maf[vm_pageout_page_count]; vm_page_t mab[vm_pageout_page_count]; vm_page_t ma[vm_pageout_page_count]; if (object->type != OBJT_VNODE || (object->flags & OBJ_MIGHTBEDIRTY) == 0) return; vp = object->handle; if (lockflag) VOP_LOCK(vp); object->flags |= OBJ_CLEANING; tstart = start; if (end == 0) { tend = object->size; } else { tend = end; } if ((tstart == 0) && (tend == object->size)) { object->flags &= ~(OBJ_WRITEABLE|OBJ_MIGHTBEDIRTY); } for(p = TAILQ_FIRST(&object->memq); p; p = TAILQ_NEXT(p, listq)) p->flags |= PG_CLEANCHK; rescan: for(p = TAILQ_FIRST(&object->memq); p; p = np) { np = TAILQ_NEXT(p, listq); pi = p->pindex; if (((p->flags & PG_CLEANCHK) == 0) || (pi < tstart) || (pi >= tend) || (p->valid == 0) || (p->queue == PQ_CACHE)) { p->flags &= ~PG_CLEANCHK; continue; } vm_page_test_dirty(p); if ((p->dirty & p->valid) == 0) { p->flags &= ~PG_CLEANCHK; continue; } s = splvm(); if ((p->flags & PG_BUSY) || p->busy) { p->flags |= PG_WANTED|PG_REFERENCED; tsleep(p, PVM, "vpcwai", 0); splx(s); goto rescan; } splx(s); s = splvm(); maxf = 0; for(i=1;iflags & PG_BUSY) || (tp->flags & PG_CLEANCHK) == 0) break; if (tp->queue == PQ_CACHE) { tp->flags &= ~PG_CLEANCHK; break; } vm_page_test_dirty(tp); if ((tp->dirty & tp->valid) == 0) { tp->flags &= ~PG_CLEANCHK; break; } maf[ i - 1 ] = tp; maxf++; continue; } break; } maxb = 0; chkb = vm_pageout_page_count - maxf; if (chkb) { for(i = 1; i < chkb;i++) { if (tp = vm_page_lookup(object, pi - i)) { if ((tp->flags & PG_BUSY) || (tp->flags & PG_CLEANCHK) == 0) break; if (tp->queue == PQ_CACHE) { tp->flags &= ~PG_CLEANCHK; break; } vm_page_test_dirty(tp); if ((tp->dirty & tp->valid) == 0) { tp->flags &= ~PG_CLEANCHK; break; } mab[ i - 1 ] = tp; maxb++; continue; } break; } } for(i=0;iflags |= PG_BUSY; ma[index]->flags &= ~PG_CLEANCHK; vm_page_protect(ma[index], VM_PROT_READ); } vm_page_protect(p, VM_PROT_READ); p->flags |= PG_BUSY; p->flags &= ~PG_CLEANCHK; ma[maxb] = p; for(i=0;iflags |= PG_BUSY; ma[index]->flags &= ~PG_CLEANCHK; vm_page_protect(ma[index], VM_PROT_READ); } runlen = maxb + maxf + 1; splx(s); vm_pageout_flush(ma, runlen, 0); goto rescan; } VOP_FSYNC(vp, NULL, syncio, curproc); if (lockflag) VOP_UNLOCK(vp); object->flags &= ~OBJ_CLEANING; return; } #ifdef not_used /* XXX I cannot tell if this should be an exported symbol */ /* * vm_object_deactivate_pages * * Deactivate all pages in the specified object. (Keep its pages * in memory even though it is no longer referenced.) * * The object must be locked. */ static void vm_object_deactivate_pages(object) register vm_object_t object; { register vm_page_t p, next; for (p = TAILQ_FIRST(&object->memq); p != NULL; p = next) { next = TAILQ_NEXT(p, listq); vm_page_deactivate(p); } } #endif /* * Trim the object cache to size. */ static void vm_object_cache_trim() { register vm_object_t object; while (vm_object_cached > vm_object_cache_max) { object = TAILQ_FIRST(&vm_object_cached_list); vm_object_reference(object); pager_cache(object, FALSE); } } /* * vm_object_pmap_copy: * * Makes all physical pages in the specified * object range copy-on-write. No writeable * references to these pages should remain. * * The object must *not* be locked. */ void vm_object_pmap_copy(object, start, end) register vm_object_t object; register vm_pindex_t start; register vm_pindex_t end; { register vm_page_t p; if (object == NULL || (object->flags & OBJ_WRITEABLE) == 0) return; for (p = TAILQ_FIRST(&object->memq); p != NULL; p = TAILQ_NEXT(p, listq)) { vm_page_protect(p, VM_PROT_READ); } object->flags &= ~OBJ_WRITEABLE; } /* * vm_object_pmap_remove: * * Removes all physical pages in the specified * object range from all physical maps. * * The object must *not* be locked. */ void vm_object_pmap_remove(object, start, end) register vm_object_t object; register vm_pindex_t start; register vm_pindex_t end; { register vm_page_t p; if (object == NULL) return; for (p = TAILQ_FIRST(&object->memq); p != NULL; p = TAILQ_NEXT(p, listq)) { if (p->pindex >= start && p->pindex < end) vm_page_protect(p, VM_PROT_NONE); } } /* * vm_object_madvise: * * Implements the madvise function at the object/page level. */ void vm_object_madvise(object, pindex, count, advise) vm_object_t object; vm_pindex_t pindex; int count; int advise; { vm_pindex_t end; vm_page_t m; if (object == NULL) return; end = pindex + count; for (; pindex < end; pindex += 1) { m = vm_page_lookup(object, pindex); /* * If the page is busy or not in a normal active state, * we skip it. Things can break if we mess with pages * in any of the below states. */ if (m == NULL || m->busy || (m->flags & PG_BUSY) || m->hold_count || m->wire_count || m->valid != VM_PAGE_BITS_ALL) continue; if (advise == MADV_WILLNEED) { if (m->queue != PQ_ACTIVE) vm_page_activate(m); } else if ((advise == MADV_DONTNEED) || ((advise == MADV_FREE) && ((object->type != OBJT_DEFAULT) && (object->type != OBJT_SWAP)))) { vm_page_deactivate(m); } else if (advise == MADV_FREE) { /* * Force a demand-zero on next ref */ if (object->type == OBJT_SWAP) swap_pager_dmzspace(object, m->pindex, 1); vm_page_protect(m, VM_PROT_NONE); vm_page_free(m); } } } /* * vm_object_shadow: * * Create a new object which is backed by the * specified existing object range. The source * object reference is deallocated. * * The new object and offset into that object * are returned in the source parameters. */ void vm_object_shadow(object, offset, length) vm_object_t *object; /* IN/OUT */ vm_ooffset_t *offset; /* IN/OUT */ vm_size_t length; { register vm_object_t source; register vm_object_t result; source = *object; /* * Allocate a new object with the given length */ if ((result = vm_object_allocate(OBJT_DEFAULT, length)) == NULL) panic("vm_object_shadow: no object for shadowing"); /* * The new object shadows the source object, adding a reference to it. * Our caller changes his reference to point to the new object, * removing a reference to the source object. Net result: no change * of reference count. */ result->backing_object = source; if (source) { TAILQ_INSERT_TAIL(&source->shadow_head, result, shadow_list); ++source->shadow_count; } /* * Store the offset into the source object, and fix up the offset into * the new object. */ result->backing_object_offset = *offset; /* * Return the new things */ *offset = 0; *object = result; } /* * this version of collapse allows the operation to occur earlier and * when paging_in_progress is true for an object... This is not a complete * operation, but should plug 99.9% of the rest of the leaks. */ static void vm_object_qcollapse(object) register vm_object_t object; { register vm_object_t backing_object; register vm_pindex_t backing_offset_index, paging_offset_index; vm_pindex_t backing_object_paging_offset_index; vm_pindex_t new_pindex; register vm_page_t p, pp; register vm_size_t size; backing_object = object->backing_object; if (backing_object->ref_count != 1) return; backing_object->ref_count += 2; backing_offset_index = OFF_TO_IDX(object->backing_object_offset); backing_object_paging_offset_index = OFF_TO_IDX(backing_object->paging_offset); paging_offset_index = OFF_TO_IDX(object->paging_offset); size = object->size; p = TAILQ_FIRST(&backing_object->memq); while (p) { vm_page_t next; next = TAILQ_NEXT(p, listq); if ((p->flags & (PG_BUSY | PG_FICTITIOUS)) || (p->queue == PQ_CACHE) || !p->valid || p->hold_count || p->wire_count || p->busy) { p = next; continue; } new_pindex = p->pindex - backing_offset_index; if (p->pindex < backing_offset_index || new_pindex >= size) { if (backing_object->type == OBJT_SWAP) swap_pager_freespace(backing_object, backing_object_paging_offset_index+p->pindex, 1); vm_page_protect(p, VM_PROT_NONE); vm_page_free(p); } else { pp = vm_page_lookup(object, new_pindex); if (pp != NULL || (object->type == OBJT_SWAP && vm_pager_has_page(object, paging_offset_index + new_pindex, NULL, NULL))) { if (backing_object->type == OBJT_SWAP) swap_pager_freespace(backing_object, backing_object_paging_offset_index + p->pindex, 1); vm_page_protect(p, VM_PROT_NONE); vm_page_free(p); } else { if (backing_object->type == OBJT_SWAP) swap_pager_freespace(backing_object, backing_object_paging_offset_index + p->pindex, 1); vm_page_rename(p, object, new_pindex); p->dirty = VM_PAGE_BITS_ALL; } } p = next; } backing_object->ref_count -= 2; } /* * vm_object_collapse: * * Collapse an object with the object backing it. * Pages in the backing object are moved into the * parent, and the backing object is deallocated. */ void vm_object_collapse(object) vm_object_t object; { vm_object_t backing_object; vm_ooffset_t backing_offset; vm_size_t size; vm_pindex_t new_pindex, backing_offset_index; vm_page_t p, pp; while (TRUE) { /* * Verify that the conditions are right for collapse: * * The object exists and no pages in it are currently being paged * out. */ if (object == NULL) return; /* * Make sure there is a backing object. */ if ((backing_object = object->backing_object) == NULL) return; /* * we check the backing object first, because it is most likely * not collapsable. */ if (backing_object->handle != NULL || (backing_object->type != OBJT_DEFAULT && backing_object->type != OBJT_SWAP) || (backing_object->flags & OBJ_DEAD) || object->handle != NULL || (object->type != OBJT_DEFAULT && object->type != OBJT_SWAP) || (object->flags & OBJ_DEAD)) { return; } if (object->paging_in_progress != 0 || backing_object->paging_in_progress != 0) { vm_object_qcollapse(object); return; } /* * We know that we can either collapse the backing object (if * the parent is the only reference to it) or (perhaps) remove * the parent's reference to it. */ backing_offset = object->backing_object_offset; backing_offset_index = OFF_TO_IDX(backing_offset); size = object->size; /* * If there is exactly one reference to the backing object, we * can collapse it into the parent. */ if (backing_object->ref_count == 1) { backing_object->flags |= OBJ_DEAD; /* * We can collapse the backing object. * * Move all in-memory pages from backing_object to the * parent. Pages that have been paged out will be * overwritten by any of the parent's pages that * shadow them. */ while ((p = TAILQ_FIRST(&backing_object->memq)) != 0) { new_pindex = p->pindex - backing_offset_index; /* * If the parent has a page here, or if this * page falls outside the parent, dispose of * it. * * Otherwise, move it as planned. */ if (p->pindex < backing_offset_index || new_pindex >= size) { vm_page_protect(p, VM_PROT_NONE); PAGE_WAKEUP(p); vm_page_free(p); } else { pp = vm_page_lookup(object, new_pindex); if (pp != NULL || (object->type == OBJT_SWAP && vm_pager_has_page(object, OFF_TO_IDX(object->paging_offset) + new_pindex, NULL, NULL))) { vm_page_protect(p, VM_PROT_NONE); PAGE_WAKEUP(p); vm_page_free(p); } else { vm_page_rename(p, object, new_pindex); } } } /* * Move the pager from backing_object to object. */ if (backing_object->type == OBJT_SWAP) { backing_object->paging_in_progress++; if (object->type == OBJT_SWAP) { object->paging_in_progress++; /* * copy shadow object pages into ours * and destroy unneeded pages in * shadow object. */ swap_pager_copy( backing_object, OFF_TO_IDX(backing_object->paging_offset), object, OFF_TO_IDX(object->paging_offset), OFF_TO_IDX(object->backing_object_offset)); vm_object_pip_wakeup(object); } else { object->paging_in_progress++; /* * move the shadow backing_object's pager data to * "object" and convert "object" type to OBJT_SWAP. */ object->type = OBJT_SWAP; object->un_pager.swp.swp_nblocks = backing_object->un_pager.swp.swp_nblocks; object->un_pager.swp.swp_allocsize = backing_object->un_pager.swp.swp_allocsize; object->un_pager.swp.swp_blocks = backing_object->un_pager.swp.swp_blocks; object->un_pager.swp.swp_poip = /* XXX */ backing_object->un_pager.swp.swp_poip; object->paging_offset = backing_object->paging_offset + backing_offset; TAILQ_INSERT_TAIL(&swap_pager_un_object_list, object, pager_object_list); /* * Convert backing object from OBJT_SWAP to * OBJT_DEFAULT. XXX - only the TAILQ_REMOVE is * actually necessary. */ backing_object->type = OBJT_DEFAULT; TAILQ_REMOVE(&swap_pager_un_object_list, backing_object, pager_object_list); /* * free unnecessary blocks */ swap_pager_freespace(object, 0, OFF_TO_IDX(object->paging_offset)); vm_object_pip_wakeup(object); } vm_object_pip_wakeup(backing_object); } /* * Object now shadows whatever backing_object did. * Note that the reference to backing_object->backing_object * moves from within backing_object to within object. */ TAILQ_REMOVE(&object->backing_object->shadow_head, object, shadow_list); --object->backing_object->shadow_count; if (backing_object->backing_object) { TAILQ_REMOVE(&backing_object->backing_object->shadow_head, backing_object, shadow_list); --backing_object->backing_object->shadow_count; } object->backing_object = backing_object->backing_object; if (object->backing_object) { TAILQ_INSERT_TAIL(&object->backing_object->shadow_head, object, shadow_list); ++object->backing_object->shadow_count; } object->backing_object_offset += backing_object->backing_object_offset; /* * Discard backing_object. * * Since the backing object has no pages, no pager left, * and no object references within it, all that is * necessary is to dispose of it. */ TAILQ_REMOVE(&vm_object_list, backing_object, object_list); vm_object_count--; free((caddr_t) backing_object, M_VMOBJ); object_collapses++; } else { /* * If all of the pages in the backing object are * shadowed by the parent object, the parent object no * longer has to shadow the backing object; it can * shadow the next one in the chain. * * The backing object must not be paged out - we'd have * to check all of the paged-out pages, as well. */ if (backing_object->type != OBJT_DEFAULT) { return; } /* * Should have a check for a 'small' number of pages * here. */ for (p = TAILQ_FIRST(&backing_object->memq); p; p = TAILQ_NEXT(p, listq)) { new_pindex = p->pindex - backing_offset_index; /* * If the parent has a page here, or if this * page falls outside the parent, keep going. * * Otherwise, the backing_object must be left in * the chain. */ if (p->pindex >= backing_offset_index && new_pindex <= size) { pp = vm_page_lookup(object, new_pindex); if ((pp == NULL || pp->valid == 0) && !vm_pager_has_page(object, OFF_TO_IDX(object->paging_offset) + new_pindex, NULL, NULL)) { /* * Page still needed. Can't go any * further. */ return; } } } /* * Make the parent shadow the next object in the * chain. Deallocating backing_object will not remove * it, since its reference count is at least 2. */ TAILQ_REMOVE(&object->backing_object->shadow_head, object, shadow_list); --object->backing_object->shadow_count; vm_object_reference(object->backing_object = backing_object->backing_object); if (object->backing_object) { TAILQ_INSERT_TAIL(&object->backing_object->shadow_head, object, shadow_list); ++object->backing_object->shadow_count; } object->backing_object_offset += backing_object->backing_object_offset; /* * Drop the reference count on backing_object. Since * its ref_count was at least 2, it will not vanish; * so we don't need to call vm_object_deallocate. */ if (backing_object->ref_count == 1) printf("should have called obj deallocate\n"); backing_object->ref_count--; object_bypasses++; } /* * Try again with this object's new backing object. */ } } /* * vm_object_page_remove: [internal] * * Removes all physical pages in the specified * object range from the object's list of pages. * * The object must be locked. */ void vm_object_page_remove(object, start, end, clean_only) register vm_object_t object; register vm_pindex_t start; register vm_pindex_t end; boolean_t clean_only; { register vm_page_t p, next; unsigned int size; int s; if (object == NULL) return; object->paging_in_progress++; again: size = end - start; if (size > 4 || size >= object->size / 4) { for (p = TAILQ_FIRST(&object->memq); p != NULL; p = next) { next = TAILQ_NEXT(p, listq); if ((start <= p->pindex) && (p->pindex < end)) { if (p->wire_count != 0) { vm_page_protect(p, VM_PROT_NONE); p->valid = 0; continue; } /* * The busy flags are only cleared at * interrupt -- minimize the spl transitions */ if ((p->flags & PG_BUSY) || p->busy) { s = splvm(); if ((p->flags & PG_BUSY) || p->busy) { p->flags |= PG_WANTED; tsleep(p, PVM, "vmopar", 0); splx(s); goto again; } splx(s); } if (clean_only) { vm_page_test_dirty(p); if (p->valid & p->dirty) continue; } vm_page_protect(p, VM_PROT_NONE); PAGE_WAKEUP(p); vm_page_free(p); } } } else { while (size > 0) { if ((p = vm_page_lookup(object, start)) != 0) { if (p->wire_count != 0) { p->valid = 0; vm_page_protect(p, VM_PROT_NONE); start += 1; size -= 1; continue; } /* * The busy flags are only cleared at * interrupt -- minimize the spl transitions */ if ((p->flags & PG_BUSY) || p->busy) { s = splvm(); if ((p->flags & PG_BUSY) || p->busy) { p->flags |= PG_WANTED; tsleep(p, PVM, "vmopar", 0); splx(s); goto again; } splx(s); } if (clean_only) { vm_page_test_dirty(p); if (p->valid & p->dirty) { start += 1; size -= 1; continue; } } vm_page_protect(p, VM_PROT_NONE); PAGE_WAKEUP(p); vm_page_free(p); } start += 1; size -= 1; } } vm_object_pip_wakeup(object); } /* * Routine: vm_object_coalesce * Function: Coalesces two objects backing up adjoining * regions of memory into a single object. * * returns TRUE if objects were combined. * * NOTE: Only works at the moment if the second object is NULL - * if it's not, which object do we lock first? * * Parameters: * prev_object First object to coalesce * prev_offset Offset into prev_object * next_object Second object into coalesce * next_offset Offset into next_object * * prev_size Size of reference to prev_object * next_size Size of reference to next_object * * Conditions: * The object must *not* be locked. */ boolean_t vm_object_coalesce(prev_object, prev_pindex, prev_size, next_size) register vm_object_t prev_object; vm_pindex_t prev_pindex; vm_size_t prev_size, next_size; { vm_size_t newsize; if (prev_object == NULL) { return (TRUE); } if (prev_object->type != OBJT_DEFAULT) { return (FALSE); } /* * Try to collapse the object first */ vm_object_collapse(prev_object); /* * Can't coalesce if: . more than one reference . paged out . shadows * another object . has a copy elsewhere (any of which mean that the * pages not mapped to prev_entry may be in use anyway) */ if (prev_object->ref_count > 1 || prev_object->backing_object != NULL) { return (FALSE); } prev_size >>= PAGE_SHIFT; next_size >>= PAGE_SHIFT; /* * Remove any pages that may still be in the object from a previous * deallocation. */ vm_object_page_remove(prev_object, prev_pindex + prev_size, prev_pindex + prev_size + next_size, FALSE); /* * Extend the object if necessary. */ newsize = prev_pindex + prev_size + next_size; if (newsize > prev_object->size) prev_object->size = newsize; return (TRUE); } #ifdef DDB static int _vm_object_in_map(map, object, entry) vm_map_t map; vm_object_t object; vm_map_entry_t entry; { vm_map_t tmpm; vm_map_entry_t tmpe; vm_object_t obj; int entcount; if (map == 0) return 0; if (entry == 0) { tmpe = map->header.next; entcount = map->nentries; while (entcount-- && (tmpe != &map->header)) { if( _vm_object_in_map(map, object, tmpe)) { return 1; } tmpe = tmpe->next; } } else if (entry->is_sub_map || entry->is_a_map) { tmpm = entry->object.share_map; tmpe = tmpm->header.next; entcount = tmpm->nentries; while (entcount-- && tmpe != &tmpm->header) { if( _vm_object_in_map(tmpm, object, tmpe)) { return 1; } tmpe = tmpe->next; } } else if (obj = entry->object.vm_object) { for(; obj; obj=obj->backing_object) if( obj == object) { return 1; } } return 0; } static int vm_object_in_map( object) vm_object_t object; { struct proc *p; for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) { if( !p->p_vmspace /* || (p->p_flag & (P_SYSTEM|P_WEXIT)) */) continue; if( _vm_object_in_map(&p->p_vmspace->vm_map, object, 0)) return 1; } if( _vm_object_in_map( kernel_map, object, 0)) return 1; if( _vm_object_in_map( kmem_map, object, 0)) return 1; if( _vm_object_in_map( pager_map, object, 0)) return 1; if( _vm_object_in_map( buffer_map, object, 0)) return 1; if( _vm_object_in_map( io_map, object, 0)) return 1; if( _vm_object_in_map( phys_map, object, 0)) return 1; if( _vm_object_in_map( mb_map, object, 0)) return 1; if( _vm_object_in_map( u_map, object, 0)) return 1; return 0; } #ifdef DDB static void DDB_vm_object_check() { vm_object_t object; /* * make sure that internal objs are in a map somewhere * and none have zero ref counts. */ for (object = TAILQ_FIRST(&vm_object_list); object != NULL; object = TAILQ_NEXT(object, object_list)) { if (object->handle == NULL && (object->type == OBJT_DEFAULT || object->type == OBJT_SWAP)) { if (object->ref_count == 0) { printf("vmochk: internal obj has zero ref count: %d\n", object->size); } if (!vm_object_in_map(object)) { printf("vmochk: internal obj is not in a map: " "ref: %d, size: %d: 0x%x, backing_object: 0x%x\n", object->ref_count, object->size, object->size, object->backing_object); } } } } #endif /* DDB */ /* * vm_object_print: [ debug ] */ void vm_object_print(iobject, full, dummy3, dummy4) /* db_expr_t */ int iobject; boolean_t full; /* db_expr_t */ int dummy3; char *dummy4; { vm_object_t object = (vm_object_t)iobject; /* XXX */ register vm_page_t p; register int count; if (object == NULL) return; iprintf("Object 0x%x: size=0x%x, res=%d, ref=%d, ", (int) object, (int) object->size, object->resident_page_count, object->ref_count); printf("offset=0x%x, backing_object=(0x%x)+0x%x\n", (int) object->paging_offset, (int) object->backing_object, (int) object->backing_object_offset); printf("cache: next=%p, prev=%p\n", TAILQ_NEXT(object, cached_list), TAILQ_PREV(object, cached_list)); if (!full) return; indent += 2; count = 0; for (p = TAILQ_FIRST(&object->memq); p != NULL; p = TAILQ_NEXT(p, listq)) { if (count == 0) iprintf("memory:="); else if (count == 6) { printf("\n"); iprintf(" ..."); count = 0; } else printf(","); count++; printf("(off=0x%lx,page=0x%lx)", (u_long) p->pindex, (u_long) VM_PAGE_TO_PHYS(p)); } if (count != 0) printf("\n"); indent -= 2; } #endif /* DDB */ Index: head/sys/vm/vm_object.h =================================================================== --- head/sys/vm/vm_object.h (revision 17760) +++ head/sys/vm/vm_object.h (revision 17761) @@ -1,187 +1,188 @@ /* * Copyright (c) 1991, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * The Mach Operating System project at Carnegie-Mellon University. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)vm_object.h 8.3 (Berkeley) 1/12/94 * * * Copyright (c) 1987, 1990 Carnegie-Mellon University. * All rights reserved. * * Authors: Avadis Tevanian, Jr., Michael Wayne Young * * Permission to use, copy, modify and distribute this software and * its documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. * * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU * School of Computer Science * Carnegie Mellon University * Pittsburgh PA 15213-3890 * * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. * - * $Id: vm_object.h,v 1.27 1996/03/02 02:54:23 dyson Exp $ + * $Id: vm_object.h,v 1.28 1996/05/19 07:36:50 dyson Exp $ */ /* * Virtual memory object module definitions. */ #ifndef _VM_OBJECT_ #define _VM_OBJECT_ #ifdef KERNEL #include /* XXX for wakeup() */ #endif enum obj_type { OBJT_DEFAULT, OBJT_SWAP, OBJT_VNODE, OBJT_DEVICE }; typedef enum obj_type objtype_t; /* * Types defined: * * vm_object_t Virtual memory object. */ struct vm_object { TAILQ_ENTRY(vm_object) object_list; /* list of all objects */ TAILQ_ENTRY(vm_object) cached_list; /* list of cached (persistent) objects */ TAILQ_HEAD(, vm_object) shadow_head; /* objects that this is a shadow for */ TAILQ_ENTRY(vm_object) shadow_list; /* chain of shadow objects */ TAILQ_HEAD(, vm_page) memq; /* list of resident pages */ objtype_t type; /* type of pager */ vm_size_t size; /* Object size */ int ref_count; /* How many refs?? */ int shadow_count; /* how many objects that this is a shadow for */ u_short flags; /* see below */ u_short paging_in_progress; /* Paging (in or out) so don't collapse or destroy */ u_short behavior; /* see below */ int resident_page_count; /* number of resident pages */ vm_ooffset_t paging_offset; /* Offset into paging space */ struct vm_object *backing_object; /* object that I'm a shadow of */ vm_ooffset_t backing_object_offset;/* Offset in backing object */ vm_offset_t last_read; /* last read in object -- detect seq behavior */ TAILQ_ENTRY(vm_object) pager_object_list; /* list of all objects of this pager type */ void *handle; union { struct { off_t vnp_size; /* Current size of file */ } vnp; struct { TAILQ_HEAD(, vm_page) devp_pglist; /* list of pages allocated */ } devp; struct { int swp_nblocks; int swp_allocsize; struct swblock *swp_blocks; short swp_poip; } swp; } un_pager; }; /* * Flags */ #define OBJ_CANPERSIST 0x0001 /* allow to persist */ #define OBJ_ACTIVE 0x0004 /* active objects */ #define OBJ_DEAD 0x0008 /* dead objects (during rundown) */ #define OBJ_PIPWNT 0x0040 /* paging in progress wanted */ #define OBJ_WRITEABLE 0x0080 /* object has been made writable */ #define OBJ_MIGHTBEDIRTY 0x0100 /* object might be dirty */ #define OBJ_CLEANING 0x0200 +#define OBJ_VFS_REF 0x0400 /* object is refed by vfs layer */ #define OBJ_NORMAL 0x0 /* default behavior */ #define OBJ_SEQUENTIAL 0x1 /* expect sequential accesses */ #define OBJ_RANDOM 0x2 /* expect random accesses */ #define IDX_TO_OFF(idx) (((vm_ooffset_t)(idx)) << PAGE_SHIFT) #define OFF_TO_IDX(off) ((vm_pindex_t)(((vm_ooffset_t)(off)) >> PAGE_SHIFT)) #ifdef KERNEL extern int vm_object_cache_max; TAILQ_HEAD(object_q, vm_object); extern struct object_q vm_object_cached_list; /* list of objects persisting */ extern int vm_object_cached; /* size of cached list */ extern struct object_q vm_object_list; /* list of allocated objects */ extern long vm_object_count; /* count of all objects */ /* lock for object list and count */ extern vm_object_t kernel_object; /* the single kernel object */ extern vm_object_t kmem_object; #endif /* KERNEL */ #ifdef KERNEL static __inline void vm_object_pip_wakeup(vm_object_t object) { object->paging_in_progress--; if ((object->flags & OBJ_PIPWNT) && object->paging_in_progress == 0) { object->flags &= ~OBJ_PIPWNT; wakeup(object); } } vm_object_t vm_object_allocate __P((objtype_t, vm_size_t)); void vm_object_cache_clear __P((void)); boolean_t vm_object_coalesce __P((vm_object_t, vm_pindex_t, vm_size_t, vm_size_t)); void vm_object_collapse __P((vm_object_t)); void vm_object_copy __P((vm_object_t, vm_pindex_t, vm_object_t *, vm_pindex_t *, boolean_t *)); void vm_object_deallocate __P((vm_object_t)); void vm_object_init __P((void)); void vm_object_page_clean __P((vm_object_t, vm_pindex_t, vm_pindex_t, boolean_t, boolean_t)); void vm_object_page_remove __P((vm_object_t, vm_pindex_t, vm_pindex_t, boolean_t)); void vm_object_pmap_copy __P((vm_object_t, vm_pindex_t, vm_pindex_t)); void vm_object_pmap_remove __P((vm_object_t, vm_pindex_t, vm_pindex_t)); void vm_object_reference __P((vm_object_t)); void vm_object_shadow __P((vm_object_t *, vm_ooffset_t *, vm_size_t)); void vm_object_madvise __P((vm_object_t, vm_pindex_t, int, int)); #endif /* KERNEL */ #endif /* _VM_OBJECT_ */ Index: head/sys/vm/vnode_pager.c =================================================================== --- head/sys/vm/vnode_pager.c (revision 17760) +++ head/sys/vm/vnode_pager.c (revision 17761) @@ -1,964 +1,964 @@ /* * Copyright (c) 1990 University of Utah. * Copyright (c) 1991 The Regents of the University of California. * All rights reserved. * Copyright (c) 1993, 1994 John S. Dyson * Copyright (c) 1995, David Greenman * * This code is derived from software contributed to Berkeley by * the Systems Programming Group of the University of Utah Computer * Science Department. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)vnode_pager.c 7.5 (Berkeley) 4/20/91 - * $Id: vnode_pager.c,v 1.61 1996/07/27 03:24:10 dyson Exp $ + * $Id: vnode_pager.c,v 1.62 1996/07/30 03:08:21 dyson Exp $ */ /* * Page to/from files (vnodes). */ /* * TODO: * Implement VOP_GETPAGES/PUTPAGES interface for filesystems. Will * greatly re-simplify the vnode_pager. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static vm_offset_t vnode_pager_addr __P((struct vnode *vp, vm_ooffset_t address, int *run)); static void vnode_pager_iodone __P((struct buf *bp)); static int vnode_pager_input_smlfs __P((vm_object_t object, vm_page_t m)); static int vnode_pager_input_old __P((vm_object_t object, vm_page_t m)); static void vnode_pager_dealloc __P((vm_object_t)); static int vnode_pager_getpages __P((vm_object_t, vm_page_t *, int, int)); static int vnode_pager_putpages __P((vm_object_t, vm_page_t *, int, boolean_t, int *)); static boolean_t vnode_pager_haspage __P((vm_object_t, vm_pindex_t, int *, int *)); struct pagerops vnodepagerops = { NULL, vnode_pager_alloc, vnode_pager_dealloc, vnode_pager_getpages, vnode_pager_putpages, vnode_pager_haspage, NULL }; static int vnode_pager_leaf_getpages __P((vm_object_t object, vm_page_t *m, int count, int reqpage)); static int vnode_pager_leaf_putpages __P((vm_object_t object, vm_page_t *m, int count, boolean_t sync, int *rtvals)); /* * Allocate (or lookup) pager for a vnode. * Handle is a vnode pointer. */ vm_object_t vnode_pager_alloc(handle, size, prot, offset) void *handle; vm_size_t size; vm_prot_t prot; vm_ooffset_t offset; { vm_object_t object; struct vnode *vp; /* * Pageout to vnode, no can do yet. */ if (handle == NULL) return (NULL); vp = (struct vnode *) handle; /* * Prevent race condition when allocating the object. This * can happen with NFS vnodes since the nfsnode isn't locked. */ while (vp->v_flag & VOLOCK) { vp->v_flag |= VOWANT; tsleep(vp, PVM, "vnpobj", 0); } vp->v_flag |= VOLOCK; /* * If the object is being terminated, wait for it to * go away. */ while (((object = vp->v_object) != NULL) && (object->flags & OBJ_DEAD)) { tsleep(object, PVM, "vadead", 0); } if (object == NULL) { /* * And an object of the appropriate size */ object = vm_object_allocate(OBJT_VNODE, size); if (vp->v_type == VREG) object->flags = OBJ_CANPERSIST; else object->flags = 0; /* * Hold a reference to the vnode and initialize object data. */ - VREF(vp); + vp->v_usecount++; object->un_pager.vnp.vnp_size = (vm_ooffset_t) size * PAGE_SIZE; object->handle = handle; vp->v_object = object; } else { /* * vm_object_reference() will remove the object from the cache if * found and gain a reference to the object. */ vm_object_reference(object); } if (vp->v_type == VREG) vp->v_flag |= VVMIO; vp->v_flag &= ~VOLOCK; if (vp->v_flag & VOWANT) { vp->v_flag &= ~VOWANT; wakeup(vp); } return (object); } static void vnode_pager_dealloc(object) vm_object_t object; { register struct vnode *vp = object->handle; if (vp == NULL) panic("vnode_pager_dealloc: pager already dealloced"); if (object->paging_in_progress) { int s = splbio(); while (object->paging_in_progress) { object->flags |= OBJ_PIPWNT; tsleep(object, PVM, "vnpdea", 0); } splx(s); } object->handle = NULL; vp->v_object = NULL; vp->v_flag &= ~(VTEXT | VVMIO); vp->v_flag |= VAGE; vrele(vp); } static boolean_t vnode_pager_haspage(object, pindex, before, after) vm_object_t object; vm_pindex_t pindex; int *before; int *after; { struct vnode *vp = object->handle; daddr_t bn; int err; daddr_t reqblock; int poff; int bsize; int pagesperblock, blocksperpage; /* * If filesystem no longer mounted or offset beyond end of file we do * not have the page. */ if ((vp->v_mount == NULL) || (IDX_TO_OFF(pindex) >= object->un_pager.vnp.vnp_size)) return FALSE; bsize = vp->v_mount->mnt_stat.f_iosize; pagesperblock = bsize / PAGE_SIZE; blocksperpage = 0; if (pagesperblock > 0) { reqblock = pindex / pagesperblock; } else { blocksperpage = (PAGE_SIZE / bsize); reqblock = pindex * blocksperpage; } err = VOP_BMAP(vp, reqblock, (struct vnode **) 0, &bn, after, before); if (err) return TRUE; if ( bn == -1) return FALSE; if (pagesperblock > 0) { poff = pindex - (reqblock * pagesperblock); if (before) { *before *= pagesperblock; *before += poff; } if (after) { int numafter; *after *= pagesperblock; numafter = pagesperblock - (poff + 1); if (IDX_TO_OFF(pindex + numafter) > object->un_pager.vnp.vnp_size) { numafter = OFF_TO_IDX((object->un_pager.vnp.vnp_size - IDX_TO_OFF(pindex))); } *after += numafter; } } else { if (before) { *before /= blocksperpage; } if (after) { *after /= blocksperpage; } } return TRUE; } /* * Lets the VM system know about a change in size for a file. * We adjust our own internal size and flush any cached pages in * the associated object that are affected by the size change. * * Note: this routine may be invoked as a result of a pager put * operation (possibly at object termination time), so we must be careful. */ void vnode_pager_setsize(vp, nsize) struct vnode *vp; vm_ooffset_t nsize; { vm_object_t object = vp->v_object; if (object == NULL) return; /* * Hasn't changed size */ if (nsize == object->un_pager.vnp.vnp_size) return; /* * File has shrunk. Toss any cached pages beyond the new EOF. */ if (nsize < object->un_pager.vnp.vnp_size) { vm_ooffset_t nsizerounded; nsizerounded = IDX_TO_OFF(OFF_TO_IDX(nsize + PAGE_MASK)); if (nsizerounded < object->un_pager.vnp.vnp_size) { vm_object_page_remove(object, OFF_TO_IDX(nsize + PAGE_MASK), OFF_TO_IDX(object->un_pager.vnp.vnp_size), FALSE); } /* * this gets rid of garbage at the end of a page that is now * only partially backed by the vnode... */ if (nsize & PAGE_MASK) { vm_offset_t kva; vm_page_t m; m = vm_page_lookup(object, OFF_TO_IDX(nsize)); if (m) { kva = vm_pager_map_page(m); bzero((caddr_t) kva + (nsize & PAGE_MASK), (int) (round_page(nsize) - nsize)); vm_pager_unmap_page(kva); } } } object->un_pager.vnp.vnp_size = nsize; object->size = OFF_TO_IDX(nsize + PAGE_MASK); } void vnode_pager_umount(mp) register struct mount *mp; { struct vnode *vp, *nvp; loop: for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) { /* * Vnode can be reclaimed by getnewvnode() while we * traverse the list. */ if (vp->v_mount != mp) goto loop; /* * Save the next pointer now since uncaching may terminate the * object and render vnode invalid */ nvp = vp->v_mntvnodes.le_next; if (vp->v_object != NULL) { VOP_LOCK(vp); vnode_pager_uncache(vp); VOP_UNLOCK(vp); } } } /* * Remove vnode associated object from the object cache. * This routine must be called with the vnode locked. * * XXX unlock the vnode. * We must do this since uncaching the object may result in its * destruction which may initiate paging activity which may necessitate * re-locking the vnode. */ void vnode_pager_uncache(vp) struct vnode *vp; { vm_object_t object; /* * Not a mapped vnode */ object = vp->v_object; if (object == NULL) return; vm_object_reference(object); VOP_UNLOCK(vp); pager_cache(object, FALSE); VOP_LOCK(vp); return; } void vnode_pager_freepage(m) vm_page_t m; { PAGE_WAKEUP(m); vm_page_free(m); } /* * calculate the linear (byte) disk address of specified virtual * file address */ static vm_offset_t vnode_pager_addr(vp, address, run) struct vnode *vp; vm_ooffset_t address; int *run; { int rtaddress; int bsize; daddr_t block; struct vnode *rtvp; int err; daddr_t vblock; int voffset; if ((int) address < 0) return -1; if (vp->v_mount == NULL) return -1; bsize = vp->v_mount->mnt_stat.f_iosize; vblock = address / bsize; voffset = address % bsize; err = VOP_BMAP(vp, vblock, &rtvp, &block, run, NULL); if (err || (block == -1)) rtaddress = -1; else { rtaddress = block + voffset / DEV_BSIZE; if( run) { *run += 1; *run *= bsize/PAGE_SIZE; *run -= voffset/PAGE_SIZE; } } return rtaddress; } /* * interrupt routine for I/O completion */ static void vnode_pager_iodone(bp) struct buf *bp; { bp->b_flags |= B_DONE; wakeup(bp); } /* * small block file system vnode pager input */ static int vnode_pager_input_smlfs(object, m) vm_object_t object; vm_page_t m; { int i; int s; struct vnode *dp, *vp; struct buf *bp; vm_offset_t kva; int fileaddr; vm_offset_t bsize; int error = 0; vp = object->handle; if (vp->v_mount == NULL) return VM_PAGER_BAD; bsize = vp->v_mount->mnt_stat.f_iosize; VOP_BMAP(vp, 0, &dp, 0, NULL, NULL); kva = vm_pager_map_page(m); for (i = 0; i < PAGE_SIZE / bsize; i++) { if ((vm_page_bits(IDX_TO_OFF(m->pindex) + i * bsize, bsize) & m->valid)) continue; fileaddr = vnode_pager_addr(vp, IDX_TO_OFF(m->pindex) + i * bsize, (int *)0); if (fileaddr != -1) { bp = getpbuf(); /* build a minimal buffer header */ bp->b_flags = B_BUSY | B_READ | B_CALL; bp->b_iodone = vnode_pager_iodone; bp->b_proc = curproc; bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; if (bp->b_rcred != NOCRED) crhold(bp->b_rcred); if (bp->b_wcred != NOCRED) crhold(bp->b_wcred); bp->b_un.b_addr = (caddr_t) kva + i * bsize; bp->b_blkno = fileaddr; pbgetvp(dp, bp); bp->b_bcount = bsize; bp->b_bufsize = bsize; /* do the input */ VOP_STRATEGY(bp); /* we definitely need to be at splbio here */ s = splbio(); while ((bp->b_flags & B_DONE) == 0) { tsleep(bp, PVM, "vnsrd", 0); } splx(s); if ((bp->b_flags & B_ERROR) != 0) error = EIO; /* * free the buffer header back to the swap buffer pool */ relpbuf(bp); if (error) break; vm_page_set_validclean(m, (i * bsize) & PAGE_MASK, bsize); } else { vm_page_set_validclean(m, (i * bsize) & PAGE_MASK, bsize); bzero((caddr_t) kva + i * bsize, bsize); } } vm_pager_unmap_page(kva); pmap_clear_modify(VM_PAGE_TO_PHYS(m)); m->flags &= ~PG_ZERO; if (error) { return VM_PAGER_ERROR; } return VM_PAGER_OK; } /* * old style vnode pager output routine */ static int vnode_pager_input_old(object, m) vm_object_t object; vm_page_t m; { struct uio auio; struct iovec aiov; int error; int size; vm_offset_t kva; error = 0; /* * Return failure if beyond current EOF */ if (IDX_TO_OFF(m->pindex) >= object->un_pager.vnp.vnp_size) { return VM_PAGER_BAD; } else { size = PAGE_SIZE; if (IDX_TO_OFF(m->pindex) + size > object->un_pager.vnp.vnp_size) size = object->un_pager.vnp.vnp_size - IDX_TO_OFF(m->pindex); /* * Allocate a kernel virtual address and initialize so that * we can use VOP_READ/WRITE routines. */ kva = vm_pager_map_page(m); aiov.iov_base = (caddr_t) kva; aiov.iov_len = size; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_offset = IDX_TO_OFF(m->pindex); auio.uio_segflg = UIO_SYSSPACE; auio.uio_rw = UIO_READ; auio.uio_resid = size; auio.uio_procp = (struct proc *) 0; error = VOP_READ(object->handle, &auio, 0, curproc->p_ucred); if (!error) { register int count = size - auio.uio_resid; if (count == 0) error = EINVAL; else if (count != PAGE_SIZE) bzero((caddr_t) kva + count, PAGE_SIZE - count); } vm_pager_unmap_page(kva); } pmap_clear_modify(VM_PAGE_TO_PHYS(m)); m->dirty = 0; m->flags &= ~PG_ZERO; return error ? VM_PAGER_ERROR : VM_PAGER_OK; } /* * generic vnode pager input routine */ static int vnode_pager_getpages(object, m, count, reqpage) vm_object_t object; vm_page_t *m; int count; int reqpage; { int rtval; struct vnode *vp; vp = object->handle; rtval = VOP_GETPAGES(vp, m, count*PAGE_SIZE, reqpage, 0); if (rtval == EOPNOTSUPP) return vnode_pager_leaf_getpages(object, m, count, reqpage); else return rtval; } static int vnode_pager_leaf_getpages(object, m, count, reqpage) vm_object_t object; vm_page_t *m; int count; int reqpage; { vm_offset_t kva; off_t foff; int i, size, bsize, first, firstaddr; struct vnode *dp, *vp; int runpg; int runend; struct buf *bp; int s; int error = 0; vp = object->handle; if (vp->v_mount == NULL) return VM_PAGER_BAD; bsize = vp->v_mount->mnt_stat.f_iosize; /* get the UNDERLYING device for the file with VOP_BMAP() */ /* * originally, we did not check for an error return value -- assuming * an fs always has a bmap entry point -- that assumption is wrong!!! */ foff = IDX_TO_OFF(m[reqpage]->pindex); /* * if we can't bmap, use old VOP code */ if (VOP_BMAP(vp, 0, &dp, 0, NULL, NULL)) { for (i = 0; i < count; i++) { if (i != reqpage) { vnode_pager_freepage(m[i]); } } cnt.v_vnodein++; cnt.v_vnodepgsin++; return vnode_pager_input_old(object, m[reqpage]); /* * if the blocksize is smaller than a page size, then use * special small filesystem code. NFS sometimes has a small * blocksize, but it can handle large reads itself. */ } else if ((PAGE_SIZE / bsize) > 1 && (vp->v_mount->mnt_stat.f_type != MOUNT_NFS)) { for (i = 0; i < count; i++) { if (i != reqpage) { vnode_pager_freepage(m[i]); } } cnt.v_vnodein++; cnt.v_vnodepgsin++; return vnode_pager_input_smlfs(object, m[reqpage]); } /* * if ANY DEV_BSIZE blocks are valid on a large filesystem block * then, the entire page is valid -- */ if (m[reqpage]->valid) { m[reqpage]->valid = VM_PAGE_BITS_ALL; for (i = 0; i < count; i++) { if (i != reqpage) vnode_pager_freepage(m[i]); } return VM_PAGER_OK; } /* * here on direct device I/O */ firstaddr = -1; /* * calculate the run that includes the required page */ for(first = 0, i = 0; i < count; i = runend) { firstaddr = vnode_pager_addr(vp, IDX_TO_OFF(m[i]->pindex), &runpg); if (firstaddr == -1) { if (i == reqpage && foff < object->un_pager.vnp.vnp_size) { panic("vnode_pager_putpages: unexpected missing page: firstaddr: %d, foff: %ld, vnp_size: %d", firstaddr, foff, object->un_pager.vnp.vnp_size); } vnode_pager_freepage(m[i]); runend = i + 1; first = runend; continue; } runend = i + runpg; if (runend <= reqpage) { int j; for (j = i; j < runend; j++) { vnode_pager_freepage(m[j]); } } else { if (runpg < (count - first)) { for (i = first + runpg; i < count; i++) vnode_pager_freepage(m[i]); count = first + runpg; } break; } first = runend; } /* * the first and last page have been calculated now, move input pages * to be zero based... */ if (first != 0) { for (i = first; i < count; i++) { m[i - first] = m[i]; } count -= first; reqpage -= first; } /* * calculate the file virtual address for the transfer */ foff = IDX_TO_OFF(m[0]->pindex); /* * calculate the size of the transfer */ size = count * PAGE_SIZE; if ((foff + size) > object->un_pager.vnp.vnp_size) size = object->un_pager.vnp.vnp_size - foff; /* * round up physical size for real devices */ if (dp->v_type == VBLK || dp->v_type == VCHR) size = (size + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1); bp = getpbuf(); kva = (vm_offset_t) bp->b_data; /* * and map the pages to be read into the kva */ pmap_qenter(kva, m, count); /* build a minimal buffer header */ bp->b_flags = B_BUSY | B_READ | B_CALL; bp->b_iodone = vnode_pager_iodone; /* B_PHYS is not set, but it is nice to fill this in */ bp->b_proc = curproc; bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; if (bp->b_rcred != NOCRED) crhold(bp->b_rcred); if (bp->b_wcred != NOCRED) crhold(bp->b_wcred); bp->b_blkno = firstaddr; pbgetvp(dp, bp); bp->b_bcount = size; bp->b_bufsize = size; cnt.v_vnodein++; cnt.v_vnodepgsin += count; /* do the input */ VOP_STRATEGY(bp); s = splbio(); /* we definitely need to be at splbio here */ while ((bp->b_flags & B_DONE) == 0) { tsleep(bp, PVM, "vnread", 0); } splx(s); if ((bp->b_flags & B_ERROR) != 0) error = EIO; if (!error) { if (size != count * PAGE_SIZE) bzero((caddr_t) kva + size, PAGE_SIZE * count - size); } pmap_qremove(kva, count); /* * free the buffer header back to the swap buffer pool */ relpbuf(bp); for (i = 0; i < count; i++) { pmap_clear_modify(VM_PAGE_TO_PHYS(m[i])); m[i]->dirty = 0; m[i]->valid = VM_PAGE_BITS_ALL; m[i]->flags &= ~PG_ZERO; if (i != reqpage) { /* * whether or not to leave the page activated is up in * the air, but we should put the page on a page queue * somewhere. (it already is in the object). Result: * It appears that emperical results show that * deactivating pages is best. */ /* * just in case someone was asking for this page we * now tell them that it is ok to use */ if (!error) { vm_page_deactivate(m[i]); PAGE_WAKEUP(m[i]); } else { vnode_pager_freepage(m[i]); } } } if (error) { printf("vnode_pager_getpages: I/O read error\n"); } return (error ? VM_PAGER_ERROR : VM_PAGER_OK); } static int vnode_pager_putpages(object, m, count, sync, rtvals) vm_object_t object; vm_page_t *m; int count; boolean_t sync; int *rtvals; { int rtval; struct vnode *vp; vp = object->handle; rtval = VOP_PUTPAGES(vp, m, count*PAGE_SIZE, sync, rtvals, 0); if (rtval == EOPNOTSUPP) return vnode_pager_leaf_putpages(object, m, count, sync, rtvals); else return rtval; } /* * generic vnode pager output routine */ static int vnode_pager_leaf_putpages(object, m, count, sync, rtvals) vm_object_t object; vm_page_t *m; int count; boolean_t sync; int *rtvals; { int i; struct vnode *vp; int maxsize, ncount; vm_ooffset_t poffset; struct uio auio; struct iovec aiov; int error; vp = object->handle;; for (i = 0; i < count; i++) rtvals[i] = VM_PAGER_AGAIN; if ((int) m[0]->pindex < 0) { printf("vnode_pager_putpages: attempt to write meta-data!!! -- 0x%x(%x)\n", m[0]->pindex, m[0]->dirty); rtvals[0] = VM_PAGER_BAD; return VM_PAGER_BAD; } maxsize = count * PAGE_SIZE; ncount = count; poffset = IDX_TO_OFF(m[0]->pindex); if (maxsize + poffset > object->un_pager.vnp.vnp_size) { if (object->un_pager.vnp.vnp_size > poffset) maxsize = object->un_pager.vnp.vnp_size - poffset; else maxsize = 0; ncount = btoc(maxsize); if (ncount < count) { for (i = ncount; i < count; i++) { rtvals[i] = VM_PAGER_BAD; } #ifdef BOGUS if (ncount == 0) { printf("vnode_pager_putpages: write past end of file: %d, %lu\n", poffset, (unsigned long) object->un_pager.vnp.vnp_size); return rtvals[0]; } #endif } } for (i = 0; i < count; i++) { m[i]->busy++; m[i]->flags &= ~PG_BUSY; } aiov.iov_base = (caddr_t) 0; aiov.iov_len = maxsize; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_offset = poffset; auio.uio_segflg = UIO_NOCOPY; auio.uio_rw = UIO_WRITE; auio.uio_resid = maxsize; auio.uio_procp = (struct proc *) 0; error = VOP_WRITE(vp, &auio, IO_VMIO|(sync?IO_SYNC:0), curproc->p_ucred); cnt.v_vnodeout++; cnt.v_vnodepgsout += ncount; if (error) { printf("vnode_pager_putpages: I/O error %d\n", error); } if (auio.uio_resid) { printf("vnode_pager_putpages: residual I/O %d at %ld\n", auio.uio_resid, m[0]->pindex); } for (i = 0; i < count; i++) { m[i]->busy--; if (i < ncount) { rtvals[i] = VM_PAGER_OK; } if ((m[i]->busy == 0) && (m[i]->flags & PG_WANTED)) wakeup(m[i]); } return rtvals[0]; } struct vnode * vnode_pager_lock(object) vm_object_t object; { for (; object != NULL; object = object->backing_object) { if (object->type != OBJT_VNODE) continue; VOP_LOCK(object->handle); return object->handle; } return NULL; }