Index: head/sys/coda/coda_vfsops.c =================================================================== --- head/sys/coda/coda_vfsops.c (revision 71698) +++ head/sys/coda/coda_vfsops.c (revision 71699) @@ -1,552 +1,540 @@ /* * * Coda: an Experimental Distributed File System * Release 3.1 * * Copyright (c) 1987-1998 Carnegie Mellon University * All Rights Reserved * * Permission to use, copy, modify and distribute this software and its * documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation, and * that credit is given to Carnegie Mellon University in all documents * and publicity pertaining to direct or indirect use of this code or its * derivatives. * * CODA IS AN EXPERIMENTAL SOFTWARE SYSTEM AND IS KNOWN TO HAVE BUGS, * SOME OF WHICH MAY HAVE SERIOUS CONSEQUENCES. CARNEGIE MELLON ALLOWS * FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION. CARNEGIE MELLON * DISCLAIMS ANY LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER * RESULTING DIRECTLY OR INDIRECTLY FROM THE USE OF THIS SOFTWARE OR OF * ANY DERIVATIVE WORK. * * Carnegie Mellon encourages users of this software to return any * improvements or extensions that they make, and to grant Carnegie * Mellon the rights to redistribute these changes without encumbrance. * * @(#) src/sys/cfs/coda_vfsops.c,v 1.1.1.1 1998/08/29 21:14:52 rvb Exp $ * $FreeBSD$ * */ /* * Mach Operating System * Copyright (c) 1989 Carnegie-Mellon University * All rights reserved. The CMU software License Agreement specifies * the terms and conditions for use and redistribution. */ /* * This code was written for the Coda file system at Carnegie Mellon * University. Contributers include David Steere, James Kistler, and * M. Satyanarayanan. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include MALLOC_DEFINE(M_CODA, "CODA storage", "Various Coda Structures"); int codadebug = 0; int coda_vfsop_print_entry = 0; #define ENTRY if(coda_vfsop_print_entry) myprintf(("Entered %s\n",__FUNCTION__)) struct vnode *coda_ctlvp; struct coda_mntinfo coda_mnttbl[NVCODA]; /* indexed by minor device number */ /* structure to keep statistics of internally generated/satisfied calls */ struct coda_op_stats coda_vfsopstats[CODA_VFSOPS_SIZE]; #define MARK_ENTRY(op) (coda_vfsopstats[op].entries++) #define MARK_INT_SAT(op) (coda_vfsopstats[op].sat_intrn++) #define MARK_INT_FAIL(op) (coda_vfsopstats[op].unsat_intrn++) #define MRAK_INT_GEN(op) (coda_vfsopstats[op].gen_intrn++) extern int coda_nc_initialized; /* Set if cache has been initialized */ extern int vc_nb_open __P((dev_t, int, int, struct proc *)); int coda_vfsopstats_init(void) { register int i; for (i=0;ini_vp; if (error) { MARK_INT_FAIL(CODA_MOUNT_STATS); return (error); } if (dvp->v_type != VCHR) { MARK_INT_FAIL(CODA_MOUNT_STATS); vrele(dvp); NDFREE(ndp, NDF_ONLY_PNBUF); return(ENXIO); } dev = dvp->v_rdev; vrele(dvp); NDFREE(ndp, NDF_ONLY_PNBUF); /* * See if the device table matches our expectations. */ if (devsw(dev)->d_open != vc_nb_open) { MARK_INT_FAIL(CODA_MOUNT_STATS); return(ENXIO); } if (minor(dev) >= NVCODA || minor(dev) < 0) { MARK_INT_FAIL(CODA_MOUNT_STATS); return(ENXIO); } /* * Initialize the mount record and link it to the vfs struct */ mi = &coda_mnttbl[minor(dev)]; if (!VC_OPEN(&mi->mi_vcomm)) { MARK_INT_FAIL(CODA_MOUNT_STATS); return(ENODEV); } /* No initialization (here) of mi_vcomm! */ vfsp->mnt_data = (qaddr_t)mi; vfs_getnewfsid (vfsp); mi->mi_vfsp = vfsp; /* * Make a root vnode to placate the Vnode interface, but don't * actually make the CODA_ROOT call to venus until the first call * to coda_root in case a server is down while venus is starting. */ rootfid.Volume = 0; rootfid.Vnode = 0; rootfid.Unique = 0; cp = make_coda_node(&rootfid, vfsp, VDIR); rootvp = CTOV(cp); rootvp->v_flag |= VROOT; ctlfid.Volume = CTL_VOL; ctlfid.Vnode = CTL_VNO; ctlfid.Unique = CTL_UNI; /* cp = make_coda_node(&ctlfid, vfsp, VCHR); The above code seems to cause a loop in the cnode links. I don't totally understand when it happens, it is caught when closing down the system. */ cp = make_coda_node(&ctlfid, 0, VCHR); coda_ctlvp = CTOV(cp); /* Add vfs and rootvp to chain of vfs hanging off mntinfo */ mi->mi_vfsp = vfsp; mi->mi_rootvp = rootvp; /* set filesystem block size */ vfsp->mnt_stat.f_bsize = 8192; /* XXX -JJK */ /* Set f_iosize. XXX -- inamura@isl.ntt.co.jp. For vnode_pager_haspage() references. The value should be obtained from underlying UFS. */ /* Checked UFS. iosize is set as 8192 */ vfsp->mnt_stat.f_iosize = 8192; /* error is currently guaranteed to be zero, but in case some code changes... */ CODADEBUG(1, myprintf(("coda_mount returned %d\n",error));); if (error) MARK_INT_FAIL(CODA_MOUNT_STATS); else MARK_INT_SAT(CODA_MOUNT_STATS); return(error); } int coda_unmount(vfsp, mntflags, p) struct mount *vfsp; int mntflags; struct proc *p; { struct coda_mntinfo *mi = vftomi(vfsp); int active, error = 0; ENTRY; MARK_ENTRY(CODA_UMOUNT_STATS); if (!CODA_MOUNTED(vfsp)) { MARK_INT_FAIL(CODA_UMOUNT_STATS); return(EINVAL); } if (mi->mi_vfsp == vfsp) { /* We found the victim */ if (!IS_UNMOUNTING(VTOC(mi->mi_rootvp))) return (EBUSY); /* Venus is still running */ #ifdef DEBUG printf("coda_unmount: ROOT: vp %p, cp %p\n", mi->mi_rootvp, VTOC(mi->mi_rootvp)); #endif vrele(mi->mi_rootvp); active = coda_kill(vfsp, NOT_DOWNCALL); mi->mi_rootvp->v_flag &= ~VROOT; error = vflush(mi->mi_vfsp, NULLVP, FORCECLOSE); printf("coda_unmount: active = %d, vflush active %d\n", active, error); error = 0; /* I'm going to take this out to allow lookups to go through. I'm * not sure it's important anyway. -- DCS 2/2/94 */ /* vfsp->VFS_DATA = NULL; */ /* No more vfsp's to hold onto */ mi->mi_vfsp = NULL; mi->mi_rootvp = NULL; if (error) MARK_INT_FAIL(CODA_UMOUNT_STATS); else MARK_INT_SAT(CODA_UMOUNT_STATS); return(error); } return (EINVAL); } /* * find root of cfs */ int coda_root(vfsp, vpp) struct mount *vfsp; struct vnode **vpp; { struct coda_mntinfo *mi = vftomi(vfsp); struct vnode **result; int error; struct proc *p = curproc; /* XXX - bnoble */ ViceFid VFid; - struct ucred* uc; ENTRY; MARK_ENTRY(CODA_ROOT_STATS); result = NULL; if (vfsp == mi->mi_vfsp) { if ((VTOC(mi->mi_rootvp)->c_fid.Volume != 0) || (VTOC(mi->mi_rootvp)->c_fid.Vnode != 0) || (VTOC(mi->mi_rootvp)->c_fid.Unique != 0)) { /* Found valid root. */ *vpp = mi->mi_rootvp; /* On Mach, this is vref. On NetBSD, VOP_LOCK */ #if 1 vref(*vpp); vn_lock(*vpp, LK_EXCLUSIVE, p); #else vget(*vpp, LK_EXCLUSIVE, p); #endif MARK_INT_SAT(CODA_ROOT_STATS); return(0); } } - PROC_LOCK(p); - uc = p->p_ucred; - crhold(uc); - PROC_UNLOCK(p); - error = venus_root(vftomi(vfsp), uc, p, &VFid); - crfree(uc); + error = venus_root(vftomi(vfsp), p->p_ucred, p, &VFid); if (!error) { /* * Save the new rootfid in the cnode, and rehash the cnode into the * cnode hash with the new fid key. */ coda_unsave(VTOC(mi->mi_rootvp)); VTOC(mi->mi_rootvp)->c_fid = VFid; coda_save(VTOC(mi->mi_rootvp)); *vpp = mi->mi_rootvp; #if 1 vref(*vpp); vn_lock(*vpp, LK_EXCLUSIVE, p); #else vget(*vpp, LK_EXCLUSIVE, p); #endif MARK_INT_SAT(CODA_ROOT_STATS); goto exit; } else if (error == ENODEV || error == EINTR) { /* Gross hack here! */ /* * If Venus fails to respond to the CODA_ROOT call, coda_call returns * ENODEV. Return the uninitialized root vnode to allow vfs * operations such as unmount to continue. Without this hack, * there is no way to do an unmount if Venus dies before a * successful CODA_ROOT call is done. All vnode operations * will fail. */ *vpp = mi->mi_rootvp; #if 1 vref(*vpp); vn_lock(*vpp, LK_EXCLUSIVE, p); #else vget(*vpp, LK_EXCLUSIVE, p); #endif MARK_INT_FAIL(CODA_ROOT_STATS); error = 0; goto exit; } else { CODADEBUG( CODA_ROOT, myprintf(("error %d in CODA_ROOT\n", error)); ); MARK_INT_FAIL(CODA_ROOT_STATS); goto exit; } exit: return(error); } /* * Get file system statistics. */ int coda_nb_statfs(vfsp, sbp, p) register struct mount *vfsp; struct statfs *sbp; struct proc *p; { ENTRY; /* MARK_ENTRY(CODA_STATFS_STATS); */ if (!CODA_MOUNTED(vfsp)) { /* MARK_INT_FAIL(CODA_STATFS_STATS);*/ return(EINVAL); } bzero(sbp, sizeof(struct statfs)); /* XXX - what to do about f_flags, others? --bnoble */ /* Below This is what AFS does #define NB_SFS_SIZ 0x895440 */ /* Note: Normal fs's have a bsize of 0x400 == 1024 */ sbp->f_type = vfsp->mnt_vfc->vfc_typenum; sbp->f_bsize = 8192; /* XXX */ sbp->f_iosize = 8192; /* XXX */ #define NB_SFS_SIZ 0x8AB75D sbp->f_blocks = NB_SFS_SIZ; sbp->f_bfree = NB_SFS_SIZ; sbp->f_bavail = NB_SFS_SIZ; sbp->f_files = NB_SFS_SIZ; sbp->f_ffree = NB_SFS_SIZ; bcopy((caddr_t)&(vfsp->mnt_stat.f_fsid), (caddr_t)&(sbp->f_fsid), sizeof (fsid_t)); snprintf(sbp->f_mntonname, sizeof(sbp->f_mntonname), "/coda"); snprintf(sbp->f_mntfromname, sizeof(sbp->f_mntfromname), "CODA"); /* MARK_INT_SAT(CODA_STATFS_STATS); */ return(0); } /* * Flush any pending I/O. */ int coda_sync(vfsp, waitfor, cred, p) struct mount *vfsp; int waitfor; struct ucred *cred; struct proc *p; { ENTRY; MARK_ENTRY(CODA_SYNC_STATS); MARK_INT_SAT(CODA_SYNC_STATS); return(0); } /* * fhtovp is now what vget used to be in 4.3-derived systems. For * some silly reason, vget is now keyed by a 32 bit ino_t, rather than * a type-specific fid. */ int coda_fhtovp(vfsp, fhp, nam, vpp, exflagsp, creadanonp) register struct mount *vfsp; struct fid *fhp; struct mbuf *nam; struct vnode **vpp; int *exflagsp; struct ucred **creadanonp; { struct cfid *cfid = (struct cfid *)fhp; struct cnode *cp = 0; int error; struct proc *p = curproc; /* XXX -mach */ ViceFid VFid; - struct ucred *uc; int vtype; ENTRY; MARK_ENTRY(CODA_VGET_STATS); /* Check for vget of control object. */ if (IS_CTL_FID(&cfid->cfid_fid)) { *vpp = coda_ctlvp; vref(coda_ctlvp); MARK_INT_SAT(CODA_VGET_STATS); return(0); } - PROC_LOCK(p); - uc = p->p_ucred; - crhold(uc); - PROC_UNLOCK(p); - error = venus_fhtovp(vftomi(vfsp), &cfid->cfid_fid, uc, p, &VFid, &vtype); - crfree(uc); + error = venus_fhtovp(vftomi(vfsp), &cfid->cfid_fid, p->p_ucred, p, &VFid, &vtype); if (error) { CODADEBUG(CODA_VGET, myprintf(("vget error %d\n",error));) *vpp = (struct vnode *)0; } else { CODADEBUG(CODA_VGET, myprintf(("vget: vol %lx vno %lx uni %lx type %d result %d\n", VFid.Volume, VFid.Vnode, VFid.Unique, vtype, error)); ) cp = make_coda_node(&VFid, vfsp, vtype); *vpp = CTOV(cp); } return(error); } /* * To allow for greater ease of use, some vnodes may be orphaned when * Venus dies. Certain operations should still be allowed to go * through, but without propagating ophan-ness. So this function will * get a new vnode for the file from the current run of Venus. */ int getNewVnode(vpp) struct vnode **vpp; { struct cfid cfid; struct coda_mntinfo *mi = vftomi((*vpp)->v_mount); ENTRY; cfid.cfid_len = (short)sizeof(ViceFid); cfid.cfid_fid = VTOC(*vpp)->c_fid; /* Structure assignment. */ /* XXX ? */ /* We're guessing that if set, the 1st element on the list is a * valid vnode to use. If not, return ENODEV as venus is dead. */ if (mi->mi_vfsp == NULL) return ENODEV; return coda_fhtovp(mi->mi_vfsp, (struct fid*)&cfid, NULL, vpp, NULL, NULL); } #include #include #include /* get the mount structure corresponding to a given device. Assume * device corresponds to a UFS. Return NULL if no device is found. */ struct mount *devtomp(dev) dev_t dev; { struct mount *mp; TAILQ_FOREACH(mp, &mountlist, mnt_list) { if (((VFSTOUFS(mp))->um_dev == dev)) { /* mount corresponds to UFS and the device matches one we want */ return(mp); } } /* mount structure wasn't found */ return(NULL); } struct vfsops coda_vfsops = { coda_mount, vfs_stdstart, coda_unmount, coda_root, vfs_stdquotactl, coda_nb_statfs, coda_sync, vfs_stdvget, vfs_stdfhtovp, vfs_stdcheckexp, vfs_stdvptofh, vfs_stdinit, vfs_stduninit, vfs_stdextattrctl, }; VFS_SET(coda_vfsops, coda, VFCF_NETWORK); Index: head/sys/compat/linux/linux_file.c =================================================================== --- head/sys/compat/linux/linux_file.c (revision 71698) +++ head/sys/compat/linux/linux_file.c (revision 71699) @@ -1,894 +1,887 @@ /*- * Copyright (c) 1994-1995 Søren Schmidt * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer * in this position and unchanged. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software withough specific prior written permission * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #include "opt_compat.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifndef __alpha__ int linux_creat(struct proc *p, struct linux_creat_args *args) { struct open_args /* { char *path; int flags; int mode; } */ bsd_open_args; caddr_t sg; sg = stackgap_init(); CHECKALTCREAT(p, &sg, args->path); #ifdef DEBUG printf("Linux-emul(%d): creat(%s, %d)\n", p->p_pid, args->path, args->mode); #endif bsd_open_args.path = args->path; bsd_open_args.mode = args->mode; bsd_open_args.flags = O_WRONLY | O_CREAT | O_TRUNC; return open(p, &bsd_open_args); } #endif /*!__alpha__*/ int linux_open(struct proc *p, struct linux_open_args *args) { struct open_args /* { char *path; int flags; int mode; } */ bsd_open_args; int error; caddr_t sg; sg = stackgap_init(); if (args->flags & LINUX_O_CREAT) CHECKALTCREAT(p, &sg, args->path); else CHECKALTEXIST(p, &sg, args->path); #ifdef DEBUG printf("Linux-emul(%d): open(%s, 0x%x, 0x%x)\n", p->p_pid, args->path, args->flags, args->mode); #endif bsd_open_args.flags = 0; if (args->flags & LINUX_O_RDONLY) bsd_open_args.flags |= O_RDONLY; if (args->flags & LINUX_O_WRONLY) bsd_open_args.flags |= O_WRONLY; if (args->flags & LINUX_O_RDWR) bsd_open_args.flags |= O_RDWR; if (args->flags & LINUX_O_NDELAY) bsd_open_args.flags |= O_NONBLOCK; if (args->flags & LINUX_O_APPEND) bsd_open_args.flags |= O_APPEND; if (args->flags & LINUX_O_SYNC) bsd_open_args.flags |= O_FSYNC; if (args->flags & LINUX_O_NONBLOCK) bsd_open_args.flags |= O_NONBLOCK; if (args->flags & LINUX_FASYNC) bsd_open_args.flags |= O_ASYNC; if (args->flags & LINUX_O_CREAT) bsd_open_args.flags |= O_CREAT; if (args->flags & LINUX_O_TRUNC) bsd_open_args.flags |= O_TRUNC; if (args->flags & LINUX_O_EXCL) bsd_open_args.flags |= O_EXCL; if (args->flags & LINUX_O_NOCTTY) bsd_open_args.flags |= O_NOCTTY; bsd_open_args.path = args->path; bsd_open_args.mode = args->mode; error = open(p, &bsd_open_args); PROC_LOCK(p); if (!error && !(bsd_open_args.flags & O_NOCTTY) && SESS_LEADER(p) && !(p->p_flag & P_CONTROLT)) { struct filedesc *fdp = p->p_fd; struct file *fp = fdp->fd_ofiles[p->p_retval[0]]; PROC_UNLOCK(p); if (fp->f_type == DTYPE_VNODE) fo_ioctl(fp, TIOCSCTTY, (caddr_t) 0, p); } else PROC_UNLOCK(p); #ifdef DEBUG printf("Linux-emul(%d): open returns error %d\n", p->p_pid, error); #endif return error; } struct linux_flock { short l_type; short l_whence; linux_off_t l_start; linux_off_t l_len; linux_pid_t l_pid; }; static void linux_to_bsd_flock(struct linux_flock *linux_flock, struct flock *bsd_flock) { switch (linux_flock->l_type) { case LINUX_F_RDLCK: bsd_flock->l_type = F_RDLCK; break; case LINUX_F_WRLCK: bsd_flock->l_type = F_WRLCK; break; case LINUX_F_UNLCK: bsd_flock->l_type = F_UNLCK; break; default: bsd_flock->l_type = -1; break; } bsd_flock->l_whence = linux_flock->l_whence; bsd_flock->l_start = (off_t)linux_flock->l_start; bsd_flock->l_len = (off_t)linux_flock->l_len; bsd_flock->l_pid = (pid_t)linux_flock->l_pid; } static void bsd_to_linux_flock(struct flock *bsd_flock, struct linux_flock *linux_flock) { switch (bsd_flock->l_type) { case F_RDLCK: linux_flock->l_type = LINUX_F_RDLCK; break; case F_WRLCK: linux_flock->l_type = LINUX_F_WRLCK; break; case F_UNLCK: linux_flock->l_type = LINUX_F_UNLCK; break; } linux_flock->l_whence = bsd_flock->l_whence; linux_flock->l_start = (linux_off_t)bsd_flock->l_start; linux_flock->l_len = (linux_off_t)bsd_flock->l_len; linux_flock->l_pid = (linux_pid_t)bsd_flock->l_pid; } int linux_fcntl(struct proc *p, struct linux_fcntl_args *args) { int error, result; struct fcntl_args /* { int fd; int cmd; long arg; } */ fcntl_args; struct linux_flock linux_flock; struct flock *bsd_flock; struct filedesc *fdp; struct file *fp; caddr_t sg; sg = stackgap_init(); bsd_flock = (struct flock *)stackgap_alloc(&sg, sizeof(struct flock)); #ifdef DEBUG printf("Linux-emul(%ld): fcntl(%d, %08x, *)\n", (long)p->p_pid, args->fd, args->cmd); #endif fcntl_args.fd = args->fd; switch (args->cmd) { case LINUX_F_DUPFD: fcntl_args.cmd = F_DUPFD; fcntl_args.arg = args->arg; return fcntl(p, &fcntl_args); case LINUX_F_GETFD: fcntl_args.cmd = F_GETFD; return fcntl(p, &fcntl_args); case LINUX_F_SETFD: fcntl_args.cmd = F_SETFD; fcntl_args.arg = args->arg; return fcntl(p, &fcntl_args); case LINUX_F_GETFL: fcntl_args.cmd = F_GETFL; error = fcntl(p, &fcntl_args); result = p->p_retval[0]; p->p_retval[0] = 0; if (result & O_RDONLY) p->p_retval[0] |= LINUX_O_RDONLY; if (result & O_WRONLY) p->p_retval[0] |= LINUX_O_WRONLY; if (result & O_RDWR) p->p_retval[0] |= LINUX_O_RDWR; if (result & O_NDELAY) p->p_retval[0] |= LINUX_O_NONBLOCK; if (result & O_APPEND) p->p_retval[0] |= LINUX_O_APPEND; if (result & O_FSYNC) p->p_retval[0] |= LINUX_O_SYNC; if (result & O_ASYNC) p->p_retval[0] |= LINUX_FASYNC; return error; case LINUX_F_SETFL: fcntl_args.arg = 0; if (args->arg & LINUX_O_NDELAY) fcntl_args.arg |= O_NONBLOCK; if (args->arg & LINUX_O_APPEND) fcntl_args.arg |= O_APPEND; if (args->arg & LINUX_O_SYNC) fcntl_args.arg |= O_FSYNC; if (args->arg & LINUX_FASYNC) fcntl_args.arg |= O_ASYNC; fcntl_args.cmd = F_SETFL; return fcntl(p, &fcntl_args); case LINUX_F_GETLK: if ((error = copyin((caddr_t)args->arg, (caddr_t)&linux_flock, sizeof(struct linux_flock)))) return error; linux_to_bsd_flock(&linux_flock, bsd_flock); fcntl_args.cmd = F_GETLK; fcntl_args.arg = (long)bsd_flock; error = fcntl(p, &fcntl_args); if (error) return error; bsd_to_linux_flock(bsd_flock, &linux_flock); return copyout((caddr_t)&linux_flock, (caddr_t)args->arg, sizeof(struct linux_flock)); case LINUX_F_SETLK: if ((error = copyin((caddr_t)args->arg, (caddr_t)&linux_flock, sizeof(struct linux_flock)))) return error; linux_to_bsd_flock(&linux_flock, bsd_flock); fcntl_args.cmd = F_SETLK; fcntl_args.arg = (long)bsd_flock; return fcntl(p, &fcntl_args); case LINUX_F_SETLKW: if ((error = copyin((caddr_t)args->arg, (caddr_t)&linux_flock, sizeof(struct linux_flock)))) return error; linux_to_bsd_flock(&linux_flock, bsd_flock); fcntl_args.cmd = F_SETLKW; fcntl_args.arg = (long)bsd_flock; return fcntl(p, &fcntl_args); case LINUX_F_GETOWN: fcntl_args.cmd = F_GETOWN; return fcntl(p, &fcntl_args); case LINUX_F_SETOWN: /* * XXX some Linux applications depend on F_SETOWN having no * significant effect for pipes (SIGIO is not delivered for * pipes under Linux-2.2.35 at least). */ fdp = p->p_fd; if ((u_int)args->fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[args->fd]) == NULL) return EBADF; if (fp->f_type == DTYPE_PIPE) return EINVAL; fcntl_args.cmd = F_SETOWN; fcntl_args.arg = args->arg; return fcntl(p, &fcntl_args); } return EINVAL; } int linux_lseek(struct proc *p, struct linux_lseek_args *args) { struct lseek_args /* { int fd; int pad; off_t offset; int whence; } */ tmp_args; int error; #ifdef DEBUG printf("Linux-emul(%ld): lseek(%d, %ld, %d)\n", (long)p->p_pid, args->fdes, args->off, args->whence); #endif tmp_args.fd = args->fdes; tmp_args.offset = (off_t)args->off; tmp_args.whence = args->whence; error = lseek(p, &tmp_args); return error; } #ifndef __alpha__ int linux_llseek(struct proc *p, struct linux_llseek_args *args) { struct lseek_args bsd_args; int error; off_t off; #ifdef DEBUG printf("Linux-emul(%d): llseek(%d, %d:%d, %d)\n", p->p_pid, args->fd, args->ohigh, args->olow, args->whence); #endif off = (args->olow) | (((off_t) args->ohigh) << 32); bsd_args.fd = args->fd; bsd_args.offset = off; bsd_args.whence = args->whence; if ((error = lseek(p, &bsd_args))) return error; if ((error = copyout(p->p_retval, (caddr_t)args->res, sizeof (off_t)))) return error; p->p_retval[0] = 0; return 0; } #endif /*!__alpha__*/ struct linux_dirent { long dino; linux_off_t doff; unsigned short dreclen; char dname[LINUX_NAME_MAX + 1]; }; #define LINUX_RECLEN(de,namlen) \ ALIGN((((char *)&(de)->dname - (char *)de) + (namlen) + 1)) #ifndef __alpha__ int linux_readdir(struct proc *p, struct linux_readdir_args *args) { struct linux_getdents_args lda; lda.fd = args->fd; lda.dent = args->dent; lda.count = 1; return linux_getdents(p, &lda); } #endif /*!__alpha__*/ int linux_getdents(struct proc *p, struct linux_getdents_args *args) { register struct dirent *bdp; struct vnode *vp; caddr_t inp, buf; /* BSD-format */ int len, reclen; /* BSD-format */ caddr_t outp; /* Linux-format */ int resid, linuxreclen=0; /* Linux-format */ struct file *fp; struct uio auio; struct iovec aiov; struct vattr va; off_t off; struct linux_dirent linux_dirent; int buflen, error, eofflag, nbytes, justone; u_long *cookies = NULL, *cookiep; int ncookies; - struct ucred *uc; #ifdef DEBUG printf("Linux-emul(%d): getdents(%d, *, %d)\n", p->p_pid, args->fd, args->count); #endif if ((error = getvnode(p->p_fd, args->fd, &fp)) != 0) { return (error); } if ((fp->f_flag & FREAD) == 0) return (EBADF); vp = (struct vnode *) fp->f_data; if (vp->v_type != VDIR) return (EINVAL); - PROC_LOCK(p); - uc = p->p_ucred; - crhold(uc); - PROC_UNLOCK(p); - error = VOP_GETATTR(vp, &va, uc, p); - crfree(uc); - if (error) { + if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p))) { return error; } nbytes = args->count; if (nbytes == 1) { nbytes = sizeof (struct linux_dirent); justone = 1; } else justone = 0; off = fp->f_offset; #define DIRBLKSIZ 512 /* XXX we used to use ufs's DIRBLKSIZ */ buflen = max(DIRBLKSIZ, nbytes); buflen = min(buflen, MAXBSIZE); buf = malloc(buflen, M_TEMP, M_WAITOK); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); again: aiov.iov_base = buf; aiov.iov_len = buflen; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_rw = UIO_READ; auio.uio_segflg = UIO_SYSSPACE; auio.uio_procp = p; auio.uio_resid = buflen; auio.uio_offset = off; if (cookies) { free(cookies, M_TEMP); cookies = NULL; } error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, &ncookies, &cookies); if (error) { goto out; } inp = buf; outp = (caddr_t) args->dent; resid = nbytes; if ((len = buflen - auio.uio_resid) <= 0) { goto eof; } cookiep = cookies; if (cookies) { /* * When using cookies, the vfs has the option of reading from * a different offset than that supplied (UFS truncates the * offset to a block boundary to make sure that it never reads * partway through a directory entry, even if the directory * has been compacted). */ while (len > 0 && ncookies > 0 && *cookiep <= off) { bdp = (struct dirent *) inp; len -= bdp->d_reclen; inp += bdp->d_reclen; cookiep++; ncookies--; } } while (len > 0) { if (cookiep && ncookies == 0) break; bdp = (struct dirent *) inp; reclen = bdp->d_reclen; if (reclen & 3) { printf("linux_readdir: reclen=%d\n", reclen); error = EFAULT; goto out; } if (bdp->d_fileno == 0) { inp += reclen; if (cookiep) { off = *cookiep++; ncookies--; } else off += reclen; len -= reclen; continue; } linuxreclen = LINUX_RECLEN(&linux_dirent, bdp->d_namlen); if (reclen > len || resid < linuxreclen) { outp++; break; } linux_dirent.dino = (long) bdp->d_fileno; if (justone) { /* * old linux-style readdir usage. */ linux_dirent.doff = (linux_off_t) linuxreclen; linux_dirent.dreclen = (u_short) bdp->d_namlen; } else { if (cookiep) linux_dirent.doff = (linux_off_t)*cookiep; else linux_dirent.doff = (linux_off_t)(off + reclen); linux_dirent.dreclen = (u_short) linuxreclen; } strcpy(linux_dirent.dname, bdp->d_name); if ((error = copyout((caddr_t)&linux_dirent, outp, linuxreclen))) { goto out; } inp += reclen; if (cookiep) { off = *cookiep++; ncookies--; } else off += reclen; outp += linuxreclen; resid -= linuxreclen; len -= reclen; if (justone) break; } if (outp == (caddr_t) args->dent) goto again; fp->f_offset = off; if (justone) nbytes = resid + linuxreclen; eof: p->p_retval[0] = nbytes - resid; out: if (cookies) free(cookies, M_TEMP); VOP_UNLOCK(vp, 0, p); free(buf, M_TEMP); return error; } /* * These exist mainly for hooks for doing /compat/linux translation. */ int linux_access(struct proc *p, struct linux_access_args *args) { struct access_args bsd; caddr_t sg; sg = stackgap_init(); CHECKALTEXIST(p, &sg, args->path); #ifdef DEBUG printf("Linux-emul(%d): access(%s, %d)\n", p->p_pid, args->path, args->flags); #endif bsd.path = args->path; bsd.flags = args->flags; return access(p, &bsd); } int linux_unlink(struct proc *p, struct linux_unlink_args *args) { struct unlink_args bsd; caddr_t sg; sg = stackgap_init(); CHECKALTEXIST(p, &sg, args->path); #ifdef DEBUG printf("Linux-emul(%d): unlink(%s)\n", p->p_pid, args->path); #endif bsd.path = args->path; return unlink(p, &bsd); } int linux_chdir(struct proc *p, struct linux_chdir_args *args) { struct chdir_args bsd; caddr_t sg; sg = stackgap_init(); CHECKALTEXIST(p, &sg, args->path); #ifdef DEBUG printf("Linux-emul(%d): chdir(%s)\n", p->p_pid, args->path); #endif bsd.path = args->path; return chdir(p, &bsd); } int linux_chmod(struct proc *p, struct linux_chmod_args *args) { struct chmod_args bsd; caddr_t sg; sg = stackgap_init(); CHECKALTEXIST(p, &sg, args->path); #ifdef DEBUG printf("Linux-emul(%d): chmod(%s, %d)\n", p->p_pid, args->path, args->mode); #endif bsd.path = args->path; bsd.mode = args->mode; return chmod(p, &bsd); } int linux_chown(struct proc *p, struct linux_chown_args *args) { struct chown_args bsd; caddr_t sg; sg = stackgap_init(); CHECKALTEXIST(p, &sg, args->path); #ifdef DEBUG printf("Linux-emul(%d): chown(%s, %d, %d)\n", p->p_pid, args->path, args->uid, args->gid); #endif bsd.path = args->path; /* XXX size casts here */ bsd.uid = args->uid; bsd.gid = args->gid; return chown(p, &bsd); } int linux_lchown(struct proc *p, struct linux_lchown_args *args) { struct lchown_args bsd; caddr_t sg; sg = stackgap_init(); CHECKALTEXIST(p, &sg, args->path); #ifdef DEBUG printf("Linux-emul(%d): lchown(%s, %d, %d)\n", p->p_pid, args->path, args->uid, args->gid); #endif bsd.path = args->path; /* XXX size casts here */ bsd.uid = args->uid; bsd.gid = args->gid; return lchown(p, &bsd); } int linux_mkdir(struct proc *p, struct linux_mkdir_args *args) { struct mkdir_args bsd; caddr_t sg; sg = stackgap_init(); CHECKALTCREAT(p, &sg, args->path); #ifdef DEBUG printf("Linux-emul(%d): mkdir(%s, %d)\n", p->p_pid, args->path, args->mode); #endif bsd.path = args->path; bsd.mode = args->mode; return mkdir(p, &bsd); } int linux_rmdir(struct proc *p, struct linux_rmdir_args *args) { struct rmdir_args bsd; caddr_t sg; sg = stackgap_init(); CHECKALTEXIST(p, &sg, args->path); #ifdef DEBUG printf("Linux-emul(%d): rmdir(%s)\n", p->p_pid, args->path); #endif bsd.path = args->path; return rmdir(p, &bsd); } int linux_rename(struct proc *p, struct linux_rename_args *args) { struct rename_args bsd; caddr_t sg; sg = stackgap_init(); CHECKALTEXIST(p, &sg, args->from); CHECKALTCREAT(p, &sg, args->to); #ifdef DEBUG printf("Linux-emul(%d): rename(%s, %s)\n", p->p_pid, args->from, args->to); #endif bsd.from = args->from; bsd.to = args->to; return rename(p, &bsd); } int linux_symlink(struct proc *p, struct linux_symlink_args *args) { struct symlink_args bsd; caddr_t sg; sg = stackgap_init(); CHECKALTEXIST(p, &sg, args->path); CHECKALTCREAT(p, &sg, args->to); #ifdef DEBUG printf("Linux-emul(%d): symlink(%s, %s)\n", p->p_pid, args->path, args->to); #endif bsd.path = args->path; bsd.link = args->to; return symlink(p, &bsd); } int linux_readlink(struct proc *p, struct linux_readlink_args *args) { struct readlink_args bsd; caddr_t sg; sg = stackgap_init(); CHECKALTEXIST(p, &sg, args->name); #ifdef DEBUG printf("Linux-emul(%ld): readlink(%s, %p, %d)\n", (long)p->p_pid, args->name, (void *)args->buf, args->count); #endif bsd.path = args->name; bsd.buf = args->buf; bsd.count = args->count; return readlink(p, &bsd); } int linux_truncate(struct proc *p, struct linux_truncate_args *args) { struct truncate_args bsd; caddr_t sg; sg = stackgap_init(); CHECKALTEXIST(p, &sg, args->path); #ifdef DEBUG printf("Linux-emul(%d): truncate(%s, %ld)\n", p->p_pid, args->path, args->length); #endif bsd.path = args->path; bsd.length = args->length; return truncate(p, &bsd); } int linux_link(struct proc *p, struct linux_link_args *args) { struct link_args bsd; caddr_t sg; sg = stackgap_init(); CHECKALTEXIST(p, &sg, args->path); CHECKALTCREAT(p, &sg, args->to); #ifdef DEBUG printf("Linux-emul(%d): link(%s, %s)\n", p->p_pid, args->path, args->to); #endif bsd.path = args->path; bsd.link = args->to; return link(p, &bsd); } int linux_getcwd(struct proc *p, struct linux_getcwd_args *args) { struct __getcwd_args bsd; caddr_t sg; int error, len; #ifdef DEBUG printf("Linux-emul(%ld): getcwd(%p, %ld)\n", (long)p->p_pid, args->buf, args->bufsize); #endif sg = stackgap_init(); bsd.buf = stackgap_alloc(&sg, SPARE_USRSPACE); bsd.buflen = SPARE_USRSPACE; error = __getcwd(p, &bsd); if (!error) { len = strlen(bsd.buf) + 1; if (len <= args->bufsize) { p->p_retval[0] = len; error = copyout(bsd.buf, args->buf, len); } else error = ERANGE; } return (error); } #ifndef __alpha__ int linux_fdatasync(p, uap) struct proc *p; struct linux_fdatasync_args *uap; { struct fsync_args bsd; bsd.fd = uap->fd; return fsync(p, &bsd); } #endif /*!__alpha__*/ int linux_pread(p, uap) struct proc *p; struct linux_pread_args *uap; { struct pread_args bsd; bsd.fd = uap->fd; bsd.buf = uap->buf; bsd.nbyte = uap->nbyte; bsd.offset = uap->offset; return pread(p, &bsd); } int linux_pwrite(p, uap) struct proc *p; struct linux_pwrite_args *uap; { struct pwrite_args bsd; bsd.fd = uap->fd; bsd.buf = uap->buf; bsd.nbyte = uap->nbyte; bsd.offset = uap->offset; return pwrite(p, &bsd); } Index: head/sys/compat/linux/linux_misc.c =================================================================== --- head/sys/compat/linux/linux_misc.c (revision 71698) +++ head/sys/compat/linux/linux_misc.c (revision 71699) @@ -1,1159 +1,1147 @@ /*- * Copyright (c) 1994-1995 Søren Schmidt * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer * in this position and unchanged. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software withough specific prior written permission * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #include "opt_compat.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef __i386__ #include #endif #include #include #include #include #include #ifdef __alpha__ #define BSD_TO_LINUX_SIGNAL(sig) (sig) #else #define BSD_TO_LINUX_SIGNAL(sig) \ (((sig) <= LINUX_SIGTBLSZ) ? bsd_to_linux_signal[_SIG_IDX(sig)] : sig) #endif struct linux_rlimit { unsigned long rlim_cur; unsigned long rlim_max; }; #ifndef __alpha__ static unsigned int linux_to_bsd_resource[LINUX_RLIM_NLIMITS] = { RLIMIT_CPU, RLIMIT_FSIZE, RLIMIT_DATA, RLIMIT_STACK, RLIMIT_CORE, RLIMIT_RSS, RLIMIT_NPROC, RLIMIT_NOFILE, RLIMIT_MEMLOCK, -1 }; #endif /*!__alpha__*/ #ifndef __alpha__ int linux_alarm(struct proc *p, struct linux_alarm_args *args) { struct itimerval it, old_it; struct timeval tv; int s; #ifdef DEBUG printf("Linux-emul(%ld): alarm(%u)\n", (long)p->p_pid, args->secs); #endif if (args->secs > 100000000) return EINVAL; it.it_value.tv_sec = (long)args->secs; it.it_value.tv_usec = 0; it.it_interval.tv_sec = 0; it.it_interval.tv_usec = 0; s = splsoftclock(); old_it = p->p_realtimer; getmicrouptime(&tv); if (timevalisset(&old_it.it_value)) callout_stop(&p->p_itcallout); if (it.it_value.tv_sec != 0) { callout_reset(&p->p_itcallout, tvtohz(&it.it_value), realitexpire, p); timevaladd(&it.it_value, &tv); } p->p_realtimer = it; splx(s); if (timevalcmp(&old_it.it_value, &tv, >)) { timevalsub(&old_it.it_value, &tv); if (old_it.it_value.tv_usec != 0) old_it.it_value.tv_sec++; p->p_retval[0] = old_it.it_value.tv_sec; } return 0; } #endif /*!__alpha__*/ int linux_brk(struct proc *p, struct linux_brk_args *args) { #if 0 struct vmspace *vm = p->p_vmspace; vm_offset_t new, old; int error; if ((vm_offset_t)args->dsend < (vm_offset_t)vm->vm_daddr) return EINVAL; if (((caddr_t)args->dsend - (caddr_t)vm->vm_daddr) > p->p_rlimit[RLIMIT_DATA].rlim_cur) return ENOMEM; old = round_page((vm_offset_t)vm->vm_daddr) + ctob(vm->vm_dsize); new = round_page((vm_offset_t)args->dsend); p->p_retval[0] = old; if ((new-old) > 0) { if (swap_pager_full) return ENOMEM; error = vm_map_find(&vm->vm_map, NULL, 0, &old, (new-old), FALSE, VM_PROT_ALL, VM_PROT_ALL, 0); if (error) return error; vm->vm_dsize += btoc((new-old)); p->p_retval[0] = (int)(vm->vm_daddr + ctob(vm->vm_dsize)); } return 0; #else struct vmspace *vm = p->p_vmspace; vm_offset_t new, old; struct obreak_args /* { char * nsize; } */ tmp; #ifdef DEBUG printf("Linux-emul(%ld): brk(%p)\n", (long)p->p_pid, (void *)args->dsend); #endif old = (vm_offset_t)vm->vm_daddr + ctob(vm->vm_dsize); new = (vm_offset_t)args->dsend; tmp.nsize = (char *) new; if (((caddr_t)new > vm->vm_daddr) && !obreak(p, &tmp)) p->p_retval[0] = (long)new; else p->p_retval[0] = (long)old; return 0; #endif } int linux_uselib(struct proc *p, struct linux_uselib_args *args) { struct nameidata ni; struct vnode *vp; struct exec *a_out; struct vattr attr; - struct ucred *uc; vm_offset_t vmaddr; unsigned long file_offset; vm_offset_t buffer; unsigned long bss_size; int error; caddr_t sg; int locked; sg = stackgap_init(); CHECKALTEXIST(p, &sg, args->library); #ifdef DEBUG printf("Linux-emul(%ld): uselib(%s)\n", (long)p->p_pid, args->library); #endif a_out = NULL; locked = 0; vp = NULL; NDINIT(&ni, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, args->library, p); error = namei(&ni); if (error) goto cleanup; vp = ni.ni_vp; /* * XXX This looks like a bogus check - a LOCKLEAF namei should not succeed * without returning a vnode. */ if (vp == NULL) { error = ENOEXEC; /* ?? */ goto cleanup; } NDFREE(&ni, NDF_ONLY_PNBUF); /* * From here on down, we have a locked vnode that must be unlocked. */ locked++; /* * Writable? */ if (vp->v_writecount) { error = ETXTBSY; goto cleanup; } /* * Executable? */ - PROC_LOCK(p); - uc = p->p_ucred; - crhold(uc); - PROC_UNLOCK(p); - error = VOP_GETATTR(vp, &attr, uc, p); - if (error) { - crfree(uc); + error = VOP_GETATTR(vp, &attr, p->p_ucred, p); + if (error) goto cleanup; - } if ((vp->v_mount->mnt_flag & MNT_NOEXEC) || ((attr.va_mode & 0111) == 0) || (attr.va_type != VREG)) { error = ENOEXEC; - crfree(uc); goto cleanup; } /* * Sensible size? */ if (attr.va_size == 0) { error = ENOEXEC; - crfree(uc); goto cleanup; } /* * Can we access it? */ - error = VOP_ACCESS(vp, VEXEC, uc, p); - if (error) { - crfree(uc); + error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p); + if (error) goto cleanup; - } - error = VOP_OPEN(vp, FREAD, uc, p); - crfree(uc); + error = VOP_OPEN(vp, FREAD, p->p_ucred, p); if (error) goto cleanup; /* * Lock no longer needed */ VOP_UNLOCK(vp, 0, p); locked = 0; /* * Pull in executable header into kernel_map */ error = vm_mmap(kernel_map, (vm_offset_t *)&a_out, PAGE_SIZE, VM_PROT_READ, VM_PROT_READ, 0, (caddr_t)vp, 0); if (error) goto cleanup; /* * Is it a Linux binary ? */ if (((a_out->a_magic >> 16) & 0xff) != 0x64) { error = ENOEXEC; goto cleanup; } /* While we are here, we should REALLY do some more checks */ /* * Set file/virtual offset based on a.out variant. */ switch ((int)(a_out->a_magic & 0xffff)) { case 0413: /* ZMAGIC */ file_offset = 1024; break; case 0314: /* QMAGIC */ file_offset = 0; break; default: error = ENOEXEC; goto cleanup; } bss_size = round_page(a_out->a_bss); /* * Check various fields in header for validity/bounds. */ if (a_out->a_text & PAGE_MASK || a_out->a_data & PAGE_MASK) { error = ENOEXEC; goto cleanup; } /* text + data can't exceed file size */ if (a_out->a_data + a_out->a_text > attr.va_size) { error = EFAULT; goto cleanup; } /* To protect p->p_rlimit in the if condition. */ mtx_assert(&Giant, MA_OWNED); /* * text/data/bss must not exceed limits * XXX: this is not complete. it should check current usage PLUS * the resources needed by this library. */ if (a_out->a_text > MAXTSIZ || a_out->a_data + bss_size > p->p_rlimit[RLIMIT_DATA].rlim_cur) { error = ENOMEM; goto cleanup; } /* * prevent more writers */ vp->v_flag |= VTEXT; /* * Check if file_offset page aligned,. * Currently we cannot handle misalinged file offsets, * and so we read in the entire image (what a waste). */ if (file_offset & PAGE_MASK) { #ifdef DEBUG printf("uselib: Non page aligned binary %lu\n", file_offset); #endif /* * Map text+data read/write/execute */ /* a_entry is the load address and is page aligned */ vmaddr = trunc_page(a_out->a_entry); /* get anon user mapping, read+write+execute */ error = vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &vmaddr, a_out->a_text + a_out->a_data, FALSE, VM_PROT_ALL, VM_PROT_ALL, 0); if (error) goto cleanup; /* map file into kernel_map */ error = vm_mmap(kernel_map, &buffer, round_page(a_out->a_text + a_out->a_data + file_offset), VM_PROT_READ, VM_PROT_READ, 0, (caddr_t)vp, trunc_page(file_offset)); if (error) goto cleanup; /* copy from kernel VM space to user space */ error = copyout((caddr_t)(void *)(uintptr_t)(buffer + file_offset), (caddr_t)vmaddr, a_out->a_text + a_out->a_data); /* release temporary kernel space */ vm_map_remove(kernel_map, buffer, buffer + round_page(a_out->a_text + a_out->a_data + file_offset)); if (error) goto cleanup; } else { #ifdef DEBUG printf("uselib: Page aligned binary %lu\n", file_offset); #endif /* * for QMAGIC, a_entry is 20 bytes beyond the load address * to skip the executable header */ vmaddr = trunc_page(a_out->a_entry); /* * Map it all into the process's space as a single copy-on-write * "data" segment. */ error = vm_mmap(&p->p_vmspace->vm_map, &vmaddr, a_out->a_text + a_out->a_data, VM_PROT_ALL, VM_PROT_ALL, MAP_PRIVATE | MAP_FIXED, (caddr_t)vp, file_offset); if (error) goto cleanup; } #ifdef DEBUG printf("mem=%08lx = %08lx %08lx\n", vmaddr, ((long*)vmaddr)[0], ((long*)vmaddr)[1]); #endif if (bss_size != 0) { /* * Calculate BSS start address */ vmaddr = trunc_page(a_out->a_entry) + a_out->a_text + a_out->a_data; /* * allocate some 'anon' space */ error = vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &vmaddr, bss_size, FALSE, VM_PROT_ALL, VM_PROT_ALL, 0); if (error) goto cleanup; } cleanup: /* * Unlock vnode if needed */ if (locked) VOP_UNLOCK(vp, 0, p); /* * Release the kernel mapping. */ if (a_out) vm_map_remove(kernel_map, (vm_offset_t)a_out, (vm_offset_t)a_out + PAGE_SIZE); return error; } int linux_newselect(struct proc *p, struct linux_newselect_args *args) { struct select_args bsa; struct timeval tv0, tv1, utv, *tvp; caddr_t sg; int error; #ifdef DEBUG printf("Linux-emul(%ld): newselect(%d, %p, %p, %p, %p)\n", (long)p->p_pid, args->nfds, (void *)args->readfds, (void *)args->writefds, (void *)args->exceptfds, (void *)args->timeout); #endif error = 0; bsa.nd = args->nfds; bsa.in = args->readfds; bsa.ou = args->writefds; bsa.ex = args->exceptfds; bsa.tv = args->timeout; /* * Store current time for computation of the amount of * time left. */ if (args->timeout) { if ((error = copyin(args->timeout, &utv, sizeof(utv)))) goto select_out; #ifdef DEBUG printf("Linux-emul(%ld): incoming timeout (%ld/%ld)\n", (long)p->p_pid, utv.tv_sec, utv.tv_usec); #endif if (itimerfix(&utv)) { /* * The timeval was invalid. Convert it to something * valid that will act as it does under Linux. */ sg = stackgap_init(); tvp = stackgap_alloc(&sg, sizeof(utv)); utv.tv_sec += utv.tv_usec / 1000000; utv.tv_usec %= 1000000; if (utv.tv_usec < 0) { utv.tv_sec -= 1; utv.tv_usec += 1000000; } if (utv.tv_sec < 0) timevalclear(&utv); if ((error = copyout(&utv, tvp, sizeof(utv)))) goto select_out; bsa.tv = tvp; } microtime(&tv0); } error = select(p, &bsa); #ifdef DEBUG printf("Linux-emul(%ld): real select returns %d\n", (long)p->p_pid, error); #endif if (error) { /* * See fs/select.c in the Linux kernel. Without this, * Maelstrom doesn't work. */ if (error == ERESTART) error = EINTR; goto select_out; } if (args->timeout) { if (p->p_retval[0]) { /* * Compute how much time was left of the timeout, * by subtracting the current time and the time * before we started the call, and subtracting * that result from the user-supplied value. */ microtime(&tv1); timevalsub(&tv1, &tv0); timevalsub(&utv, &tv1); if (utv.tv_sec < 0) timevalclear(&utv); } else timevalclear(&utv); #ifdef DEBUG printf("Linux-emul(%ld): outgoing timeout (%ld/%ld)\n", (long)p->p_pid, utv.tv_sec, utv.tv_usec); #endif if ((error = copyout(&utv, args->timeout, sizeof(utv)))) goto select_out; } select_out: #ifdef DEBUG printf("Linux-emul(%ld): newselect_out -> %d\n", (long)p->p_pid, error); #endif return error; } int linux_getpgid(struct proc *p, struct linux_getpgid_args *args) { struct proc *curp; #ifdef DEBUG printf("Linux-emul(%ld): getpgid(%d)\n", (long)p->p_pid, args->pid); #endif if (args->pid != p->p_pid) { if (!(curp = pfind(args->pid))) return ESRCH; } else curp = p; p->p_retval[0] = curp->p_pgid; return 0; } int linux_mremap(struct proc *p, struct linux_mremap_args *args) { struct munmap_args /* { void *addr; size_t len; } */ bsd_args; int error = 0; #ifdef DEBUG printf("Linux-emul(%ld): mremap(%p, %08lx, %08lx, %08lx)\n", (long)p->p_pid, (void *)args->addr, (unsigned long)args->old_len, (unsigned long)args->new_len, (unsigned long)args->flags); #endif args->new_len = round_page(args->new_len); args->old_len = round_page(args->old_len); if (args->new_len > args->old_len) { p->p_retval[0] = 0; return ENOMEM; } if (args->new_len < args->old_len) { bsd_args.addr = args->addr + args->new_len; bsd_args.len = args->old_len - args->new_len; error = munmap(p, &bsd_args); } p->p_retval[0] = error ? 0 : (u_long)args->addr; return error; } int linux_msync(struct proc *p, struct linux_msync_args *args) { struct msync_args bsd_args; bsd_args.addr = args->addr; bsd_args.len = args->len; bsd_args.flags = 0; /* XXX ignore */ return msync(p, &bsd_args); } #ifndef __alpha__ int linux_time(struct proc *p, struct linux_time_args *args) { struct timeval tv; linux_time_t tm; int error; #ifdef DEBUG printf("Linux-emul(%ld): time(*)\n", (long)p->p_pid); #endif microtime(&tv); tm = tv.tv_sec; if (args->tm && (error = copyout(&tm, args->tm, sizeof(linux_time_t)))) return error; p->p_retval[0] = tm; return 0; } #endif /*!__alpha__*/ struct linux_times_argv { long tms_utime; long tms_stime; long tms_cutime; long tms_cstime; }; #define CLK_TCK 100 /* Linux uses 100 */ #define CONVTCK(r) (r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK)) int linux_times(struct proc *p, struct linux_times_args *args) { struct timeval tv; struct linux_times_argv tms; struct rusage ru; int error; #ifdef DEBUG printf("Linux-emul(%ld): times(*)\n", (long)p->p_pid); #endif mtx_enter(&sched_lock, MTX_SPIN); calcru(p, &ru.ru_utime, &ru.ru_stime, NULL); mtx_exit(&sched_lock, MTX_SPIN); tms.tms_utime = CONVTCK(ru.ru_utime); tms.tms_stime = CONVTCK(ru.ru_stime); tms.tms_cutime = CONVTCK(p->p_stats->p_cru.ru_utime); tms.tms_cstime = CONVTCK(p->p_stats->p_cru.ru_stime); if ((error = copyout((caddr_t)&tms, (caddr_t)args->buf, sizeof(struct linux_times_argv)))) return error; microuptime(&tv); p->p_retval[0] = (int)CONVTCK(tv); return 0; } int linux_newuname(struct proc *p, struct linux_newuname_args *args) { struct linux_new_utsname utsname; char *osrelease, *osname; #ifdef DEBUG printf("Linux-emul(%ld): newuname(*)\n", (long)p->p_pid); #endif osname = linux_get_osname(p); osrelease = linux_get_osrelease(p); bzero(&utsname, sizeof(struct linux_new_utsname)); strncpy(utsname.sysname, osname, LINUX_MAX_UTSNAME-1); strncpy(utsname.nodename, hostname, LINUX_MAX_UTSNAME-1); strncpy(utsname.release, osrelease, LINUX_MAX_UTSNAME-1); strncpy(utsname.version, version, LINUX_MAX_UTSNAME-1); strncpy(utsname.machine, machine, LINUX_MAX_UTSNAME-1); strncpy(utsname.domainname, domainname, LINUX_MAX_UTSNAME-1); return (copyout((caddr_t)&utsname, (caddr_t)args->buf, sizeof(struct linux_new_utsname))); } struct linux_utimbuf { linux_time_t l_actime; linux_time_t l_modtime; }; int linux_utime(struct proc *p, struct linux_utime_args *args) { struct utimes_args /* { char *path; struct timeval *tptr; } */ bsdutimes; struct timeval tv[2], *tvp; struct linux_utimbuf lut; int error; caddr_t sg; sg = stackgap_init(); CHECKALTEXIST(p, &sg, args->fname); #ifdef DEBUG printf("Linux-emul(%ld): utime(%s, *)\n", (long)p->p_pid, args->fname); #endif if (args->times) { if ((error = copyin(args->times, &lut, sizeof lut))) return error; tv[0].tv_sec = lut.l_actime; tv[0].tv_usec = 0; tv[1].tv_sec = lut.l_modtime; tv[1].tv_usec = 0; /* so that utimes can copyin */ tvp = (struct timeval *)stackgap_alloc(&sg, sizeof(tv)); if (tvp == NULL) return (ENAMETOOLONG); if ((error = copyout(tv, tvp, sizeof(tv)))) return error; bsdutimes.tptr = tvp; } else bsdutimes.tptr = NULL; bsdutimes.path = args->fname; return utimes(p, &bsdutimes); } #define __WCLONE 0x80000000 #ifndef __alpha__ int linux_waitpid(struct proc *p, struct linux_waitpid_args *args) { struct wait_args /* { int pid; int *status; int options; struct rusage *rusage; } */ tmp; int error, tmpstat; #ifdef DEBUG printf("Linux-emul(%ld): waitpid(%d, %p, %d)\n", (long)p->p_pid, args->pid, (void *)args->status, args->options); #endif tmp.pid = args->pid; tmp.status = args->status; tmp.options = (args->options & (WNOHANG | WUNTRACED)); /* WLINUXCLONE should be equal to __WCLONE, but we make sure */ if (args->options & __WCLONE) tmp.options |= WLINUXCLONE; tmp.rusage = NULL; if ((error = wait4(p, &tmp)) != 0) return error; if (args->status) { if ((error = copyin(args->status, &tmpstat, sizeof(int))) != 0) return error; tmpstat &= 0xffff; if (WIFSIGNALED(tmpstat)) tmpstat = (tmpstat & 0xffffff80) | BSD_TO_LINUX_SIGNAL(WTERMSIG(tmpstat)); else if (WIFSTOPPED(tmpstat)) tmpstat = (tmpstat & 0xffff00ff) | (BSD_TO_LINUX_SIGNAL(WSTOPSIG(tmpstat)) << 8); return copyout(&tmpstat, args->status, sizeof(int)); } else return 0; } #endif /*!__alpha__*/ int linux_wait4(struct proc *p, struct linux_wait4_args *args) { struct wait_args /* { int pid; int *status; int options; struct rusage *rusage; } */ tmp; int error, tmpstat; #ifdef DEBUG printf("Linux-emul(%ld): wait4(%d, %p, %d, %p)\n", (long)p->p_pid, args->pid, (void *)args->status, args->options, (void *)args->rusage); #endif tmp.pid = args->pid; tmp.status = args->status; tmp.options = (args->options & (WNOHANG | WUNTRACED)); /* WLINUXCLONE should be equal to __WCLONE, but we make sure */ if (args->options & __WCLONE) tmp.options |= WLINUXCLONE; tmp.rusage = args->rusage; if ((error = wait4(p, &tmp)) != 0) return error; SIGDELSET(p->p_siglist, SIGCHLD); if (args->status) { if ((error = copyin(args->status, &tmpstat, sizeof(int))) != 0) return error; tmpstat &= 0xffff; if (WIFSIGNALED(tmpstat)) tmpstat = (tmpstat & 0xffffff80) | BSD_TO_LINUX_SIGNAL(WTERMSIG(tmpstat)); else if (WIFSTOPPED(tmpstat)) tmpstat = (tmpstat & 0xffff00ff) | (BSD_TO_LINUX_SIGNAL(WSTOPSIG(tmpstat)) << 8); return copyout(&tmpstat, args->status, sizeof(int)); } else return 0; } int linux_mknod(struct proc *p, struct linux_mknod_args *args) { caddr_t sg; struct mknod_args bsd_mknod; struct mkfifo_args bsd_mkfifo; sg = stackgap_init(); CHECKALTCREAT(p, &sg, args->path); #ifdef DEBUG printf("Linux-emul(%ld): mknod(%s, %d, %d)\n", (long)p->p_pid, args->path, args->mode, args->dev); #endif if (args->mode & S_IFIFO) { bsd_mkfifo.path = args->path; bsd_mkfifo.mode = args->mode; return mkfifo(p, &bsd_mkfifo); } else { bsd_mknod.path = args->path; bsd_mknod.mode = args->mode; bsd_mknod.dev = args->dev; return mknod(p, &bsd_mknod); } } /* * UGH! This is just about the dumbest idea I've ever heard!! */ int linux_personality(struct proc *p, struct linux_personality_args *args) { #ifdef DEBUG printf("Linux-emul(%ld): personality(%d)\n", (long)p->p_pid, args->per); #endif #ifndef __alpha__ if (args->per != 0) return EINVAL; #endif /* Yes Jim, it's still a Linux... */ p->p_retval[0] = 0; return 0; } /* * Wrappers for get/setitimer for debugging.. */ int linux_setitimer(struct proc *p, struct linux_setitimer_args *args) { struct setitimer_args bsa; struct itimerval foo; int error; #ifdef DEBUG printf("Linux-emul(%ld): setitimer(%p, %p)\n", (long)p->p_pid, (void *)args->itv, (void *)args->oitv); #endif bsa.which = args->which; bsa.itv = args->itv; bsa.oitv = args->oitv; if (args->itv) { if ((error = copyin((caddr_t)args->itv, (caddr_t)&foo, sizeof(foo)))) return error; #ifdef DEBUG printf("setitimer: value: sec: %ld, usec: %ld\n", foo.it_value.tv_sec, foo.it_value.tv_usec); printf("setitimer: interval: sec: %ld, usec: %ld\n", foo.it_interval.tv_sec, foo.it_interval.tv_usec); #endif } return setitimer(p, &bsa); } int linux_getitimer(struct proc *p, struct linux_getitimer_args *args) { struct getitimer_args bsa; #ifdef DEBUG printf("Linux-emul(%ld): getitimer(%p)\n", (long)p->p_pid, (void *)args->itv); #endif bsa.which = args->which; bsa.itv = args->itv; return getitimer(p, &bsa); } #ifndef __alpha__ int linux_nice(struct proc *p, struct linux_nice_args *args) { struct setpriority_args bsd_args; bsd_args.which = PRIO_PROCESS; bsd_args.who = 0; /* current process */ bsd_args.prio = args->inc; return setpriority(p, &bsd_args); } #endif /*!__alpha__*/ int linux_setgroups(p, uap) struct proc *p; struct linux_setgroups_args *uap; { struct pcred *pc; linux_gid_t linux_gidset[NGROUPS]; gid_t *bsd_gidset; int ngrp, error; pc = p->p_cred; ngrp = uap->gidsetsize; /* * cr_groups[0] holds egid. Setting the whole set from * the supplied set will cause egid to be changed too. * Keep cr_groups[0] unchanged to prevent that. */ if ((error = suser(p)) != 0) return (error); if (ngrp >= NGROUPS) return (EINVAL); pc->pc_ucred = crcopy(pc->pc_ucred); if (ngrp > 0) { error = copyin((caddr_t)uap->gidset, (caddr_t)linux_gidset, ngrp * sizeof(linux_gid_t)); if (error) return (error); pc->pc_ucred->cr_ngroups = ngrp + 1; bsd_gidset = pc->pc_ucred->cr_groups; ngrp--; while (ngrp >= 0) { bsd_gidset[ngrp + 1] = linux_gidset[ngrp]; ngrp--; } } else pc->pc_ucred->cr_ngroups = 1; setsugid(p); return (0); } int linux_getgroups(p, uap) struct proc *p; struct linux_getgroups_args *uap; { struct pcred *pc; linux_gid_t linux_gidset[NGROUPS]; gid_t *bsd_gidset; int bsd_gidsetsz, ngrp, error; pc = p->p_cred; bsd_gidset = pc->pc_ucred->cr_groups; bsd_gidsetsz = pc->pc_ucred->cr_ngroups - 1; /* * cr_groups[0] holds egid. Returning the whole set * here will cause a duplicate. Exclude cr_groups[0] * to prevent that. */ if ((ngrp = uap->gidsetsize) == 0) { p->p_retval[0] = bsd_gidsetsz; return (0); } if (ngrp < bsd_gidsetsz) return (EINVAL); ngrp = 0; while (ngrp < bsd_gidsetsz) { linux_gidset[ngrp] = bsd_gidset[ngrp + 1]; ngrp++; } if ((error = copyout((caddr_t)linux_gidset, (caddr_t)uap->gidset, ngrp * sizeof(linux_gid_t)))) return (error); p->p_retval[0] = ngrp; return (0); } #ifndef __alpha__ int linux_setrlimit(p, uap) struct proc *p; struct linux_setrlimit_args *uap; { struct __setrlimit_args bsd; struct linux_rlimit rlim; int error; caddr_t sg = stackgap_init(); #ifdef DEBUG printf("Linux-emul(%ld): setrlimit(%d, %p)\n", (long)p->p_pid, uap->resource, (void *)uap->rlim); #endif if (uap->resource >= LINUX_RLIM_NLIMITS) return (EINVAL); bsd.which = linux_to_bsd_resource[uap->resource]; if (bsd.which == -1) return (EINVAL); error = copyin(uap->rlim, &rlim, sizeof(rlim)); if (error) return (error); bsd.rlp = stackgap_alloc(&sg, sizeof(struct rlimit)); bsd.rlp->rlim_cur = (rlim_t)rlim.rlim_cur; bsd.rlp->rlim_max = (rlim_t)rlim.rlim_max; return (setrlimit(p, &bsd)); } int linux_getrlimit(p, uap) struct proc *p; struct linux_getrlimit_args *uap; { struct __getrlimit_args bsd; struct linux_rlimit rlim; int error; caddr_t sg = stackgap_init(); #ifdef DEBUG printf("Linux-emul(%ld): getrlimit(%d, %p)\n", (long)p->p_pid, uap->resource, (void *)uap->rlim); #endif if (uap->resource >= LINUX_RLIM_NLIMITS) return (EINVAL); bsd.which = linux_to_bsd_resource[uap->resource]; if (bsd.which == -1) return (EINVAL); bsd.rlp = stackgap_alloc(&sg, sizeof(struct rlimit)); error = getrlimit(p, &bsd); if (error) return (error); rlim.rlim_cur = (unsigned long)bsd.rlp->rlim_cur; if (rlim.rlim_cur == ULONG_MAX) rlim.rlim_cur = LONG_MAX; rlim.rlim_max = (unsigned long)bsd.rlp->rlim_max; if (rlim.rlim_max == ULONG_MAX) rlim.rlim_max = LONG_MAX; return (copyout(&rlim, uap->rlim, sizeof(rlim))); } #endif /*!__alpha__*/ int linux_sched_setscheduler(p, uap) struct proc *p; struct linux_sched_setscheduler_args *uap; { struct sched_setscheduler_args bsd; #ifdef DEBUG printf("Linux-emul(%ld): sched_setscheduler(%d, %d, %p)\n", (long)p->p_pid, uap->pid, uap->policy, (const void *)uap->param); #endif switch (uap->policy) { case LINUX_SCHED_OTHER: bsd.policy = SCHED_OTHER; break; case LINUX_SCHED_FIFO: bsd.policy = SCHED_FIFO; break; case LINUX_SCHED_RR: bsd.policy = SCHED_RR; break; default: return EINVAL; } bsd.pid = uap->pid; bsd.param = uap->param; return sched_setscheduler(p, &bsd); } int linux_sched_getscheduler(p, uap) struct proc *p; struct linux_sched_getscheduler_args *uap; { struct sched_getscheduler_args bsd; int error; #ifdef DEBUG printf("Linux-emul(%ld): sched_getscheduler(%d)\n", (long)p->p_pid, uap->pid); #endif bsd.pid = uap->pid; error = sched_getscheduler(p, &bsd); switch (p->p_retval[0]) { case SCHED_OTHER: p->p_retval[0] = LINUX_SCHED_OTHER; break; case SCHED_FIFO: p->p_retval[0] = LINUX_SCHED_FIFO; break; case SCHED_RR: p->p_retval[0] = LINUX_SCHED_RR; break; } return error; } Index: head/sys/compat/linux/linux_util.c =================================================================== --- head/sys/compat/linux/linux_util.c (revision 71698) +++ head/sys/compat/linux/linux_util.c (revision 71699) @@ -1,193 +1,186 @@ /* * Copyright (c) 1994 Christos Zoulas * Copyright (c) 1995 Frank van der Linden * Copyright (c) 1995 Scott Bartram * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * from: svr4_util.c,v 1.5 1995/01/22 23:44:50 christos Exp * $FreeBSD$ */ #include #include #include #include #include #include #include const char linux_emul_path[] = "/compat/linux"; /* * Search an alternate path before passing pathname arguments on * to system calls. Useful for keeping a seperate 'emulation tree'. * * If cflag is set, we check if an attempt can be made to create * the named file, i.e. we check if the directory it should * be in exists. */ int linux_emul_find(p, sgp, prefix, path, pbuf, cflag) struct proc *p; caddr_t *sgp; /* Pointer to stackgap memory */ const char *prefix; char *path; char **pbuf; int cflag; { struct nameidata nd; struct nameidata ndroot; struct vattr vat; struct vattr vatroot; - struct ucred *uc; int error; char *ptr, *buf, *cp; size_t sz, len; buf = (char *) malloc(MAXPATHLEN, M_TEMP, M_WAITOK); *pbuf = path; for (ptr = buf; (*ptr = *prefix) != '\0'; ptr++, prefix++) continue; sz = MAXPATHLEN - (ptr - buf); /* * If sgp is not given then the path is already in kernel space */ if (sgp == NULL) error = copystr(path, ptr, sz, &len); else error = copyinstr(path, ptr, sz, &len); if (error) { free(buf, M_TEMP); return error; } if (*ptr != '/') { free(buf, M_TEMP); return EINVAL; } /* * We know that there is a / somewhere in this pathname. * Search backwards for it, to find the file's parent dir * to see if it exists in the alternate tree. If it does, * and we want to create a file (cflag is set). We don't * need to worry about the root comparison in this case. */ if (cflag) { for (cp = &ptr[len] - 1; *cp != '/'; cp--); *cp = '\0'; NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, buf, p); if ((error = namei(&nd)) != 0) { free(buf, M_TEMP); return error; } *cp = '/'; } else { NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, buf, p); if ((error = namei(&nd)) != 0) { free(buf, M_TEMP); return error; } /* * We now compare the vnode of the linux_root to the one * vnode asked. If they resolve to be the same, then we * ignore the match so that the real root gets used. * This avoids the problem of traversing "../.." to find the * root directory and never finding it, because "/" resolves * to the emulation root directory. This is expensive :-( */ NDINIT(&ndroot, LOOKUP, FOLLOW, UIO_SYSSPACE, linux_emul_path, p); if ((error = namei(&ndroot)) != 0) { /* Cannot happen! */ free(buf, M_TEMP); NDFREE(&nd, NDF_ONLY_PNBUF); vrele(nd.ni_vp); return error; } - PROC_LOCK(p); - uc = p->p_ucred; - crhold(uc); - PROC_UNLOCK(p); - if ((error = VOP_GETATTR(nd.ni_vp, &vat, uc, p)) != 0) { - crfree(uc); + if ((error = VOP_GETATTR(nd.ni_vp, &vat, p->p_ucred, p)) != 0) { goto bad; } - error = VOP_GETATTR(ndroot.ni_vp, &vatroot, uc, p); - crfree(uc); - if (error != 0) { + if ((error = VOP_GETATTR(ndroot.ni_vp, &vatroot, p->p_ucred, p)) + != 0) { goto bad; } if (vat.va_fsid == vatroot.va_fsid && vat.va_fileid == vatroot.va_fileid) { error = ENOENT; goto bad; } } if (sgp == NULL) *pbuf = buf; else { sz = &ptr[len] - buf; *pbuf = stackgap_alloc(sgp, sz + 1); if (*pbuf != NULL) error = copyout(buf, *pbuf, sz); else error = ENAMETOOLONG; free(buf, M_TEMP); } NDFREE(&nd, NDF_ONLY_PNBUF); vrele(nd.ni_vp); if (!cflag) { NDFREE(&ndroot, NDF_ONLY_PNBUF); vrele(ndroot.ni_vp); } return error; bad: NDFREE(&ndroot, NDF_ONLY_PNBUF); vrele(ndroot.ni_vp); NDFREE(&nd, NDF_ONLY_PNBUF); vrele(nd.ni_vp); free(buf, M_TEMP); return error; } Index: head/sys/compat/pecoff/imgact_pecoff.c =================================================================== --- head/sys/compat/pecoff/imgact_pecoff.c (revision 71698) +++ head/sys/compat/pecoff/imgact_pecoff.c (revision 71699) @@ -1,651 +1,641 @@ /* $NetBSD$ */ /* $FreeBSD$ */ /* * Copyright (c) 2000 Masaru OKI * Copyright (c) 1994, 1995, 1998 Scott Bartram * Copyright (c) 1994 Adam Glass * Copyright (c) 1993, 1994 Christopher G. Demetriou * * originally from NetBSD kern/exec_ecoff.c * * Copyright (c) 2000 Takanori Watanabe * Copyright (c) 2000 KUROSAWA Takahiro * Copyright (c) 1995-1996 Sen Schmidt * Copyright (c) 1996 Peter Wemm * All rights reserved. * * originally from FreeBSD kern/imgact_elf.c * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Masaru OKI. * 4. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "opt_pecoff.h" #define PECOFF_PE_SIGNATURE "PE\0\0" static int pecoff_fixup(register_t **, struct image_params *); static int pecoff_coredump(register struct proc *, register struct vnode *, off_t); #ifndef PECOFF_DEBUG #define DPRINTF(a) #else #define DPRINTF(a) printf a #endif static struct sysentvec pecoff_sysvec = { SYS_MAXSYSCALL, sysent, 0, 0, 0, 0, 0, 0, pecoff_fixup, sendsig, sigcode, &szsigcode, 0, "FreeBSD PECoff", pecoff_coredump, NULL, MINSIGSTKSZ }; static const char signature[] = PECOFF_PE_SIGNATURE; static int exec_pecoff_coff_prep_omagic(struct image_params *, struct coff_filehdr *, struct coff_aouthdr *, int peoffs); static int exec_pecoff_coff_prep_nmagic(struct image_params *, struct coff_filehdr *, struct coff_aouthdr *, int peoffs); static int exec_pecoff_coff_prep_zmagic(struct image_params *, struct coff_filehdr *, struct coff_aouthdr *, int peoffs); static int exec_pecoff_coff_makecmds(struct image_params *, struct coff_filehdr *, int); static int pecoff_signature(struct proc *, struct vnode *, struct pecoff_dos_filehdr *); static int pecoff_read_from(struct proc *, struct vnode *, int, caddr_t, int); static int pecoff_load_section(struct proc * p, struct vmspace * vmspace, struct vnode * vp, vm_offset_t offset, caddr_t vmaddr, size_t memsz, size_t filsz, vm_prot_t prot); static int pecoff_fixup(register_t ** stack_base, struct image_params * imgp) { int len = sizeof(struct pecoff_args); struct pecoff_imghdr *ap; register_t *pos; pos = *stack_base + (imgp->argc + imgp->envc + 2); ap = (struct pecoff_imghdr *) imgp->auxargs; if (copyout(ap, pos, len)) { return NULL; } free(ap, M_TEMP); imgp->auxargs = NULL; (*stack_base)--; suword(*stack_base, (long) imgp->argc); return 0; } static int pecoff_coredump(register struct proc * p, register struct vnode * vp, off_t limit) { - register struct ucred *cred; + register struct ucred *cred = p->p_ucred; register struct vmspace *vm = p->p_vmspace; int error; #ifdef PECOFF_DEBUG struct vm_map *map; struct vm_map_entry *ent; struct reg regs; #endif if (ctob(UPAGES + vm->vm_dsize + vm->vm_ssize) >= limit) return (EFAULT); fill_kinfo_proc(p, &p->p_addr->u_kproc); #if PECOFF_DEBUG fill_regs(p, ®s); printf("EIP%x\n", regs.r_eip); printf("EAX%x EBX%x ECX%x EDI%x\n", regs.r_eax, regs.r_ebx, regs.r_ecx, regs.r_edi); map = &vm->vm_map; ent = &map->header; printf("%p %p %p\n", ent, ent->prev, ent->next); #endif - PROC_LOCK(p); - cred = p->p_ucred; - crhold(cred); - PROC_UNLOCK(p); error = cpu_coredump(p, vp, cred); if (error == 0) error = vn_rdwr(UIO_WRITE, vp, vm->vm_daddr, (int) ctob(vm->vm_dsize), (off_t) ctob(UPAGES), UIO_USERSPACE, IO_NODELOCKED | IO_UNIT, cred, (int *) NULL, p); if (error == 0) error = vn_rdwr(UIO_WRITE, vp, (caddr_t) trunc_page(USRSTACK - ctob(vm->vm_ssize)), round_page(ctob(vm->vm_ssize)), (off_t) ctob(UPAGES) + ctob(vm->vm_dsize), UIO_USERSPACE, IO_NODELOCKED | IO_UNIT, cred, (int *) NULL, p); - crfree(cred); return (error); } static int pecoff_load_section(struct proc * p, struct vmspace * vmspace, struct vnode * vp, vm_offset_t offset, caddr_t vmaddr, size_t memsz, size_t filsz, vm_prot_t prot) { size_t map_len; vm_offset_t map_addr; int error, rv; size_t copy_len; size_t copy_map_len; size_t copy_start; vm_object_t object; vm_offset_t copy_map_offset; vm_offset_t file_addr; vm_offset_t data_buf = 0; object = vp->v_object; error = 0; map_addr = trunc_page((vm_offset_t) vmaddr); file_addr = trunc_page(offset); DPRINTF(("SECARG:%x %p %x %x\n", offset, vmaddr, memsz, filsz)); if (file_addr != offset) { /* * The section is not on page boundary. We can't use * vm_map_insert(). Use copyin instead. */ map_len = round_page(memsz); copy_len = filsz; copy_map_offset = file_addr; copy_map_len = round_page(offset + filsz) - file_addr; copy_start = offset - file_addr; DPRINTF(("offset=%x vmaddr=%x filsz=%x memsz=%x\n", offset, vmaddr, filsz, memsz)); DPRINTF(("map_len=%x copy_len=%x copy_map_offset=%x" " copy_map_len=%x copy_start=%x\n", map_len, copy_len, copy_map_offset, copy_map_len, copy_start)); } else { map_len = trunc_page(filsz); if (map_len != 0) { vm_object_reference(object); vm_map_lock(&vmspace->vm_map); rv = vm_map_insert(&vmspace->vm_map, object, file_addr, /* file offset */ map_addr, /* virtual start */ map_addr + map_len, /* virtual end */ prot, VM_PROT_ALL, MAP_COPY_ON_WRITE | MAP_PREFAULT); vm_map_unlock(&vmspace->vm_map); if (rv != KERN_SUCCESS) { vm_object_deallocate(object); return EINVAL; } /* we can stop now if we've covered it all */ if (memsz == filsz) return 0; } copy_map_offset = trunc_page(offset + filsz); copy_map_len = PAGE_SIZE; copy_start = 0; copy_len = (offset + filsz) - trunc_page(offset + filsz); map_addr = trunc_page((vm_offset_t) vmaddr + filsz); map_len = round_page((vm_offset_t) vmaddr + memsz) - map_addr; } if (map_len != 0) { vm_map_lock(&vmspace->vm_map); rv = vm_map_insert(&vmspace->vm_map, NULL, 0, map_addr, map_addr + map_len, VM_PROT_ALL, VM_PROT_ALL, 0); vm_map_unlock(&vmspace->vm_map); DPRINTF(("EMP-rv:%d,%x %x\n", rv, map_addr, map_addr + map_len)); if (rv != KERN_SUCCESS) { return EINVAL; } } DPRINTF(("COPYARG %x %x\n", map_addr, copy_len)); if (copy_len != 0) { vm_object_reference(object); rv = vm_map_find(exec_map, object, copy_map_offset, &data_buf, copy_map_len, TRUE, VM_PROT_READ, VM_PROT_ALL, MAP_COPY_ON_WRITE | MAP_PREFAULT_PARTIAL); if (rv != KERN_SUCCESS) { vm_object_deallocate(object); return EINVAL; } /* send the page fragment to user space */ error = copyout((caddr_t) data_buf + copy_start, (caddr_t) map_addr, copy_len); vm_map_remove(exec_map, data_buf, data_buf + copy_map_len); DPRINTF(("%d\n", error)); if (error) return (error); } /* * set it to the specified protection */ vm_map_protect(&vmspace->vm_map, map_addr, map_addr + map_len, prot, FALSE); return error; } static int pecoff_load_file(struct proc * p, const char *file, u_long * addr, u_long * entry, u_long * ldexport) { struct nameidata nd; struct pecoff_dos_filehdr dh; struct coff_filehdr *fp = 0; struct coff_aouthdr *ap; struct pecoff_opthdr *wp; struct coff_scnhdr *sh = 0; struct vmspace *vmspace = p->p_vmspace; struct vattr attr; struct image_params image_params, *imgp; int peofs; int error, i, scnsiz; imgp = &image_params; /* * Initialize part of the common data */ imgp->proc = p; imgp->uap = NULL; imgp->attr = &attr; imgp->firstpage = NULL; NDINIT(&nd, LOOKUP, LOCKLEAF | FOLLOW, UIO_SYSSPACE, file, p); if ((error = namei(&nd)) != 0) { nd.ni_vp = NULL; goto fail; } NDFREE(&nd, NDF_ONLY_PNBUF); imgp->vp = nd.ni_vp; /* * Check permissions, modes, uid, etc on the file, and "open" it. */ error = exec_check_permissions(imgp); if (error) { VOP_UNLOCK(nd.ni_vp, 0, p); goto fail; } VOP_UNLOCK(nd.ni_vp, 0, p); if (error) goto fail; if ((error = pecoff_read_from(p, imgp->vp, 0, (caddr_t) & dh, sizeof(dh))) != 0) goto fail; if ((error = pecoff_signature(p, imgp->vp, &dh) != 0)) goto fail; fp = malloc(PECOFF_HDR_SIZE, M_TEMP, M_WAITOK); peofs = dh.d_peofs + sizeof(signature) - 1; if ((error = pecoff_read_from(p, imgp->vp, peofs, (caddr_t) fp, PECOFF_HDR_SIZE) != 0)) goto fail; if (COFF_BADMAG(fp)) { error = ENOEXEC; goto fail; } ap = (void *) ((char *) fp + sizeof(struct coff_filehdr)); wp = (void *) ((char *) ap + sizeof(struct coff_aouthdr)); /* read section header */ scnsiz = sizeof(struct coff_scnhdr) * fp->f_nscns; sh = malloc(scnsiz, M_TEMP, M_WAITOK); if ((error = pecoff_read_from(p, imgp->vp, peofs + PECOFF_HDR_SIZE, (caddr_t) sh, scnsiz)) != 0) goto fail; /* * Read Section infomation and map sections. */ for (i = 0; i < fp->f_nscns; i++) { int prot = 0; if (sh[i].s_flags & COFF_STYP_DISCARD) continue; /* XXX ? */ if ((sh[i].s_flags & COFF_STYP_TEXT) && (sh[i].s_flags & COFF_STYP_EXEC) == 0) continue; if ((sh[i].s_flags & (COFF_STYP_TEXT | COFF_STYP_DATA | COFF_STYP_BSS)) == 0) continue; prot |= (sh[i].s_flags & COFF_STYP_READ) ? VM_PROT_READ : 0; prot |= (sh[i].s_flags & COFF_STYP_WRITE) ? VM_PROT_WRITE : 0; prot |= (sh[i].s_flags & COFF_STYP_EXEC) ? VM_PROT_EXECUTE : 0; sh[i].s_vaddr += wp->w_base; /* RVA --> VA */ if ((error = pecoff_load_section(p, vmspace, imgp->vp, sh[i].s_scnptr ,(caddr_t) sh[i].s_vaddr, sh[i].s_paddr, sh[i].s_size ,prot)) != 0) goto fail; } *entry = wp->w_base + ap->a_entry; *addr = wp->w_base; *ldexport = wp->w_imghdr[0].i_vaddr + wp->w_base; fail: if (fp) free(fp, M_TEMP); if (sh) free(sh, M_TEMP); if (nd.ni_vp) vrele(nd.ni_vp); return error; } static int exec_pecoff_coff_prep_omagic(struct image_params * imgp, struct coff_filehdr * fp, struct coff_aouthdr * ap, int peofs) { return ENOEXEC; } static int exec_pecoff_coff_prep_nmagic(struct image_params * imgp, struct coff_filehdr * fp, struct coff_aouthdr * ap, int peofs) { return ENOEXEC; } static int exec_pecoff_coff_prep_zmagic(struct image_params * imgp, struct coff_filehdr * fp, struct coff_aouthdr * ap, int peofs) { int scnsiz = sizeof(struct coff_scnhdr) * fp->f_nscns; int error = ENOEXEC, i; int prot; u_long text_size = 0, data_size = 0, dsize; u_long text_addr = 0, data_addr = VM_MAXUSER_ADDRESS; u_long ldexport, ldbase; struct pecoff_opthdr *wp; struct coff_scnhdr *sh; struct vmspace *vmspace; struct pecoff_args *argp = NULL; sh = malloc(scnsiz, M_TEMP, M_WAITOK); wp = (void *) ((char *) ap + sizeof(struct coff_aouthdr)); error = pecoff_read_from(imgp->proc, imgp->vp, peofs + PECOFF_HDR_SIZE, (caddr_t) sh, scnsiz); if ((error = exec_extract_strings(imgp)) != 0) goto fail; exec_new_vmspace(imgp); vmspace = imgp->proc->p_vmspace; for (i = 0; i < fp->f_nscns; i++) { prot = VM_PROT_WRITE; /* XXX for relocation? */ prot |= (sh[i].s_flags & COFF_STYP_READ) ? VM_PROT_READ : 0; prot |= (sh[i].s_flags & COFF_STYP_WRITE) ? VM_PROT_WRITE : 0; prot |= (sh[i].s_flags & COFF_STYP_EXEC) ? VM_PROT_EXECUTE : 0; sh[i].s_vaddr += wp->w_base; if (sh[i].s_flags & COFF_STYP_DISCARD) continue; if ((sh[i].s_flags & COFF_STYP_TEXT) != 0) { error = pecoff_load_section(imgp->proc, vmspace, imgp->vp, sh[i].s_scnptr ,(caddr_t) sh[i].s_vaddr, sh[i].s_paddr, sh[i].s_size ,prot); DPRINTF(("ERROR%d\n", error)); if (error) goto fail; text_addr = trunc_page(sh[i].s_vaddr); text_size = trunc_page(sh[i].s_size + sh[i].s_vaddr - text_addr); } if ((sh[i].s_flags & (COFF_STYP_DATA|COFF_STYP_BSS)) != 0) { if (pecoff_load_section(imgp->proc, vmspace, imgp->vp, sh[i].s_scnptr ,(caddr_t) sh[i].s_vaddr, sh[i].s_paddr, sh[i].s_size, prot) != 0) goto fail; data_addr = min(trunc_page(sh[i].s_vaddr), data_addr); dsize = round_page(sh[i].s_vaddr + sh[i].s_paddr) - data_addr; data_size = max(dsize, data_size); } } vmspace->vm_tsize = text_size >> PAGE_SHIFT; vmspace->vm_taddr = (caddr_t) (uintptr_t) text_addr; vmspace->vm_dsize = data_size >> PAGE_SHIFT; vmspace->vm_daddr = (caddr_t) (uintptr_t) data_addr; argp = malloc(sizeof(struct pecoff_args), M_TEMP, M_WAITOK); if (argp == NULL) { error = ENOMEM; goto fail; } argp->a_base = wp->w_base; argp->a_entry = wp->w_base + ap->a_entry; argp->a_end = data_addr + data_size; argp->a_subsystem = wp->w_subvers; error = pecoff_load_file(imgp->proc, "/usr/libexec/ld.so.dll", &ldbase, &imgp->entry_addr, &ldexport); if (error) goto fail; argp->a_ldbase = ldbase; argp->a_ldexport = ldexport; memcpy(argp->a_imghdr, wp->w_imghdr, sizeof(struct pecoff_imghdr) * 16); for (i = 0; i < 16; i++) { argp->a_imghdr[i].i_vaddr += wp->w_base; } imgp->proc->p_sysent = &pecoff_sysvec; if (error) goto fail; imgp->auxargs = argp; imgp->auxarg_size = sizeof(struct pecoff_args); imgp->interpreted = 0; imgp->vp->v_flag |= VTEXT; if (sh != NULL) free(sh, M_TEMP); return 0; fail: error = (error) ? error : ENOEXEC; if (sh != NULL) free(sh, M_TEMP); if (argp != NULL) free(argp, M_TEMP); return error; } int exec_pecoff_coff_makecmds(struct image_params * imgp, struct coff_filehdr * fp, int peofs) { struct coff_aouthdr *ap; int error; if (COFF_BADMAG(fp)) { return ENOEXEC; } ap = (void *) ((char *) fp + sizeof(struct coff_filehdr)); switch (ap->a_magic) { case COFF_OMAGIC: error = exec_pecoff_coff_prep_omagic(imgp, fp, ap, peofs); break; case COFF_NMAGIC: error = exec_pecoff_coff_prep_nmagic(imgp, fp, ap, peofs); break; case COFF_ZMAGIC: error = exec_pecoff_coff_prep_zmagic(imgp, fp, ap, peofs); break; default: return ENOEXEC; } return error; } static int pecoff_signature(p, vp, dp) struct proc *p; struct vnode *vp; struct pecoff_dos_filehdr *dp; { int error; char buf[512]; char *pesig; if (DOS_BADMAG(dp)) { return ENOEXEC; } error = pecoff_read_from(p, vp, dp->d_peofs, buf, sizeof(buf)); if (error) { return error; } pesig = buf; if (memcmp(pesig, signature, sizeof(signature) - 1) == 0) { return 0; } return EFTYPE; } int pecoff_read_from(p, vp, pos, buf, siz) struct proc *p; struct vnode *vp; int pos; caddr_t buf; int siz; { int error; size_t resid; - struct ucred *uc; - PROC_LOCK(p); - uc = p->p_ucred; - crhold(uc); - PROC_UNLOCK(p); error = vn_rdwr(UIO_READ, vp, buf, siz, pos, - UIO_SYSSPACE, IO_NODELOCKED, uc, &resid, p); - crfree(uc); + UIO_SYSSPACE, IO_NODELOCKED, p->p_ucred, + &resid, p); if (error) return error; if (resid != 0) { return ENOEXEC; } return 0; } static int imgact_pecoff(struct image_params * imgp) { struct pecoff_dos_filehdr *dp = (struct pecoff_dos_filehdr *) imgp->image_header; struct coff_filehdr *fp; int error, peofs; error = pecoff_signature(imgp->proc, imgp->vp, dp); if (error) { return -1; } peofs = dp->d_peofs + sizeof(signature) - 1; fp = malloc(PECOFF_HDR_SIZE, M_TEMP, M_WAITOK); error = pecoff_read_from(imgp->proc, imgp->vp, peofs, (caddr_t) fp, PECOFF_HDR_SIZE); if (error) { free(fp, M_TEMP); return error; } error = exec_pecoff_coff_makecmds(imgp, fp, peofs); free(fp, M_TEMP); return error; } static struct execsw pecoff_execsw = {imgact_pecoff, "FreeBSD PEcoff"}; EXEC_SET(pecoff, pecoff_execsw); Index: head/sys/compat/svr4/svr4_fcntl.c =================================================================== --- head/sys/compat/svr4/svr4_fcntl.c (revision 71698) +++ head/sys/compat/svr4/svr4_fcntl.c (revision 71699) @@ -1,739 +1,723 @@ /* * Copyright (c) 1998 Mark Newton * Copyright (c) 1994, 1997 Christos Zoulas. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Christos Zoulas. * 4. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #include #include #include #include #include #include #include /*#include */ #include #include #include #include #include #include #include #include #include #include static int svr4_to_bsd_flags __P((int)); static u_long svr4_to_bsd_cmd __P((u_long)); static int fd_revoke __P((struct proc *, int)); static int fd_truncate __P((struct proc *, int, struct flock *)); static int bsd_to_svr4_flags __P((int)); static void bsd_to_svr4_flock __P((struct flock *, struct svr4_flock *)); static void svr4_to_bsd_flock __P((struct svr4_flock *, struct flock *)); static void bsd_to_svr4_flock64 __P((struct flock *, struct svr4_flock64 *)); static void svr4_to_bsd_flock64 __P((struct svr4_flock64 *, struct flock *)); static u_long svr4_to_bsd_cmd(cmd) u_long cmd; { switch (cmd) { case SVR4_F_DUPFD: return F_DUPFD; case SVR4_F_GETFD: return F_GETFD; case SVR4_F_SETFD: return F_SETFD; case SVR4_F_GETFL: return F_GETFL; case SVR4_F_SETFL: return F_SETFL; case SVR4_F_GETLK: return F_GETLK; case SVR4_F_SETLK: return F_SETLK; case SVR4_F_SETLKW: return F_SETLKW; default: return -1; } } static int svr4_to_bsd_flags(l) int l; { int r = 0; r |= (l & SVR4_O_RDONLY) ? O_RDONLY : 0; r |= (l & SVR4_O_WRONLY) ? O_WRONLY : 0; r |= (l & SVR4_O_RDWR) ? O_RDWR : 0; r |= (l & SVR4_O_NDELAY) ? O_NONBLOCK : 0; r |= (l & SVR4_O_APPEND) ? O_APPEND : 0; r |= (l & SVR4_O_SYNC) ? O_FSYNC : 0; r |= (l & SVR4_O_NONBLOCK) ? O_NONBLOCK : 0; r |= (l & SVR4_O_PRIV) ? O_EXLOCK : 0; r |= (l & SVR4_O_CREAT) ? O_CREAT : 0; r |= (l & SVR4_O_TRUNC) ? O_TRUNC : 0; r |= (l & SVR4_O_EXCL) ? O_EXCL : 0; r |= (l & SVR4_O_NOCTTY) ? O_NOCTTY : 0; return r; } static int bsd_to_svr4_flags(l) int l; { int r = 0; r |= (l & O_RDONLY) ? SVR4_O_RDONLY : 0; r |= (l & O_WRONLY) ? SVR4_O_WRONLY : 0; r |= (l & O_RDWR) ? SVR4_O_RDWR : 0; r |= (l & O_NDELAY) ? SVR4_O_NONBLOCK : 0; r |= (l & O_APPEND) ? SVR4_O_APPEND : 0; r |= (l & O_FSYNC) ? SVR4_O_SYNC : 0; r |= (l & O_NONBLOCK) ? SVR4_O_NONBLOCK : 0; r |= (l & O_EXLOCK) ? SVR4_O_PRIV : 0; r |= (l & O_CREAT) ? SVR4_O_CREAT : 0; r |= (l & O_TRUNC) ? SVR4_O_TRUNC : 0; r |= (l & O_EXCL) ? SVR4_O_EXCL : 0; r |= (l & O_NOCTTY) ? SVR4_O_NOCTTY : 0; return r; } static void bsd_to_svr4_flock(iflp, oflp) struct flock *iflp; struct svr4_flock *oflp; { switch (iflp->l_type) { case F_RDLCK: oflp->l_type = SVR4_F_RDLCK; break; case F_WRLCK: oflp->l_type = SVR4_F_WRLCK; break; case F_UNLCK: oflp->l_type = SVR4_F_UNLCK; break; default: oflp->l_type = -1; break; } oflp->l_whence = (short) iflp->l_whence; oflp->l_start = (svr4_off_t) iflp->l_start; oflp->l_len = (svr4_off_t) iflp->l_len; oflp->l_sysid = 0; oflp->l_pid = (svr4_pid_t) iflp->l_pid; } static void svr4_to_bsd_flock(iflp, oflp) struct svr4_flock *iflp; struct flock *oflp; { switch (iflp->l_type) { case SVR4_F_RDLCK: oflp->l_type = F_RDLCK; break; case SVR4_F_WRLCK: oflp->l_type = F_WRLCK; break; case SVR4_F_UNLCK: oflp->l_type = F_UNLCK; break; default: oflp->l_type = -1; break; } oflp->l_whence = iflp->l_whence; oflp->l_start = (off_t) iflp->l_start; oflp->l_len = (off_t) iflp->l_len; oflp->l_pid = (pid_t) iflp->l_pid; } static void bsd_to_svr4_flock64(iflp, oflp) struct flock *iflp; struct svr4_flock64 *oflp; { switch (iflp->l_type) { case F_RDLCK: oflp->l_type = SVR4_F_RDLCK; break; case F_WRLCK: oflp->l_type = SVR4_F_WRLCK; break; case F_UNLCK: oflp->l_type = SVR4_F_UNLCK; break; default: oflp->l_type = -1; break; } oflp->l_whence = (short) iflp->l_whence; oflp->l_start = (svr4_off64_t) iflp->l_start; oflp->l_len = (svr4_off64_t) iflp->l_len; oflp->l_sysid = 0; oflp->l_pid = (svr4_pid_t) iflp->l_pid; } static void svr4_to_bsd_flock64(iflp, oflp) struct svr4_flock64 *iflp; struct flock *oflp; { switch (iflp->l_type) { case SVR4_F_RDLCK: oflp->l_type = F_RDLCK; break; case SVR4_F_WRLCK: oflp->l_type = F_WRLCK; break; case SVR4_F_UNLCK: oflp->l_type = F_UNLCK; break; default: oflp->l_type = -1; break; } oflp->l_whence = iflp->l_whence; oflp->l_start = (off_t) iflp->l_start; oflp->l_len = (off_t) iflp->l_len; oflp->l_pid = (pid_t) iflp->l_pid; } static int fd_revoke(p, fd) struct proc *p; int fd; { struct filedesc *fdp = p->p_fd; struct file *fp; struct vnode *vp; struct mount *mp; struct vattr vattr; - struct ucred *uc; int error, *retval; retval = p->p_retval; if ((u_int)fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[fd]) == NULL) return EBADF; if (fp->f_type != DTYPE_VNODE) return EINVAL; vp = (struct vnode *) fp->f_data; if (vp->v_type != VCHR && vp->v_type != VBLK) { error = EINVAL; goto out; } - PROC_LOCK(p); - uc = p->p_ucred; - crhold(uc); - PROC_UNLOCK(p); - if ((error = VOP_GETATTR(vp, &vattr, uc, p)) != 0) { - crfree(uc); + if ((error = VOP_GETATTR(vp, &vattr, p->p_ucred, p)) != 0) goto out; - } - if (uc->cr_uid != vattr.va_uid && (error = suser(p)) != 0) { - crfree(uc); + if (p->p_ucred->cr_uid != vattr.va_uid && + (error = suser(p)) != 0) goto out; - } - crfree(uc); if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) goto out; if (vcount(vp) > 1) VOP_REVOKE(vp, REVOKEALL); vn_finished_write(mp); out: vrele(vp); return error; } static int fd_truncate(p, fd, flp) struct proc *p; int fd; struct flock *flp; { struct filedesc *fdp = p->p_fd; struct file *fp; off_t start, length; struct vnode *vp; struct vattr vattr; - struct ucred *uc; int error, *retval; struct ftruncate_args ft; retval = p->p_retval; /* * We only support truncating the file. */ if ((u_int)fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[fd]) == NULL) return EBADF; vp = (struct vnode *)fp->f_data; if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) return ESPIPE; - PROC_LOCK(p); - uc = p->p_ucred; - crhold(uc); - PROC_UNLOCK(p); - error = VOP_GETATTR(vp, &vattr, uc, p); - crfree(uc); - if (error != 0) + if ((error = VOP_GETATTR(vp, &vattr, p->p_ucred, p)) != 0) return error; length = vattr.va_size; switch (flp->l_whence) { case SEEK_CUR: start = fp->f_offset + flp->l_start; break; case SEEK_END: start = flp->l_start + length; break; case SEEK_SET: start = flp->l_start; break; default: return EINVAL; } if (start + flp->l_len < length) { /* We don't support free'ing in the middle of the file */ return EINVAL; } SCARG(&ft, fd) = fd; SCARG(&ft, length) = start; return ftruncate(p, &ft); } int svr4_sys_open(p, uap) register struct proc *p; struct svr4_sys_open_args *uap; { int error, retval; struct open_args cup; caddr_t sg = stackgap_init(); CHECKALTEXIST(p, &sg, SCARG(uap, path)); (&cup)->path = uap->path; (&cup)->flags = svr4_to_bsd_flags(uap->flags); (&cup)->mode = uap->mode; error = open(p, &cup); if (error) { /* uprintf("svr4_open(%s, 0x%0x, 0%o): %d\n", uap->path, uap->flags, uap->mode, error);*/ return error; } retval = p->p_retval[0]; PROC_LOCK(p); if (!(SCARG(&cup, flags) & O_NOCTTY) && SESS_LEADER(p) && !(p->p_flag & P_CONTROLT)) { #if defined(NOTYET) struct filedesc *fdp = p->p_fd; struct file *fp = fdp->fd_ofiles[retval]; PROC_UNLOCK(p); /* ignore any error, just give it a try */ if (fp->f_type == DTYPE_VNODE) fo_ioctl(fp, TIOCSCTTY, (caddr_t) 0, p); } else PROC_UNLOCK(p); #else } PROC_UNLOCK(p); #endif return error; } int svr4_sys_open64(p, uap) register struct proc *p; struct svr4_sys_open64_args *uap; { return svr4_sys_open(p, (struct svr4_sys_open_args *)uap); } int svr4_sys_creat(p, uap) register struct proc *p; struct svr4_sys_creat_args *uap; { struct open_args cup; caddr_t sg = stackgap_init(); CHECKALTEXIST(p, &sg, SCARG(uap, path)); SCARG(&cup, path) = SCARG(uap, path); SCARG(&cup, mode) = SCARG(uap, mode); SCARG(&cup, flags) = O_WRONLY | O_CREAT | O_TRUNC; return open(p, &cup); } int svr4_sys_creat64(p, uap) register struct proc *p; struct svr4_sys_creat64_args *uap; { return svr4_sys_creat(p, (struct svr4_sys_creat_args *)uap); } int svr4_sys_llseek(p, uap) register struct proc *p; struct svr4_sys_llseek_args *uap; { struct lseek_args ap; SCARG(&ap, fd) = SCARG(uap, fd); #if BYTE_ORDER == BIG_ENDIAN SCARG(&ap, offset) = (((u_int64_t) SCARG(uap, offset1)) << 32) | SCARG(uap, offset2); #else SCARG(&ap, offset) = (((u_int64_t) SCARG(uap, offset2)) << 32) | SCARG(uap, offset1); #endif SCARG(&ap, whence) = SCARG(uap, whence); return lseek(p, &ap); } int svr4_sys_access(p, uap) register struct proc *p; struct svr4_sys_access_args *uap; { struct access_args cup; int *retval; caddr_t sg = stackgap_init(); CHECKALTEXIST(p, &sg, SCARG(uap, path)); retval = p->p_retval; SCARG(&cup, path) = SCARG(uap, path); SCARG(&cup, flags) = SCARG(uap, flags); return access(p, &cup); } #if defined(NOTYET) int svr4_sys_pread(p, uap) register struct proc *p; struct svr4_sys_pread_args *uap; { struct pread_args pra; /* * Just translate the args structure and call the NetBSD * pread(2) system call (offset type is 64-bit in NetBSD). */ SCARG(&pra, fd) = SCARG(uap, fd); SCARG(&pra, buf) = SCARG(uap, buf); SCARG(&pra, nbyte) = SCARG(uap, nbyte); SCARG(&pra, offset) = SCARG(uap, off); return pread(p, &pra); } #endif #if defined(NOTYET) int svr4_sys_pread64(p, v, retval) register struct proc *p; void *v; register_t *retval; { struct svr4_sys_pread64_args *uap = v; struct sys_pread_args pra; /* * Just translate the args structure and call the NetBSD * pread(2) system call (offset type is 64-bit in NetBSD). */ SCARG(&pra, fd) = SCARG(uap, fd); SCARG(&pra, buf) = SCARG(uap, buf); SCARG(&pra, nbyte) = SCARG(uap, nbyte); SCARG(&pra, offset) = SCARG(uap, off); return (sys_pread(p, &pra, retval)); } #endif /* NOTYET */ #if defined(NOTYET) int svr4_sys_pwrite(p, uap) register struct proc *p; struct svr4_sys_pwrite_args *uap; { struct pwrite_args pwa; /* * Just translate the args structure and call the NetBSD * pwrite(2) system call (offset type is 64-bit in NetBSD). */ SCARG(&pwa, fd) = SCARG(uap, fd); SCARG(&pwa, buf) = SCARG(uap, buf); SCARG(&pwa, nbyte) = SCARG(uap, nbyte); SCARG(&pwa, offset) = SCARG(uap, off); return pwrite(p, &pwa); } #endif #if defined(NOTYET) int svr4_sys_pwrite64(p, v, retval) register struct proc *p; void *v; register_t *retval; { struct svr4_sys_pwrite64_args *uap = v; struct sys_pwrite_args pwa; /* * Just translate the args structure and call the NetBSD * pwrite(2) system call (offset type is 64-bit in NetBSD). */ SCARG(&pwa, fd) = SCARG(uap, fd); SCARG(&pwa, buf) = SCARG(uap, buf); SCARG(&pwa, nbyte) = SCARG(uap, nbyte); SCARG(&pwa, offset) = SCARG(uap, off); return (sys_pwrite(p, &pwa, retval)); } #endif /* NOTYET */ int svr4_sys_fcntl(p, uap) register struct proc *p; struct svr4_sys_fcntl_args *uap; { int error; struct fcntl_args fa; int *retval; retval = p->p_retval; SCARG(&fa, fd) = SCARG(uap, fd); SCARG(&fa, cmd) = svr4_to_bsd_cmd(SCARG(uap, cmd)); switch (SCARG(&fa, cmd)) { case F_DUPFD: case F_GETFD: case F_SETFD: SCARG(&fa, arg) = (long) SCARG(uap, arg); return fcntl(p, &fa); case F_GETFL: SCARG(&fa, arg) = (long) SCARG(uap, arg); error = fcntl(p, &fa); if (error) return error; *retval = bsd_to_svr4_flags(*retval); return error; case F_SETFL: { /* * we must save the O_ASYNC flag, as that is * handled by ioctl(_, I_SETSIG, _) emulation. */ long cmd; int flags; DPRINTF(("Setting flags 0x%x\n", SCARG(uap, arg))); cmd = SCARG(&fa, cmd); /* save it for a while */ SCARG(&fa, cmd) = F_GETFL; if ((error = fcntl(p, &fa)) != 0) return error; flags = *retval; flags &= O_ASYNC; flags |= svr4_to_bsd_flags((u_long) SCARG(uap, arg)); SCARG(&fa, cmd) = cmd; SCARG(&fa, arg) = (long) flags; return fcntl(p, &fa); } case F_GETLK: case F_SETLK: case F_SETLKW: { struct svr4_flock ifl; struct flock *flp, fl; caddr_t sg = stackgap_init(); flp = stackgap_alloc(&sg, sizeof(struct flock)); SCARG(&fa, arg) = (long) flp; error = copyin(SCARG(uap, arg), &ifl, sizeof ifl); if (error) return error; svr4_to_bsd_flock(&ifl, &fl); error = copyout(&fl, flp, sizeof fl); if (error) return error; error = fcntl(p, &fa); if (error || SCARG(&fa, cmd) != F_GETLK) return error; error = copyin(flp, &fl, sizeof fl); if (error) return error; bsd_to_svr4_flock(&fl, &ifl); return copyout(&ifl, SCARG(uap, arg), sizeof ifl); } case -1: switch (SCARG(uap, cmd)) { case SVR4_F_DUP2FD: { struct dup2_args du; SCARG(&du, from) = SCARG(uap, fd); SCARG(&du, to) = (int)SCARG(uap, arg); error = dup2(p, &du); if (error) return error; *retval = SCARG(&du, to); return 0; } case SVR4_F_FREESP: { struct svr4_flock ifl; struct flock fl; error = copyin(SCARG(uap, arg), &ifl, sizeof ifl); if (error) return error; svr4_to_bsd_flock(&ifl, &fl); return fd_truncate(p, SCARG(uap, fd), &fl); } case SVR4_F_GETLK64: case SVR4_F_SETLK64: case SVR4_F_SETLKW64: { struct svr4_flock64 ifl; struct flock *flp, fl; caddr_t sg = stackgap_init(); flp = stackgap_alloc(&sg, sizeof(struct flock)); SCARG(&fa, arg) = (long) flp; error = copyin(SCARG(uap, arg), &ifl, sizeof ifl); if (error) return error; svr4_to_bsd_flock64(&ifl, &fl); error = copyout(&fl, flp, sizeof fl); if (error) return error; error = fcntl(p, &fa); if (error || SCARG(&fa, cmd) != F_GETLK) return error; error = copyin(flp, &fl, sizeof fl); if (error) return error; bsd_to_svr4_flock64(&fl, &ifl); return copyout(&ifl, SCARG(uap, arg), sizeof ifl); } case SVR4_F_FREESP64: { struct svr4_flock64 ifl; struct flock fl; error = copyin(SCARG(uap, arg), &ifl, sizeof ifl); if (error) return error; svr4_to_bsd_flock64(&ifl, &fl); return fd_truncate(p, SCARG(uap, fd), &fl); } case SVR4_F_REVOKE: return fd_revoke(p, SCARG(uap, fd)); default: return ENOSYS; } default: return ENOSYS; } } Index: head/sys/compat/svr4/svr4_misc.c =================================================================== --- head/sys/compat/svr4/svr4_misc.c (revision 71698) +++ head/sys/compat/svr4/svr4_misc.c (revision 71699) @@ -1,1739 +1,1725 @@ /* * Copyright (c) 1998 Mark Newton * Copyright (c) 1994 Christos Zoulas * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ /* * SVR4 compatibility module. * * SVR4 system calls that are implemented differently in BSD are * handled here. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(__FreeBSD__) #include #endif #if defined(NetBSD) # if defined(UVM) # include # endif #endif #define BSD_DIRENT(cp) ((struct dirent *)(cp)) static int svr4_mknod __P((struct proc *, register_t *, char *, svr4_mode_t, svr4_dev_t)); static __inline clock_t timeval_to_clock_t __P((struct timeval *)); static int svr4_setinfo __P((struct proc *, int, svr4_siginfo_t *)); struct svr4_hrtcntl_args; static int svr4_hrtcntl __P((struct proc *, struct svr4_hrtcntl_args *, register_t *)); static void bsd_statfs_to_svr4_statvfs __P((const struct statfs *, struct svr4_statvfs *)); static void bsd_statfs_to_svr4_statvfs64 __P((const struct statfs *, struct svr4_statvfs64 *)); static struct proc *svr4_pfind __P((pid_t pid)); /* BOGUS noop */ #if defined(BOGUS) int svr4_sys_setitimer(p, uap) register struct proc *p; struct svr4_sys_setitimer_args *uap; { p->p_retval[0] = 0; return 0; } #endif int svr4_sys_wait(p, uap) struct proc *p; struct svr4_sys_wait_args *uap; { struct wait_args w4; int error, *retval = p->p_retval, st, sig; size_t sz = sizeof(*SCARG(&w4, status)); SCARG(&w4, rusage) = NULL; SCARG(&w4, options) = 0; if (SCARG(uap, status) == NULL) { caddr_t sg = stackgap_init(); SCARG(&w4, status) = stackgap_alloc(&sg, sz); } else SCARG(&w4, status) = SCARG(uap, status); SCARG(&w4, pid) = WAIT_ANY; if ((error = wait4(p, &w4)) != 0) return error; if ((error = copyin(SCARG(&w4, status), &st, sizeof(st))) != 0) return error; if (WIFSIGNALED(st)) { sig = WTERMSIG(st); if (sig >= 0 && sig < NSIG) st = (st & ~0177) | SVR4_BSD2SVR4_SIG(sig); } else if (WIFSTOPPED(st)) { sig = WSTOPSIG(st); if (sig >= 0 && sig < NSIG) st = (st & ~0xff00) | (SVR4_BSD2SVR4_SIG(sig) << 8); } /* * It looks like wait(2) on svr4/solaris/2.4 returns * the status in retval[1], and the pid on retval[0]. */ retval[1] = st; if (SCARG(uap, status)) if ((error = copyout(&st, SCARG(uap, status), sizeof(st))) != 0) return error; return 0; } int svr4_sys_execv(p, uap) struct proc *p; struct svr4_sys_execv_args *uap; { struct execve_args ap; caddr_t sg; sg = stackgap_init(); CHECKALTEXIST(p, &sg, SCARG(uap, path)); SCARG(&ap, fname) = SCARG(uap, path); SCARG(&ap, argv) = SCARG(uap, argp); SCARG(&ap, envv) = NULL; return execve(p, &ap); } int svr4_sys_execve(p, uap) struct proc *p; struct svr4_sys_execve_args *uap; { struct execve_args ap; caddr_t sg; sg = stackgap_init(); CHECKALTEXIST(p, &sg, uap->path); SCARG(&ap, fname) = SCARG(uap, path); SCARG(&ap, argv) = SCARG(uap, argp); SCARG(&ap, envv) = SCARG(uap, envp); return execve(p, &ap); } int svr4_sys_time(p, v) struct proc *p; struct svr4_sys_time_args *v; { struct svr4_sys_time_args *uap = v; int error = 0; struct timeval tv; microtime(&tv); if (SCARG(uap, t)) error = copyout(&tv.tv_sec, SCARG(uap, t), sizeof(*(SCARG(uap, t)))); p->p_retval[0] = (int) tv.tv_sec; return error; } /* * Read SVR4-style directory entries. We suck them into kernel space so * that they can be massaged before being copied out to user code. * * This code is ported from the Linux emulator: Changes to the VFS interface * between FreeBSD and NetBSD have made it simpler to port it from there than * to adapt the NetBSD version. */ int svr4_sys_getdents64(p, uap) struct proc *p; struct svr4_sys_getdents64_args *uap; { register struct dirent *bdp; struct vnode *vp; caddr_t inp, buf; /* BSD-format */ int len, reclen; /* BSD-format */ caddr_t outp; /* SVR4-format */ int resid, svr4reclen=0; /* SVR4-format */ struct file *fp; struct uio auio; struct iovec aiov; struct vattr va; - struct ucred *uc; off_t off; struct svr4_dirent64 svr4_dirent; int buflen, error, eofflag, nbytes, justone; u_long *cookies = NULL, *cookiep; int ncookies; DPRINTF(("svr4_sys_getdents64(%d, *, %d)\n", p->p_pid, SCARG(uap, fd), SCARG(uap, nbytes))); if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0) { return (error); } if ((fp->f_flag & FREAD) == 0) return (EBADF); vp = (struct vnode *) fp->f_data; if (vp->v_type != VDIR) return (EINVAL); - PROC_LOCK(p); - uc = p->p_ucred; - crhold(uc); - PROC_UNLOCK(p); - error = VOP_GETATTR(vp, &va, uc, p); - crfree(uc); - if (error != 0) { + if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p))) { return error; } nbytes = SCARG(uap, nbytes); if (nbytes == 1) { nbytes = sizeof (struct svr4_dirent64); justone = 1; } else justone = 0; off = fp->f_offset; #define DIRBLKSIZ 512 /* XXX we used to use ufs's DIRBLKSIZ */ buflen = max(DIRBLKSIZ, nbytes); buflen = min(buflen, MAXBSIZE); buf = malloc(buflen, M_TEMP, M_WAITOK); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); again: aiov.iov_base = buf; aiov.iov_len = buflen; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_rw = UIO_READ; auio.uio_segflg = UIO_SYSSPACE; auio.uio_procp = p; auio.uio_resid = buflen; auio.uio_offset = off; if (cookies) { free(cookies, M_TEMP); cookies = NULL; } error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, &ncookies, &cookies); if (error) { goto out; } inp = buf; outp = (caddr_t) SCARG(uap, dp); resid = nbytes; if ((len = buflen - auio.uio_resid) <= 0) { goto eof; } cookiep = cookies; if (cookies) { /* * When using cookies, the vfs has the option of reading from * a different offset than that supplied (UFS truncates the * offset to a block boundary to make sure that it never reads * partway through a directory entry, even if the directory * has been compacted). */ while (len > 0 && ncookies > 0 && *cookiep <= off) { bdp = (struct dirent *) inp; len -= bdp->d_reclen; inp += bdp->d_reclen; cookiep++; ncookies--; } } while (len > 0) { if (cookiep && ncookies == 0) break; bdp = (struct dirent *) inp; reclen = bdp->d_reclen; if (reclen & 3) { DPRINTF(("svr4_readdir: reclen=%d\n", reclen)); error = EFAULT; goto out; } if (bdp->d_fileno == 0) { inp += reclen; if (cookiep) { off = *cookiep++; ncookies--; } else off += reclen; len -= reclen; continue; } svr4reclen = SVR4_RECLEN(&svr4_dirent, bdp->d_namlen); if (reclen > len || resid < svr4reclen) { outp++; break; } svr4_dirent.d_ino = (long) bdp->d_fileno; if (justone) { /* * old svr4-style readdir usage. */ svr4_dirent.d_off = (svr4_off_t) svr4reclen; svr4_dirent.d_reclen = (u_short) bdp->d_namlen; } else { svr4_dirent.d_off = (svr4_off_t)(off + reclen); svr4_dirent.d_reclen = (u_short) svr4reclen; } strcpy(svr4_dirent.d_name, bdp->d_name); if ((error = copyout((caddr_t)&svr4_dirent, outp, svr4reclen))) goto out; inp += reclen; if (cookiep) { off = *cookiep++; ncookies--; } else off += reclen; outp += svr4reclen; resid -= svr4reclen; len -= reclen; if (justone) break; } if (outp == (caddr_t) SCARG(uap, dp)) goto again; fp->f_offset = off; if (justone) nbytes = resid + svr4reclen; eof: p->p_retval[0] = nbytes - resid; out: if (cookies) free(cookies, M_TEMP); VOP_UNLOCK(vp, 0, p); free(buf, M_TEMP); return error; } int svr4_sys_getdents(p, uap) struct proc *p; struct svr4_sys_getdents_args *uap; { struct dirent *bdp; struct vnode *vp; caddr_t inp, buf; /* BSD-format */ int len, reclen; /* BSD-format */ caddr_t outp; /* SVR4-format */ int resid, svr4_reclen; /* SVR4-format */ struct file *fp; struct uio auio; struct iovec aiov; struct svr4_dirent idb; off_t off; /* true file offset */ int buflen, error, eofflag; u_long *cookiebuf = NULL, *cookie; int ncookies = 0, *retval = p->p_retval; if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0) return (error); if ((fp->f_flag & FREAD) == 0) return (EBADF); vp = (struct vnode *)fp->f_data; if (vp->v_type != VDIR) return (EINVAL); buflen = min(MAXBSIZE, SCARG(uap, nbytes)); buf = malloc(buflen, M_TEMP, M_WAITOK); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); off = fp->f_offset; again: aiov.iov_base = buf; aiov.iov_len = buflen; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_rw = UIO_READ; auio.uio_segflg = UIO_SYSSPACE; auio.uio_procp = p; auio.uio_resid = buflen; auio.uio_offset = off; /* * First we read into the malloc'ed buffer, then * we massage it into user space, one record at a time. */ error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, &ncookies, &cookiebuf); if (error) goto out; inp = buf; outp = SCARG(uap, buf); resid = SCARG(uap, nbytes); if ((len = buflen - auio.uio_resid) == 0) goto eof; for (cookie = cookiebuf; len > 0; len -= reclen) { bdp = (struct dirent *)inp; reclen = bdp->d_reclen; if (reclen & 3) panic("svr4_sys_getdents64: bad reclen"); off = *cookie++; /* each entry points to the next */ if ((off >> 32) != 0) { uprintf("svr4_sys_getdents64: dir offset too large for emulated program"); error = EINVAL; goto out; } if (bdp->d_fileno == 0) { inp += reclen; /* it is a hole; squish it out */ continue; } svr4_reclen = SVR4_RECLEN(&idb, bdp->d_namlen); if (reclen > len || resid < svr4_reclen) { /* entry too big for buffer, so just stop */ outp++; break; } /* * Massage in place to make a SVR4-shaped dirent (otherwise * we have to worry about touching user memory outside of * the copyout() call). */ idb.d_ino = (svr4_ino_t)bdp->d_fileno; idb.d_off = (svr4_off_t)off; idb.d_reclen = (u_short)svr4_reclen; strcpy(idb.d_name, bdp->d_name); if ((error = copyout((caddr_t)&idb, outp, svr4_reclen))) goto out; /* advance past this real entry */ inp += reclen; /* advance output past SVR4-shaped entry */ outp += svr4_reclen; resid -= svr4_reclen; } /* if we squished out the whole block, try again */ if (outp == SCARG(uap, buf)) goto again; fp->f_offset = off; /* update the vnode offset */ eof: *retval = SCARG(uap, nbytes) - resid; out: VOP_UNLOCK(vp, 0, p); if (cookiebuf) free(cookiebuf, M_TEMP); free(buf, M_TEMP); return error; } int svr4_sys_mmap(p, uap) struct proc *p; struct svr4_sys_mmap_args *uap; { struct mmap_args mm; int *retval; retval = p->p_retval; #define _MAP_NEW 0x80000000 /* * Verify the arguments. */ if (SCARG(uap, prot) & ~(PROT_READ | PROT_WRITE | PROT_EXEC)) return EINVAL; /* XXX still needed? */ if (SCARG(uap, len) == 0) return EINVAL; SCARG(&mm, prot) = SCARG(uap, prot); SCARG(&mm, len) = SCARG(uap, len); SCARG(&mm, flags) = SCARG(uap, flags) & ~_MAP_NEW; SCARG(&mm, fd) = SCARG(uap, fd); SCARG(&mm, addr) = SCARG(uap, addr); SCARG(&mm, pos) = SCARG(uap, pos); return mmap(p, &mm); } int svr4_sys_mmap64(p, uap) struct proc *p; struct svr4_sys_mmap64_args *uap; { struct mmap_args mm; void *rp; #define _MAP_NEW 0x80000000 /* * Verify the arguments. */ if (SCARG(uap, prot) & ~(PROT_READ | PROT_WRITE | PROT_EXEC)) return EINVAL; /* XXX still needed? */ if (SCARG(uap, len) == 0) return EINVAL; SCARG(&mm, prot) = SCARG(uap, prot); SCARG(&mm, len) = SCARG(uap, len); SCARG(&mm, flags) = SCARG(uap, flags) & ~_MAP_NEW; SCARG(&mm, fd) = SCARG(uap, fd); SCARG(&mm, addr) = SCARG(uap, addr); SCARG(&mm, pos) = SCARG(uap, pos); rp = (void *) round_page((vm_offset_t)(p->p_vmspace->vm_daddr + MAXDSIZ)); if ((SCARG(&mm, flags) & MAP_FIXED) == 0 && SCARG(&mm, addr) != 0 && (void *)SCARG(&mm, addr) < rp) SCARG(&mm, addr) = rp; return mmap(p, &mm); } int svr4_sys_fchroot(p, uap) struct proc *p; struct svr4_sys_fchroot_args *uap; { struct filedesc *fdp = p->p_fd; struct vnode *vp; struct file *fp; - struct ucred *uc; int error; if ((error = suser(p)) != 0) return error; if ((error = getvnode(fdp, SCARG(uap, fd), &fp)) != 0) return error; vp = (struct vnode *) fp->f_data; vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); if (vp->v_type != VDIR) error = ENOTDIR; - else { - PROC_LOCK(p); - uc = p->p_ucred; - crhold(uc); - PROC_UNLOCK(p); + else error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p); - crfree(uc); - } VOP_UNLOCK(vp, 0, p); if (error) return error; VREF(vp); if (fdp->fd_rdir != NULL) vrele(fdp->fd_rdir); fdp->fd_rdir = vp; return 0; } static int svr4_mknod(p, retval, path, mode, dev) struct proc *p; register_t *retval; char *path; svr4_mode_t mode; svr4_dev_t dev; { caddr_t sg = stackgap_init(); CHECKALTEXIST(p, &sg, path); if (S_ISFIFO(mode)) { struct mkfifo_args ap; SCARG(&ap, path) = path; SCARG(&ap, mode) = mode; return mkfifo(p, &ap); } else { struct mknod_args ap; SCARG(&ap, path) = path; SCARG(&ap, mode) = mode; SCARG(&ap, dev) = dev; return mknod(p, &ap); } } int svr4_sys_mknod(p, uap) register struct proc *p; struct svr4_sys_mknod_args *uap; { int *retval = p->p_retval; return svr4_mknod(p, retval, SCARG(uap, path), SCARG(uap, mode), (svr4_dev_t)svr4_to_bsd_odev_t(SCARG(uap, dev))); } int svr4_sys_xmknod(p, uap) struct proc *p; struct svr4_sys_xmknod_args *uap; { int *retval = p->p_retval; return svr4_mknod(p, retval, SCARG(uap, path), SCARG(uap, mode), (svr4_dev_t)svr4_to_bsd_dev_t(SCARG(uap, dev))); } int svr4_sys_vhangup(p, uap) struct proc *p; struct svr4_sys_vhangup_args *uap; { return 0; } int svr4_sys_sysconfig(p, uap) struct proc *p; struct svr4_sys_sysconfig_args *uap; { int *retval; retval = &(p->p_retval[0]); switch (SCARG(uap, name)) { case SVR4_CONFIG_UNUSED: *retval = 0; break; case SVR4_CONFIG_NGROUPS: *retval = NGROUPS_MAX; break; case SVR4_CONFIG_CHILD_MAX: *retval = maxproc; break; case SVR4_CONFIG_OPEN_FILES: *retval = maxfiles; break; case SVR4_CONFIG_POSIX_VER: *retval = 198808; break; case SVR4_CONFIG_PAGESIZE: *retval = PAGE_SIZE; break; case SVR4_CONFIG_CLK_TCK: *retval = 60; /* should this be `hz', ie. 100? */ break; case SVR4_CONFIG_XOPEN_VER: *retval = 2; /* XXX: What should that be? */ break; case SVR4_CONFIG_PROF_TCK: *retval = 60; /* XXX: What should that be? */ break; case SVR4_CONFIG_NPROC_CONF: *retval = 1; /* Only one processor for now */ break; case SVR4_CONFIG_NPROC_ONLN: *retval = 1; /* And it better be online */ break; case SVR4_CONFIG_AIO_LISTIO_MAX: case SVR4_CONFIG_AIO_MAX: case SVR4_CONFIG_AIO_PRIO_DELTA_MAX: *retval = 0; /* No aio support */ break; case SVR4_CONFIG_DELAYTIMER_MAX: *retval = 0; /* No delaytimer support */ break; case SVR4_CONFIG_MQ_OPEN_MAX: *retval = msginfo.msgmni; break; case SVR4_CONFIG_MQ_PRIO_MAX: *retval = 0; /* XXX: Don't know */ break; case SVR4_CONFIG_RTSIG_MAX: *retval = 0; break; case SVR4_CONFIG_SEM_NSEMS_MAX: *retval = seminfo.semmni; break; case SVR4_CONFIG_SEM_VALUE_MAX: *retval = seminfo.semvmx; break; case SVR4_CONFIG_SIGQUEUE_MAX: *retval = 0; /* XXX: Don't know */ break; case SVR4_CONFIG_SIGRT_MIN: case SVR4_CONFIG_SIGRT_MAX: *retval = 0; /* No real time signals */ break; case SVR4_CONFIG_TIMER_MAX: *retval = 3; /* XXX: real, virtual, profiling */ break; #if defined(NOTYET) case SVR4_CONFIG_PHYS_PAGES: #if defined(UVM) *retval = uvmexp.free; /* XXX: free instead of total */ #else *retval = cnt.v_free_count; /* XXX: free instead of total */ #endif break; case SVR4_CONFIG_AVPHYS_PAGES: #if defined(UVM) *retval = uvmexp.active; /* XXX: active instead of avg */ #else *retval = cnt.v_active_count; /* XXX: active instead of avg */ #endif break; #endif /* NOTYET */ default: return EINVAL; } return 0; } extern int swap_pager_full; /* ARGSUSED */ int svr4_sys_break(p, uap) struct proc *p; struct svr4_sys_break_args *uap; { struct vmspace *vm = p->p_vmspace; vm_offset_t new, old, base, ns; int rv; base = round_page((vm_offset_t) vm->vm_daddr); ns = (vm_offset_t)SCARG(uap, nsize); new = round_page(ns); /* For p_rlimit. */ mtx_assert(&Giant, MA_OWNED); if (new > base) { if ((new - base) > (unsigned) p->p_rlimit[RLIMIT_DATA].rlim_cur) { return ENOMEM; } if (new >= VM_MAXUSER_ADDRESS) { return (ENOMEM); } } else if (new < base) { /* * This is simply an invalid value. If someone wants to * do fancy address space manipulations, mmap and munmap * can do most of what the user would want. */ return EINVAL; } old = base + ctob(vm->vm_dsize); if (new > old) { vm_size_t diff; if (swap_pager_full) { return (ENOMEM); } diff = new - old; rv = vm_map_find(&vm->vm_map, NULL, 0, &old, diff, FALSE, VM_PROT_ALL, VM_PROT_ALL, 0); if (rv != KERN_SUCCESS) { return (ENOMEM); } vm->vm_dsize += btoc(diff); } else if (new < old) { rv = vm_map_remove(&vm->vm_map, new, old); if (rv != KERN_SUCCESS) { return (ENOMEM); } vm->vm_dsize -= btoc(old - new); } return (0); } static __inline clock_t timeval_to_clock_t(tv) struct timeval *tv; { return tv->tv_sec * hz + tv->tv_usec / (1000000 / hz); } int svr4_sys_times(p, uap) struct proc *p; struct svr4_sys_times_args *uap; { int error, *retval = p->p_retval; struct tms tms; struct timeval t; struct rusage *ru; struct rusage r; struct getrusage_args ga; caddr_t sg = stackgap_init(); ru = stackgap_alloc(&sg, sizeof(struct rusage)); SCARG(&ga, who) = RUSAGE_SELF; SCARG(&ga, rusage) = ru; error = getrusage(p, &ga); if (error) return error; if ((error = copyin(ru, &r, sizeof r)) != 0) return error; tms.tms_utime = timeval_to_clock_t(&r.ru_utime); tms.tms_stime = timeval_to_clock_t(&r.ru_stime); SCARG(&ga, who) = RUSAGE_CHILDREN; error = getrusage(p, &ga); if (error) return error; if ((error = copyin(ru, &r, sizeof r)) != 0) return error; tms.tms_cutime = timeval_to_clock_t(&r.ru_utime); tms.tms_cstime = timeval_to_clock_t(&r.ru_stime); microtime(&t); *retval = timeval_to_clock_t(&t); return copyout(&tms, SCARG(uap, tp), sizeof(tms)); } int svr4_sys_ulimit(p, uap) struct proc *p; struct svr4_sys_ulimit_args *uap; { int *retval = p->p_retval; switch (SCARG(uap, cmd)) { case SVR4_GFILLIM: /* For p_rlimit below. */ mtx_assert(&Giant, MA_OWNED); *retval = p->p_rlimit[RLIMIT_FSIZE].rlim_cur / 512; if (*retval == -1) *retval = 0x7fffffff; return 0; case SVR4_SFILLIM: { int error; struct __setrlimit_args srl; struct rlimit krl; caddr_t sg = stackgap_init(); struct rlimit *url = (struct rlimit *) stackgap_alloc(&sg, sizeof *url); krl.rlim_cur = SCARG(uap, newlimit) * 512; mtx_assert(&Giant, MA_OWNED); krl.rlim_max = p->p_rlimit[RLIMIT_FSIZE].rlim_max; error = copyout(&krl, url, sizeof(*url)); if (error) return error; SCARG(&srl, which) = RLIMIT_FSIZE; SCARG(&srl, rlp) = (struct orlimit *)url; error = setrlimit(p, &srl); if (error) return error; mtx_assert(&Giant, MA_OWNED); *retval = p->p_rlimit[RLIMIT_FSIZE].rlim_cur; if (*retval == -1) *retval = 0x7fffffff; return 0; } case SVR4_GMEMLIM: { struct vmspace *vm = p->p_vmspace; register_t r; mtx_assert(&Giant, MA_OWNED); r = p->p_rlimit[RLIMIT_DATA].rlim_cur; if (r == -1) r = 0x7fffffff; r += (long) vm->vm_daddr; if (r < 0) r = 0x7fffffff; *retval = r; return 0; } case SVR4_GDESLIM: mtx_assert(&Giant, MA_OWNED); *retval = p->p_rlimit[RLIMIT_NOFILE].rlim_cur; if (*retval == -1) *retval = 0x7fffffff; return 0; default: return EINVAL; } } static struct proc * svr4_pfind(pid) pid_t pid; { struct proc *p; /* look in the live processes */ if ((p = pfind(pid)) == NULL) /* look in the zombies */ p = zpfind(pid); return p; } int svr4_sys_pgrpsys(p, uap) struct proc *p; struct svr4_sys_pgrpsys_args *uap; { int *retval = p->p_retval; switch (SCARG(uap, cmd)) { case 1: /* setpgrp() */ /* * SVR4 setpgrp() (which takes no arguments) has the * semantics that the session ID is also created anew, so * in almost every sense, setpgrp() is identical to * setsid() for SVR4. (Under BSD, the difference is that * a setpgid(0,0) will not create a new session.) */ setsid(p, NULL); /*FALLTHROUGH*/ case 0: /* getpgrp() */ *retval = p->p_pgrp->pg_id; return 0; case 2: /* getsid(pid) */ if (SCARG(uap, pid) != 0 && (p = svr4_pfind(SCARG(uap, pid))) == NULL) return ESRCH; /* * This has already been initialized to the pid of * the session leader. */ *retval = (register_t) p->p_session->s_leader->p_pid; return 0; case 3: /* setsid() */ return setsid(p, NULL); case 4: /* getpgid(pid) */ if (SCARG(uap, pid) != 0 && (p = svr4_pfind(SCARG(uap, pid))) == NULL) return ESRCH; *retval = (int) p->p_pgrp->pg_id; return 0; case 5: /* setpgid(pid, pgid); */ { struct setpgid_args sa; SCARG(&sa, pid) = SCARG(uap, pid); SCARG(&sa, pgid) = SCARG(uap, pgid); return setpgid(p, &sa); } default: return EINVAL; } } #define syscallarg(x) union { x datum; register_t pad; } struct svr4_hrtcntl_args { int cmd; int fun; int clk; svr4_hrt_interval_t * iv; svr4_hrt_time_t * ti; }; static int svr4_hrtcntl(p, uap, retval) struct proc *p; struct svr4_hrtcntl_args *uap; register_t *retval; { switch (SCARG(uap, fun)) { case SVR4_HRT_CNTL_RES: DPRINTF(("htrcntl(RES)\n")); *retval = SVR4_HRT_USEC; return 0; case SVR4_HRT_CNTL_TOFD: DPRINTF(("htrcntl(TOFD)\n")); { struct timeval tv; svr4_hrt_time_t t; if (SCARG(uap, clk) != SVR4_HRT_CLK_STD) { DPRINTF(("clk == %d\n", SCARG(uap, clk))); return EINVAL; } if (SCARG(uap, ti) == NULL) { DPRINTF(("ti NULL\n")); return EINVAL; } microtime(&tv); t.h_sec = tv.tv_sec; t.h_rem = tv.tv_usec; t.h_res = SVR4_HRT_USEC; return copyout(&t, SCARG(uap, ti), sizeof(t)); } case SVR4_HRT_CNTL_START: DPRINTF(("htrcntl(START)\n")); return ENOSYS; case SVR4_HRT_CNTL_GET: DPRINTF(("htrcntl(GET)\n")); return ENOSYS; default: DPRINTF(("Bad htrcntl command %d\n", SCARG(uap, fun))); return ENOSYS; } } int svr4_sys_hrtsys(p, uap) struct proc *p; struct svr4_sys_hrtsys_args *uap; { int *retval = p->p_retval; switch (SCARG(uap, cmd)) { case SVR4_HRT_CNTL: return svr4_hrtcntl(p, (struct svr4_hrtcntl_args *) uap, retval); case SVR4_HRT_ALRM: DPRINTF(("hrtalarm\n")); return ENOSYS; case SVR4_HRT_SLP: DPRINTF(("hrtsleep\n")); return ENOSYS; case SVR4_HRT_CAN: DPRINTF(("hrtcancel\n")); return ENOSYS; default: DPRINTF(("Bad hrtsys command %d\n", SCARG(uap, cmd))); return EINVAL; } } static int svr4_setinfo(p, st, s) struct proc *p; int st; svr4_siginfo_t *s; { svr4_siginfo_t i; int sig; memset(&i, 0, sizeof(i)); i.si_signo = SVR4_SIGCHLD; i.si_errno = 0; /* XXX? */ if (p) { i.si_pid = p->p_pid; mtx_enter(&sched_lock, MTX_SPIN); if (p->p_stat == SZOMB) { i.si_stime = p->p_ru->ru_stime.tv_sec; i.si_utime = p->p_ru->ru_utime.tv_sec; } else { i.si_stime = p->p_stats->p_ru.ru_stime.tv_sec; i.si_utime = p->p_stats->p_ru.ru_utime.tv_sec; } mtx_exit(&sched_lock, MTX_SPIN); } if (WIFEXITED(st)) { i.si_status = WEXITSTATUS(st); i.si_code = SVR4_CLD_EXITED; } else if (WIFSTOPPED(st)) { sig = WSTOPSIG(st); if (sig >= 0 && sig < NSIG) i.si_status = SVR4_BSD2SVR4_SIG(sig); if (i.si_status == SVR4_SIGCONT) i.si_code = SVR4_CLD_CONTINUED; else i.si_code = SVR4_CLD_STOPPED; } else { sig = WTERMSIG(st); if (sig >= 0 && sig < NSIG) i.si_status = SVR4_BSD2SVR4_SIG(sig); if (WCOREDUMP(st)) i.si_code = SVR4_CLD_DUMPED; else i.si_code = SVR4_CLD_KILLED; } DPRINTF(("siginfo [pid %ld signo %d code %d errno %d status %d]\n", i.si_pid, i.si_signo, i.si_code, i.si_errno, i.si_status)); return copyout(&i, s, sizeof(i)); } int svr4_sys_waitsys(p, uap) struct proc *p; struct svr4_sys_waitsys_args *uap; { int nfound; int error, *retval = p->p_retval; struct proc *q, *t; switch (SCARG(uap, grp)) { case SVR4_P_PID: break; case SVR4_P_PGID: SCARG(uap, id) = -p->p_pgid; break; case SVR4_P_ALL: SCARG(uap, id) = WAIT_ANY; break; default: return EINVAL; } DPRINTF(("waitsys(%d, %d, %p, %x)\n", SCARG(uap, grp), SCARG(uap, id), SCARG(uap, info), SCARG(uap, options))); loop: nfound = 0; PROCTREE_LOCK(PT_SHARED); LIST_FOREACH(q, &p->p_children, p_sibling) { if (SCARG(uap, id) != WAIT_ANY && q->p_pid != SCARG(uap, id) && q->p_pgid != -SCARG(uap, id)) { DPRINTF(("pid %d pgid %d != %d\n", q->p_pid, q->p_pgid, SCARG(uap, id))); continue; } nfound++; PROC_LOCK(q); mtx_enter(&sched_lock, MTX_SPIN); if (q->p_stat == SZOMB && ((SCARG(uap, options) & (SVR4_WEXITED|SVR4_WTRAPPED)))) { mtx_exit(&sched_lock, MTX_SPIN); PROC_UNLOCK(q); PROCTREE_LOCK(PT_RELEASE); *retval = 0; DPRINTF(("found %d\n", q->p_pid)); error = svr4_setinfo(q, q->p_xstat, SCARG(uap, info)); if (error != 0) return error; if ((SCARG(uap, options) & SVR4_WNOWAIT)) { DPRINTF(("Don't wait\n")); return 0; } /* * If we got the child via ptrace(2) or procfs, and * the parent is different (meaning the process was * attached, rather than run as a child), then we need * to give it back to the old parent, and send the * parent a SIGCHLD. The rest of the cleanup will be * done when the old parent waits on the child. */ PROC_LOCK(q); if (q->p_flag & P_TRACED) { PROC_UNLOCK(q); PROCTREE_LOCK(PT_EXCLUSIVE); if (q->p_oppid != q->p_pptr->p_pid) { t = pfind(q->p_oppid); proc_reparent(q, t ? t : initproc); PROCTREE_LOCK(PT_RELEASE); PROC_LOCK(q); q->p_oppid = 0; q->p_flag &= ~(P_TRACED | P_WAITED); PROC_UNLOCK(q); PROCTREE_LOCK(PT_SHARED); wakeup((caddr_t)q->p_pptr); PROCTREE_LOCK(PT_RELEASE); return 0; } PROCTREE_LOCK(PT_RELEASE); } else PROC_UNLOCK(q); q->p_xstat = 0; ruadd(&p->p_stats->p_cru, q->p_ru); FREE(q->p_ru, M_ZOMBIE); q->p_ru = 0; /* * Decrement the count of procs running with this uid. */ (void)chgproccnt(q->p_cred->p_uidinfo, -1, 0); /* * Release reference to text vnode. */ if (q->p_textvp) vrele(q->p_textvp); /* * Free up credentials. */ PROC_LOCK(q); if (--q->p_cred->p_refcnt == 0) { crfree(q->p_ucred); uifree(q->p_cred->p_uidinfo); FREE(q->p_cred, M_SUBPROC); q->p_cred = NULL; } /* * Destroy empty prisons */ if (q->p_prison && !--q->p_prison->pr_ref) { if (q->p_prison->pr_linux != NULL) FREE(q->p_prison->pr_linux, M_PRISON); FREE(q->p_prison, M_PRISON); } /* * Remove unused arguments */ if (q->p_args && --q->p_args->ar_ref == 0) FREE(q->p_args, M_PARGS); PROC_UNLOCK(q); /* * Finally finished with old proc entry. * Unlink it from its process group and free it. */ leavepgrp(q); ALLPROC_LOCK(AP_EXCLUSIVE); LIST_REMOVE(q, p_list); /* off zombproc */ ALLPROC_LOCK(AP_RELEASE); PROCTREE_LOCK(PT_EXCLUSIVE); LIST_REMOVE(q, p_sibling); PROCTREE_LOCK(PT_RELEASE); PROC_LOCK(q); if (--q->p_procsig->ps_refcnt == 0) { if (q->p_sigacts != &q->p_addr->u_sigacts) FREE(q->p_sigacts, M_SUBPROC); FREE(q->p_procsig, M_SUBPROC); q->p_procsig = NULL; } PROC_UNLOCK(q); /* * Give machine-dependent layer a chance * to free anything that cpu_exit couldn't * release while still running in process context. */ cpu_wait(q); #if defined(__NetBSD__) pool_put(&proc_pool, q); #endif #ifdef __FreeBSD__ mtx_destroy(&q->p_mtx); zfree(proc_zone, q); #endif nprocs--; return 0; } if (q->p_stat == SSTOP && (q->p_flag & P_WAITED) == 0 && (q->p_flag & P_TRACED || (SCARG(uap, options) & (SVR4_WSTOPPED|SVR4_WCONTINUED)))) { mtx_exit(&sched_lock, MTX_SPIN); DPRINTF(("jobcontrol %d\n", q->p_pid)); if (((SCARG(uap, options) & SVR4_WNOWAIT)) == 0) q->p_flag |= P_WAITED; PROC_UNLOCK(q); *retval = 0; return svr4_setinfo(q, W_STOPCODE(q->p_xstat), SCARG(uap, info)); } mtx_exit(&sched_lock, MTX_SPIN); PROC_UNLOCK(q); } if (nfound == 0) return ECHILD; if (SCARG(uap, options) & SVR4_WNOHANG) { *retval = 0; if ((error = svr4_setinfo(NULL, 0, SCARG(uap, info))) != 0) return error; return 0; } if ((error = tsleep((caddr_t)p, PWAIT | PCATCH, "svr4_wait", 0)) != 0) return error; goto loop; } static void bsd_statfs_to_svr4_statvfs(bfs, sfs) const struct statfs *bfs; struct svr4_statvfs *sfs; { sfs->f_bsize = bfs->f_iosize; /* XXX */ sfs->f_frsize = bfs->f_bsize; sfs->f_blocks = bfs->f_blocks; sfs->f_bfree = bfs->f_bfree; sfs->f_bavail = bfs->f_bavail; sfs->f_files = bfs->f_files; sfs->f_ffree = bfs->f_ffree; sfs->f_favail = bfs->f_ffree; sfs->f_fsid = bfs->f_fsid.val[0]; memcpy(sfs->f_basetype, bfs->f_fstypename, sizeof(sfs->f_basetype)); sfs->f_flag = 0; if (bfs->f_flags & MNT_RDONLY) sfs->f_flag |= SVR4_ST_RDONLY; if (bfs->f_flags & MNT_NOSUID) sfs->f_flag |= SVR4_ST_NOSUID; sfs->f_namemax = MAXNAMLEN; memcpy(sfs->f_fstr, bfs->f_fstypename, sizeof(sfs->f_fstr)); /* XXX */ memset(sfs->f_filler, 0, sizeof(sfs->f_filler)); } static void bsd_statfs_to_svr4_statvfs64(bfs, sfs) const struct statfs *bfs; struct svr4_statvfs64 *sfs; { sfs->f_bsize = bfs->f_iosize; /* XXX */ sfs->f_frsize = bfs->f_bsize; sfs->f_blocks = bfs->f_blocks; sfs->f_bfree = bfs->f_bfree; sfs->f_bavail = bfs->f_bavail; sfs->f_files = bfs->f_files; sfs->f_ffree = bfs->f_ffree; sfs->f_favail = bfs->f_ffree; sfs->f_fsid = bfs->f_fsid.val[0]; memcpy(sfs->f_basetype, bfs->f_fstypename, sizeof(sfs->f_basetype)); sfs->f_flag = 0; if (bfs->f_flags & MNT_RDONLY) sfs->f_flag |= SVR4_ST_RDONLY; if (bfs->f_flags & MNT_NOSUID) sfs->f_flag |= SVR4_ST_NOSUID; sfs->f_namemax = MAXNAMLEN; memcpy(sfs->f_fstr, bfs->f_fstypename, sizeof(sfs->f_fstr)); /* XXX */ memset(sfs->f_filler, 0, sizeof(sfs->f_filler)); } int svr4_sys_statvfs(p, uap) struct proc *p; struct svr4_sys_statvfs_args *uap; { struct statfs_args fs_args; caddr_t sg = stackgap_init(); struct statfs *fs = stackgap_alloc(&sg, sizeof(struct statfs)); struct statfs bfs; struct svr4_statvfs sfs; int error; CHECKALTEXIST(p, &sg, SCARG(uap, path)); SCARG(&fs_args, path) = SCARG(uap, path); SCARG(&fs_args, buf) = fs; if ((error = statfs(p, &fs_args)) != 0) return error; if ((error = copyin(fs, &bfs, sizeof(bfs))) != 0) return error; bsd_statfs_to_svr4_statvfs(&bfs, &sfs); return copyout(&sfs, SCARG(uap, fs), sizeof(sfs)); } int svr4_sys_fstatvfs(p, uap) struct proc *p; struct svr4_sys_fstatvfs_args *uap; { struct fstatfs_args fs_args; caddr_t sg = stackgap_init(); struct statfs *fs = stackgap_alloc(&sg, sizeof(struct statfs)); struct statfs bfs; struct svr4_statvfs sfs; int error; SCARG(&fs_args, fd) = SCARG(uap, fd); SCARG(&fs_args, buf) = fs; if ((error = fstatfs(p, &fs_args)) != 0) return error; if ((error = copyin(fs, &bfs, sizeof(bfs))) != 0) return error; bsd_statfs_to_svr4_statvfs(&bfs, &sfs); return copyout(&sfs, SCARG(uap, fs), sizeof(sfs)); } int svr4_sys_statvfs64(p, uap) struct proc *p; struct svr4_sys_statvfs64_args *uap; { struct statfs_args fs_args; caddr_t sg = stackgap_init(); struct statfs *fs = stackgap_alloc(&sg, sizeof(struct statfs)); struct statfs bfs; struct svr4_statvfs64 sfs; int error; CHECKALTEXIST(p, &sg, SCARG(uap, path)); SCARG(&fs_args, path) = SCARG(uap, path); SCARG(&fs_args, buf) = fs; if ((error = statfs(p, &fs_args)) != 0) return error; if ((error = copyin(fs, &bfs, sizeof(bfs))) != 0) return error; bsd_statfs_to_svr4_statvfs64(&bfs, &sfs); return copyout(&sfs, SCARG(uap, fs), sizeof(sfs)); } int svr4_sys_fstatvfs64(p, uap) struct proc *p; struct svr4_sys_fstatvfs64_args *uap; { struct fstatfs_args fs_args; caddr_t sg = stackgap_init(); struct statfs *fs = stackgap_alloc(&sg, sizeof(struct statfs)); struct statfs bfs; struct svr4_statvfs64 sfs; int error; SCARG(&fs_args, fd) = SCARG(uap, fd); SCARG(&fs_args, buf) = fs; if ((error = fstatfs(p, &fs_args)) != 0) return error; if ((error = copyin(fs, &bfs, sizeof(bfs))) != 0) return error; bsd_statfs_to_svr4_statvfs64(&bfs, &sfs); return copyout(&sfs, SCARG(uap, fs), sizeof(sfs)); } int svr4_sys_alarm(p, uap) struct proc *p; struct svr4_sys_alarm_args *uap; { int error; struct itimerval *itp, *oitp; struct setitimer_args sa; caddr_t sg = stackgap_init(); itp = stackgap_alloc(&sg, sizeof(*itp)); oitp = stackgap_alloc(&sg, sizeof(*oitp)); timevalclear(&itp->it_interval); itp->it_value.tv_sec = SCARG(uap, sec); itp->it_value.tv_usec = 0; SCARG(&sa, which) = ITIMER_REAL; SCARG(&sa, itv) = itp; SCARG(&sa, oitv) = oitp; error = setitimer(p, &sa); if (error) return error; if (oitp->it_value.tv_usec) oitp->it_value.tv_sec++; p->p_retval[0] = oitp->it_value.tv_sec; return 0; } int svr4_sys_gettimeofday(p, uap) struct proc *p; struct svr4_sys_gettimeofday_args *uap; { if (SCARG(uap, tp)) { struct timeval atv; microtime(&atv); return copyout(&atv, SCARG(uap, tp), sizeof (atv)); } return 0; } int svr4_sys_facl(p, uap) struct proc *p; struct svr4_sys_facl_args *uap; { int *retval; retval = p->p_retval; *retval = 0; switch (SCARG(uap, cmd)) { case SVR4_SYS_SETACL: /* We don't support acls on any filesystem */ return ENOSYS; case SVR4_SYS_GETACL: return copyout(retval, &SCARG(uap, num), sizeof(SCARG(uap, num))); case SVR4_SYS_GETACLCNT: return 0; default: return EINVAL; } } int svr4_sys_acl(p, uap) struct proc *p; struct svr4_sys_acl_args *uap; { /* XXX: for now the same */ return svr4_sys_facl(p, (struct svr4_sys_facl_args *)uap); } int svr4_sys_auditsys(p, uap) struct proc *p; struct svr4_sys_auditsys_args *uap; { /* * XXX: Big brother is *not* watching. */ return 0; } int svr4_sys_memcntl(p, uap) struct proc *p; struct svr4_sys_memcntl_args *uap; { switch (SCARG(uap, cmd)) { case SVR4_MC_SYNC: { struct msync_args msa; SCARG(&msa, addr) = SCARG(uap, addr); SCARG(&msa, len) = SCARG(uap, len); SCARG(&msa, flags) = (int)SCARG(uap, arg); return msync(p, &msa); } case SVR4_MC_ADVISE: { struct madvise_args maa; SCARG(&maa, addr) = SCARG(uap, addr); SCARG(&maa, len) = SCARG(uap, len); SCARG(&maa, behav) = (int)SCARG(uap, arg); return madvise(p, &maa); } case SVR4_MC_LOCK: case SVR4_MC_UNLOCK: case SVR4_MC_LOCKAS: case SVR4_MC_UNLOCKAS: return EOPNOTSUPP; default: return ENOSYS; } } int svr4_sys_nice(p, uap) struct proc *p; struct svr4_sys_nice_args *uap; { struct setpriority_args ap; int error; SCARG(&ap, which) = PRIO_PROCESS; SCARG(&ap, who) = 0; SCARG(&ap, prio) = SCARG(uap, prio); if ((error = setpriority(p, &ap)) != 0) return error; /* the cast is stupid, but the structures are the same */ if ((error = getpriority(p, (struct getpriority_args *)&ap)) != 0) return error; return 0; } int svr4_sys_resolvepath(p, uap) struct proc *p; struct svr4_sys_resolvepath_args *uap; { struct nameidata nd; int error, *retval = p->p_retval; NDINIT(&nd, LOOKUP, NOFOLLOW | SAVENAME, UIO_USERSPACE, SCARG(uap, path), p); if ((error = namei(&nd)) != 0) return error; if ((error = copyout(nd.ni_cnd.cn_pnbuf, SCARG(uap, buf), SCARG(uap, bufsiz))) != 0) goto bad; *retval = strlen(nd.ni_cnd.cn_pnbuf) < SCARG(uap, bufsiz) ? strlen(nd.ni_cnd.cn_pnbuf) + 1 : SCARG(uap, bufsiz); bad: NDFREE(&nd, NDF_ONLY_PNBUF); vput(nd.ni_vp); return error; } Index: head/sys/dev/ccd/ccd.c =================================================================== --- head/sys/dev/ccd/ccd.c (revision 71698) +++ head/sys/dev/ccd/ccd.c (revision 71699) @@ -1,1755 +1,1727 @@ /* $FreeBSD$ */ /* $NetBSD: ccd.c,v 1.22 1995/12/08 19:13:26 thorpej Exp $ */ /* * Copyright (c) 1995 Jason R. Thorpe. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed for the NetBSD Project * by Jason R. Thorpe. * 4. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Copyright (c) 1988 University of Utah. * Copyright (c) 1990, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * the Systems Programming Group of the University of Utah Computer * Science Department. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: Utah $Hdr: cd.c 1.6 90/11/28$ * * @(#)cd.c 8.2 (Berkeley) 11/16/93 */ /* * "Concatenated" disk driver. * * Dynamic configuration and disklabel support by: * Jason R. Thorpe * Numerical Aerodynamic Simulation Facility * Mail Stop 258-6 * NASA Ames Research Center * Moffett Field, CA 94035 */ #include "ccd.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(CCDDEBUG) && !defined(DEBUG) #define DEBUG #endif #ifdef DEBUG #define CCDB_FOLLOW 0x01 #define CCDB_INIT 0x02 #define CCDB_IO 0x04 #define CCDB_LABEL 0x08 #define CCDB_VNODE 0x10 static int ccddebug = CCDB_FOLLOW | CCDB_INIT | CCDB_IO | CCDB_LABEL | CCDB_VNODE; SYSCTL_INT(_debug, OID_AUTO, ccddebug, CTLFLAG_RW, &ccddebug, 0, ""); #undef DEBUG #endif #define ccdunit(x) dkunit(x) #define ccdpart(x) dkpart(x) /* This is how mirroring works (only writes are special): When initiating a write, ccdbuffer() returns two "struct ccdbuf *"s linked together by the cb_mirror field. "cb_pflags & CCDPF_MIRROR_DONE" is set to 0 on both of them. When a component returns to ccdiodone(), it checks if "cb_pflags & CCDPF_MIRROR_DONE" is set or not. If not, it sets the partner's flag and returns. If it is, it means its partner has already returned, so it will go to the regular cleanup. */ struct ccdbuf { struct bio cb_buf; /* new I/O buf */ struct bio *cb_obp; /* ptr. to original I/O buf */ struct ccdbuf *cb_freenext; /* free list link */ int cb_unit; /* target unit */ int cb_comp; /* target component */ int cb_pflags; /* mirror/parity status flag */ struct ccdbuf *cb_mirror; /* mirror counterpart */ }; /* bits in cb_pflags */ #define CCDPF_MIRROR_DONE 1 /* if set, mirror counterpart is done */ #define CCDLABELDEV(dev) \ (makedev(major((dev)), dkmakeminor(ccdunit((dev)), 0, RAW_PART))) static d_open_t ccdopen; static d_close_t ccdclose; static d_strategy_t ccdstrategy; static d_ioctl_t ccdioctl; static d_dump_t ccddump; static d_psize_t ccdsize; #define NCCDFREEHIWAT 16 #define CDEV_MAJOR 74 #define BDEV_MAJOR 21 static struct cdevsw ccd_cdevsw = { /* open */ ccdopen, /* close */ ccdclose, /* read */ physread, /* write */ physwrite, /* ioctl */ ccdioctl, /* poll */ nopoll, /* mmap */ nommap, /* strategy */ ccdstrategy, /* name */ "ccd", /* maj */ CDEV_MAJOR, /* dump */ ccddump, /* psize */ ccdsize, /* flags */ D_DISK, /* bmaj */ BDEV_MAJOR }; /* called during module initialization */ static void ccdattach __P((void)); static int ccd_modevent __P((module_t, int, void *)); /* called by biodone() at interrupt time */ static void ccdiodone __P((struct bio *bp)); static void ccdstart __P((struct ccd_softc *, struct bio *)); static void ccdinterleave __P((struct ccd_softc *, int)); static void ccdintr __P((struct ccd_softc *, struct bio *)); static int ccdinit __P((struct ccddevice *, char **, struct proc *)); static int ccdlookup __P((char *, struct proc *p, struct vnode **)); static void ccdbuffer __P((struct ccdbuf **ret, struct ccd_softc *, struct bio *, daddr_t, caddr_t, long)); static void ccdgetdisklabel __P((dev_t)); static void ccdmakedisklabel __P((struct ccd_softc *)); static int ccdlock __P((struct ccd_softc *)); static void ccdunlock __P((struct ccd_softc *)); #ifdef DEBUG static void printiinfo __P((struct ccdiinfo *)); #endif /* Non-private for the benefit of libkvm. */ struct ccd_softc *ccd_softc; struct ccddevice *ccddevs; struct ccdbuf *ccdfreebufs; static int numccdfreebufs; static int numccd = 0; /* * getccdbuf() - Allocate and zero a ccd buffer. * * This routine is called at splbio(). */ static __inline struct ccdbuf * getccdbuf(struct ccdbuf *cpy) { struct ccdbuf *cbp; /* * Allocate from freelist or malloc as necessary */ if ((cbp = ccdfreebufs) != NULL) { ccdfreebufs = cbp->cb_freenext; --numccdfreebufs; } else { cbp = malloc(sizeof(struct ccdbuf), M_DEVBUF, M_WAITOK); } /* * Used by mirroring code */ if (cpy) bcopy(cpy, cbp, sizeof(struct ccdbuf)); else bzero(cbp, sizeof(struct ccdbuf)); /* * independant struct bio initialization */ return(cbp); } /* * putccdbuf() - Free a ccd buffer. * * This routine is called at splbio(). */ static __inline void putccdbuf(struct ccdbuf *cbp) { if (numccdfreebufs < NCCDFREEHIWAT) { cbp->cb_freenext = ccdfreebufs; ccdfreebufs = cbp; ++numccdfreebufs; } else { free((caddr_t)cbp, M_DEVBUF); } } /* * Number of blocks to untouched in front of a component partition. * This is to avoid violating its disklabel area when it starts at the * beginning of the slice. */ #if !defined(CCD_OFFSET) #define CCD_OFFSET 16 #endif static void ccd_clone(void *arg, char *name, int namelen, dev_t *dev) { int i, u; char *s; if (*dev != NODEV) return; i = dev_stdclone(name, &s, "ccd", &u); if (i != 2) return; if (u >= numccd) return; if (*s <= 'a' || *s >= 'h') return; if (s[1] != '\0') return; *dev = make_dev(&ccd_cdevsw, u * 8 + *s - 'a', UID_ROOT, GID_OPERATOR, 0640, name); } /* * Called by main() during pseudo-device attachment. All we need * to do is allocate enough space for devices to be configured later, and * add devsw entries. */ static void ccdattach() { int i; int num = NCCD; if (num > 1) printf("ccd0-%d: Concatenated disk drivers\n", num-1); else printf("ccd0: Concatenated disk driver\n"); ccd_softc = (struct ccd_softc *)malloc(num * sizeof(struct ccd_softc), M_DEVBUF, M_NOWAIT); ccddevs = (struct ccddevice *)malloc(num * sizeof(struct ccddevice), M_DEVBUF, M_NOWAIT); if ((ccd_softc == NULL) || (ccddevs == NULL)) { printf("WARNING: no memory for concatenated disks\n"); if (ccd_softc != NULL) free(ccd_softc, M_DEVBUF); if (ccddevs != NULL) free(ccddevs, M_DEVBUF); return; } numccd = num; bzero(ccd_softc, num * sizeof(struct ccd_softc)); bzero(ccddevs, num * sizeof(struct ccddevice)); cdevsw_add(&ccd_cdevsw); /* XXX: is this necessary? */ for (i = 0; i < numccd; ++i) ccddevs[i].ccd_dk = -1; EVENTHANDLER_REGISTER(dev_clone, ccd_clone, 0, 1000); } static int ccd_modevent(mod, type, data) module_t mod; int type; void *data; { int error = 0; switch (type) { case MOD_LOAD: ccdattach(); break; case MOD_UNLOAD: printf("ccd0: Unload not supported!\n"); error = EOPNOTSUPP; break; default: /* MOD_SHUTDOWN etc */ break; } return (error); } DEV_MODULE(ccd, ccd_modevent, NULL); static int ccdinit(ccd, cpaths, p) struct ccddevice *ccd; char **cpaths; struct proc *p; { struct ccd_softc *cs = &ccd_softc[ccd->ccd_unit]; struct ccdcinfo *ci = NULL; /* XXX */ size_t size; int ix; struct vnode *vp; - struct ucred *uc; size_t minsize; int maxsecsize; struct partinfo dpart; struct ccdgeom *ccg = &cs->sc_geom; char tmppath[MAXPATHLEN]; int error = 0; #ifdef DEBUG if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) printf("ccdinit: unit %d\n", ccd->ccd_unit); #endif cs->sc_size = 0; cs->sc_ileave = ccd->ccd_interleave; cs->sc_nccdisks = ccd->ccd_ndev; /* Allocate space for the component info. */ cs->sc_cinfo = malloc(cs->sc_nccdisks * sizeof(struct ccdcinfo), M_DEVBUF, M_WAITOK); /* * Verify that each component piece exists and record * relevant information about it. */ maxsecsize = 0; minsize = 0; - PROC_LOCK(p); - uc = p->p_ucred; - crhold(uc); - PROC_UNLOCK(p); for (ix = 0; ix < cs->sc_nccdisks; ix++) { vp = ccd->ccd_vpp[ix]; ci = &cs->sc_cinfo[ix]; ci->ci_vp = vp; /* * Copy in the pathname of the component. */ bzero(tmppath, sizeof(tmppath)); /* sanity */ if ((error = copyinstr(cpaths[ix], tmppath, MAXPATHLEN, &ci->ci_pathlen)) != 0) { #ifdef DEBUG if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) printf("ccd%d: can't copy path, error = %d\n", ccd->ccd_unit, error); #endif goto fail; } ci->ci_path = malloc(ci->ci_pathlen, M_DEVBUF, M_WAITOK); bcopy(tmppath, ci->ci_path, ci->ci_pathlen); ci->ci_dev = vn_todev(vp); /* * Get partition information for the component. */ if ((error = VOP_IOCTL(vp, DIOCGPART, (caddr_t)&dpart, - FREAD, uc, p)) != 0) { + FREAD, p->p_ucred, p)) != 0) { #ifdef DEBUG if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) printf("ccd%d: %s: ioctl failed, error = %d\n", ccd->ccd_unit, ci->ci_path, error); #endif goto fail; } if (dpart.part->p_fstype == FS_BSDFFS) { maxsecsize = ((dpart.disklab->d_secsize > maxsecsize) ? dpart.disklab->d_secsize : maxsecsize); size = dpart.part->p_size - CCD_OFFSET; } else { #ifdef DEBUG if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) printf("ccd%d: %s: incorrect partition type\n", ccd->ccd_unit, ci->ci_path); #endif error = EFTYPE; goto fail; } /* * Calculate the size, truncating to an interleave * boundary if necessary. */ if (cs->sc_ileave > 1) size -= size % cs->sc_ileave; if (size == 0) { #ifdef DEBUG if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) printf("ccd%d: %s: size == 0\n", ccd->ccd_unit, ci->ci_path); #endif error = ENODEV; goto fail; } if (minsize == 0 || size < minsize) minsize = size; ci->ci_size = size; cs->sc_size += size; } - crfree(uc); /* * Don't allow the interleave to be smaller than * the biggest component sector. */ if ((cs->sc_ileave > 0) && (cs->sc_ileave < (maxsecsize / DEV_BSIZE))) { #ifdef DEBUG if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) printf("ccd%d: interleave must be at least %d\n", ccd->ccd_unit, (maxsecsize / DEV_BSIZE)); #endif error = EINVAL; goto fail; } /* * If uniform interleave is desired set all sizes to that of * the smallest component. This will guarentee that a single * interleave table is generated. * * Lost space must be taken into account when calculating the * overall size. Half the space is lost when CCDF_MIRROR is * specified. One disk is lost when CCDF_PARITY is specified. */ if (ccd->ccd_flags & CCDF_UNIFORM) { for (ci = cs->sc_cinfo; ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) { ci->ci_size = minsize; } if (ccd->ccd_flags & CCDF_MIRROR) { /* * Check to see if an even number of components * have been specified. The interleave must also * be non-zero in order for us to be able to * guarentee the topology. */ if (cs->sc_nccdisks % 2) { printf("ccd%d: mirroring requires an even number of disks\n", ccd->ccd_unit ); error = EINVAL; goto fail; } if (cs->sc_ileave == 0) { printf("ccd%d: an interleave must be specified when mirroring\n", ccd->ccd_unit); error = EINVAL; goto fail; } cs->sc_size = (cs->sc_nccdisks/2) * minsize; } else if (ccd->ccd_flags & CCDF_PARITY) { cs->sc_size = (cs->sc_nccdisks-1) * minsize; } else { if (cs->sc_ileave == 0) { printf("ccd%d: an interleave must be specified when using parity\n", ccd->ccd_unit); error = EINVAL; goto fail; } cs->sc_size = cs->sc_nccdisks * minsize; } } /* * Construct the interleave table. */ ccdinterleave(cs, ccd->ccd_unit); /* * Create pseudo-geometry based on 1MB cylinders. It's * pretty close. */ ccg->ccg_secsize = maxsecsize; ccg->ccg_ntracks = 1; ccg->ccg_nsectors = 1024 * 1024 / ccg->ccg_secsize; ccg->ccg_ncylinders = cs->sc_size / ccg->ccg_nsectors; /* * Add an devstat entry for this device. */ devstat_add_entry(&cs->device_stats, "ccd", ccd->ccd_unit, ccg->ccg_secsize, DEVSTAT_ALL_SUPPORTED, DEVSTAT_TYPE_STORARRAY |DEVSTAT_TYPE_IF_OTHER, DEVSTAT_PRIORITY_ARRAY); cs->sc_flags |= CCDF_INITED; cs->sc_cflags = ccd->ccd_flags; /* So we can find out later... */ cs->sc_unit = ccd->ccd_unit; return (0); fail: - crfree(uc); while (ci > cs->sc_cinfo) { ci--; free(ci->ci_path, M_DEVBUF); } free(cs->sc_cinfo, M_DEVBUF); return (error); } static void ccdinterleave(cs, unit) struct ccd_softc *cs; int unit; { struct ccdcinfo *ci, *smallci; struct ccdiinfo *ii; daddr_t bn, lbn; int ix; u_long size; #ifdef DEBUG if (ccddebug & CCDB_INIT) printf("ccdinterleave(%x): ileave %d\n", cs, cs->sc_ileave); #endif /* * Allocate an interleave table. The worst case occurs when each * of N disks is of a different size, resulting in N interleave * tables. * * Chances are this is too big, but we don't care. */ size = (cs->sc_nccdisks + 1) * sizeof(struct ccdiinfo); cs->sc_itable = (struct ccdiinfo *)malloc(size, M_DEVBUF, M_WAITOK | M_ZERO); /* * Trivial case: no interleave (actually interleave of disk size). * Each table entry represents a single component in its entirety. * * An interleave of 0 may not be used with a mirror or parity setup. */ if (cs->sc_ileave == 0) { bn = 0; ii = cs->sc_itable; for (ix = 0; ix < cs->sc_nccdisks; ix++) { /* Allocate space for ii_index. */ ii->ii_index = malloc(sizeof(int), M_DEVBUF, M_WAITOK); ii->ii_ndisk = 1; ii->ii_startblk = bn; ii->ii_startoff = 0; ii->ii_index[0] = ix; bn += cs->sc_cinfo[ix].ci_size; ii++; } ii->ii_ndisk = 0; #ifdef DEBUG if (ccddebug & CCDB_INIT) printiinfo(cs->sc_itable); #endif return; } /* * The following isn't fast or pretty; it doesn't have to be. */ size = 0; bn = lbn = 0; for (ii = cs->sc_itable; ; ii++) { /* * Allocate space for ii_index. We might allocate more then * we use. */ ii->ii_index = malloc((sizeof(int) * cs->sc_nccdisks), M_DEVBUF, M_WAITOK); /* * Locate the smallest of the remaining components */ smallci = NULL; for (ci = cs->sc_cinfo; ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) { if (ci->ci_size > size && (smallci == NULL || ci->ci_size < smallci->ci_size)) { smallci = ci; } } /* * Nobody left, all done */ if (smallci == NULL) { ii->ii_ndisk = 0; break; } /* * Record starting logical block using an sc_ileave blocksize. */ ii->ii_startblk = bn / cs->sc_ileave; /* * Record starting comopnent block using an sc_ileave * blocksize. This value is relative to the beginning of * a component disk. */ ii->ii_startoff = lbn; /* * Determine how many disks take part in this interleave * and record their indices. */ ix = 0; for (ci = cs->sc_cinfo; ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) { if (ci->ci_size >= smallci->ci_size) { ii->ii_index[ix++] = ci - cs->sc_cinfo; } } ii->ii_ndisk = ix; bn += ix * (smallci->ci_size - size); lbn = smallci->ci_size / cs->sc_ileave; size = smallci->ci_size; } #ifdef DEBUG if (ccddebug & CCDB_INIT) printiinfo(cs->sc_itable); #endif } /* ARGSUSED */ static int ccdopen(dev, flags, fmt, p) dev_t dev; int flags, fmt; struct proc *p; { int unit = ccdunit(dev); struct ccd_softc *cs; struct disklabel *lp; int error = 0, part, pmask; #ifdef DEBUG if (ccddebug & CCDB_FOLLOW) printf("ccdopen(%x, %x)\n", dev, flags); #endif if (unit >= numccd) return (ENXIO); cs = &ccd_softc[unit]; if ((error = ccdlock(cs)) != 0) return (error); lp = &cs->sc_label; part = ccdpart(dev); pmask = (1 << part); /* * If we're initialized, check to see if there are any other * open partitions. If not, then it's safe to update * the in-core disklabel. */ if ((cs->sc_flags & CCDF_INITED) && (cs->sc_openmask == 0)) ccdgetdisklabel(dev); /* Check that the partition exists. */ if (part != RAW_PART && ((part >= lp->d_npartitions) || (lp->d_partitions[part].p_fstype == FS_UNUSED))) { error = ENXIO; goto done; } cs->sc_openmask |= pmask; done: ccdunlock(cs); return (0); } /* ARGSUSED */ static int ccdclose(dev, flags, fmt, p) dev_t dev; int flags, fmt; struct proc *p; { int unit = ccdunit(dev); struct ccd_softc *cs; int error = 0, part; #ifdef DEBUG if (ccddebug & CCDB_FOLLOW) printf("ccdclose(%x, %x)\n", dev, flags); #endif if (unit >= numccd) return (ENXIO); cs = &ccd_softc[unit]; if ((error = ccdlock(cs)) != 0) return (error); part = ccdpart(dev); /* ...that much closer to allowing unconfiguration... */ cs->sc_openmask &= ~(1 << part); ccdunlock(cs); return (0); } static void ccdstrategy(bp) struct bio *bp; { int unit = ccdunit(bp->bio_dev); struct ccd_softc *cs = &ccd_softc[unit]; int s; int wlabel; struct disklabel *lp; #ifdef DEBUG if (ccddebug & CCDB_FOLLOW) printf("ccdstrategy(%x): unit %d\n", bp, unit); #endif if ((cs->sc_flags & CCDF_INITED) == 0) { bp->bio_error = ENXIO; bp->bio_flags |= BIO_ERROR; goto done; } /* If it's a nil transfer, wake up the top half now. */ if (bp->bio_bcount == 0) goto done; lp = &cs->sc_label; /* * Do bounds checking and adjust transfer. If there's an * error, the bounds check will flag that for us. */ wlabel = cs->sc_flags & (CCDF_WLABEL|CCDF_LABELLING); if (ccdpart(bp->bio_dev) != RAW_PART) { if (bounds_check_with_label(bp, lp, wlabel) <= 0) goto done; } else { int pbn; /* in sc_secsize chunks */ long sz; /* in sc_secsize chunks */ pbn = bp->bio_blkno / (cs->sc_geom.ccg_secsize / DEV_BSIZE); sz = howmany(bp->bio_bcount, cs->sc_geom.ccg_secsize); /* * If out of bounds return an error. If at the EOF point, * simply read or write less. */ if (pbn < 0 || pbn >= cs->sc_size) { bp->bio_resid = bp->bio_bcount; if (pbn != cs->sc_size) { bp->bio_error = EINVAL; bp->bio_flags |= BIO_ERROR; } goto done; } /* * If the request crosses EOF, truncate the request. */ if (pbn + sz > cs->sc_size) { bp->bio_bcount = (cs->sc_size - pbn) * cs->sc_geom.ccg_secsize; } } bp->bio_resid = bp->bio_bcount; /* * "Start" the unit. */ s = splbio(); ccdstart(cs, bp); splx(s); return; done: biodone(bp); } static void ccdstart(cs, bp) struct ccd_softc *cs; struct bio *bp; { long bcount, rcount; struct ccdbuf *cbp[4]; /* XXX! : 2 reads and 2 writes for RAID 4/5 */ caddr_t addr; daddr_t bn; struct partition *pp; #ifdef DEBUG if (ccddebug & CCDB_FOLLOW) printf("ccdstart(%x, %x)\n", cs, bp); #endif /* Record the transaction start */ devstat_start_transaction(&cs->device_stats); /* * Translate the partition-relative block number to an absolute. */ bn = bp->bio_blkno; if (ccdpart(bp->bio_dev) != RAW_PART) { pp = &cs->sc_label.d_partitions[ccdpart(bp->bio_dev)]; bn += pp->p_offset; } /* * Allocate component buffers and fire off the requests */ addr = bp->bio_data; for (bcount = bp->bio_bcount; bcount > 0; bcount -= rcount) { ccdbuffer(cbp, cs, bp, bn, addr, bcount); rcount = cbp[0]->cb_buf.bio_bcount; if (cs->sc_cflags & CCDF_MIRROR) { /* * Mirroring. Writes go to both disks, reads are * taken from whichever disk seems most appropriate. * * We attempt to localize reads to the disk whos arm * is nearest the read request. We ignore seeks due * to writes when making this determination and we * also try to avoid hogging. */ if (cbp[0]->cb_buf.bio_cmd == BIO_WRITE) { BIO_STRATEGY(&cbp[0]->cb_buf, 0); BIO_STRATEGY(&cbp[1]->cb_buf, 0); } else { int pick = cs->sc_pick; daddr_t range = cs->sc_size / 16; if (bn < cs->sc_blk[pick] - range || bn > cs->sc_blk[pick] + range ) { cs->sc_pick = pick = 1 - pick; } cs->sc_blk[pick] = bn + btodb(rcount); BIO_STRATEGY(&cbp[pick]->cb_buf, 0); } } else { /* * Not mirroring */ BIO_STRATEGY(&cbp[0]->cb_buf, 0); } bn += btodb(rcount); addr += rcount; } } /* * Build a component buffer header. */ static void ccdbuffer(cb, cs, bp, bn, addr, bcount) struct ccdbuf **cb; struct ccd_softc *cs; struct bio *bp; daddr_t bn; caddr_t addr; long bcount; { struct ccdcinfo *ci, *ci2 = NULL; /* XXX */ struct ccdbuf *cbp; daddr_t cbn, cboff; off_t cbc; #ifdef DEBUG if (ccddebug & CCDB_IO) printf("ccdbuffer(%x, %x, %d, %x, %d)\n", cs, bp, bn, addr, bcount); #endif /* * Determine which component bn falls in. */ cbn = bn; cboff = 0; if (cs->sc_ileave == 0) { /* * Serially concatenated and neither a mirror nor a parity * config. This is a special case. */ daddr_t sblk; sblk = 0; for (ci = cs->sc_cinfo; cbn >= sblk + ci->ci_size; ci++) sblk += ci->ci_size; cbn -= sblk; } else { struct ccdiinfo *ii; int ccdisk, off; /* * Calculate cbn, the logical superblock (sc_ileave chunks), * and cboff, a normal block offset (DEV_BSIZE chunks) relative * to cbn. */ cboff = cbn % cs->sc_ileave; /* DEV_BSIZE gran */ cbn = cbn / cs->sc_ileave; /* DEV_BSIZE * ileave gran */ /* * Figure out which interleave table to use. */ for (ii = cs->sc_itable; ii->ii_ndisk; ii++) { if (ii->ii_startblk > cbn) break; } ii--; /* * off is the logical superblock relative to the beginning * of this interleave block. */ off = cbn - ii->ii_startblk; /* * We must calculate which disk component to use (ccdisk), * and recalculate cbn to be the superblock relative to * the beginning of the component. This is typically done by * adding 'off' and ii->ii_startoff together. However, 'off' * must typically be divided by the number of components in * this interleave array to be properly convert it from a * CCD-relative logical superblock number to a * component-relative superblock number. */ if (ii->ii_ndisk == 1) { /* * When we have just one disk, it can't be a mirror * or a parity config. */ ccdisk = ii->ii_index[0]; cbn = ii->ii_startoff + off; } else { if (cs->sc_cflags & CCDF_MIRROR) { /* * We have forced a uniform mapping, resulting * in a single interleave array. We double * up on the first half of the available * components and our mirror is in the second * half. This only works with a single * interleave array because doubling up * doubles the number of sectors, so there * cannot be another interleave array because * the next interleave array's calculations * would be off. */ int ndisk2 = ii->ii_ndisk / 2; ccdisk = ii->ii_index[off % ndisk2]; cbn = ii->ii_startoff + off / ndisk2; ci2 = &cs->sc_cinfo[ccdisk + ndisk2]; } else if (cs->sc_cflags & CCDF_PARITY) { /* * XXX not implemented yet */ int ndisk2 = ii->ii_ndisk - 1; ccdisk = ii->ii_index[off % ndisk2]; cbn = ii->ii_startoff + off / ndisk2; if (cbn % ii->ii_ndisk <= ccdisk) ccdisk++; } else { ccdisk = ii->ii_index[off % ii->ii_ndisk]; cbn = ii->ii_startoff + off / ii->ii_ndisk; } } ci = &cs->sc_cinfo[ccdisk]; /* * Convert cbn from a superblock to a normal block so it * can be used to calculate (along with cboff) the normal * block index into this particular disk. */ cbn *= cs->sc_ileave; } /* * Fill in the component buf structure. */ cbp = getccdbuf(NULL); cbp->cb_buf.bio_cmd = bp->bio_cmd; cbp->cb_buf.bio_done = ccdiodone; cbp->cb_buf.bio_dev = ci->ci_dev; /* XXX */ cbp->cb_buf.bio_blkno = cbn + cboff + CCD_OFFSET; cbp->cb_buf.bio_offset = dbtob(cbn + cboff + CCD_OFFSET); cbp->cb_buf.bio_data = addr; if (cs->sc_ileave == 0) cbc = dbtob((off_t)(ci->ci_size - cbn)); else cbc = dbtob((off_t)(cs->sc_ileave - cboff)); cbp->cb_buf.bio_bcount = (cbc < bcount) ? cbc : bcount; cbp->cb_buf.bio_caller1 = (void*)cbp->cb_buf.bio_bcount; /* * context for ccdiodone */ cbp->cb_obp = bp; cbp->cb_unit = cs - ccd_softc; cbp->cb_comp = ci - cs->sc_cinfo; #ifdef DEBUG if (ccddebug & CCDB_IO) printf(" dev %x(u%d): cbp %x bn %d addr %x bcnt %d\n", ci->ci_dev, ci-cs->sc_cinfo, cbp, cbp->cb_buf.bio_blkno, cbp->cb_buf.bio_data, cbp->cb_buf.bio_bcount); #endif cb[0] = cbp; /* * Note: both I/O's setup when reading from mirror, but only one * will be executed. */ if (cs->sc_cflags & CCDF_MIRROR) { /* mirror, setup second I/O */ cbp = getccdbuf(cb[0]); cbp->cb_buf.bio_dev = ci2->ci_dev; cbp->cb_comp = ci2 - cs->sc_cinfo; cb[1] = cbp; /* link together the ccdbuf's and clear "mirror done" flag */ cb[0]->cb_mirror = cb[1]; cb[1]->cb_mirror = cb[0]; cb[0]->cb_pflags &= ~CCDPF_MIRROR_DONE; cb[1]->cb_pflags &= ~CCDPF_MIRROR_DONE; } } static void ccdintr(cs, bp) struct ccd_softc *cs; struct bio *bp; { #ifdef DEBUG if (ccddebug & CCDB_FOLLOW) printf("ccdintr(%x, %x)\n", cs, bp); #endif /* * Request is done for better or worse, wakeup the top half. */ if (bp->bio_flags & BIO_ERROR) bp->bio_resid = bp->bio_bcount; devstat_end_transaction_bio(&cs->device_stats, bp); biodone(bp); } /* * Called at interrupt time. * Mark the component as done and if all components are done, * take a ccd interrupt. */ static void ccdiodone(ibp) struct bio *ibp; { struct ccdbuf *cbp = (struct ccdbuf *)ibp; struct bio *bp = cbp->cb_obp; int unit = cbp->cb_unit; int count, s; s = splbio(); #ifdef DEBUG if (ccddebug & CCDB_FOLLOW) printf("ccdiodone(%x)\n", cbp); if (ccddebug & CCDB_IO) { printf("ccdiodone: bp %x bcount %d resid %d\n", bp, bp->bio_bcount, bp->bio_resid); printf(" dev %x(u%d), cbp %x bn %d addr %x bcnt %d\n", cbp->cb_buf.bio_dev, cbp->cb_comp, cbp, cbp->cb_buf.bio_blkno, cbp->cb_buf.bio_data, cbp->cb_buf.bio_bcount); } #endif /* * If an error occured, report it. If this is a mirrored * configuration and the first of two possible reads, do not * set the error in the bp yet because the second read may * succeed. */ if (cbp->cb_buf.bio_flags & BIO_ERROR) { const char *msg = ""; if ((ccd_softc[unit].sc_cflags & CCDF_MIRROR) && (cbp->cb_buf.bio_cmd == BIO_READ) && (cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { /* * We will try our read on the other disk down * below, also reverse the default pick so if we * are doing a scan we do not keep hitting the * bad disk first. */ struct ccd_softc *cs = &ccd_softc[unit]; msg = ", trying other disk"; cs->sc_pick = 1 - cs->sc_pick; cs->sc_blk[cs->sc_pick] = bp->bio_blkno; } else { bp->bio_flags |= BIO_ERROR; bp->bio_error = cbp->cb_buf.bio_error ? cbp->cb_buf.bio_error : EIO; } printf("ccd%d: error %d on component %d block %d (ccd block %d)%s\n", unit, bp->bio_error, cbp->cb_comp, (int)cbp->cb_buf.bio_blkno, bp->bio_blkno, msg); } /* * Process mirror. If we are writing, I/O has been initiated on both * buffers and we fall through only after both are finished. * * If we are reading only one I/O is initiated at a time. If an * error occurs we initiate the second I/O and return, otherwise * we free the second I/O without initiating it. */ if (ccd_softc[unit].sc_cflags & CCDF_MIRROR) { if (cbp->cb_buf.bio_cmd == BIO_WRITE) { /* * When writing, handshake with the second buffer * to determine when both are done. If both are not * done, return here. */ if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { cbp->cb_mirror->cb_pflags |= CCDPF_MIRROR_DONE; putccdbuf(cbp); splx(s); return; } } else { /* * When reading, either dispose of the second buffer * or initiate I/O on the second buffer if an error * occured with this one. */ if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { if (cbp->cb_buf.bio_flags & BIO_ERROR) { cbp->cb_mirror->cb_pflags |= CCDPF_MIRROR_DONE; BIO_STRATEGY(&cbp->cb_mirror->cb_buf, 0); putccdbuf(cbp); splx(s); return; } else { putccdbuf(cbp->cb_mirror); /* fall through */ } } } } /* * use bio_caller1 to determine how big the original request was rather * then bio_bcount, because bio_bcount may have been truncated for EOF. * * XXX We check for an error, but we do not test the resid for an * aligned EOF condition. This may result in character & block * device access not recognizing EOF properly when read or written * sequentially, but will not effect filesystems. */ count = (long)cbp->cb_buf.bio_caller1; putccdbuf(cbp); /* * If all done, "interrupt". */ bp->bio_resid -= count; if (bp->bio_resid < 0) panic("ccdiodone: count"); if (bp->bio_resid == 0) ccdintr(&ccd_softc[unit], bp); splx(s); } static int ccdioctl(dev, cmd, data, flag, p) dev_t dev; u_long cmd; caddr_t data; int flag; struct proc *p; { int unit = ccdunit(dev); int i, j, lookedup = 0, error = 0; int part, pmask, s; struct ccd_softc *cs; struct ccd_ioctl *ccio = (struct ccd_ioctl *)data; struct ccddevice ccd; char **cpp; struct vnode **vpp; - struct ucred *uc; if (unit >= numccd) return (ENXIO); cs = &ccd_softc[unit]; bzero(&ccd, sizeof(ccd)); switch (cmd) { case CCDIOCSET: if (cs->sc_flags & CCDF_INITED) return (EBUSY); if ((flag & FWRITE) == 0) return (EBADF); if ((error = ccdlock(cs)) != 0) return (error); /* Fill in some important bits. */ ccd.ccd_unit = unit; ccd.ccd_interleave = ccio->ccio_ileave; if (ccd.ccd_interleave == 0 && ((ccio->ccio_flags & CCDF_MIRROR) || (ccio->ccio_flags & CCDF_PARITY))) { printf("ccd%d: disabling mirror/parity, interleave is 0\n", unit); ccio->ccio_flags &= ~(CCDF_MIRROR | CCDF_PARITY); } if ((ccio->ccio_flags & CCDF_MIRROR) && (ccio->ccio_flags & CCDF_PARITY)) { printf("ccd%d: can't specify both mirror and parity, using mirror\n", unit); ccio->ccio_flags &= ~CCDF_PARITY; } if ((ccio->ccio_flags & (CCDF_MIRROR | CCDF_PARITY)) && !(ccio->ccio_flags & CCDF_UNIFORM)) { printf("ccd%d: mirror/parity forces uniform flag\n", unit); ccio->ccio_flags |= CCDF_UNIFORM; } ccd.ccd_flags = ccio->ccio_flags & CCDF_USERMASK; /* * Allocate space for and copy in the array of * componet pathnames and device numbers. */ cpp = malloc(ccio->ccio_ndisks * sizeof(char *), M_DEVBUF, M_WAITOK); vpp = malloc(ccio->ccio_ndisks * sizeof(struct vnode *), M_DEVBUF, M_WAITOK); error = copyin((caddr_t)ccio->ccio_disks, (caddr_t)cpp, ccio->ccio_ndisks * sizeof(char **)); if (error) { free(vpp, M_DEVBUF); free(cpp, M_DEVBUF); ccdunlock(cs); return (error); } #ifdef DEBUG if (ccddebug & CCDB_INIT) for (i = 0; i < ccio->ccio_ndisks; ++i) printf("ccdioctl: component %d: 0x%x\n", i, cpp[i]); #endif for (i = 0; i < ccio->ccio_ndisks; ++i) { #ifdef DEBUG if (ccddebug & CCDB_INIT) printf("ccdioctl: lookedup = %d\n", lookedup); #endif if ((error = ccdlookup(cpp[i], p, &vpp[i])) != 0) { - PROC_LOCK(p); - uc = p->p_ucred; - crhold(uc); - PROC_UNLOCK(p); for (j = 0; j < lookedup; ++j) (void)vn_close(vpp[j], FREAD|FWRITE, - uc, p); + p->p_ucred, p); free(vpp, M_DEVBUF); free(cpp, M_DEVBUF); ccdunlock(cs); - crfree(uc); return (error); } ++lookedup; } ccd.ccd_cpp = cpp; ccd.ccd_vpp = vpp; ccd.ccd_ndev = ccio->ccio_ndisks; /* * Initialize the ccd. Fills in the softc for us. */ if ((error = ccdinit(&ccd, cpp, p)) != 0) { - PROC_LOCK(p); - uc = p->p_ucred; - crhold(uc); - PROC_UNLOCK(p); for (j = 0; j < lookedup; ++j) - (void)vn_close(vpp[j], FREAD|FWRITE, uc, p); + (void)vn_close(vpp[j], FREAD|FWRITE, + p->p_ucred, p); bzero(&ccd_softc[unit], sizeof(struct ccd_softc)); free(vpp, M_DEVBUF); free(cpp, M_DEVBUF); ccdunlock(cs); - crfree(uc); return (error); } /* * The ccd has been successfully initialized, so * we can place it into the array and read the disklabel. */ bcopy(&ccd, &ccddevs[unit], sizeof(ccd)); ccio->ccio_unit = unit; ccio->ccio_size = cs->sc_size; ccdgetdisklabel(dev); ccdunlock(cs); break; case CCDIOCCLR: if ((cs->sc_flags & CCDF_INITED) == 0) return (ENXIO); if ((flag & FWRITE) == 0) return (EBADF); if ((error = ccdlock(cs)) != 0) return (error); /* Don't unconfigure if any other partitions are open */ part = ccdpart(dev); pmask = (1 << part); if ((cs->sc_openmask & ~pmask)) { ccdunlock(cs); return (EBUSY); } /* * Free ccd_softc information and clear entry. */ /* Close the components and free their pathnames. */ - PROC_LOCK(p); - uc = p->p_ucred; - crhold(uc); - PROC_UNLOCK(p); for (i = 0; i < cs->sc_nccdisks; ++i) { /* * XXX: this close could potentially fail and * cause Bad Things. Maybe we need to force * the close to happen? */ #ifdef DEBUG if (ccddebug & CCDB_VNODE) vprint("CCDIOCCLR: vnode info", cs->sc_cinfo[i].ci_vp); #endif (void)vn_close(cs->sc_cinfo[i].ci_vp, FREAD|FWRITE, - uc, p); + p->p_ucred, p); free(cs->sc_cinfo[i].ci_path, M_DEVBUF); } - crfree(uc); /* Free interleave index. */ for (i = 0; cs->sc_itable[i].ii_ndisk; ++i) free(cs->sc_itable[i].ii_index, M_DEVBUF); /* Free component info and interleave table. */ free(cs->sc_cinfo, M_DEVBUF); free(cs->sc_itable, M_DEVBUF); cs->sc_flags &= ~CCDF_INITED; /* * Free ccddevice information and clear entry. */ free(ccddevs[unit].ccd_cpp, M_DEVBUF); free(ccddevs[unit].ccd_vpp, M_DEVBUF); ccd.ccd_dk = -1; bcopy(&ccd, &ccddevs[unit], sizeof(ccd)); /* * And remove the devstat entry. */ devstat_remove_entry(&cs->device_stats); /* This must be atomic. */ s = splhigh(); ccdunlock(cs); bzero(cs, sizeof(struct ccd_softc)); splx(s); break; case DIOCGDINFO: if ((cs->sc_flags & CCDF_INITED) == 0) return (ENXIO); *(struct disklabel *)data = cs->sc_label; break; case DIOCGPART: if ((cs->sc_flags & CCDF_INITED) == 0) return (ENXIO); ((struct partinfo *)data)->disklab = &cs->sc_label; ((struct partinfo *)data)->part = &cs->sc_label.d_partitions[ccdpart(dev)]; break; case DIOCWDINFO: case DIOCSDINFO: if ((cs->sc_flags & CCDF_INITED) == 0) return (ENXIO); if ((flag & FWRITE) == 0) return (EBADF); if ((error = ccdlock(cs)) != 0) return (error); cs->sc_flags |= CCDF_LABELLING; error = setdisklabel(&cs->sc_label, (struct disklabel *)data, 0); if (error == 0) { if (cmd == DIOCWDINFO) error = writedisklabel(CCDLABELDEV(dev), &cs->sc_label); } cs->sc_flags &= ~CCDF_LABELLING; ccdunlock(cs); if (error) return (error); break; case DIOCWLABEL: if ((cs->sc_flags & CCDF_INITED) == 0) return (ENXIO); if ((flag & FWRITE) == 0) return (EBADF); if (*(int *)data != 0) cs->sc_flags |= CCDF_WLABEL; else cs->sc_flags &= ~CCDF_WLABEL; break; default: return (ENOTTY); } return (0); } static int ccdsize(dev) dev_t dev; { struct ccd_softc *cs; int part, size; if (ccdopen(dev, 0, S_IFCHR, curproc)) return (-1); cs = &ccd_softc[ccdunit(dev)]; part = ccdpart(dev); if ((cs->sc_flags & CCDF_INITED) == 0) return (-1); if (cs->sc_label.d_partitions[part].p_fstype != FS_SWAP) size = -1; else size = cs->sc_label.d_partitions[part].p_size; if (ccdclose(dev, 0, S_IFCHR, curproc)) return (-1); return (size); } static int ccddump(dev) dev_t dev; { /* Not implemented. */ return ENXIO; } /* * Lookup the provided name in the filesystem. If the file exists, * is a valid block device, and isn't being used by anyone else, * set *vpp to the file's vnode. */ static int ccdlookup(path, p, vpp) char *path; struct proc *p; struct vnode **vpp; /* result */ { struct nameidata nd; struct vnode *vp; - struct ucred *uc; int error, flags; NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, path, p); flags = FREAD | FWRITE; if ((error = vn_open(&nd, &flags, 0)) != 0) { #ifdef DEBUG if (ccddebug & CCDB_FOLLOW|CCDB_INIT) printf("ccdlookup: vn_open error = %d\n", error); #endif return (error); } vp = nd.ni_vp; if (vp->v_usecount > 1) { error = EBUSY; goto bad; } if (!vn_isdisk(vp, &error)) goto bad; #ifdef DEBUG if (ccddebug & CCDB_VNODE) vprint("ccdlookup: vnode info", vp); #endif VOP_UNLOCK(vp, 0, p); NDFREE(&nd, NDF_ONLY_PNBUF); *vpp = vp; return (0); bad: VOP_UNLOCK(vp, 0, p); NDFREE(&nd, NDF_ONLY_PNBUF); /* vn_close does vrele() for vp */ - PROC_LOCK(p); - uc = p->p_ucred; - crhold(uc); - PROC_UNLOCK(p); - (void)vn_close(vp, FREAD|FWRITE, uc, p); - crfree(uc); + (void)vn_close(vp, FREAD|FWRITE, p->p_ucred, p); return (error); } /* * Read the disklabel from the ccd. If one is not present, fake one * up. */ static void ccdgetdisklabel(dev) dev_t dev; { int unit = ccdunit(dev); struct ccd_softc *cs = &ccd_softc[unit]; char *errstring; struct disklabel *lp = &cs->sc_label; struct ccdgeom *ccg = &cs->sc_geom; bzero(lp, sizeof(*lp)); lp->d_secperunit = cs->sc_size; lp->d_secsize = ccg->ccg_secsize; lp->d_nsectors = ccg->ccg_nsectors; lp->d_ntracks = ccg->ccg_ntracks; lp->d_ncylinders = ccg->ccg_ncylinders; lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; strncpy(lp->d_typename, "ccd", sizeof(lp->d_typename)); lp->d_type = DTYPE_CCD; strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); lp->d_rpm = 3600; lp->d_interleave = 1; lp->d_flags = 0; lp->d_partitions[RAW_PART].p_offset = 0; lp->d_partitions[RAW_PART].p_size = cs->sc_size; lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED; lp->d_npartitions = RAW_PART + 1; lp->d_bbsize = BBSIZE; /* XXX */ lp->d_sbsize = SBSIZE; /* XXX */ lp->d_magic = DISKMAGIC; lp->d_magic2 = DISKMAGIC; lp->d_checksum = dkcksum(&cs->sc_label); /* * Call the generic disklabel extraction routine. */ errstring = readdisklabel(CCDLABELDEV(dev), &cs->sc_label); if (errstring != NULL) ccdmakedisklabel(cs); #ifdef DEBUG /* It's actually extremely common to have unlabeled ccds. */ if (ccddebug & CCDB_LABEL) if (errstring != NULL) printf("ccd%d: %s\n", unit, errstring); #endif } /* * Take care of things one might want to take care of in the event * that a disklabel isn't present. */ static void ccdmakedisklabel(cs) struct ccd_softc *cs; { struct disklabel *lp = &cs->sc_label; /* * For historical reasons, if there's no disklabel present * the raw partition must be marked FS_BSDFFS. */ lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS; strncpy(lp->d_packname, "default label", sizeof(lp->d_packname)); } /* * Wait interruptibly for an exclusive lock. * * XXX * Several drivers do this; it should be abstracted and made MP-safe. */ static int ccdlock(cs) struct ccd_softc *cs; { int error; while ((cs->sc_flags & CCDF_LOCKED) != 0) { cs->sc_flags |= CCDF_WANTED; if ((error = tsleep(cs, PRIBIO | PCATCH, "ccdlck", 0)) != 0) return (error); } cs->sc_flags |= CCDF_LOCKED; return (0); } /* * Unlock and wake up any waiters. */ static void ccdunlock(cs) struct ccd_softc *cs; { cs->sc_flags &= ~CCDF_LOCKED; if ((cs->sc_flags & CCDF_WANTED) != 0) { cs->sc_flags &= ~CCDF_WANTED; wakeup(cs); } } #ifdef DEBUG static void printiinfo(ii) struct ccdiinfo *ii; { int ix, i; for (ix = 0; ii->ii_ndisk; ix++, ii++) { printf(" itab[%d]: #dk %d sblk %d soff %d", ix, ii->ii_ndisk, ii->ii_startblk, ii->ii_startoff); for (i = 0; i < ii->ii_ndisk; i++) printf(" %d", ii->ii_index[i]); printf("\n"); } } #endif Index: head/sys/dev/vn/vn.c =================================================================== --- head/sys/dev/vn/vn.c (revision 71698) +++ head/sys/dev/vn/vn.c (revision 71699) @@ -1,808 +1,794 @@ /* * Copyright (c) 1988 University of Utah. * Copyright (c) 1990, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * the Systems Programming Group of the University of Utah Computer * Science Department. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: Utah Hdr: vn.c 1.13 94/04/02 * * from: @(#)vn.c 8.6 (Berkeley) 4/1/94 * $FreeBSD$ */ /* * Vnode disk driver. * * Block/character interface to a vnode. Allows one to treat a file * as a disk (e.g. build a filesystem in it, mount it, etc.). * * NOTE 1: This uses the VOP_BMAP/VOP_STRATEGY interface to the vnode * instead of a simple VOP_RDWR. We do this to avoid distorting the * local buffer cache. * * NOTE 2: There is a security issue involved with this driver. * Once mounted all access to the contents of the "mapped" file via * the special file is controlled by the permissions on the special * file, the protection of the mapped file is ignored (effectively, * by using root credentials in all transactions). * * NOTE 3: Doesn't interact with leases, should it? */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static d_ioctl_t vnioctl, vnctlioctl; static d_open_t vnopen, vnctlopen; static d_close_t vnclose, vnctlclose; static d_psize_t vnsize; static d_strategy_t vnstrategy; #define CDEV_MAJOR 43 #define VN_BSIZE_BEST 8192 /* * cdevsw * D_DISK we want to look like a disk * D_CANFREE We support BIO_DELETE */ static struct cdevsw vn_cdevsw = { /* open */ vnopen, /* close */ vnclose, /* read */ physread, /* write */ physwrite, /* ioctl */ vnioctl, /* poll */ nopoll, /* mmap */ nommap, /* strategy */ vnstrategy, /* name */ "vn", /* maj */ CDEV_MAJOR, /* dump */ nodump, /* psize */ vnsize, /* flags */ D_DISK | D_CANFREE | D_MEMDISK }; static struct cdevsw vnctl_cdevsw = { /* open */ vnctlopen, /* close */ vnctlclose, /* read */ noread, /* write */ nowrite, /* ioctl */ vnctlioctl, /* poll */ nopoll, /* mmap */ nommap, /* strategy */ nostrategy, /* name */ "vn", /* maj */ CDEV_MAJOR, /* dump */ nodump, /* psize */ nopsize, /* flags */ 0 }; static void vn_clone (void *arg, char *name, int namelen, dev_t *dev) { int i, u; char *np; if (*dev != NODEV) return; i = dev_stdclone(name, &np, "vn", &u); if (i != 2) return; if (u > DKMAXUNIT) return; if (strcmp(np, ".ctl")) return; *dev = make_dev(&vnctl_cdevsw, dkmakeminor(u, 0, 0) | 0x02000000, UID_ROOT, GID_WHEEL, 0600, name); return; } #define getvnbuf() \ ((struct buf *)malloc(sizeof(struct buf), M_DEVBUF, M_WAITOK)) #define putvnbuf(bp) \ free((caddr_t)(bp), M_DEVBUF) struct vn_softc { int sc_unit; int sc_flags; /* flags */ int sc_size; /* size of vn, sc_secsize scale */ int sc_secsize; /* sector size */ struct diskslices *sc_slices; struct vnode *sc_vp; /* vnode if not NULL */ vm_object_t sc_object; /* backing object if not NULL */ struct ucred *sc_cred; /* credentials */ int sc_maxactive; /* max # of active requests */ u_long sc_options; /* options */ SLIST_ENTRY(vn_softc) sc_list; }; static SLIST_HEAD(, vn_softc) vn_list; /* sc_flags */ #define VNF_INITED 0x01 #define VNF_READONLY 0x02 static u_long vn_options; #define IFOPT(vn,opt) if (((vn)->sc_options|vn_options) & (opt)) #define TESTOPT(vn,opt) (((vn)->sc_options|vn_options) & (opt)) static int vnsetcred (struct vn_softc *vn, struct ucred *cred); static void vnclear (struct vn_softc *vn); static int vn_modevent (module_t, int, void *); static int vniocattach_file (struct vn_softc *, struct vn_ioctl *, dev_t dev, int flag, struct proc *p); static int vniocattach_swap (struct vn_softc *, struct vn_ioctl *, dev_t dev, int flag, struct proc *p); static int vnctlclose(dev_t dev, int flags, int mode, struct proc *p) { struct vn_softc *vn = dev->si_drv1; IFOPT(vn, VN_FOLLOW) printf("vnctlclose(%s, 0x%x, 0x%x, %p)\n", devtoname(dev), flags, mode, (void *)p); return (0); } static int vnclose(dev_t dev, int flags, int mode, struct proc *p) { struct vn_softc *vn = dev->si_drv1; IFOPT(vn, VN_FOLLOW) printf("vnclose(%s, 0x%x, 0x%x, %p)\n", devtoname(dev), flags, mode, (void *)p); if (vn->sc_slices != NULL) dsclose(dev, mode, vn->sc_slices); return (0); } static struct vn_softc * vnfindvn(dev_t dev) { int unit; struct vn_softc *vn; unit = dkunit(dev); vn = dev->si_drv1; if (!vn) { SLIST_FOREACH(vn, &vn_list, sc_list) { if (vn->sc_unit == unit) { dev->si_drv1 = vn; break; } } } if (!vn) { vn = malloc(sizeof *vn, M_DEVBUF, M_WAITOK | M_ZERO); if (!vn) return (NULL); vn->sc_unit = unit; dev->si_drv1 = vn; make_dev(&vn_cdevsw, dkmakeminor(unit, WHOLE_DISK_SLICE, RAW_PART), UID_ROOT, GID_OPERATOR, 0640, "vn%d", unit); SLIST_INSERT_HEAD(&vn_list, vn, sc_list); } return (vn); } static int vnctlopen(dev_t dev, int flags, int mode, struct proc *p) { struct vn_softc *vn; /* * Locate preexisting device */ if ((vn = dev->si_drv1) == NULL) vn = vnfindvn(dev); IFOPT(vn, VN_FOLLOW) printf("vnctlopen(%s, 0x%x, 0x%x, %p)\n", devtoname(dev), flags, mode, (void *)p); return(0); } static int vnopen(dev_t dev, int flags, int mode, struct proc *p) { struct vn_softc *vn; /* * Locate preexisting device */ if ((vn = dev->si_drv1) == NULL) vn = vnfindvn(dev); /* * Update si_bsize fields for device. This data will be overriden by * the slice/parition code for vn accesses through partitions, and * used directly if you open the 'whole disk' device. * * si_bsize_best must be reinitialized in case VN has been * reconfigured, plus make it at least VN_BSIZE_BEST for efficiency. */ dev->si_bsize_phys = vn->sc_secsize; dev->si_bsize_best = vn->sc_secsize; if (dev->si_bsize_best < VN_BSIZE_BEST) dev->si_bsize_best = VN_BSIZE_BEST; if ((flags & FWRITE) && (vn->sc_flags & VNF_READONLY)) return (EACCES); IFOPT(vn, VN_FOLLOW) printf("vnopen(%s, 0x%x, 0x%x, %p)\n", devtoname(dev), flags, mode, (void *)p); /* * Initialize label */ if (vn->sc_flags & VNF_INITED) { struct disklabel label; /* Build label for whole disk. */ bzero(&label, sizeof label); label.d_secsize = vn->sc_secsize; label.d_nsectors = 32; label.d_ntracks = 64 / (vn->sc_secsize / DEV_BSIZE); label.d_secpercyl = label.d_nsectors * label.d_ntracks; label.d_ncylinders = vn->sc_size / label.d_secpercyl; label.d_secperunit = vn->sc_size; label.d_partitions[RAW_PART].p_size = vn->sc_size; return (dsopen(dev, mode, 0, &vn->sc_slices, &label)); } if (dkslice(dev) != WHOLE_DISK_SLICE || dkpart(dev) != RAW_PART || mode != S_IFCHR) { return (ENXIO); } return(0); } /* * vnstrategy: * * Run strategy routine for VN device. We use VOP_READ/VOP_WRITE calls * for vnode-backed vn's, and the new vm_pager_strategy() call for * vm_object-backed vn's. * * NOTE: bp->b_blkno is DEV_BSIZE'd. We must generate bp->b_pblkno for * our uio or vn_pager_strategy() call that is vn->sc_secsize'd */ static void vnstrategy(struct bio *bp) { int unit; struct vn_softc *vn; int error; unit = dkunit(bp->bio_dev); vn = bp->bio_dev->si_drv1; if (vn == NULL) vn = vnfindvn(bp->bio_dev); IFOPT(vn, VN_DEBUG) printf("vnstrategy(%p): unit %d\n", bp, unit); if ((vn->sc_flags & VNF_INITED) == 0) { bp->bio_error = ENXIO; bp->bio_flags |= BIO_ERROR; biodone(bp); return; } bp->bio_resid = bp->bio_bcount; if (vn->sc_slices != NULL && dscheck(bp, vn->sc_slices) <= 0) { bp->bio_error = EINVAL; bp->bio_flags |= BIO_ERROR; biodone(bp); return; } if (vn->sc_vp && (bp->bio_cmd == BIO_DELETE)) { /* * Not handled for vnode-backed element yet. */ biodone(bp); } else if (vn->sc_vp) { /* * VNODE I/O * * If an error occurs, we set BIO_ERROR but we do not set * B_INVAL because (for a write anyway), the buffer is * still valid. */ struct uio auio; struct iovec aiov; struct mount *mp; bzero(&auio, sizeof(auio)); aiov.iov_base = bp->bio_data; aiov.iov_len = bp->bio_bcount; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_offset = (vm_ooffset_t)bp->bio_pblkno * vn->sc_secsize; auio.uio_segflg = UIO_SYSSPACE; if(bp->bio_cmd == BIO_READ) auio.uio_rw = UIO_READ; else auio.uio_rw = UIO_WRITE; auio.uio_resid = bp->bio_bcount; auio.uio_procp = curproc; if (VOP_ISLOCKED(vn->sc_vp, NULL)) vprint("unexpected vn driver lock", vn->sc_vp); if (bp->bio_cmd == BIO_READ) { vn_lock(vn->sc_vp, LK_EXCLUSIVE | LK_RETRY, curproc); error = VOP_READ(vn->sc_vp, &auio, 0, vn->sc_cred); } else { (void) vn_start_write(vn->sc_vp, &mp, V_WAIT); vn_lock(vn->sc_vp, LK_EXCLUSIVE | LK_RETRY, curproc); error = VOP_WRITE(vn->sc_vp, &auio, 0, vn->sc_cred); vn_finished_write(mp); } VOP_UNLOCK(vn->sc_vp, 0, curproc); bp->bio_resid = auio.uio_resid; if (error) { bp->bio_error = error; bp->bio_flags |= BIO_ERROR; } biodone(bp); } else if (vn->sc_object) { /* * OBJT_SWAP I/O * * ( handles read, write, freebuf ) * * Note: if we pre-reserved swap, BIO_DELETE is disabled */ #if 0 KASSERT((bp->b_bufsize & (vn->sc_secsize - 1)) == 0, ("vnstrategy: buffer %p too small for physio", bp)); #endif if ((bp->bio_cmd == BIO_DELETE) && TESTOPT(vn, VN_RESERVE)) { biodone(bp); } else { vm_pager_strategy(vn->sc_object, bp); } } else { bp->bio_flags |= BIO_ERROR; bp->bio_error = EINVAL; biodone(bp); } } /* ARGSUSED */ static int vnctlioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p) { struct vn_softc *vn; struct vn_ioctl *vio; int error; u_long *f; vn = dev->si_drv1; IFOPT(vn,VN_FOLLOW) printf("vnctlioctl(%s, 0x%lx, %p, 0x%x, %p): unit %d\n", devtoname(dev), cmd, (void *)data, flag, (void *)p, dkunit(dev)); error = suser(p); if (error) return (error); vio = (struct vn_ioctl *)data; f = (u_long*)data; switch (cmd) { case VNIOCATTACH: if (vn->sc_flags & VNF_INITED) return(EBUSY); if (vio->vn_file == NULL) error = vniocattach_swap(vn, vio, dev, flag, p); else error = vniocattach_file(vn, vio, dev, flag, p); break; case VNIOCDETACH: if ((vn->sc_flags & VNF_INITED) == 0) return(ENXIO); /* * XXX handle i/o in progress. Return EBUSY, or wait, or * flush the i/o. * XXX handle multiple opens of the device. Return EBUSY, * or revoke the fd's. * How are these problems handled for removable and failing * hardware devices? (Hint: They are not) */ vnclear(vn); IFOPT(vn, VN_FOLLOW) printf("vnioctl: CLRed\n"); break; case VNIOCGSET: vn_options |= *f; *f = vn_options; break; case VNIOCGCLEAR: vn_options &= ~(*f); *f = vn_options; break; case VNIOCUSET: vn->sc_options |= *f; *f = vn->sc_options; break; case VNIOCUCLEAR: vn->sc_options &= ~(*f); *f = vn->sc_options; break; default: error = ENOTTY; break; } return(error); } /* ARGSUSED */ static int vnioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p) { struct vn_softc *vn; vn = dev->si_drv1; IFOPT(vn,VN_FOLLOW) printf("vnioctl(%s, 0x%lx, %p, 0x%x, %p): unit %d\n", devtoname(dev), cmd, (void *)data, flag, (void *)p, dkunit(dev)); switch (cmd) { case VNIOCATTACH: case VNIOCDETACH: case VNIOCGSET: case VNIOCGCLEAR: case VNIOCUSET: case VNIOCUCLEAR: return (vnctlioctl(dev, cmd, data, flag, p)); } if (vn->sc_slices != NULL) return(dsioctl(dev, cmd, data, flag, &vn->sc_slices)); return (ENOIOCTL); } /* * vniocattach_file: * * Attach a file to a VN partition. Return the size in the vn_size * field. */ static int vniocattach_file(vn, vio, dev, flag, p) struct vn_softc *vn; struct vn_ioctl *vio; dev_t dev; int flag; struct proc *p; { struct vattr vattr; - struct ucred *uc; struct nameidata nd; int error, flags; flags = FREAD|FWRITE; NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, vio->vn_file, p); error = vn_open(&nd, &flags, 0); if (error) { if (error != EACCES && error != EPERM && error != EROFS) return (error); flags &= ~FWRITE; NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, vio->vn_file, p); error = vn_open(&nd, &flags, 0); if (error) return (error); } NDFREE(&nd, NDF_ONLY_PNBUF); - PROC_LOCK(p); - uc = p->p_ucred; - crhold(uc); - PROC_UNLOCK(p); if (nd.ni_vp->v_type != VREG || - (error = VOP_GETATTR(nd.ni_vp, &vattr, uc, p))) { + (error = VOP_GETATTR(nd.ni_vp, &vattr, p->p_ucred, p))) { VOP_UNLOCK(nd.ni_vp, 0, p); - (void) vn_close(nd.ni_vp, flags, uc, p); - crfree(uc); + (void) vn_close(nd.ni_vp, flags, p->p_ucred, p); return (error ? error : EINVAL); } VOP_UNLOCK(nd.ni_vp, 0, p); vn->sc_secsize = DEV_BSIZE; vn->sc_vp = nd.ni_vp; /* * If the size is specified, override the file attributes. Note that * the vn_size argument is in PAGE_SIZE sized blocks. */ if (vio->vn_size) vn->sc_size = (quad_t)vio->vn_size * PAGE_SIZE / vn->sc_secsize; else vn->sc_size = vattr.va_size / vn->sc_secsize; - error = vnsetcred(vn, uc); + error = vnsetcred(vn, p->p_ucred); if (error) { - (void) vn_close(nd.ni_vp, flags, uc, p); - crfree(uc); + (void) vn_close(nd.ni_vp, flags, p->p_ucred, p); return(error); } - crfree(uc); vn->sc_flags |= VNF_INITED; if (flags == FREAD) vn->sc_flags |= VNF_READONLY; IFOPT(vn, VN_FOLLOW) printf("vnioctl: SET vp %p size %x blks\n", vn->sc_vp, vn->sc_size); return(0); } /* * vniocattach_swap: * * Attach swap backing store to a VN partition of the size specified * in vn_size. */ static int vniocattach_swap(vn, vio, dev, flag, p) struct vn_softc *vn; struct vn_ioctl *vio; dev_t dev; int flag; struct proc *p; { int error; - struct ucred *uc; /* * Range check. Disallow negative sizes or any size less then the * size of a page. Then round to a page. */ if (vio->vn_size <= 0) return(EDOM); /* * Allocate an OBJT_SWAP object. * * sc_secsize is PAGE_SIZE'd * * vio->vn_size is in PAGE_SIZE'd chunks. * sc_size must be in PAGE_SIZE'd chunks. * Note the truncation. */ vn->sc_secsize = PAGE_SIZE; vn->sc_size = vio->vn_size; vn->sc_object = vm_pager_allocate(OBJT_SWAP, NULL, vn->sc_secsize * (vm_ooffset_t)vio->vn_size, VM_PROT_DEFAULT, 0); IFOPT(vn, VN_RESERVE) { if (swap_pager_reserve(vn->sc_object, 0, vn->sc_size) < 0) { vm_pager_deallocate(vn->sc_object); vn->sc_object = NULL; return(EDOM); } } vn->sc_flags |= VNF_INITED; - PROC_LOCK(p); - uc = p->p_ucred; - crhold(uc); - PROC_UNLOCK(p); - error = vnsetcred(vn, uc); - crfree(uc); + error = vnsetcred(vn, p->p_ucred); if (error == 0) { IFOPT(vn, VN_FOLLOW) { printf("vnioctl: SET vp %p size %x\n", vn->sc_vp, vn->sc_size); } } if (error) vnclear(vn); return(error); } /* * Duplicate the current processes' credentials. Since we are called only * as the result of a SET ioctl and only root can do that, any future access * to this "disk" is essentially as root. Note that credentials may change * if some other uid can write directly to the mapped file (NFS). */ int vnsetcred(struct vn_softc *vn, struct ucred *cred) { char *tmpbuf; int error = 0; /* * Set credits in our softc */ if (vn->sc_cred) crfree(vn->sc_cred); vn->sc_cred = crdup(cred); /* * Horrible kludge to establish credentials for NFS XXX. */ if (vn->sc_vp) { struct uio auio; struct iovec aiov; tmpbuf = malloc(vn->sc_secsize, M_TEMP, M_WAITOK); bzero(&auio, sizeof(auio)); aiov.iov_base = tmpbuf; aiov.iov_len = vn->sc_secsize; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_offset = 0; auio.uio_rw = UIO_READ; auio.uio_segflg = UIO_SYSSPACE; auio.uio_resid = aiov.iov_len; vn_lock(vn->sc_vp, LK_EXCLUSIVE | LK_RETRY, curproc); error = VOP_READ(vn->sc_vp, &auio, 0, vn->sc_cred); VOP_UNLOCK(vn->sc_vp, 0, curproc); free(tmpbuf, M_TEMP); } return (error); } void vnclear(struct vn_softc *vn) { struct proc *p = curproc; /* XXX */ IFOPT(vn, VN_FOLLOW) printf("vnclear(%p): vp=%p\n", vn, vn->sc_vp); if (vn->sc_slices != NULL) dsgone(&vn->sc_slices); vn->sc_flags &= ~VNF_INITED; if (vn->sc_vp != NULL) { (void)vn_close(vn->sc_vp, vn->sc_flags & VNF_READONLY ? FREAD : (FREAD|FWRITE), vn->sc_cred, p); vn->sc_vp = NULL; } vn->sc_flags &= ~VNF_READONLY; if (vn->sc_cred) { crfree(vn->sc_cred); vn->sc_cred = NULL; } if (vn->sc_object != NULL) { vm_pager_deallocate(vn->sc_object); vn->sc_object = NULL; } vn->sc_size = 0; } static int vnsize(dev_t dev) { struct vn_softc *vn; vn = dev->si_drv1; if (!vn) return(-1); if ((vn->sc_flags & VNF_INITED) == 0) return(-1); return(vn->sc_size); } static int vn_modevent(module_t mod, int type, void *data) { struct vn_softc *vn; static eventhandler_tag clonetag; switch (type) { case MOD_LOAD: clonetag = EVENTHANDLER_REGISTER(dev_clone, vn_clone, 0, 1000); cdevsw_add(&vn_cdevsw); break; case MOD_UNLOAD: /* fall through */ case MOD_SHUTDOWN: EVENTHANDLER_DEREGISTER(dev_clone, clonetag); for (;;) { vn = SLIST_FIRST(&vn_list); if (!vn) break; SLIST_REMOVE_HEAD(&vn_list, sc_list); if (vn->sc_flags & VNF_INITED) vnclear(vn); free(vn, M_DEVBUF); } cdevsw_remove(&vn_cdevsw); break; default: break; } return 0; } DEV_MODULE(vn, vn_modevent, 0); Index: head/sys/fs/cd9660/cd9660_vfsops.c =================================================================== --- head/sys/fs/cd9660/cd9660_vfsops.c (revision 71698) +++ head/sys/fs/cd9660/cd9660_vfsops.c (revision 71699) @@ -1,914 +1,901 @@ /*- * Copyright (c) 1994 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley * by Pace Willisson (pace@blitz.com). The Rock Ridge Extension * Support code is derived from software contributed to Berkeley * by Atsushi Murai (amurai@spec.co.jp). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)cd9660_vfsops.c 8.18 (Berkeley) 5/22/95 * $FreeBSD$ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include MALLOC_DEFINE(M_ISOFSMNT, "ISOFS mount", "ISOFS mount structure"); MALLOC_DEFINE(M_ISOFSNODE, "ISOFS node", "ISOFS vnode private part"); static int cd9660_mount __P((struct mount *, char *, caddr_t, struct nameidata *, struct proc *)); static int cd9660_unmount __P((struct mount *, int, struct proc *)); static int cd9660_root __P((struct mount *, struct vnode **)); static int cd9660_statfs __P((struct mount *, struct statfs *, struct proc *)); static int cd9660_vget __P((struct mount *, ino_t, struct vnode **)); static int cd9660_fhtovp __P((struct mount *, struct fid *, struct vnode **)); static int cd9660_checkexp __P((struct mount *, struct sockaddr *, int *, struct ucred **)); static int cd9660_vptofh __P((struct vnode *, struct fid *)); static struct vfsops cd9660_vfsops = { cd9660_mount, vfs_stdstart, cd9660_unmount, cd9660_root, vfs_stdquotactl, cd9660_statfs, vfs_stdsync, cd9660_vget, cd9660_fhtovp, cd9660_checkexp, cd9660_vptofh, cd9660_init, cd9660_uninit, vfs_stdextattrctl, }; VFS_SET(cd9660_vfsops, cd9660, VFCF_READONLY); /* * Called by vfs_mountroot when iso is going to be mounted as root. */ static int iso_get_ssector __P((dev_t dev, struct proc *p)); static int iso_mountfs __P((struct vnode *devvp, struct mount *mp, struct proc *p, struct iso_args *argp)); /* * Try to find the start of the last data track on this CD-ROM. This * is used to mount the last session of a multi-session CD. Bail out * and return 0 if we fail, this is always a safe bet. */ static int iso_get_ssector(dev, p) dev_t dev; struct proc *p; { struct ioc_toc_header h; struct ioc_read_toc_single_entry t; int i; struct cdevsw *bd; d_ioctl_t *ioctlp; bd = devsw(dev); ioctlp = bd->d_ioctl; if (ioctlp == NULL) return 0; if (ioctlp(dev, CDIOREADTOCHEADER, (caddr_t)&h, FREAD, p) != 0) return 0; for (i = h.ending_track; i >= 0; i--) { t.address_format = CD_LBA_FORMAT; t.track = i; if (ioctlp(dev, CDIOREADTOCENTRY, (caddr_t)&t, FREAD, p) != 0) return 0; if ((t.entry.control & 4) != 0) /* found a data track */ break; } if (i < 0) return 0; return ntohl(t.entry.addr.lba); } static int iso_mountroot __P((struct mount *mp, struct proc *p)); static int iso_mountroot(mp, p) struct mount *mp; struct proc *p; { struct iso_args args; int error; if ((error = bdevvp(rootdev, &rootvp))) { printf("iso_mountroot: can't find rootvp\n"); return (error); } args.flags = ISOFSMNT_ROOT; args.ssector = iso_get_ssector(rootdev, p); if (bootverbose) printf("iso_mountroot(): using session at block %d\n", args.ssector); if ((error = iso_mountfs(rootvp, mp, p, &args)) != 0) return (error); (void)cd9660_statfs(mp, &mp->mnt_stat, p); return (0); } /* * VFS Operations. * * mount system call */ static int cd9660_mount(mp, path, data, ndp, p) register struct mount *mp; char *path; caddr_t data; struct nameidata *ndp; struct proc *p; { struct vnode *devvp; struct iso_args args; - struct ucred *uc; size_t size; int error; mode_t accessmode; struct iso_mnt *imp = 0; if ((mp->mnt_flag & MNT_ROOTFS) != 0) { return (iso_mountroot(mp, p)); } if ((error = copyin(data, (caddr_t)&args, sizeof (struct iso_args)))) return (error); if ((mp->mnt_flag & MNT_RDONLY) == 0) return (EROFS); /* * If updating, check whether changing from read-only to * read/write; if there is no device name, that's all we do. */ if (mp->mnt_flag & MNT_UPDATE) { imp = VFSTOISOFS(mp); if (args.fspec == 0) return (vfs_export(mp, &imp->im_export, &args.export)); } /* * Not an update, or updating the name: look up the name * and verify that it refers to a sensible block device. */ NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, p); if ((error = namei(ndp))) return (error); NDFREE(ndp, NDF_ONLY_PNBUF); devvp = ndp->ni_vp; if (!vn_isdisk(devvp, &error)) { vrele(devvp); return (error); } /* * Verify that user has necessary permissions on the device, * or has superuser abilities */ accessmode = VREAD; vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p); - PROC_LOCK(p); - uc = p->p_ucred; - crhold(uc); - PROC_UNLOCK(p); - error = VOP_ACCESS(devvp, accessmode, uc, p); - crfree(uc); + error = VOP_ACCESS(devvp, accessmode, p->p_ucred, p); if (error) error = suser(p); if (error) { vput(devvp); return (error); } VOP_UNLOCK(devvp, 0, p); if ((mp->mnt_flag & MNT_UPDATE) == 0) { error = iso_mountfs(devvp, mp, p, &args); } else { if (devvp != imp->im_devvp) error = EINVAL; /* needs translation */ else vrele(devvp); } if (error) { vrele(devvp); return error; } imp = VFSTOISOFS(mp); (void) copyinstr(path, mp->mnt_stat.f_mntonname, MNAMELEN - 1, &size); bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size); (void) copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &size); bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size); (void) cd9660_statfs(mp, &mp->mnt_stat, p); return 0; } /* * Common code for mount and mountroot */ static int iso_mountfs(devvp, mp, p, argp) register struct vnode *devvp; struct mount *mp; struct proc *p; struct iso_args *argp; { register struct iso_mnt *isomp = (struct iso_mnt *)0; struct buf *bp = NULL; struct buf *pribp = NULL, *supbp = NULL; - struct ucred *uc; dev_t dev = devvp->v_rdev; int error = EINVAL; int needclose = 0; int high_sierra = 0; int iso_bsize; int iso_blknum; int joliet_level; struct iso_volume_descriptor *vdp = 0; struct iso_primary_descriptor *pri = NULL; struct iso_sierra_primary_descriptor *pri_sierra = NULL; struct iso_supplementary_descriptor *sup = NULL; struct iso_directory_record *rootp; int logical_block_size; if (!(mp->mnt_flag & MNT_RDONLY)) return EROFS; /* * Disallow multiple mounts of the same device. * Disallow mounting of a device that is currently in use * (except for root, which might share swap device for miniroot). * Flush out any old buffers remaining from a previous use. */ if ((error = vfs_mountedon(devvp))) return error; if (vcount(devvp) > 1 && devvp != rootvp) return EBUSY; - PROC_LOCK(p); - uc = p->p_ucred; - crhold(uc); - PROC_UNLOCK(p); - error = vinvalbuf(devvp, V_SAVE, p->p_ucred, p, 0, 0); - crfree(uc); - if (error) + if ((error = vinvalbuf(devvp, V_SAVE, p->p_ucred, p, 0, 0))) return (error); vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p); error = VOP_OPEN(devvp, FREAD, FSCRED, p); VOP_UNLOCK(devvp, 0, p); if (error) return error; needclose = 1; /* This is the "logical sector size". The standard says this * should be 2048 or the physical sector size on the device, * whichever is greater. For now, we'll just use a constant. */ iso_bsize = ISO_DEFAULT_BLOCK_SIZE; joliet_level = 0; for (iso_blknum = 16 + argp->ssector; iso_blknum < 100 + argp->ssector; iso_blknum++) { if ((error = bread(devvp, iso_blknum * btodb(iso_bsize), iso_bsize, NOCRED, &bp)) != 0) goto out; vdp = (struct iso_volume_descriptor *)bp->b_data; if (bcmp (vdp->id, ISO_STANDARD_ID, sizeof vdp->id) != 0) { if (bcmp (vdp->id_sierra, ISO_SIERRA_ID, sizeof vdp->id) != 0) { error = EINVAL; goto out; } else high_sierra = 1; } switch (isonum_711 (high_sierra? vdp->type_sierra: vdp->type)){ case ISO_VD_PRIMARY: if (pribp == NULL) { pribp = bp; bp = NULL; pri = (struct iso_primary_descriptor *)vdp; pri_sierra = (struct iso_sierra_primary_descriptor *)vdp; } break; case ISO_VD_SUPPLEMENTARY: if (supbp == NULL) { supbp = bp; bp = NULL; sup = (struct iso_supplementary_descriptor *)vdp; if (!(argp->flags & ISOFSMNT_NOJOLIET)) { if (bcmp(sup->escape, "%/@", 3) == 0) joliet_level = 1; if (bcmp(sup->escape, "%/C", 3) == 0) joliet_level = 2; if (bcmp(sup->escape, "%/E", 3) == 0) joliet_level = 3; if (isonum_711 (sup->flags) & 1) joliet_level = 0; } } break; case ISO_VD_END: goto vd_end; default: break; } if (bp) { brelse(bp); bp = NULL; } } vd_end: if (bp) { brelse(bp); bp = NULL; } if (pri == NULL) { error = EINVAL; goto out; } logical_block_size = isonum_723 (high_sierra? pri_sierra->logical_block_size: pri->logical_block_size); if (logical_block_size < DEV_BSIZE || logical_block_size > MAXBSIZE || (logical_block_size & (logical_block_size - 1)) != 0) { error = EINVAL; goto out; } rootp = (struct iso_directory_record *) (high_sierra? pri_sierra->root_directory_record: pri->root_directory_record); isomp = malloc(sizeof *isomp, M_ISOFSMNT, M_WAITOK | M_ZERO); isomp->logical_block_size = logical_block_size; isomp->volume_space_size = isonum_733 (high_sierra? pri_sierra->volume_space_size: pri->volume_space_size); isomp->joliet_level = 0; /* * Since an ISO9660 multi-session CD can also access previous * sessions, we have to include them into the space consider- * ations. This doesn't yield a very accurate number since * parts of the old sessions might be inaccessible now, but we * can't do much better. This is also important for the NFS * filehandle validation. */ isomp->volume_space_size += argp->ssector; bcopy (rootp, isomp->root, sizeof isomp->root); isomp->root_extent = isonum_733 (rootp->extent); isomp->root_size = isonum_733 (rootp->size); isomp->im_bmask = logical_block_size - 1; isomp->im_bshift = ffs(logical_block_size) - 1; pribp->b_flags |= B_AGE; brelse(pribp); pribp = NULL; mp->mnt_data = (qaddr_t)isomp; mp->mnt_stat.f_fsid.val[0] = dev2udev(dev); mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum; mp->mnt_maxsymlinklen = 0; mp->mnt_flag |= MNT_LOCAL; isomp->im_mountp = mp; isomp->im_dev = dev; isomp->im_devvp = devvp; devvp->v_rdev->si_mountpoint = mp; /* Check the Rock Ridge Extention support */ if (!(argp->flags & ISOFSMNT_NORRIP)) { if ((error = bread(isomp->im_devvp, (isomp->root_extent + isonum_711(rootp->ext_attr_length)) << (isomp->im_bshift - DEV_BSHIFT), isomp->logical_block_size, NOCRED, &bp)) != 0) goto out; rootp = (struct iso_directory_record *)bp->b_data; if ((isomp->rr_skip = cd9660_rrip_offset(rootp,isomp)) < 0) { argp->flags |= ISOFSMNT_NORRIP; } else { argp->flags &= ~ISOFSMNT_GENS; } /* * The contents are valid, * but they will get reread as part of another vnode, so... */ bp->b_flags |= B_AGE; brelse(bp); bp = NULL; } isomp->im_flags = argp->flags & (ISOFSMNT_NORRIP | ISOFSMNT_GENS | ISOFSMNT_EXTATT | ISOFSMNT_NOJOLIET); if (high_sierra) { /* this effectively ignores all the mount flags */ log(LOG_INFO, "cd9660: High Sierra Format\n"); isomp->iso_ftype = ISO_FTYPE_HIGH_SIERRA; } else switch (isomp->im_flags&(ISOFSMNT_NORRIP|ISOFSMNT_GENS)) { default: isomp->iso_ftype = ISO_FTYPE_DEFAULT; break; case ISOFSMNT_GENS|ISOFSMNT_NORRIP: isomp->iso_ftype = ISO_FTYPE_9660; break; case 0: log(LOG_INFO, "cd9660: RockRidge Extension\n"); isomp->iso_ftype = ISO_FTYPE_RRIP; break; } /* Decide whether to use the Joliet descriptor */ if (isomp->iso_ftype != ISO_FTYPE_RRIP && joliet_level) { log(LOG_INFO, "cd9660: Joliet Extension\n"); rootp = (struct iso_directory_record *) sup->root_directory_record; bcopy (rootp, isomp->root, sizeof isomp->root); isomp->root_extent = isonum_733 (rootp->extent); isomp->root_size = isonum_733 (rootp->size); isomp->joliet_level = joliet_level; supbp->b_flags |= B_AGE; } if (supbp) { brelse(supbp); supbp = NULL; } return 0; out: devvp->v_rdev->si_mountpoint = NULL; if (bp) brelse(bp); if (pribp) brelse(pribp); if (supbp) brelse(supbp); if (needclose) (void)VOP_CLOSE(devvp, FREAD, NOCRED, p); if (isomp) { free((caddr_t)isomp, M_ISOFSMNT); mp->mnt_data = (qaddr_t)0; } return error; } /* * unmount system call */ static int cd9660_unmount(mp, mntflags, p) struct mount *mp; int mntflags; struct proc *p; { register struct iso_mnt *isomp; int error, flags = 0; if (mntflags & MNT_FORCE) flags |= FORCECLOSE; #if 0 mntflushbuf(mp, 0); if (mntinvalbuf(mp)) return EBUSY; #endif if ((error = vflush(mp, NULLVP, flags))) return (error); isomp = VFSTOISOFS(mp); isomp->im_devvp->v_rdev->si_mountpoint = NULL; error = VOP_CLOSE(isomp->im_devvp, FREAD, NOCRED, p); vrele(isomp->im_devvp); free((caddr_t)isomp, M_ISOFSMNT); mp->mnt_data = (qaddr_t)0; mp->mnt_flag &= ~MNT_LOCAL; return (error); } /* * Return root of a filesystem */ static int cd9660_root(mp, vpp) struct mount *mp; struct vnode **vpp; { struct iso_mnt *imp = VFSTOISOFS(mp); struct iso_directory_record *dp = (struct iso_directory_record *)imp->root; ino_t ino = isodirino(dp, imp); /* * With RRIP we must use the `.' entry of the root directory. * Simply tell vget, that it's a relocated directory. */ return (cd9660_vget_internal(mp, ino, vpp, imp->iso_ftype == ISO_FTYPE_RRIP, dp)); } /* * Get file system statistics. */ int cd9660_statfs(mp, sbp, p) struct mount *mp; register struct statfs *sbp; struct proc *p; { register struct iso_mnt *isomp; isomp = VFSTOISOFS(mp); sbp->f_bsize = isomp->logical_block_size; sbp->f_iosize = sbp->f_bsize; /* XXX */ sbp->f_blocks = isomp->volume_space_size; sbp->f_bfree = 0; /* total free blocks */ sbp->f_bavail = 0; /* blocks free for non superuser */ sbp->f_files = 0; /* total files */ sbp->f_ffree = 0; /* free file nodes */ if (sbp != &mp->mnt_stat) { sbp->f_type = mp->mnt_vfc->vfc_typenum; bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN); bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN); } return 0; } /* * File handle to vnode * * Have to be really careful about stale file handles: * - check that the inode number is in range * - call iget() to get the locked inode * - check for an unallocated inode (i_mode == 0) * - check that the generation number matches */ struct ifid { ushort ifid_len; ushort ifid_pad; int ifid_ino; long ifid_start; }; /* ARGSUSED */ int cd9660_fhtovp(mp, fhp, vpp) register struct mount *mp; struct fid *fhp; struct vnode **vpp; { struct ifid *ifhp = (struct ifid *)fhp; register struct iso_node *ip; struct vnode *nvp; int error; #ifdef ISOFS_DBG printf("fhtovp: ino %d, start %ld\n", ifhp->ifid_ino, ifhp->ifid_start); #endif if ((error = VFS_VGET(mp, ifhp->ifid_ino, &nvp)) != 0) { *vpp = NULLVP; return (error); } ip = VTOI(nvp); if (ip->inode.iso_mode == 0) { vput(nvp); *vpp = NULLVP; return (ESTALE); } *vpp = nvp; return (0); } int cd9660_checkexp(mp, nam, exflagsp, credanonp) struct mount *mp; struct sockaddr *nam; int *exflagsp; struct ucred **credanonp; { register struct netcred *np; register struct iso_mnt *imp; imp = VFSTOISOFS(mp); /* * Get the export permission structure for this tuple. */ np = vfs_export_lookup(mp, &imp->im_export, nam); if (np == NULL) return (EACCES); *exflagsp = np->netc_exflags; *credanonp = &np->netc_anon; return (0); } int cd9660_vget(mp, ino, vpp) struct mount *mp; ino_t ino; struct vnode **vpp; { /* * XXXX * It would be nice if we didn't always set the `relocated' flag * and force the extra read, but I don't want to think about fixing * that right now. */ return (cd9660_vget_internal(mp, ino, vpp, #if 0 VFSTOISOFS(mp)->iso_ftype == ISO_FTYPE_RRIP, #else 0, #endif (struct iso_directory_record *)0)); } int cd9660_vget_internal(mp, ino, vpp, relocated, isodir) struct mount *mp; ino_t ino; struct vnode **vpp; int relocated; struct iso_directory_record *isodir; { struct iso_mnt *imp; struct iso_node *ip; struct buf *bp; struct vnode *vp; dev_t dev; int error; imp = VFSTOISOFS(mp); dev = imp->im_dev; if ((*vpp = cd9660_ihashget(dev, ino)) != NULLVP) return (0); /* Allocate a new vnode/iso_node. */ if ((error = getnewvnode(VT_ISOFS, mp, cd9660_vnodeop_p, &vp)) != 0) { *vpp = NULLVP; return (error); } MALLOC(ip, struct iso_node *, sizeof(struct iso_node), M_ISOFSNODE, M_WAITOK | M_ZERO); lockinit(&vp->v_lock, PINOD, "isonode", 0, 0); /* * ISOFS uses stdlock and can share lock structure */ vp->v_vnlock = &vp->v_lock; vp->v_data = ip; ip->i_vnode = vp; ip->i_dev = dev; ip->i_number = ino; /* * Put it onto its hash chain and lock it so that other requests for * this inode will block if they arrive while we are sleeping waiting * for old data structures to be purged or for the contents of the * disk portion of this inode to be read. */ cd9660_ihashins(ip); if (isodir == 0) { int lbn, off; lbn = lblkno(imp, ino); if (lbn >= imp->volume_space_size) { vput(vp); printf("fhtovp: lbn exceed volume space %d\n", lbn); return (ESTALE); } off = blkoff(imp, ino); if (off + ISO_DIRECTORY_RECORD_SIZE > imp->logical_block_size) { vput(vp); printf("fhtovp: crosses block boundary %d\n", off + ISO_DIRECTORY_RECORD_SIZE); return (ESTALE); } error = bread(imp->im_devvp, lbn << (imp->im_bshift - DEV_BSHIFT), imp->logical_block_size, NOCRED, &bp); if (error) { vput(vp); brelse(bp); printf("fhtovp: bread error %d\n",error); return (error); } isodir = (struct iso_directory_record *)(bp->b_data + off); if (off + isonum_711(isodir->length) > imp->logical_block_size) { vput(vp); if (bp != 0) brelse(bp); printf("fhtovp: directory crosses block boundary %d[off=%d/len=%d]\n", off +isonum_711(isodir->length), off, isonum_711(isodir->length)); return (ESTALE); } #if 0 if (isonum_733(isodir->extent) + isonum_711(isodir->ext_attr_length) != ifhp->ifid_start) { if (bp != 0) brelse(bp); printf("fhtovp: file start miss %d vs %d\n", isonum_733(isodir->extent) + isonum_711(isodir->ext_attr_length), ifhp->ifid_start); return (ESTALE); } #endif } else bp = 0; ip->i_mnt = imp; ip->i_devvp = imp->im_devvp; VREF(ip->i_devvp); if (relocated) { /* * On relocated directories we must * read the `.' entry out of a dir. */ ip->iso_start = ino >> imp->im_bshift; if (bp != 0) brelse(bp); if ((error = cd9660_blkatoff(vp, (off_t)0, NULL, &bp)) != 0) { vput(vp); return (error); } isodir = (struct iso_directory_record *)bp->b_data; } ip->iso_extent = isonum_733(isodir->extent); ip->i_size = isonum_733(isodir->size); ip->iso_start = isonum_711(isodir->ext_attr_length) + ip->iso_extent; /* * Setup time stamp, attribute */ vp->v_type = VNON; switch (imp->iso_ftype) { default: /* ISO_FTYPE_9660 */ { struct buf *bp2; int off; if ((imp->im_flags & ISOFSMNT_EXTATT) && (off = isonum_711(isodir->ext_attr_length))) cd9660_blkatoff(vp, (off_t)-(off << imp->im_bshift), NULL, &bp2); else bp2 = NULL; cd9660_defattr(isodir, ip, bp2, ISO_FTYPE_9660); cd9660_deftstamp(isodir, ip, bp2, ISO_FTYPE_9660); if (bp2) brelse(bp2); break; } case ISO_FTYPE_RRIP: cd9660_rrip_analyze(isodir, ip, imp); break; } if (bp != 0) brelse(bp); /* * Initialize the associated vnode */ switch (vp->v_type = IFTOVT(ip->inode.iso_mode)) { case VFIFO: vp->v_op = cd9660_fifoop_p; break; case VCHR: case VBLK: vp->v_op = cd9660_specop_p; vp = addaliasu(vp, ip->inode.iso_rdev); ip->i_vnode = vp; break; default: break; } if (ip->iso_extent == imp->root_extent) vp->v_flag |= VROOT; /* * XXX need generation number? */ *vpp = vp; return (0); } /* * Vnode pointer to File handle */ /* ARGSUSED */ int cd9660_vptofh(vp, fhp) struct vnode *vp; struct fid *fhp; { register struct iso_node *ip = VTOI(vp); register struct ifid *ifhp; ifhp = (struct ifid *)fhp; ifhp->ifid_len = sizeof(struct ifid); ifhp->ifid_ino = ip->i_number; ifhp->ifid_start = ip->iso_start; #ifdef ISOFS_DBG printf("vptofh: ino %d, start %ld\n", ifhp->ifid_ino,ifhp->ifid_start); #endif return 0; } Index: head/sys/fs/coda/coda_vfsops.c =================================================================== --- head/sys/fs/coda/coda_vfsops.c (revision 71698) +++ head/sys/fs/coda/coda_vfsops.c (revision 71699) @@ -1,552 +1,540 @@ /* * * Coda: an Experimental Distributed File System * Release 3.1 * * Copyright (c) 1987-1998 Carnegie Mellon University * All Rights Reserved * * Permission to use, copy, modify and distribute this software and its * documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation, and * that credit is given to Carnegie Mellon University in all documents * and publicity pertaining to direct or indirect use of this code or its * derivatives. * * CODA IS AN EXPERIMENTAL SOFTWARE SYSTEM AND IS KNOWN TO HAVE BUGS, * SOME OF WHICH MAY HAVE SERIOUS CONSEQUENCES. CARNEGIE MELLON ALLOWS * FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION. CARNEGIE MELLON * DISCLAIMS ANY LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER * RESULTING DIRECTLY OR INDIRECTLY FROM THE USE OF THIS SOFTWARE OR OF * ANY DERIVATIVE WORK. * * Carnegie Mellon encourages users of this software to return any * improvements or extensions that they make, and to grant Carnegie * Mellon the rights to redistribute these changes without encumbrance. * * @(#) src/sys/cfs/coda_vfsops.c,v 1.1.1.1 1998/08/29 21:14:52 rvb Exp $ * $FreeBSD$ * */ /* * Mach Operating System * Copyright (c) 1989 Carnegie-Mellon University * All rights reserved. The CMU software License Agreement specifies * the terms and conditions for use and redistribution. */ /* * This code was written for the Coda file system at Carnegie Mellon * University. Contributers include David Steere, James Kistler, and * M. Satyanarayanan. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include MALLOC_DEFINE(M_CODA, "CODA storage", "Various Coda Structures"); int codadebug = 0; int coda_vfsop_print_entry = 0; #define ENTRY if(coda_vfsop_print_entry) myprintf(("Entered %s\n",__FUNCTION__)) struct vnode *coda_ctlvp; struct coda_mntinfo coda_mnttbl[NVCODA]; /* indexed by minor device number */ /* structure to keep statistics of internally generated/satisfied calls */ struct coda_op_stats coda_vfsopstats[CODA_VFSOPS_SIZE]; #define MARK_ENTRY(op) (coda_vfsopstats[op].entries++) #define MARK_INT_SAT(op) (coda_vfsopstats[op].sat_intrn++) #define MARK_INT_FAIL(op) (coda_vfsopstats[op].unsat_intrn++) #define MRAK_INT_GEN(op) (coda_vfsopstats[op].gen_intrn++) extern int coda_nc_initialized; /* Set if cache has been initialized */ extern int vc_nb_open __P((dev_t, int, int, struct proc *)); int coda_vfsopstats_init(void) { register int i; for (i=0;ini_vp; if (error) { MARK_INT_FAIL(CODA_MOUNT_STATS); return (error); } if (dvp->v_type != VCHR) { MARK_INT_FAIL(CODA_MOUNT_STATS); vrele(dvp); NDFREE(ndp, NDF_ONLY_PNBUF); return(ENXIO); } dev = dvp->v_rdev; vrele(dvp); NDFREE(ndp, NDF_ONLY_PNBUF); /* * See if the device table matches our expectations. */ if (devsw(dev)->d_open != vc_nb_open) { MARK_INT_FAIL(CODA_MOUNT_STATS); return(ENXIO); } if (minor(dev) >= NVCODA || minor(dev) < 0) { MARK_INT_FAIL(CODA_MOUNT_STATS); return(ENXIO); } /* * Initialize the mount record and link it to the vfs struct */ mi = &coda_mnttbl[minor(dev)]; if (!VC_OPEN(&mi->mi_vcomm)) { MARK_INT_FAIL(CODA_MOUNT_STATS); return(ENODEV); } /* No initialization (here) of mi_vcomm! */ vfsp->mnt_data = (qaddr_t)mi; vfs_getnewfsid (vfsp); mi->mi_vfsp = vfsp; /* * Make a root vnode to placate the Vnode interface, but don't * actually make the CODA_ROOT call to venus until the first call * to coda_root in case a server is down while venus is starting. */ rootfid.Volume = 0; rootfid.Vnode = 0; rootfid.Unique = 0; cp = make_coda_node(&rootfid, vfsp, VDIR); rootvp = CTOV(cp); rootvp->v_flag |= VROOT; ctlfid.Volume = CTL_VOL; ctlfid.Vnode = CTL_VNO; ctlfid.Unique = CTL_UNI; /* cp = make_coda_node(&ctlfid, vfsp, VCHR); The above code seems to cause a loop in the cnode links. I don't totally understand when it happens, it is caught when closing down the system. */ cp = make_coda_node(&ctlfid, 0, VCHR); coda_ctlvp = CTOV(cp); /* Add vfs and rootvp to chain of vfs hanging off mntinfo */ mi->mi_vfsp = vfsp; mi->mi_rootvp = rootvp; /* set filesystem block size */ vfsp->mnt_stat.f_bsize = 8192; /* XXX -JJK */ /* Set f_iosize. XXX -- inamura@isl.ntt.co.jp. For vnode_pager_haspage() references. The value should be obtained from underlying UFS. */ /* Checked UFS. iosize is set as 8192 */ vfsp->mnt_stat.f_iosize = 8192; /* error is currently guaranteed to be zero, but in case some code changes... */ CODADEBUG(1, myprintf(("coda_mount returned %d\n",error));); if (error) MARK_INT_FAIL(CODA_MOUNT_STATS); else MARK_INT_SAT(CODA_MOUNT_STATS); return(error); } int coda_unmount(vfsp, mntflags, p) struct mount *vfsp; int mntflags; struct proc *p; { struct coda_mntinfo *mi = vftomi(vfsp); int active, error = 0; ENTRY; MARK_ENTRY(CODA_UMOUNT_STATS); if (!CODA_MOUNTED(vfsp)) { MARK_INT_FAIL(CODA_UMOUNT_STATS); return(EINVAL); } if (mi->mi_vfsp == vfsp) { /* We found the victim */ if (!IS_UNMOUNTING(VTOC(mi->mi_rootvp))) return (EBUSY); /* Venus is still running */ #ifdef DEBUG printf("coda_unmount: ROOT: vp %p, cp %p\n", mi->mi_rootvp, VTOC(mi->mi_rootvp)); #endif vrele(mi->mi_rootvp); active = coda_kill(vfsp, NOT_DOWNCALL); mi->mi_rootvp->v_flag &= ~VROOT; error = vflush(mi->mi_vfsp, NULLVP, FORCECLOSE); printf("coda_unmount: active = %d, vflush active %d\n", active, error); error = 0; /* I'm going to take this out to allow lookups to go through. I'm * not sure it's important anyway. -- DCS 2/2/94 */ /* vfsp->VFS_DATA = NULL; */ /* No more vfsp's to hold onto */ mi->mi_vfsp = NULL; mi->mi_rootvp = NULL; if (error) MARK_INT_FAIL(CODA_UMOUNT_STATS); else MARK_INT_SAT(CODA_UMOUNT_STATS); return(error); } return (EINVAL); } /* * find root of cfs */ int coda_root(vfsp, vpp) struct mount *vfsp; struct vnode **vpp; { struct coda_mntinfo *mi = vftomi(vfsp); struct vnode **result; int error; struct proc *p = curproc; /* XXX - bnoble */ ViceFid VFid; - struct ucred* uc; ENTRY; MARK_ENTRY(CODA_ROOT_STATS); result = NULL; if (vfsp == mi->mi_vfsp) { if ((VTOC(mi->mi_rootvp)->c_fid.Volume != 0) || (VTOC(mi->mi_rootvp)->c_fid.Vnode != 0) || (VTOC(mi->mi_rootvp)->c_fid.Unique != 0)) { /* Found valid root. */ *vpp = mi->mi_rootvp; /* On Mach, this is vref. On NetBSD, VOP_LOCK */ #if 1 vref(*vpp); vn_lock(*vpp, LK_EXCLUSIVE, p); #else vget(*vpp, LK_EXCLUSIVE, p); #endif MARK_INT_SAT(CODA_ROOT_STATS); return(0); } } - PROC_LOCK(p); - uc = p->p_ucred; - crhold(uc); - PROC_UNLOCK(p); - error = venus_root(vftomi(vfsp), uc, p, &VFid); - crfree(uc); + error = venus_root(vftomi(vfsp), p->p_ucred, p, &VFid); if (!error) { /* * Save the new rootfid in the cnode, and rehash the cnode into the * cnode hash with the new fid key. */ coda_unsave(VTOC(mi->mi_rootvp)); VTOC(mi->mi_rootvp)->c_fid = VFid; coda_save(VTOC(mi->mi_rootvp)); *vpp = mi->mi_rootvp; #if 1 vref(*vpp); vn_lock(*vpp, LK_EXCLUSIVE, p); #else vget(*vpp, LK_EXCLUSIVE, p); #endif MARK_INT_SAT(CODA_ROOT_STATS); goto exit; } else if (error == ENODEV || error == EINTR) { /* Gross hack here! */ /* * If Venus fails to respond to the CODA_ROOT call, coda_call returns * ENODEV. Return the uninitialized root vnode to allow vfs * operations such as unmount to continue. Without this hack, * there is no way to do an unmount if Venus dies before a * successful CODA_ROOT call is done. All vnode operations * will fail. */ *vpp = mi->mi_rootvp; #if 1 vref(*vpp); vn_lock(*vpp, LK_EXCLUSIVE, p); #else vget(*vpp, LK_EXCLUSIVE, p); #endif MARK_INT_FAIL(CODA_ROOT_STATS); error = 0; goto exit; } else { CODADEBUG( CODA_ROOT, myprintf(("error %d in CODA_ROOT\n", error)); ); MARK_INT_FAIL(CODA_ROOT_STATS); goto exit; } exit: return(error); } /* * Get file system statistics. */ int coda_nb_statfs(vfsp, sbp, p) register struct mount *vfsp; struct statfs *sbp; struct proc *p; { ENTRY; /* MARK_ENTRY(CODA_STATFS_STATS); */ if (!CODA_MOUNTED(vfsp)) { /* MARK_INT_FAIL(CODA_STATFS_STATS);*/ return(EINVAL); } bzero(sbp, sizeof(struct statfs)); /* XXX - what to do about f_flags, others? --bnoble */ /* Below This is what AFS does #define NB_SFS_SIZ 0x895440 */ /* Note: Normal fs's have a bsize of 0x400 == 1024 */ sbp->f_type = vfsp->mnt_vfc->vfc_typenum; sbp->f_bsize = 8192; /* XXX */ sbp->f_iosize = 8192; /* XXX */ #define NB_SFS_SIZ 0x8AB75D sbp->f_blocks = NB_SFS_SIZ; sbp->f_bfree = NB_SFS_SIZ; sbp->f_bavail = NB_SFS_SIZ; sbp->f_files = NB_SFS_SIZ; sbp->f_ffree = NB_SFS_SIZ; bcopy((caddr_t)&(vfsp->mnt_stat.f_fsid), (caddr_t)&(sbp->f_fsid), sizeof (fsid_t)); snprintf(sbp->f_mntonname, sizeof(sbp->f_mntonname), "/coda"); snprintf(sbp->f_mntfromname, sizeof(sbp->f_mntfromname), "CODA"); /* MARK_INT_SAT(CODA_STATFS_STATS); */ return(0); } /* * Flush any pending I/O. */ int coda_sync(vfsp, waitfor, cred, p) struct mount *vfsp; int waitfor; struct ucred *cred; struct proc *p; { ENTRY; MARK_ENTRY(CODA_SYNC_STATS); MARK_INT_SAT(CODA_SYNC_STATS); return(0); } /* * fhtovp is now what vget used to be in 4.3-derived systems. For * some silly reason, vget is now keyed by a 32 bit ino_t, rather than * a type-specific fid. */ int coda_fhtovp(vfsp, fhp, nam, vpp, exflagsp, creadanonp) register struct mount *vfsp; struct fid *fhp; struct mbuf *nam; struct vnode **vpp; int *exflagsp; struct ucred **creadanonp; { struct cfid *cfid = (struct cfid *)fhp; struct cnode *cp = 0; int error; struct proc *p = curproc; /* XXX -mach */ ViceFid VFid; - struct ucred *uc; int vtype; ENTRY; MARK_ENTRY(CODA_VGET_STATS); /* Check for vget of control object. */ if (IS_CTL_FID(&cfid->cfid_fid)) { *vpp = coda_ctlvp; vref(coda_ctlvp); MARK_INT_SAT(CODA_VGET_STATS); return(0); } - PROC_LOCK(p); - uc = p->p_ucred; - crhold(uc); - PROC_UNLOCK(p); - error = venus_fhtovp(vftomi(vfsp), &cfid->cfid_fid, uc, p, &VFid, &vtype); - crfree(uc); + error = venus_fhtovp(vftomi(vfsp), &cfid->cfid_fid, p->p_ucred, p, &VFid, &vtype); if (error) { CODADEBUG(CODA_VGET, myprintf(("vget error %d\n",error));) *vpp = (struct vnode *)0; } else { CODADEBUG(CODA_VGET, myprintf(("vget: vol %lx vno %lx uni %lx type %d result %d\n", VFid.Volume, VFid.Vnode, VFid.Unique, vtype, error)); ) cp = make_coda_node(&VFid, vfsp, vtype); *vpp = CTOV(cp); } return(error); } /* * To allow for greater ease of use, some vnodes may be orphaned when * Venus dies. Certain operations should still be allowed to go * through, but without propagating ophan-ness. So this function will * get a new vnode for the file from the current run of Venus. */ int getNewVnode(vpp) struct vnode **vpp; { struct cfid cfid; struct coda_mntinfo *mi = vftomi((*vpp)->v_mount); ENTRY; cfid.cfid_len = (short)sizeof(ViceFid); cfid.cfid_fid = VTOC(*vpp)->c_fid; /* Structure assignment. */ /* XXX ? */ /* We're guessing that if set, the 1st element on the list is a * valid vnode to use. If not, return ENODEV as venus is dead. */ if (mi->mi_vfsp == NULL) return ENODEV; return coda_fhtovp(mi->mi_vfsp, (struct fid*)&cfid, NULL, vpp, NULL, NULL); } #include #include #include /* get the mount structure corresponding to a given device. Assume * device corresponds to a UFS. Return NULL if no device is found. */ struct mount *devtomp(dev) dev_t dev; { struct mount *mp; TAILQ_FOREACH(mp, &mountlist, mnt_list) { if (((VFSTOUFS(mp))->um_dev == dev)) { /* mount corresponds to UFS and the device matches one we want */ return(mp); } } /* mount structure wasn't found */ return(NULL); } struct vfsops coda_vfsops = { coda_mount, vfs_stdstart, coda_unmount, coda_root, vfs_stdquotactl, coda_nb_statfs, coda_sync, vfs_stdvget, vfs_stdfhtovp, vfs_stdcheckexp, vfs_stdvptofh, vfs_stdinit, vfs_stduninit, vfs_stdextattrctl, }; VFS_SET(coda_vfsops, coda, VFCF_NETWORK); Index: head/sys/fs/hpfs/hpfs_vfsops.c =================================================================== --- head/sys/fs/hpfs/hpfs_vfsops.c (revision 71698) +++ head/sys/fs/hpfs/hpfs_vfsops.c (revision 71699) @@ -1,789 +1,781 @@ /*- * Copyright (c) 1998, 1999 Semen Ustimenko (semenu@FreeBSD.org) * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(__NetBSD__) #include #endif #include #include #include #if defined(__NetBSD__) #include #endif #include #include #include #if defined(__FreeBSD__) MALLOC_DEFINE(M_HPFSMNT, "HPFS mount", "HPFS mount structure"); MALLOC_DEFINE(M_HPFSNO, "HPFS node", "HPFS node structure"); #endif static int hpfs_root __P((struct mount *, struct vnode **)); static int hpfs_statfs __P((struct mount *, struct statfs *, struct proc *)); static int hpfs_unmount __P((struct mount *, int, struct proc *)); static int hpfs_vget __P((struct mount *mp, ino_t ino, struct vnode **vpp)); static int hpfs_mountfs __P((register struct vnode *, struct mount *, struct hpfs_args *, struct proc *)); static int hpfs_vptofh __P((struct vnode *, struct fid *)); static int hpfs_fhtovp __P((struct mount *, struct fid *, struct vnode **)); #if !defined(__FreeBSD__) static int hpfs_quotactl __P((struct mount *, int, uid_t, caddr_t, struct proc *)); static int hpfs_start __P((struct mount *, int, struct proc *)); static int hpfs_sync __P((struct mount *, int, struct ucred *, struct proc *)); #endif #if defined(__FreeBSD__) struct sockaddr; static int hpfs_mount __P((struct mount *, char *, caddr_t, struct nameidata *, struct proc *)); static int hpfs_init __P((struct vfsconf *)); static int hpfs_uninit __P((struct vfsconf *)); static int hpfs_checkexp __P((struct mount *, struct sockaddr *, int *, struct ucred **)); #else /* defined(__NetBSD__) */ static int hpfs_mount __P((struct mount *, const char *, void *, struct nameidata *, struct proc *)); static void hpfs_init __P((void)); static int hpfs_mountroot __P((void)); static int hpfs_sysctl __P((int *, u_int, void *, size_t *, void *, size_t, struct proc *)); static int hpfs_checkexp __P((struct mount *, struct mbuf *, int *, struct ucred **)); #endif /*ARGSUSED*/ static int hpfs_checkexp(mp, nam, exflagsp, credanonp) #if defined(__FreeBSD__) register struct mount *mp; struct sockaddr *nam; int *exflagsp; struct ucred **credanonp; #else /* defined(__NetBSD__) */ register struct mount *mp; struct mbuf *nam; int *exflagsp; struct ucred **credanonp; #endif { register struct netcred *np; register struct hpfsmount *hpm = VFSTOHPFS(mp); /* * Get the export permission structure for this tuple. */ np = vfs_export_lookup(mp, &hpm->hpm_export, nam); if (np == NULL) return (EACCES); *exflagsp = np->netc_exflags; *credanonp = &np->netc_anon; return (0); } #if !defined(__FreeBSD__) /*ARGSUSED*/ static int hpfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p) int *name; u_int namelen; void *oldp; size_t *oldlenp; void *newp; size_t newlen; struct proc *p; { return (EINVAL); } static int hpfs_mountroot() { return (EINVAL); } #endif #if defined(__FreeBSD__) static int hpfs_init ( struct vfsconf *vcp ) #else /* defined(__NetBSD__) */ static void hpfs_init () #endif { dprintf(("hpfs_init():\n")); hpfs_hphashinit(); #if defined(__FreeBSD__) return 0; #endif } #if defined(__FreeBSD__) static int hpfs_uninit (vfsp) struct vfsconf *vfsp; { hpfs_hphashdestroy(); return 0;; } #endif static int hpfs_mount ( struct mount *mp, #if defined(__FreeBSD__) char *path, caddr_t data, #else /* defined(__NetBSD__) */ const char *path, void *data, #endif struct nameidata *ndp, struct proc *p ) { u_int size; int err = 0; struct vnode *devvp; struct hpfs_args args; struct hpfsmount *hpmp = 0; dprintf(("hpfs_mount():\n")); /* *** * Mounting non-root file system or updating a file system *** */ /* copy in user arguments*/ err = copyin(data, (caddr_t)&args, sizeof (struct hpfs_args)); if (err) goto error_1; /* can't get arguments*/ /* * If updating, check whether changing from read-only to * read/write; if there is no device name, that's all we do. */ if (mp->mnt_flag & MNT_UPDATE) { dprintf(("hpfs_mount: MNT_UPDATE: ")); hpmp = VFSTOHPFS(mp); if (args.fspec == 0) { dprintf(("export 0x%x\n",args.export.ex_flags)); err = vfs_export(mp, &hpmp->hpm_export, &args.export); if (err) { printf("hpfs_mount: vfs_export failed %d\n", err); } goto success; } else { dprintf(("name [FAILED]\n")); err = EINVAL; goto success; } dprintf(("\n")); } /* * Not an update, or updating the name: look up the name * and verify that it refers to a sensible block device. */ NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, p); err = namei(ndp); if (err) { /* can't get devvp!*/ goto error_1; } devvp = ndp->ni_vp; #if defined(__FreeBSD__) if (!vn_isdisk(devvp, &err)) goto error_2; #else /* defined(__NetBSD__) */ if (devvp->v_type != VBLK) { err = ENOTBLK; goto error_2; } if (major(devvp->v_rdev) >= nblkdev) { err = ENXIO; goto error_2; } #endif /* ******************** * NEW MOUNT ******************** */ /* * Since this is a new mount, we want the names for * the device and the mount point copied in. If an * error occurs, the mountpoint is discarded by the * upper level code. */ /* Save "last mounted on" info for mount point (NULL pad)*/ copyinstr( path, /* mount point*/ mp->mnt_stat.f_mntonname, /* save area*/ MNAMELEN - 1, /* max size*/ &size); /* real size*/ bzero( mp->mnt_stat.f_mntonname + size, MNAMELEN - size); /* Save "mounted from" info for mount point (NULL pad)*/ copyinstr( args.fspec, /* device name*/ mp->mnt_stat.f_mntfromname, /* save area*/ MNAMELEN - 1, /* max size*/ &size); /* real size*/ bzero( mp->mnt_stat.f_mntfromname + size, MNAMELEN - size); err = hpfs_mountfs(devvp, mp, &args, p); if (err) goto error_2; /* * Initialize FS stat information in mount struct; uses both * mp->mnt_stat.f_mntonname and mp->mnt_stat.f_mntfromname * * This code is common to root and non-root mounts */ (void)VFS_STATFS(mp, &mp->mnt_stat, p); goto success; error_2: /* error with devvp held*/ /* release devvp before failing*/ vrele(devvp); error_1: /* no state to back out*/ success: return( err); } /* * Common code for mount and mountroot */ int hpfs_mountfs(devvp, mp, argsp, p) register struct vnode *devvp; struct mount *mp; struct hpfs_args *argsp; struct proc *p; { int error, ncount, ronly; struct sublock *sup; struct spblock *spp; struct hpfsmount *hpmp; struct buf *bp = NULL; struct vnode *vp; -#if defined(__FreeBSD__) - struct ucred *uc; -#endif dev_t dev = devvp->v_rdev; dprintf(("hpfs_mountfs():\n")); /* * Disallow multiple mounts of the same device. * Disallow mounting of a device that is currently in use * (except for root, which might share swap device for miniroot). * Flush out any old buffers remaining from a previous use. */ error = vfs_mountedon(devvp); if (error) return (error); ncount = vcount(devvp); #if defined(__FreeBSD__) if (devvp->v_object) ncount -= 1; #endif if (ncount > 1 && devvp != rootvp) return (EBUSY); #if defined(__FreeBSD__) VN_LOCK(devvp, LK_EXCLUSIVE | LK_RETRY, p); - PROC_LOCK(p); - uc = p->p_ucred; - crhold(uc); - PROC_UNLOCK(p); - error = vinvalbuf(devvp, V_SAVE, uc, p, 0, 0); - crfree(uc); + error = vinvalbuf(devvp, V_SAVE, p->p_ucred, p, 0, 0); VOP__UNLOCK(devvp, 0, p); #else error = vinvalbuf(devvp, V_SAVE, p->p_ucred, p, 0, 0); #endif if (error) return (error); ronly = (mp->mnt_flag & MNT_RDONLY) != 0; VN_LOCK(devvp, LK_EXCLUSIVE | LK_RETRY, p); error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, p); VOP__UNLOCK(devvp, 0, p); if (error) return (error); /* * Do actual mount */ hpmp = malloc(sizeof(struct hpfsmount), M_HPFSMNT, M_WAITOK | M_ZERO); /* Read in SuperBlock */ error = bread(devvp, SUBLOCK, SUSIZE, NOCRED, &bp); if (error) goto failed; bcopy(bp->b_data, &hpmp->hpm_su, sizeof(struct sublock)); brelse(bp); bp = NULL; /* Read in SpareBlock */ error = bread(devvp, SPBLOCK, SPSIZE, NOCRED, &bp); if (error) goto failed; bcopy(bp->b_data, &hpmp->hpm_sp, sizeof(struct spblock)); brelse(bp); bp = NULL; sup = &hpmp->hpm_su; spp = &hpmp->hpm_sp; /* Check magic */ if (sup->su_magic != SU_MAGIC) { printf("hpfs_mountfs: SuperBlock MAGIC DOESN'T MATCH\n"); error = EINVAL; goto failed; } if (spp->sp_magic != SP_MAGIC) { printf("hpfs_mountfs: SpareBlock MAGIC DOESN'T MATCH\n"); error = EINVAL; goto failed; } mp->mnt_data = (qaddr_t)hpmp; hpmp->hpm_devvp = devvp; hpmp->hpm_dev = devvp->v_rdev; hpmp->hpm_mp = mp; hpmp->hpm_uid = argsp->uid; hpmp->hpm_gid = argsp->gid; hpmp->hpm_mode = argsp->mode; error = hpfs_bminit(hpmp); if (error) goto failed; error = hpfs_cpinit(hpmp, argsp); if (error) { hpfs_bmdeinit(hpmp); goto failed; } error = hpfs_root(mp, &vp); if (error) { hpfs_cpdeinit(hpmp); hpfs_bmdeinit(hpmp); goto failed; } vput(vp); #if defined(__FreeBSD__) mp->mnt_stat.f_fsid.val[0] = (long)dev2udev(dev); mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum; #else mp->mnt_stat.f_fsid.val[0] = (long)dev; mp->mnt_stat.f_fsid.val[1] = makefstype(MOUNT_HPFS); #endif mp->mnt_maxsymlinklen = 0; mp->mnt_flag |= MNT_LOCAL; devvp->v_rdev->si_mountpoint = mp; return (0); failed: if (bp) brelse (bp); mp->mnt_data = (qaddr_t)NULL; #if defined(__FreeBSD__) devvp->v_rdev->si_mountpoint = NULL; #else devvp->v_specflags &= ~SI_MOUNTEDON; #endif (void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD|FWRITE, NOCRED, p); return (error); } #if !defined(__FreeBSD__) static int hpfs_start ( struct mount *mp, int flags, struct proc *p ) { return (0); } #endif static int hpfs_unmount( struct mount *mp, int mntflags, struct proc *p) { int error, flags, ronly; register struct hpfsmount *hpmp = VFSTOHPFS(mp); dprintf(("hpfs_unmount():\n")); ronly = (mp->mnt_flag & MNT_RDONLY) != 0; flags = 0; if(mntflags & MNT_FORCE) flags |= FORCECLOSE; dprintf(("hpfs_unmount: vflushing...\n")); error = vflush(mp,NULLVP,flags); if (error) { printf("hpfs_unmount: vflush failed: %d\n",error); return (error); } #if defined(__FreeBSD__) hpmp->hpm_devvp->v_rdev->si_mountpoint = NULL; #else hpmp->hpm_devvp->v_specflags &= ~SI_MOUNTEDON; #endif vinvalbuf(hpmp->hpm_devvp, V_SAVE, NOCRED, p, 0, 0); error = VOP_CLOSE(hpmp->hpm_devvp, ronly ? FREAD : FREAD|FWRITE, NOCRED, p); vrele(hpmp->hpm_devvp); dprintf(("hpfs_umount: freeing memory...\n")); hpfs_cpdeinit(hpmp); hpfs_bmdeinit(hpmp); mp->mnt_data = (qaddr_t)0; mp->mnt_flag &= ~MNT_LOCAL; FREE(hpmp, M_HPFSMNT); return (0); } static int hpfs_root( struct mount *mp, struct vnode **vpp ) { int error = 0; struct hpfsmount *hpmp = VFSTOHPFS(mp); dprintf(("hpfs_root():\n")); error = VFS_VGET(mp, (ino_t)hpmp->hpm_su.su_rootfno, vpp); if(error) { printf("hpfs_root: VFS_VGET failed: %d\n",error); return (error); } return (error); } static int hpfs_statfs( struct mount *mp, struct statfs *sbp, struct proc *p) { struct hpfsmount *hpmp = VFSTOHPFS(mp); dprintf(("hpfs_statfs(): HPFS%d.%d\n", hpmp->hpm_su.su_hpfsver, hpmp->hpm_su.su_fnctver)); #if defined(__FreeBSD__) sbp->f_type = mp->mnt_vfc->vfc_typenum; #else /* defined(__NetBSD__) */ sbp->f_type = 0; #endif sbp->f_bsize = DEV_BSIZE; sbp->f_iosize = DEV_BSIZE; sbp->f_blocks = hpmp->hpm_su.su_btotal; sbp->f_bfree = sbp->f_bavail = hpmp->hpm_bavail; sbp->f_ffree = 0; sbp->f_files = 0; if (sbp != &mp->mnt_stat) { bcopy((caddr_t)mp->mnt_stat.f_mntonname, (caddr_t)&sbp->f_mntonname[0], MNAMELEN); bcopy((caddr_t)mp->mnt_stat.f_mntfromname, (caddr_t)&sbp->f_mntfromname[0], MNAMELEN); } sbp->f_flags = mp->mnt_flag; return (0); } #if !defined(__FreeBSD__) static int hpfs_sync ( struct mount *mp, int waitfor, struct ucred *cred, struct proc *p) { return (0); } static int hpfs_quotactl ( struct mount *mp, int cmds, uid_t uid, caddr_t arg, struct proc *p) { printf("hpfs_quotactl():\n"); return (EOPNOTSUPP); } #endif /*ARGSUSED*/ static int hpfs_fhtovp( struct mount *mp, struct fid *fhp, struct vnode **vpp) { struct vnode *nvp; struct hpfid *hpfhp = (struct hpfid *)fhp; int error; if ((error = VFS_VGET(mp, hpfhp->hpfid_ino, &nvp)) != 0) { *vpp = NULLVP; return (error); } /* XXX as unlink/rmdir/mkdir/creat are not currently possible * with HPFS, we don't need to check anything else for now */ *vpp = nvp; return (0); } static int hpfs_vptofh( struct vnode *vp, struct fid *fhp) { register struct hpfsnode *hpp; register struct hpfid *hpfhp; hpp = VTOHP(vp); hpfhp = (struct hpfid *)fhp; hpfhp->hpfid_len = sizeof(struct hpfid); hpfhp->hpfid_ino = hpp->h_no; /* hpfhp->hpfid_gen = hpp->h_gen; */ return (0); } static int hpfs_vget( struct mount *mp, ino_t ino, struct vnode **vpp) { struct hpfsmount *hpmp = VFSTOHPFS(mp); struct vnode *vp; struct hpfsnode *hp; struct buf *bp; struct proc *p = curproc; /* XXX */ int error; dprintf(("hpfs_vget(0x%x): ",ino)); *vpp = NULL; hp = NULL; vp = NULL; if ((*vpp = hpfs_hphashvget(hpmp->hpm_dev, ino, p)) != NULL) { dprintf(("hashed\n")); return (0); } /* * We have to lock node creation for a while, * but then we have to call getnewvnode(), * this may cause hpfs_reclaim() to be called, * this may need to VOP_VGET() parent dir for * update reasons, and if parent is not in * hash, we have to lock node creation... * To solve this, we MALLOC, getnewvnode and init while * not locked (probability of node appearence * at that time is little, and anyway - we'll * check for it). */ MALLOC(hp, struct hpfsnode *, sizeof(struct hpfsnode), M_HPFSNO, M_WAITOK); error = getnewvnode(VT_HPFS, hpmp->hpm_mp, hpfs_vnodeop_p, &vp); if (error) { printf("hpfs_vget: can't get new vnode\n"); FREE(hp, M_HPFSNO); return (error); } dprintf(("prenew ")); vp->v_data = hp; if (ino == (ino_t)hpmp->hpm_su.su_rootfno) vp->v_flag |= VROOT; mtx_init(&hp->h_interlock, "hpfsnode interlock", MTX_DEF); lockinit(&hp->h_lock, PINOD, "hpnode", 0, 0); hp->h_flag = H_INVAL; hp->h_vp = vp; hp->h_hpmp = hpmp; hp->h_no = ino; hp->h_dev = hpmp->hpm_dev; hp->h_uid = hpmp->hpm_uid; hp->h_gid = hpmp->hpm_uid; hp->h_mode = hpmp->hpm_mode; hp->h_devvp = hpmp->hpm_devvp; VREF(hp->h_devvp); error = VN_LOCK(vp, LK_EXCLUSIVE, p); if (error) { vput(vp); return (error); } do { if ((*vpp = hpfs_hphashvget(hpmp->hpm_dev, ino, p)) != NULL) { dprintf(("hashed2\n")); vput(vp); return (0); } } while(LOCKMGR(&hpfs_hphash_lock,LK_EXCLUSIVE|LK_SLEEPFAIL,NULL,NULL)); hpfs_hphashins(hp); LOCKMGR(&hpfs_hphash_lock, LK_RELEASE, NULL, NULL); error = bread(hpmp->hpm_devvp, ino, FNODESIZE, NOCRED, &bp); if (error) { printf("hpfs_vget: can't read ino %d\n",ino); vput(vp); return (error); } bcopy(bp->b_data, &hp->h_fn, sizeof(struct fnode)); brelse(bp); if (hp->h_fn.fn_magic != FN_MAGIC) { printf("hpfs_vget: MAGIC DOESN'T MATCH\n"); vput(vp); return (EINVAL); } vp->v_type = hp->h_fn.fn_flag ? VDIR:VREG; hp->h_flag &= ~H_INVAL; *vpp = vp; return (0); } #if defined(__FreeBSD__) static struct vfsops hpfs_vfsops = { hpfs_mount, vfs_stdstart, hpfs_unmount, hpfs_root, vfs_stdquotactl, hpfs_statfs, vfs_stdsync, hpfs_vget, hpfs_fhtovp, hpfs_checkexp, hpfs_vptofh, hpfs_init, hpfs_uninit, vfs_stdextattrctl, }; VFS_SET(hpfs_vfsops, hpfs, 0); #else /* defined(__NetBSD__) */ extern struct vnodeopv_desc hpfs_vnodeop_opv_desc; struct vnodeopv_desc *hpfs_vnodeopv_descs[] = { &hpfs_vnodeop_opv_desc, NULL, }; struct vfsops hpfs_vfsops = { MOUNT_HPFS, hpfs_mount, hpfs_start, hpfs_unmount, hpfs_root, hpfs_quotactl, hpfs_statfs, hpfs_sync, hpfs_vget, hpfs_fhtovp, hpfs_vptofh, hpfs_init, hpfs_sysctl, hpfs_mountroot, hpfs_checkexp, hpfs_vnodeopv_descs, }; #endif Index: head/sys/geom/geom_ccd.c =================================================================== --- head/sys/geom/geom_ccd.c (revision 71698) +++ head/sys/geom/geom_ccd.c (revision 71699) @@ -1,1755 +1,1727 @@ /* $FreeBSD$ */ /* $NetBSD: ccd.c,v 1.22 1995/12/08 19:13:26 thorpej Exp $ */ /* * Copyright (c) 1995 Jason R. Thorpe. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed for the NetBSD Project * by Jason R. Thorpe. * 4. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Copyright (c) 1988 University of Utah. * Copyright (c) 1990, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * the Systems Programming Group of the University of Utah Computer * Science Department. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: Utah $Hdr: cd.c 1.6 90/11/28$ * * @(#)cd.c 8.2 (Berkeley) 11/16/93 */ /* * "Concatenated" disk driver. * * Dynamic configuration and disklabel support by: * Jason R. Thorpe * Numerical Aerodynamic Simulation Facility * Mail Stop 258-6 * NASA Ames Research Center * Moffett Field, CA 94035 */ #include "ccd.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(CCDDEBUG) && !defined(DEBUG) #define DEBUG #endif #ifdef DEBUG #define CCDB_FOLLOW 0x01 #define CCDB_INIT 0x02 #define CCDB_IO 0x04 #define CCDB_LABEL 0x08 #define CCDB_VNODE 0x10 static int ccddebug = CCDB_FOLLOW | CCDB_INIT | CCDB_IO | CCDB_LABEL | CCDB_VNODE; SYSCTL_INT(_debug, OID_AUTO, ccddebug, CTLFLAG_RW, &ccddebug, 0, ""); #undef DEBUG #endif #define ccdunit(x) dkunit(x) #define ccdpart(x) dkpart(x) /* This is how mirroring works (only writes are special): When initiating a write, ccdbuffer() returns two "struct ccdbuf *"s linked together by the cb_mirror field. "cb_pflags & CCDPF_MIRROR_DONE" is set to 0 on both of them. When a component returns to ccdiodone(), it checks if "cb_pflags & CCDPF_MIRROR_DONE" is set or not. If not, it sets the partner's flag and returns. If it is, it means its partner has already returned, so it will go to the regular cleanup. */ struct ccdbuf { struct bio cb_buf; /* new I/O buf */ struct bio *cb_obp; /* ptr. to original I/O buf */ struct ccdbuf *cb_freenext; /* free list link */ int cb_unit; /* target unit */ int cb_comp; /* target component */ int cb_pflags; /* mirror/parity status flag */ struct ccdbuf *cb_mirror; /* mirror counterpart */ }; /* bits in cb_pflags */ #define CCDPF_MIRROR_DONE 1 /* if set, mirror counterpart is done */ #define CCDLABELDEV(dev) \ (makedev(major((dev)), dkmakeminor(ccdunit((dev)), 0, RAW_PART))) static d_open_t ccdopen; static d_close_t ccdclose; static d_strategy_t ccdstrategy; static d_ioctl_t ccdioctl; static d_dump_t ccddump; static d_psize_t ccdsize; #define NCCDFREEHIWAT 16 #define CDEV_MAJOR 74 #define BDEV_MAJOR 21 static struct cdevsw ccd_cdevsw = { /* open */ ccdopen, /* close */ ccdclose, /* read */ physread, /* write */ physwrite, /* ioctl */ ccdioctl, /* poll */ nopoll, /* mmap */ nommap, /* strategy */ ccdstrategy, /* name */ "ccd", /* maj */ CDEV_MAJOR, /* dump */ ccddump, /* psize */ ccdsize, /* flags */ D_DISK, /* bmaj */ BDEV_MAJOR }; /* called during module initialization */ static void ccdattach __P((void)); static int ccd_modevent __P((module_t, int, void *)); /* called by biodone() at interrupt time */ static void ccdiodone __P((struct bio *bp)); static void ccdstart __P((struct ccd_softc *, struct bio *)); static void ccdinterleave __P((struct ccd_softc *, int)); static void ccdintr __P((struct ccd_softc *, struct bio *)); static int ccdinit __P((struct ccddevice *, char **, struct proc *)); static int ccdlookup __P((char *, struct proc *p, struct vnode **)); static void ccdbuffer __P((struct ccdbuf **ret, struct ccd_softc *, struct bio *, daddr_t, caddr_t, long)); static void ccdgetdisklabel __P((dev_t)); static void ccdmakedisklabel __P((struct ccd_softc *)); static int ccdlock __P((struct ccd_softc *)); static void ccdunlock __P((struct ccd_softc *)); #ifdef DEBUG static void printiinfo __P((struct ccdiinfo *)); #endif /* Non-private for the benefit of libkvm. */ struct ccd_softc *ccd_softc; struct ccddevice *ccddevs; struct ccdbuf *ccdfreebufs; static int numccdfreebufs; static int numccd = 0; /* * getccdbuf() - Allocate and zero a ccd buffer. * * This routine is called at splbio(). */ static __inline struct ccdbuf * getccdbuf(struct ccdbuf *cpy) { struct ccdbuf *cbp; /* * Allocate from freelist or malloc as necessary */ if ((cbp = ccdfreebufs) != NULL) { ccdfreebufs = cbp->cb_freenext; --numccdfreebufs; } else { cbp = malloc(sizeof(struct ccdbuf), M_DEVBUF, M_WAITOK); } /* * Used by mirroring code */ if (cpy) bcopy(cpy, cbp, sizeof(struct ccdbuf)); else bzero(cbp, sizeof(struct ccdbuf)); /* * independant struct bio initialization */ return(cbp); } /* * putccdbuf() - Free a ccd buffer. * * This routine is called at splbio(). */ static __inline void putccdbuf(struct ccdbuf *cbp) { if (numccdfreebufs < NCCDFREEHIWAT) { cbp->cb_freenext = ccdfreebufs; ccdfreebufs = cbp; ++numccdfreebufs; } else { free((caddr_t)cbp, M_DEVBUF); } } /* * Number of blocks to untouched in front of a component partition. * This is to avoid violating its disklabel area when it starts at the * beginning of the slice. */ #if !defined(CCD_OFFSET) #define CCD_OFFSET 16 #endif static void ccd_clone(void *arg, char *name, int namelen, dev_t *dev) { int i, u; char *s; if (*dev != NODEV) return; i = dev_stdclone(name, &s, "ccd", &u); if (i != 2) return; if (u >= numccd) return; if (*s <= 'a' || *s >= 'h') return; if (s[1] != '\0') return; *dev = make_dev(&ccd_cdevsw, u * 8 + *s - 'a', UID_ROOT, GID_OPERATOR, 0640, name); } /* * Called by main() during pseudo-device attachment. All we need * to do is allocate enough space for devices to be configured later, and * add devsw entries. */ static void ccdattach() { int i; int num = NCCD; if (num > 1) printf("ccd0-%d: Concatenated disk drivers\n", num-1); else printf("ccd0: Concatenated disk driver\n"); ccd_softc = (struct ccd_softc *)malloc(num * sizeof(struct ccd_softc), M_DEVBUF, M_NOWAIT); ccddevs = (struct ccddevice *)malloc(num * sizeof(struct ccddevice), M_DEVBUF, M_NOWAIT); if ((ccd_softc == NULL) || (ccddevs == NULL)) { printf("WARNING: no memory for concatenated disks\n"); if (ccd_softc != NULL) free(ccd_softc, M_DEVBUF); if (ccddevs != NULL) free(ccddevs, M_DEVBUF); return; } numccd = num; bzero(ccd_softc, num * sizeof(struct ccd_softc)); bzero(ccddevs, num * sizeof(struct ccddevice)); cdevsw_add(&ccd_cdevsw); /* XXX: is this necessary? */ for (i = 0; i < numccd; ++i) ccddevs[i].ccd_dk = -1; EVENTHANDLER_REGISTER(dev_clone, ccd_clone, 0, 1000); } static int ccd_modevent(mod, type, data) module_t mod; int type; void *data; { int error = 0; switch (type) { case MOD_LOAD: ccdattach(); break; case MOD_UNLOAD: printf("ccd0: Unload not supported!\n"); error = EOPNOTSUPP; break; default: /* MOD_SHUTDOWN etc */ break; } return (error); } DEV_MODULE(ccd, ccd_modevent, NULL); static int ccdinit(ccd, cpaths, p) struct ccddevice *ccd; char **cpaths; struct proc *p; { struct ccd_softc *cs = &ccd_softc[ccd->ccd_unit]; struct ccdcinfo *ci = NULL; /* XXX */ size_t size; int ix; struct vnode *vp; - struct ucred *uc; size_t minsize; int maxsecsize; struct partinfo dpart; struct ccdgeom *ccg = &cs->sc_geom; char tmppath[MAXPATHLEN]; int error = 0; #ifdef DEBUG if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) printf("ccdinit: unit %d\n", ccd->ccd_unit); #endif cs->sc_size = 0; cs->sc_ileave = ccd->ccd_interleave; cs->sc_nccdisks = ccd->ccd_ndev; /* Allocate space for the component info. */ cs->sc_cinfo = malloc(cs->sc_nccdisks * sizeof(struct ccdcinfo), M_DEVBUF, M_WAITOK); /* * Verify that each component piece exists and record * relevant information about it. */ maxsecsize = 0; minsize = 0; - PROC_LOCK(p); - uc = p->p_ucred; - crhold(uc); - PROC_UNLOCK(p); for (ix = 0; ix < cs->sc_nccdisks; ix++) { vp = ccd->ccd_vpp[ix]; ci = &cs->sc_cinfo[ix]; ci->ci_vp = vp; /* * Copy in the pathname of the component. */ bzero(tmppath, sizeof(tmppath)); /* sanity */ if ((error = copyinstr(cpaths[ix], tmppath, MAXPATHLEN, &ci->ci_pathlen)) != 0) { #ifdef DEBUG if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) printf("ccd%d: can't copy path, error = %d\n", ccd->ccd_unit, error); #endif goto fail; } ci->ci_path = malloc(ci->ci_pathlen, M_DEVBUF, M_WAITOK); bcopy(tmppath, ci->ci_path, ci->ci_pathlen); ci->ci_dev = vn_todev(vp); /* * Get partition information for the component. */ if ((error = VOP_IOCTL(vp, DIOCGPART, (caddr_t)&dpart, - FREAD, uc, p)) != 0) { + FREAD, p->p_ucred, p)) != 0) { #ifdef DEBUG if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) printf("ccd%d: %s: ioctl failed, error = %d\n", ccd->ccd_unit, ci->ci_path, error); #endif goto fail; } if (dpart.part->p_fstype == FS_BSDFFS) { maxsecsize = ((dpart.disklab->d_secsize > maxsecsize) ? dpart.disklab->d_secsize : maxsecsize); size = dpart.part->p_size - CCD_OFFSET; } else { #ifdef DEBUG if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) printf("ccd%d: %s: incorrect partition type\n", ccd->ccd_unit, ci->ci_path); #endif error = EFTYPE; goto fail; } /* * Calculate the size, truncating to an interleave * boundary if necessary. */ if (cs->sc_ileave > 1) size -= size % cs->sc_ileave; if (size == 0) { #ifdef DEBUG if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) printf("ccd%d: %s: size == 0\n", ccd->ccd_unit, ci->ci_path); #endif error = ENODEV; goto fail; } if (minsize == 0 || size < minsize) minsize = size; ci->ci_size = size; cs->sc_size += size; } - crfree(uc); /* * Don't allow the interleave to be smaller than * the biggest component sector. */ if ((cs->sc_ileave > 0) && (cs->sc_ileave < (maxsecsize / DEV_BSIZE))) { #ifdef DEBUG if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) printf("ccd%d: interleave must be at least %d\n", ccd->ccd_unit, (maxsecsize / DEV_BSIZE)); #endif error = EINVAL; goto fail; } /* * If uniform interleave is desired set all sizes to that of * the smallest component. This will guarentee that a single * interleave table is generated. * * Lost space must be taken into account when calculating the * overall size. Half the space is lost when CCDF_MIRROR is * specified. One disk is lost when CCDF_PARITY is specified. */ if (ccd->ccd_flags & CCDF_UNIFORM) { for (ci = cs->sc_cinfo; ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) { ci->ci_size = minsize; } if (ccd->ccd_flags & CCDF_MIRROR) { /* * Check to see if an even number of components * have been specified. The interleave must also * be non-zero in order for us to be able to * guarentee the topology. */ if (cs->sc_nccdisks % 2) { printf("ccd%d: mirroring requires an even number of disks\n", ccd->ccd_unit ); error = EINVAL; goto fail; } if (cs->sc_ileave == 0) { printf("ccd%d: an interleave must be specified when mirroring\n", ccd->ccd_unit); error = EINVAL; goto fail; } cs->sc_size = (cs->sc_nccdisks/2) * minsize; } else if (ccd->ccd_flags & CCDF_PARITY) { cs->sc_size = (cs->sc_nccdisks-1) * minsize; } else { if (cs->sc_ileave == 0) { printf("ccd%d: an interleave must be specified when using parity\n", ccd->ccd_unit); error = EINVAL; goto fail; } cs->sc_size = cs->sc_nccdisks * minsize; } } /* * Construct the interleave table. */ ccdinterleave(cs, ccd->ccd_unit); /* * Create pseudo-geometry based on 1MB cylinders. It's * pretty close. */ ccg->ccg_secsize = maxsecsize; ccg->ccg_ntracks = 1; ccg->ccg_nsectors = 1024 * 1024 / ccg->ccg_secsize; ccg->ccg_ncylinders = cs->sc_size / ccg->ccg_nsectors; /* * Add an devstat entry for this device. */ devstat_add_entry(&cs->device_stats, "ccd", ccd->ccd_unit, ccg->ccg_secsize, DEVSTAT_ALL_SUPPORTED, DEVSTAT_TYPE_STORARRAY |DEVSTAT_TYPE_IF_OTHER, DEVSTAT_PRIORITY_ARRAY); cs->sc_flags |= CCDF_INITED; cs->sc_cflags = ccd->ccd_flags; /* So we can find out later... */ cs->sc_unit = ccd->ccd_unit; return (0); fail: - crfree(uc); while (ci > cs->sc_cinfo) { ci--; free(ci->ci_path, M_DEVBUF); } free(cs->sc_cinfo, M_DEVBUF); return (error); } static void ccdinterleave(cs, unit) struct ccd_softc *cs; int unit; { struct ccdcinfo *ci, *smallci; struct ccdiinfo *ii; daddr_t bn, lbn; int ix; u_long size; #ifdef DEBUG if (ccddebug & CCDB_INIT) printf("ccdinterleave(%x): ileave %d\n", cs, cs->sc_ileave); #endif /* * Allocate an interleave table. The worst case occurs when each * of N disks is of a different size, resulting in N interleave * tables. * * Chances are this is too big, but we don't care. */ size = (cs->sc_nccdisks + 1) * sizeof(struct ccdiinfo); cs->sc_itable = (struct ccdiinfo *)malloc(size, M_DEVBUF, M_WAITOK | M_ZERO); /* * Trivial case: no interleave (actually interleave of disk size). * Each table entry represents a single component in its entirety. * * An interleave of 0 may not be used with a mirror or parity setup. */ if (cs->sc_ileave == 0) { bn = 0; ii = cs->sc_itable; for (ix = 0; ix < cs->sc_nccdisks; ix++) { /* Allocate space for ii_index. */ ii->ii_index = malloc(sizeof(int), M_DEVBUF, M_WAITOK); ii->ii_ndisk = 1; ii->ii_startblk = bn; ii->ii_startoff = 0; ii->ii_index[0] = ix; bn += cs->sc_cinfo[ix].ci_size; ii++; } ii->ii_ndisk = 0; #ifdef DEBUG if (ccddebug & CCDB_INIT) printiinfo(cs->sc_itable); #endif return; } /* * The following isn't fast or pretty; it doesn't have to be. */ size = 0; bn = lbn = 0; for (ii = cs->sc_itable; ; ii++) { /* * Allocate space for ii_index. We might allocate more then * we use. */ ii->ii_index = malloc((sizeof(int) * cs->sc_nccdisks), M_DEVBUF, M_WAITOK); /* * Locate the smallest of the remaining components */ smallci = NULL; for (ci = cs->sc_cinfo; ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) { if (ci->ci_size > size && (smallci == NULL || ci->ci_size < smallci->ci_size)) { smallci = ci; } } /* * Nobody left, all done */ if (smallci == NULL) { ii->ii_ndisk = 0; break; } /* * Record starting logical block using an sc_ileave blocksize. */ ii->ii_startblk = bn / cs->sc_ileave; /* * Record starting comopnent block using an sc_ileave * blocksize. This value is relative to the beginning of * a component disk. */ ii->ii_startoff = lbn; /* * Determine how many disks take part in this interleave * and record their indices. */ ix = 0; for (ci = cs->sc_cinfo; ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) { if (ci->ci_size >= smallci->ci_size) { ii->ii_index[ix++] = ci - cs->sc_cinfo; } } ii->ii_ndisk = ix; bn += ix * (smallci->ci_size - size); lbn = smallci->ci_size / cs->sc_ileave; size = smallci->ci_size; } #ifdef DEBUG if (ccddebug & CCDB_INIT) printiinfo(cs->sc_itable); #endif } /* ARGSUSED */ static int ccdopen(dev, flags, fmt, p) dev_t dev; int flags, fmt; struct proc *p; { int unit = ccdunit(dev); struct ccd_softc *cs; struct disklabel *lp; int error = 0, part, pmask; #ifdef DEBUG if (ccddebug & CCDB_FOLLOW) printf("ccdopen(%x, %x)\n", dev, flags); #endif if (unit >= numccd) return (ENXIO); cs = &ccd_softc[unit]; if ((error = ccdlock(cs)) != 0) return (error); lp = &cs->sc_label; part = ccdpart(dev); pmask = (1 << part); /* * If we're initialized, check to see if there are any other * open partitions. If not, then it's safe to update * the in-core disklabel. */ if ((cs->sc_flags & CCDF_INITED) && (cs->sc_openmask == 0)) ccdgetdisklabel(dev); /* Check that the partition exists. */ if (part != RAW_PART && ((part >= lp->d_npartitions) || (lp->d_partitions[part].p_fstype == FS_UNUSED))) { error = ENXIO; goto done; } cs->sc_openmask |= pmask; done: ccdunlock(cs); return (0); } /* ARGSUSED */ static int ccdclose(dev, flags, fmt, p) dev_t dev; int flags, fmt; struct proc *p; { int unit = ccdunit(dev); struct ccd_softc *cs; int error = 0, part; #ifdef DEBUG if (ccddebug & CCDB_FOLLOW) printf("ccdclose(%x, %x)\n", dev, flags); #endif if (unit >= numccd) return (ENXIO); cs = &ccd_softc[unit]; if ((error = ccdlock(cs)) != 0) return (error); part = ccdpart(dev); /* ...that much closer to allowing unconfiguration... */ cs->sc_openmask &= ~(1 << part); ccdunlock(cs); return (0); } static void ccdstrategy(bp) struct bio *bp; { int unit = ccdunit(bp->bio_dev); struct ccd_softc *cs = &ccd_softc[unit]; int s; int wlabel; struct disklabel *lp; #ifdef DEBUG if (ccddebug & CCDB_FOLLOW) printf("ccdstrategy(%x): unit %d\n", bp, unit); #endif if ((cs->sc_flags & CCDF_INITED) == 0) { bp->bio_error = ENXIO; bp->bio_flags |= BIO_ERROR; goto done; } /* If it's a nil transfer, wake up the top half now. */ if (bp->bio_bcount == 0) goto done; lp = &cs->sc_label; /* * Do bounds checking and adjust transfer. If there's an * error, the bounds check will flag that for us. */ wlabel = cs->sc_flags & (CCDF_WLABEL|CCDF_LABELLING); if (ccdpart(bp->bio_dev) != RAW_PART) { if (bounds_check_with_label(bp, lp, wlabel) <= 0) goto done; } else { int pbn; /* in sc_secsize chunks */ long sz; /* in sc_secsize chunks */ pbn = bp->bio_blkno / (cs->sc_geom.ccg_secsize / DEV_BSIZE); sz = howmany(bp->bio_bcount, cs->sc_geom.ccg_secsize); /* * If out of bounds return an error. If at the EOF point, * simply read or write less. */ if (pbn < 0 || pbn >= cs->sc_size) { bp->bio_resid = bp->bio_bcount; if (pbn != cs->sc_size) { bp->bio_error = EINVAL; bp->bio_flags |= BIO_ERROR; } goto done; } /* * If the request crosses EOF, truncate the request. */ if (pbn + sz > cs->sc_size) { bp->bio_bcount = (cs->sc_size - pbn) * cs->sc_geom.ccg_secsize; } } bp->bio_resid = bp->bio_bcount; /* * "Start" the unit. */ s = splbio(); ccdstart(cs, bp); splx(s); return; done: biodone(bp); } static void ccdstart(cs, bp) struct ccd_softc *cs; struct bio *bp; { long bcount, rcount; struct ccdbuf *cbp[4]; /* XXX! : 2 reads and 2 writes for RAID 4/5 */ caddr_t addr; daddr_t bn; struct partition *pp; #ifdef DEBUG if (ccddebug & CCDB_FOLLOW) printf("ccdstart(%x, %x)\n", cs, bp); #endif /* Record the transaction start */ devstat_start_transaction(&cs->device_stats); /* * Translate the partition-relative block number to an absolute. */ bn = bp->bio_blkno; if (ccdpart(bp->bio_dev) != RAW_PART) { pp = &cs->sc_label.d_partitions[ccdpart(bp->bio_dev)]; bn += pp->p_offset; } /* * Allocate component buffers and fire off the requests */ addr = bp->bio_data; for (bcount = bp->bio_bcount; bcount > 0; bcount -= rcount) { ccdbuffer(cbp, cs, bp, bn, addr, bcount); rcount = cbp[0]->cb_buf.bio_bcount; if (cs->sc_cflags & CCDF_MIRROR) { /* * Mirroring. Writes go to both disks, reads are * taken from whichever disk seems most appropriate. * * We attempt to localize reads to the disk whos arm * is nearest the read request. We ignore seeks due * to writes when making this determination and we * also try to avoid hogging. */ if (cbp[0]->cb_buf.bio_cmd == BIO_WRITE) { BIO_STRATEGY(&cbp[0]->cb_buf, 0); BIO_STRATEGY(&cbp[1]->cb_buf, 0); } else { int pick = cs->sc_pick; daddr_t range = cs->sc_size / 16; if (bn < cs->sc_blk[pick] - range || bn > cs->sc_blk[pick] + range ) { cs->sc_pick = pick = 1 - pick; } cs->sc_blk[pick] = bn + btodb(rcount); BIO_STRATEGY(&cbp[pick]->cb_buf, 0); } } else { /* * Not mirroring */ BIO_STRATEGY(&cbp[0]->cb_buf, 0); } bn += btodb(rcount); addr += rcount; } } /* * Build a component buffer header. */ static void ccdbuffer(cb, cs, bp, bn, addr, bcount) struct ccdbuf **cb; struct ccd_softc *cs; struct bio *bp; daddr_t bn; caddr_t addr; long bcount; { struct ccdcinfo *ci, *ci2 = NULL; /* XXX */ struct ccdbuf *cbp; daddr_t cbn, cboff; off_t cbc; #ifdef DEBUG if (ccddebug & CCDB_IO) printf("ccdbuffer(%x, %x, %d, %x, %d)\n", cs, bp, bn, addr, bcount); #endif /* * Determine which component bn falls in. */ cbn = bn; cboff = 0; if (cs->sc_ileave == 0) { /* * Serially concatenated and neither a mirror nor a parity * config. This is a special case. */ daddr_t sblk; sblk = 0; for (ci = cs->sc_cinfo; cbn >= sblk + ci->ci_size; ci++) sblk += ci->ci_size; cbn -= sblk; } else { struct ccdiinfo *ii; int ccdisk, off; /* * Calculate cbn, the logical superblock (sc_ileave chunks), * and cboff, a normal block offset (DEV_BSIZE chunks) relative * to cbn. */ cboff = cbn % cs->sc_ileave; /* DEV_BSIZE gran */ cbn = cbn / cs->sc_ileave; /* DEV_BSIZE * ileave gran */ /* * Figure out which interleave table to use. */ for (ii = cs->sc_itable; ii->ii_ndisk; ii++) { if (ii->ii_startblk > cbn) break; } ii--; /* * off is the logical superblock relative to the beginning * of this interleave block. */ off = cbn - ii->ii_startblk; /* * We must calculate which disk component to use (ccdisk), * and recalculate cbn to be the superblock relative to * the beginning of the component. This is typically done by * adding 'off' and ii->ii_startoff together. However, 'off' * must typically be divided by the number of components in * this interleave array to be properly convert it from a * CCD-relative logical superblock number to a * component-relative superblock number. */ if (ii->ii_ndisk == 1) { /* * When we have just one disk, it can't be a mirror * or a parity config. */ ccdisk = ii->ii_index[0]; cbn = ii->ii_startoff + off; } else { if (cs->sc_cflags & CCDF_MIRROR) { /* * We have forced a uniform mapping, resulting * in a single interleave array. We double * up on the first half of the available * components and our mirror is in the second * half. This only works with a single * interleave array because doubling up * doubles the number of sectors, so there * cannot be another interleave array because * the next interleave array's calculations * would be off. */ int ndisk2 = ii->ii_ndisk / 2; ccdisk = ii->ii_index[off % ndisk2]; cbn = ii->ii_startoff + off / ndisk2; ci2 = &cs->sc_cinfo[ccdisk + ndisk2]; } else if (cs->sc_cflags & CCDF_PARITY) { /* * XXX not implemented yet */ int ndisk2 = ii->ii_ndisk - 1; ccdisk = ii->ii_index[off % ndisk2]; cbn = ii->ii_startoff + off / ndisk2; if (cbn % ii->ii_ndisk <= ccdisk) ccdisk++; } else { ccdisk = ii->ii_index[off % ii->ii_ndisk]; cbn = ii->ii_startoff + off / ii->ii_ndisk; } } ci = &cs->sc_cinfo[ccdisk]; /* * Convert cbn from a superblock to a normal block so it * can be used to calculate (along with cboff) the normal * block index into this particular disk. */ cbn *= cs->sc_ileave; } /* * Fill in the component buf structure. */ cbp = getccdbuf(NULL); cbp->cb_buf.bio_cmd = bp->bio_cmd; cbp->cb_buf.bio_done = ccdiodone; cbp->cb_buf.bio_dev = ci->ci_dev; /* XXX */ cbp->cb_buf.bio_blkno = cbn + cboff + CCD_OFFSET; cbp->cb_buf.bio_offset = dbtob(cbn + cboff + CCD_OFFSET); cbp->cb_buf.bio_data = addr; if (cs->sc_ileave == 0) cbc = dbtob((off_t)(ci->ci_size - cbn)); else cbc = dbtob((off_t)(cs->sc_ileave - cboff)); cbp->cb_buf.bio_bcount = (cbc < bcount) ? cbc : bcount; cbp->cb_buf.bio_caller1 = (void*)cbp->cb_buf.bio_bcount; /* * context for ccdiodone */ cbp->cb_obp = bp; cbp->cb_unit = cs - ccd_softc; cbp->cb_comp = ci - cs->sc_cinfo; #ifdef DEBUG if (ccddebug & CCDB_IO) printf(" dev %x(u%d): cbp %x bn %d addr %x bcnt %d\n", ci->ci_dev, ci-cs->sc_cinfo, cbp, cbp->cb_buf.bio_blkno, cbp->cb_buf.bio_data, cbp->cb_buf.bio_bcount); #endif cb[0] = cbp; /* * Note: both I/O's setup when reading from mirror, but only one * will be executed. */ if (cs->sc_cflags & CCDF_MIRROR) { /* mirror, setup second I/O */ cbp = getccdbuf(cb[0]); cbp->cb_buf.bio_dev = ci2->ci_dev; cbp->cb_comp = ci2 - cs->sc_cinfo; cb[1] = cbp; /* link together the ccdbuf's and clear "mirror done" flag */ cb[0]->cb_mirror = cb[1]; cb[1]->cb_mirror = cb[0]; cb[0]->cb_pflags &= ~CCDPF_MIRROR_DONE; cb[1]->cb_pflags &= ~CCDPF_MIRROR_DONE; } } static void ccdintr(cs, bp) struct ccd_softc *cs; struct bio *bp; { #ifdef DEBUG if (ccddebug & CCDB_FOLLOW) printf("ccdintr(%x, %x)\n", cs, bp); #endif /* * Request is done for better or worse, wakeup the top half. */ if (bp->bio_flags & BIO_ERROR) bp->bio_resid = bp->bio_bcount; devstat_end_transaction_bio(&cs->device_stats, bp); biodone(bp); } /* * Called at interrupt time. * Mark the component as done and if all components are done, * take a ccd interrupt. */ static void ccdiodone(ibp) struct bio *ibp; { struct ccdbuf *cbp = (struct ccdbuf *)ibp; struct bio *bp = cbp->cb_obp; int unit = cbp->cb_unit; int count, s; s = splbio(); #ifdef DEBUG if (ccddebug & CCDB_FOLLOW) printf("ccdiodone(%x)\n", cbp); if (ccddebug & CCDB_IO) { printf("ccdiodone: bp %x bcount %d resid %d\n", bp, bp->bio_bcount, bp->bio_resid); printf(" dev %x(u%d), cbp %x bn %d addr %x bcnt %d\n", cbp->cb_buf.bio_dev, cbp->cb_comp, cbp, cbp->cb_buf.bio_blkno, cbp->cb_buf.bio_data, cbp->cb_buf.bio_bcount); } #endif /* * If an error occured, report it. If this is a mirrored * configuration and the first of two possible reads, do not * set the error in the bp yet because the second read may * succeed. */ if (cbp->cb_buf.bio_flags & BIO_ERROR) { const char *msg = ""; if ((ccd_softc[unit].sc_cflags & CCDF_MIRROR) && (cbp->cb_buf.bio_cmd == BIO_READ) && (cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { /* * We will try our read on the other disk down * below, also reverse the default pick so if we * are doing a scan we do not keep hitting the * bad disk first. */ struct ccd_softc *cs = &ccd_softc[unit]; msg = ", trying other disk"; cs->sc_pick = 1 - cs->sc_pick; cs->sc_blk[cs->sc_pick] = bp->bio_blkno; } else { bp->bio_flags |= BIO_ERROR; bp->bio_error = cbp->cb_buf.bio_error ? cbp->cb_buf.bio_error : EIO; } printf("ccd%d: error %d on component %d block %d (ccd block %d)%s\n", unit, bp->bio_error, cbp->cb_comp, (int)cbp->cb_buf.bio_blkno, bp->bio_blkno, msg); } /* * Process mirror. If we are writing, I/O has been initiated on both * buffers and we fall through only after both are finished. * * If we are reading only one I/O is initiated at a time. If an * error occurs we initiate the second I/O and return, otherwise * we free the second I/O without initiating it. */ if (ccd_softc[unit].sc_cflags & CCDF_MIRROR) { if (cbp->cb_buf.bio_cmd == BIO_WRITE) { /* * When writing, handshake with the second buffer * to determine when both are done. If both are not * done, return here. */ if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { cbp->cb_mirror->cb_pflags |= CCDPF_MIRROR_DONE; putccdbuf(cbp); splx(s); return; } } else { /* * When reading, either dispose of the second buffer * or initiate I/O on the second buffer if an error * occured with this one. */ if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { if (cbp->cb_buf.bio_flags & BIO_ERROR) { cbp->cb_mirror->cb_pflags |= CCDPF_MIRROR_DONE; BIO_STRATEGY(&cbp->cb_mirror->cb_buf, 0); putccdbuf(cbp); splx(s); return; } else { putccdbuf(cbp->cb_mirror); /* fall through */ } } } } /* * use bio_caller1 to determine how big the original request was rather * then bio_bcount, because bio_bcount may have been truncated for EOF. * * XXX We check for an error, but we do not test the resid for an * aligned EOF condition. This may result in character & block * device access not recognizing EOF properly when read or written * sequentially, but will not effect filesystems. */ count = (long)cbp->cb_buf.bio_caller1; putccdbuf(cbp); /* * If all done, "interrupt". */ bp->bio_resid -= count; if (bp->bio_resid < 0) panic("ccdiodone: count"); if (bp->bio_resid == 0) ccdintr(&ccd_softc[unit], bp); splx(s); } static int ccdioctl(dev, cmd, data, flag, p) dev_t dev; u_long cmd; caddr_t data; int flag; struct proc *p; { int unit = ccdunit(dev); int i, j, lookedup = 0, error = 0; int part, pmask, s; struct ccd_softc *cs; struct ccd_ioctl *ccio = (struct ccd_ioctl *)data; struct ccddevice ccd; char **cpp; struct vnode **vpp; - struct ucred *uc; if (unit >= numccd) return (ENXIO); cs = &ccd_softc[unit]; bzero(&ccd, sizeof(ccd)); switch (cmd) { case CCDIOCSET: if (cs->sc_flags & CCDF_INITED) return (EBUSY); if ((flag & FWRITE) == 0) return (EBADF); if ((error = ccdlock(cs)) != 0) return (error); /* Fill in some important bits. */ ccd.ccd_unit = unit; ccd.ccd_interleave = ccio->ccio_ileave; if (ccd.ccd_interleave == 0 && ((ccio->ccio_flags & CCDF_MIRROR) || (ccio->ccio_flags & CCDF_PARITY))) { printf("ccd%d: disabling mirror/parity, interleave is 0\n", unit); ccio->ccio_flags &= ~(CCDF_MIRROR | CCDF_PARITY); } if ((ccio->ccio_flags & CCDF_MIRROR) && (ccio->ccio_flags & CCDF_PARITY)) { printf("ccd%d: can't specify both mirror and parity, using mirror\n", unit); ccio->ccio_flags &= ~CCDF_PARITY; } if ((ccio->ccio_flags & (CCDF_MIRROR | CCDF_PARITY)) && !(ccio->ccio_flags & CCDF_UNIFORM)) { printf("ccd%d: mirror/parity forces uniform flag\n", unit); ccio->ccio_flags |= CCDF_UNIFORM; } ccd.ccd_flags = ccio->ccio_flags & CCDF_USERMASK; /* * Allocate space for and copy in the array of * componet pathnames and device numbers. */ cpp = malloc(ccio->ccio_ndisks * sizeof(char *), M_DEVBUF, M_WAITOK); vpp = malloc(ccio->ccio_ndisks * sizeof(struct vnode *), M_DEVBUF, M_WAITOK); error = copyin((caddr_t)ccio->ccio_disks, (caddr_t)cpp, ccio->ccio_ndisks * sizeof(char **)); if (error) { free(vpp, M_DEVBUF); free(cpp, M_DEVBUF); ccdunlock(cs); return (error); } #ifdef DEBUG if (ccddebug & CCDB_INIT) for (i = 0; i < ccio->ccio_ndisks; ++i) printf("ccdioctl: component %d: 0x%x\n", i, cpp[i]); #endif for (i = 0; i < ccio->ccio_ndisks; ++i) { #ifdef DEBUG if (ccddebug & CCDB_INIT) printf("ccdioctl: lookedup = %d\n", lookedup); #endif if ((error = ccdlookup(cpp[i], p, &vpp[i])) != 0) { - PROC_LOCK(p); - uc = p->p_ucred; - crhold(uc); - PROC_UNLOCK(p); for (j = 0; j < lookedup; ++j) (void)vn_close(vpp[j], FREAD|FWRITE, - uc, p); + p->p_ucred, p); free(vpp, M_DEVBUF); free(cpp, M_DEVBUF); ccdunlock(cs); - crfree(uc); return (error); } ++lookedup; } ccd.ccd_cpp = cpp; ccd.ccd_vpp = vpp; ccd.ccd_ndev = ccio->ccio_ndisks; /* * Initialize the ccd. Fills in the softc for us. */ if ((error = ccdinit(&ccd, cpp, p)) != 0) { - PROC_LOCK(p); - uc = p->p_ucred; - crhold(uc); - PROC_UNLOCK(p); for (j = 0; j < lookedup; ++j) - (void)vn_close(vpp[j], FREAD|FWRITE, uc, p); + (void)vn_close(vpp[j], FREAD|FWRITE, + p->p_ucred, p); bzero(&ccd_softc[unit], sizeof(struct ccd_softc)); free(vpp, M_DEVBUF); free(cpp, M_DEVBUF); ccdunlock(cs); - crfree(uc); return (error); } /* * The ccd has been successfully initialized, so * we can place it into the array and read the disklabel. */ bcopy(&ccd, &ccddevs[unit], sizeof(ccd)); ccio->ccio_unit = unit; ccio->ccio_size = cs->sc_size; ccdgetdisklabel(dev); ccdunlock(cs); break; case CCDIOCCLR: if ((cs->sc_flags & CCDF_INITED) == 0) return (ENXIO); if ((flag & FWRITE) == 0) return (EBADF); if ((error = ccdlock(cs)) != 0) return (error); /* Don't unconfigure if any other partitions are open */ part = ccdpart(dev); pmask = (1 << part); if ((cs->sc_openmask & ~pmask)) { ccdunlock(cs); return (EBUSY); } /* * Free ccd_softc information and clear entry. */ /* Close the components and free their pathnames. */ - PROC_LOCK(p); - uc = p->p_ucred; - crhold(uc); - PROC_UNLOCK(p); for (i = 0; i < cs->sc_nccdisks; ++i) { /* * XXX: this close could potentially fail and * cause Bad Things. Maybe we need to force * the close to happen? */ #ifdef DEBUG if (ccddebug & CCDB_VNODE) vprint("CCDIOCCLR: vnode info", cs->sc_cinfo[i].ci_vp); #endif (void)vn_close(cs->sc_cinfo[i].ci_vp, FREAD|FWRITE, - uc, p); + p->p_ucred, p); free(cs->sc_cinfo[i].ci_path, M_DEVBUF); } - crfree(uc); /* Free interleave index. */ for (i = 0; cs->sc_itable[i].ii_ndisk; ++i) free(cs->sc_itable[i].ii_index, M_DEVBUF); /* Free component info and interleave table. */ free(cs->sc_cinfo, M_DEVBUF); free(cs->sc_itable, M_DEVBUF); cs->sc_flags &= ~CCDF_INITED; /* * Free ccddevice information and clear entry. */ free(ccddevs[unit].ccd_cpp, M_DEVBUF); free(ccddevs[unit].ccd_vpp, M_DEVBUF); ccd.ccd_dk = -1; bcopy(&ccd, &ccddevs[unit], sizeof(ccd)); /* * And remove the devstat entry. */ devstat_remove_entry(&cs->device_stats); /* This must be atomic. */ s = splhigh(); ccdunlock(cs); bzero(cs, sizeof(struct ccd_softc)); splx(s); break; case DIOCGDINFO: if ((cs->sc_flags & CCDF_INITED) == 0) return (ENXIO); *(struct disklabel *)data = cs->sc_label; break; case DIOCGPART: if ((cs->sc_flags & CCDF_INITED) == 0) return (ENXIO); ((struct partinfo *)data)->disklab = &cs->sc_label; ((struct partinfo *)data)->part = &cs->sc_label.d_partitions[ccdpart(dev)]; break; case DIOCWDINFO: case DIOCSDINFO: if ((cs->sc_flags & CCDF_INITED) == 0) return (ENXIO); if ((flag & FWRITE) == 0) return (EBADF); if ((error = ccdlock(cs)) != 0) return (error); cs->sc_flags |= CCDF_LABELLING; error = setdisklabel(&cs->sc_label, (struct disklabel *)data, 0); if (error == 0) { if (cmd == DIOCWDINFO) error = writedisklabel(CCDLABELDEV(dev), &cs->sc_label); } cs->sc_flags &= ~CCDF_LABELLING; ccdunlock(cs); if (error) return (error); break; case DIOCWLABEL: if ((cs->sc_flags & CCDF_INITED) == 0) return (ENXIO); if ((flag & FWRITE) == 0) return (EBADF); if (*(int *)data != 0) cs->sc_flags |= CCDF_WLABEL; else cs->sc_flags &= ~CCDF_WLABEL; break; default: return (ENOTTY); } return (0); } static int ccdsize(dev) dev_t dev; { struct ccd_softc *cs; int part, size; if (ccdopen(dev, 0, S_IFCHR, curproc)) return (-1); cs = &ccd_softc[ccdunit(dev)]; part = ccdpart(dev); if ((cs->sc_flags & CCDF_INITED) == 0) return (-1); if (cs->sc_label.d_partitions[part].p_fstype != FS_SWAP) size = -1; else size = cs->sc_label.d_partitions[part].p_size; if (ccdclose(dev, 0, S_IFCHR, curproc)) return (-1); return (size); } static int ccddump(dev) dev_t dev; { /* Not implemented. */ return ENXIO; } /* * Lookup the provided name in the filesystem. If the file exists, * is a valid block device, and isn't being used by anyone else, * set *vpp to the file's vnode. */ static int ccdlookup(path, p, vpp) char *path; struct proc *p; struct vnode **vpp; /* result */ { struct nameidata nd; struct vnode *vp; - struct ucred *uc; int error, flags; NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, path, p); flags = FREAD | FWRITE; if ((error = vn_open(&nd, &flags, 0)) != 0) { #ifdef DEBUG if (ccddebug & CCDB_FOLLOW|CCDB_INIT) printf("ccdlookup: vn_open error = %d\n", error); #endif return (error); } vp = nd.ni_vp; if (vp->v_usecount > 1) { error = EBUSY; goto bad; } if (!vn_isdisk(vp, &error)) goto bad; #ifdef DEBUG if (ccddebug & CCDB_VNODE) vprint("ccdlookup: vnode info", vp); #endif VOP_UNLOCK(vp, 0, p); NDFREE(&nd, NDF_ONLY_PNBUF); *vpp = vp; return (0); bad: VOP_UNLOCK(vp, 0, p); NDFREE(&nd, NDF_ONLY_PNBUF); /* vn_close does vrele() for vp */ - PROC_LOCK(p); - uc = p->p_ucred; - crhold(uc); - PROC_UNLOCK(p); - (void)vn_close(vp, FREAD|FWRITE, uc, p); - crfree(uc); + (void)vn_close(vp, FREAD|FWRITE, p->p_ucred, p); return (error); } /* * Read the disklabel from the ccd. If one is not present, fake one * up. */ static void ccdgetdisklabel(dev) dev_t dev; { int unit = ccdunit(dev); struct ccd_softc *cs = &ccd_softc[unit]; char *errstring; struct disklabel *lp = &cs->sc_label; struct ccdgeom *ccg = &cs->sc_geom; bzero(lp, sizeof(*lp)); lp->d_secperunit = cs->sc_size; lp->d_secsize = ccg->ccg_secsize; lp->d_nsectors = ccg->ccg_nsectors; lp->d_ntracks = ccg->ccg_ntracks; lp->d_ncylinders = ccg->ccg_ncylinders; lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; strncpy(lp->d_typename, "ccd", sizeof(lp->d_typename)); lp->d_type = DTYPE_CCD; strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); lp->d_rpm = 3600; lp->d_interleave = 1; lp->d_flags = 0; lp->d_partitions[RAW_PART].p_offset = 0; lp->d_partitions[RAW_PART].p_size = cs->sc_size; lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED; lp->d_npartitions = RAW_PART + 1; lp->d_bbsize = BBSIZE; /* XXX */ lp->d_sbsize = SBSIZE; /* XXX */ lp->d_magic = DISKMAGIC; lp->d_magic2 = DISKMAGIC; lp->d_checksum = dkcksum(&cs->sc_label); /* * Call the generic disklabel extraction routine. */ errstring = readdisklabel(CCDLABELDEV(dev), &cs->sc_label); if (errstring != NULL) ccdmakedisklabel(cs); #ifdef DEBUG /* It's actually extremely common to have unlabeled ccds. */ if (ccddebug & CCDB_LABEL) if (errstring != NULL) printf("ccd%d: %s\n", unit, errstring); #endif } /* * Take care of things one might want to take care of in the event * that a disklabel isn't present. */ static void ccdmakedisklabel(cs) struct ccd_softc *cs; { struct disklabel *lp = &cs->sc_label; /* * For historical reasons, if there's no disklabel present * the raw partition must be marked FS_BSDFFS. */ lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS; strncpy(lp->d_packname, "default label", sizeof(lp->d_packname)); } /* * Wait interruptibly for an exclusive lock. * * XXX * Several drivers do this; it should be abstracted and made MP-safe. */ static int ccdlock(cs) struct ccd_softc *cs; { int error; while ((cs->sc_flags & CCDF_LOCKED) != 0) { cs->sc_flags |= CCDF_WANTED; if ((error = tsleep(cs, PRIBIO | PCATCH, "ccdlck", 0)) != 0) return (error); } cs->sc_flags |= CCDF_LOCKED; return (0); } /* * Unlock and wake up any waiters. */ static void ccdunlock(cs) struct ccd_softc *cs; { cs->sc_flags &= ~CCDF_LOCKED; if ((cs->sc_flags & CCDF_WANTED) != 0) { cs->sc_flags &= ~CCDF_WANTED; wakeup(cs); } } #ifdef DEBUG static void printiinfo(ii) struct ccdiinfo *ii; { int ix, i; for (ix = 0; ii->ii_ndisk; ix++, ii++) { printf(" itab[%d]: #dk %d sblk %d soff %d", ix, ii->ii_ndisk, ii->ii_startblk, ii->ii_startoff); for (i = 0; i < ii->ii_ndisk; i++) printf(" %d", ii->ii_index[i]); printf("\n"); } } #endif Index: head/sys/gnu/ext2fs/ext2_vfsops.c =================================================================== --- head/sys/gnu/ext2fs/ext2_vfsops.c (revision 71698) +++ head/sys/gnu/ext2fs/ext2_vfsops.c (revision 71699) @@ -1,1224 +1,1203 @@ /* * modified for EXT2FS support in Lites 1.1 * * Aug 1995, Godmar Back (gback@cs.utah.edu) * University of Utah, Department of Computer Science */ /* * Copyright (c) 1989, 1991, 1993, 1994 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ffs_vfsops.c 8.8 (Berkeley) 4/18/94 * $FreeBSD$ */ #include "opt_quota.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static int ext2_fhtovp __P((struct mount *, struct fid *, struct vnode **)); static int ext2_flushfiles __P((struct mount *mp, int flags, struct proc *p)); static int ext2_mount __P((struct mount *, char *, caddr_t, struct nameidata *, struct proc *)); static int ext2_mountfs __P((struct vnode *, struct mount *, struct proc *)); static int ext2_reload __P((struct mount *mountp, struct ucred *cred, struct proc *p)); static int ext2_sbupdate __P((struct ufsmount *, int)); static int ext2_statfs __P((struct mount *, struct statfs *, struct proc *)); static int ext2_sync __P((struct mount *, int, struct ucred *, struct proc *)); static int ext2_unmount __P((struct mount *, int, struct proc *)); static int ext2_vget __P((struct mount *, ino_t, struct vnode **)); static int ext2_vptofh __P((struct vnode *, struct fid *)); static MALLOC_DEFINE(M_EXT2NODE, "EXT2 node", "EXT2 vnode private part"); static struct vfsops ext2fs_vfsops = { ext2_mount, ufs_start, /* empty function */ ext2_unmount, ufs_root, /* root inode via vget */ ufs_quotactl, /* does operations associated with quotas */ ext2_statfs, ext2_sync, ext2_vget, ext2_fhtovp, ufs_check_export, ext2_vptofh, ext2_init, vfs_stduninit, vfs_stdextattrctl, }; VFS_SET(ext2fs_vfsops, ext2fs, 0); #define bsd_malloc malloc #define bsd_free free static int ext2fs_inode_hash_lock; static int ext2_check_sb_compat __P((struct ext2_super_block *es, dev_t dev, int ronly)); static int compute_sb_data __P((struct vnode * devvp, struct ext2_super_block * es, struct ext2_sb_info * fs)); #ifdef notyet static int ext2_mountroot __P((void)); /* * Called by main() when ext2fs is going to be mounted as root. * * Name is updated by mount(8) after booting. */ #define ROOTNAME "root_device" static int ext2_mountroot() { register struct ext2_sb_info *fs; register struct mount *mp; struct proc *p = curproc; struct ufsmount *ump; u_int size; int error; if ((error = bdevvp(rootdev, &rootvp))) { printf("ext2_mountroot: can't find rootvp\n"); return (error); } mp = bsd_malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); bzero((char *)mp, (u_long)sizeof(struct mount)); mp->mnt_op = &ext2fs_vfsops; mp->mnt_flag = MNT_RDONLY; if (error = ext2_mountfs(rootvp, mp, p)) { bsd_free(mp, M_MOUNT); return (error); } if (error = vfs_lock(mp)) { (void)ext2_unmount(mp, 0, p); bsd_free(mp, M_MOUNT); return (error); } TAILQ_INSERT_HEAD(&mountlist, mp, mnt_list); mp->mnt_flag |= MNT_ROOTFS; mp->mnt_vnodecovered = NULLVP; ump = VFSTOUFS(mp); fs = ump->um_e2fs; bzero(fs->fs_fsmnt, sizeof(fs->fs_fsmnt)); fs->fs_fsmnt[0] = '/'; bcopy((caddr_t)fs->fs_fsmnt, (caddr_t)mp->mnt_stat.f_mntonname, MNAMELEN); (void) copystr(ROOTNAME, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &size); bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size); (void)ext2_statfs(mp, &mp->mnt_stat, p); vfs_unlock(mp); inittodr(fs->s_es->s_wtime); /* this helps to set the time */ return (0); } #endif /* * VFS Operations. * * mount system call */ static int ext2_mount(mp, path, data, ndp, p) register struct mount *mp; char *path; caddr_t data; /* this is actually a (struct ufs_args *) */ struct nameidata *ndp; struct proc *p; { struct vnode *devvp; struct ufs_args args; struct ufsmount *ump = 0; - struct ucred *uc; register struct ext2_sb_info *fs; size_t size; int error, flags; mode_t accessmode; if ((error = copyin(data, (caddr_t)&args, sizeof (struct ufs_args))) != 0) return (error); /* * If updating, check whether changing from read-only to * read/write; if there is no device name, that's all we do. */ if (mp->mnt_flag & MNT_UPDATE) { ump = VFSTOUFS(mp); fs = ump->um_e2fs; error = 0; if (fs->s_rd_only == 0 && (mp->mnt_flag & MNT_RDONLY)) { flags = WRITECLOSE; if (mp->mnt_flag & MNT_FORCE) flags |= FORCECLOSE; if (vfs_busy(mp, LK_NOWAIT, 0, p)) return (EBUSY); error = ext2_flushfiles(mp, flags, p); vfs_unbusy(mp, p); if (!error && fs->s_wasvalid) { fs->s_es->s_state |= EXT2_VALID_FS; ext2_sbupdate(ump, MNT_WAIT); } fs->s_rd_only = 1; } if (!error && (mp->mnt_flag & MNT_RELOAD)) error = ext2_reload(mp, ndp->ni_cnd.cn_cred, p); if (error) return (error); devvp = ump->um_devvp; if (ext2_check_sb_compat(fs->s_es, devvp->v_rdev, (mp->mnt_kern_flag & MNTK_WANTRDWR) == 0) != 0) return (EPERM); if (fs->s_rd_only && (mp->mnt_kern_flag & MNTK_WANTRDWR)) { /* * If upgrade to read-write by non-root, then verify * that user has necessary permissions on the device. */ if (suser(p)) { vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p); - PROC_LOCK(p); - uc = p->p_ucred; - crhold(uc); - PROC_UNLOCK(p); if ((error = VOP_ACCESS(devvp, VREAD | VWRITE, - uc, p)) != 0) { - crfree(uc); + p->p_ucred, p)) != 0) { VOP_UNLOCK(devvp, 0, p); return (error); } - crfree(uc); VOP_UNLOCK(devvp, 0, p); } if ((fs->s_es->s_state & EXT2_VALID_FS) == 0 || (fs->s_es->s_state & EXT2_ERROR_FS)) { if (mp->mnt_flag & MNT_FORCE) { printf( "WARNING: %s was not properly dismounted\n", fs->fs_fsmnt); } else { printf( "WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck\n", fs->fs_fsmnt); return (EPERM); } } fs->s_es->s_state &= ~EXT2_VALID_FS; ext2_sbupdate(ump, MNT_WAIT); fs->s_rd_only = 0; } if (args.fspec == 0) { /* * Process export requests. */ return (vfs_export(mp, &ump->um_export, &args.export)); } } /* * Not an update, or updating the name: look up the name * and verify that it refers to a sensible block device. */ NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, p); if ((error = namei(ndp)) != 0) return (error); NDFREE(ndp, NDF_ONLY_PNBUF); devvp = ndp->ni_vp; if (!vn_isdisk(devvp, &error)) { vrele(devvp); return (error); } /* * If mount by non-root, then verify that user has necessary * permissions on the device. */ if (suser(p)) { accessmode = VREAD; if ((mp->mnt_flag & MNT_RDONLY) == 0) accessmode |= VWRITE; vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p); - PROC_LOCK(p); - uc = p->p_ucred; - crhold(uc); - PROC_UNLOCK(p); - if ((error = VOP_ACCESS(devvp, accessmode, uc, p)) != 0) { - crfree(uc); + if ((error = VOP_ACCESS(devvp, accessmode, p->p_ucred, p)) != 0) { vput(devvp); return (error); } - crfree(uc); VOP_UNLOCK(devvp, 0, p); } if ((mp->mnt_flag & MNT_UPDATE) == 0) { error = ext2_mountfs(devvp, mp, p); } else { if (devvp != ump->um_devvp) error = EINVAL; /* needs translation */ else vrele(devvp); } if (error) { vrele(devvp); return (error); } ump = VFSTOUFS(mp); fs = ump->um_e2fs; (void) copyinstr(path, fs->fs_fsmnt, sizeof(fs->fs_fsmnt) - 1, &size); bzero(fs->fs_fsmnt + size, sizeof(fs->fs_fsmnt) - size); bcopy((caddr_t)fs->fs_fsmnt, (caddr_t)mp->mnt_stat.f_mntonname, MNAMELEN); (void) copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &size); bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size); (void)ext2_statfs(mp, &mp->mnt_stat, p); return (0); } /* * checks that the data in the descriptor blocks make sense * this is taken from ext2/super.c */ static int ext2_check_descriptors (struct ext2_sb_info * sb) { int i; int desc_block = 0; unsigned long block = sb->s_es->s_first_data_block; struct ext2_group_desc * gdp = NULL; /* ext2_debug ("Checking group descriptors"); */ for (i = 0; i < sb->s_groups_count; i++) { /* examine next descriptor block */ if ((i % EXT2_DESC_PER_BLOCK(sb)) == 0) gdp = (struct ext2_group_desc *) sb->s_group_desc[desc_block++]->b_data; if (gdp->bg_block_bitmap < block || gdp->bg_block_bitmap >= block + EXT2_BLOCKS_PER_GROUP(sb)) { printf ("ext2_check_descriptors: " "Block bitmap for group %d" " not in group (block %lu)!\n", i, (unsigned long) gdp->bg_block_bitmap); return 0; } if (gdp->bg_inode_bitmap < block || gdp->bg_inode_bitmap >= block + EXT2_BLOCKS_PER_GROUP(sb)) { printf ("ext2_check_descriptors: " "Inode bitmap for group %d" " not in group (block %lu)!\n", i, (unsigned long) gdp->bg_inode_bitmap); return 0; } if (gdp->bg_inode_table < block || gdp->bg_inode_table + sb->s_itb_per_group >= block + EXT2_BLOCKS_PER_GROUP(sb)) { printf ("ext2_check_descriptors: " "Inode table for group %d" " not in group (block %lu)!\n", i, (unsigned long) gdp->bg_inode_table); return 0; } block += EXT2_BLOCKS_PER_GROUP(sb); gdp++; } return 1; } static int ext2_check_sb_compat(es, dev, ronly) struct ext2_super_block *es; dev_t dev; int ronly; { if (es->s_magic != EXT2_SUPER_MAGIC) { printf("ext2fs: %s: wrong magic number %#x (expected %#x)\n", devtoname(dev), es->s_magic, EXT2_SUPER_MAGIC); return (1); } if (es->s_rev_level > EXT2_GOOD_OLD_REV) { if (es->s_feature_incompat & ~EXT2_FEATURE_INCOMPAT_SUPP) { printf( "WARNING: mount of %s denied due to unsupported optional features\n", devtoname(dev)); return (1); } if (!ronly && (es->s_feature_ro_compat & ~EXT2_FEATURE_RO_COMPAT_SUPP)) { printf( "WARNING: R/W mount of %s denied due to unsupported optional features\n", devtoname(dev)); return (1); } } return (0); } /* * this computes the fields of the ext2_sb_info structure from the * data in the ext2_super_block structure read in */ static int compute_sb_data(devvp, es, fs) struct vnode * devvp; struct ext2_super_block * es; struct ext2_sb_info * fs; { int db_count, error; int i, j; int logic_sb_block = 1; /* XXX for now */ #if 1 #define V(v) #else #define V(v) printf(#v"= %d\n", fs->v); #endif fs->s_blocksize = EXT2_MIN_BLOCK_SIZE << es->s_log_block_size; V(s_blocksize) fs->s_bshift = EXT2_MIN_BLOCK_LOG_SIZE + es->s_log_block_size; V(s_bshift) fs->s_fsbtodb = es->s_log_block_size + 1; V(s_fsbtodb) fs->s_qbmask = fs->s_blocksize - 1; V(s_bmask) fs->s_blocksize_bits = EXT2_BLOCK_SIZE_BITS(es); V(s_blocksize_bits) fs->s_frag_size = EXT2_MIN_FRAG_SIZE << es->s_log_frag_size; V(s_frag_size) if (fs->s_frag_size) fs->s_frags_per_block = fs->s_blocksize / fs->s_frag_size; V(s_frags_per_block) fs->s_blocks_per_group = es->s_blocks_per_group; V(s_blocks_per_group) fs->s_frags_per_group = es->s_frags_per_group; V(s_frags_per_group) fs->s_inodes_per_group = es->s_inodes_per_group; V(s_inodes_per_group) fs->s_inodes_per_block = fs->s_blocksize / EXT2_INODE_SIZE; V(s_inodes_per_block) fs->s_itb_per_group = fs->s_inodes_per_group /fs->s_inodes_per_block; V(s_itb_per_group) fs->s_desc_per_block = fs->s_blocksize / sizeof (struct ext2_group_desc); V(s_desc_per_block) /* s_resuid / s_resgid ? */ fs->s_groups_count = (es->s_blocks_count - es->s_first_data_block + EXT2_BLOCKS_PER_GROUP(fs) - 1) / EXT2_BLOCKS_PER_GROUP(fs); V(s_groups_count) db_count = (fs->s_groups_count + EXT2_DESC_PER_BLOCK(fs) - 1) / EXT2_DESC_PER_BLOCK(fs); fs->s_db_per_group = db_count; V(s_db_per_group) fs->s_group_desc = bsd_malloc(db_count * sizeof (struct buf *), M_UFSMNT, M_WAITOK); /* adjust logic_sb_block */ if(fs->s_blocksize > SBSIZE) /* Godmar thinks: if the blocksize is greater than 1024, then the superblock is logically part of block zero. */ logic_sb_block = 0; for (i = 0; i < db_count; i++) { error = bread(devvp , fsbtodb(fs, logic_sb_block + i + 1), fs->s_blocksize, NOCRED, &fs->s_group_desc[i]); if(error) { for (j = 0; j < i; j++) brelse(fs->s_group_desc[j]); bsd_free(fs->s_group_desc, M_UFSMNT); printf("EXT2-fs: unable to read group descriptors (%d)\n", error); return EIO; } /* Set the B_LOCKED flag on the buffer, then brelse() it */ LCK_BUF(fs->s_group_desc[i]) } if(!ext2_check_descriptors(fs)) { for (j = 0; j < db_count; j++) ULCK_BUF(fs->s_group_desc[j]) bsd_free(fs->s_group_desc, M_UFSMNT); printf("EXT2-fs: (ext2_check_descriptors failure) " "unable to read group descriptors\n"); return EIO; } for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++) { fs->s_inode_bitmap_number[i] = 0; fs->s_inode_bitmap[i] = NULL; fs->s_block_bitmap_number[i] = 0; fs->s_block_bitmap[i] = NULL; } fs->s_loaded_inode_bitmaps = 0; fs->s_loaded_block_bitmaps = 0; return 0; } /* * Reload all incore data for a filesystem (used after running fsck on * the root filesystem and finding things to fix). The filesystem must * be mounted read-only. * * Things to do to update the mount: * 1) invalidate all cached meta-data. * 2) re-read superblock from disk. * 3) re-read summary information from disk. * 4) invalidate all inactive vnodes. * 5) invalidate all cached file data. * 6) re-read inode data for all active vnodes. */ static int ext2_reload(mountp, cred, p) register struct mount *mountp; struct ucred *cred; struct proc *p; { register struct vnode *vp, *nvp, *devvp; struct inode *ip; struct buf *bp; struct ext2_super_block * es; struct ext2_sb_info *fs; int error; if ((mountp->mnt_flag & MNT_RDONLY) == 0) return (EINVAL); /* * Step 1: invalidate all cached meta-data. */ devvp = VFSTOUFS(mountp)->um_devvp; if (vinvalbuf(devvp, 0, cred, p, 0, 0)) panic("ext2_reload: dirty1"); /* * Step 2: re-read superblock from disk. * constants have been adjusted for ext2 */ if ((error = bread(devvp, SBLOCK, SBSIZE, NOCRED, &bp)) != 0) return (error); es = (struct ext2_super_block *)bp->b_data; if (ext2_check_sb_compat(es, devvp->v_rdev, 0) != 0) { brelse(bp); return (EIO); /* XXX needs translation */ } fs = VFSTOUFS(mountp)->um_e2fs; bcopy(bp->b_data, fs->s_es, sizeof(struct ext2_super_block)); if((error = compute_sb_data(devvp, es, fs)) != 0) { brelse(bp); return error; } #ifdef UNKLAR if (fs->fs_sbsize < SBSIZE) bp->b_flags |= B_INVAL; #endif brelse(bp); loop: mtx_enter(&mntvnode_mtx, MTX_DEF); for (vp = mountp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) { if (vp->v_mount != mountp) { mtx_exit(&mntvnode_mtx, MTX_DEF); goto loop; } nvp = vp->v_mntvnodes.le_next; /* * Step 4: invalidate all inactive vnodes. */ if (vrecycle(vp, &mntvnode_mtx, p)) goto loop; /* * Step 5: invalidate all cached file data. */ mtx_enter(&vp->v_interlock, MTX_DEF); mtx_exit(&mntvnode_mtx, MTX_DEF); if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) { goto loop; } if (vinvalbuf(vp, 0, cred, p, 0, 0)) panic("ext2_reload: dirty2"); /* * Step 6: re-read inode data for all active vnodes. */ ip = VTOI(vp); error = bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)), (int)fs->s_blocksize, NOCRED, &bp); if (error) { vput(vp); return (error); } ext2_ei2di((struct ext2_inode *) ((char *)bp->b_data + EXT2_INODE_SIZE * ino_to_fsbo(fs, ip->i_number)), &ip->i_din); brelse(bp); vput(vp); mtx_enter(&mntvnode_mtx, MTX_DEF); } mtx_exit(&mntvnode_mtx, MTX_DEF); return (0); } /* * Common code for mount and mountroot */ static int ext2_mountfs(devvp, mp, p) register struct vnode *devvp; struct mount *mp; struct proc *p; { register struct ufsmount *ump; struct buf *bp; - struct ucred *uc; register struct ext2_sb_info *fs; struct ext2_super_block * es; dev_t dev = devvp->v_rdev; struct partinfo dpart; int havepart = 0; int error, i, size; int ronly; /* * Disallow multiple mounts of the same device. * Disallow mounting of a device that is currently in use * (except for root, which might share swap device for miniroot). * Flush out any old buffers remaining from a previous use. */ if ((error = vfs_mountedon(devvp)) != 0) return (error); if (vcount(devvp) > 1 && devvp != rootvp) return (EBUSY); - PROC_LOCK(p); - uc = p->p_ucred; - crhold(uc); - PROC_UNLOCK(p); - if ((error = vinvalbuf(devvp, V_SAVE, uc, p, 0, 0)) != 0) { - crfree(uc); + if ((error = vinvalbuf(devvp, V_SAVE, p->p_ucred, p, 0, 0)) != 0) return (error); - } - crfree(uc); #ifdef READONLY /* turn on this to force it to be read-only */ mp->mnt_flag |= MNT_RDONLY; #endif ronly = (mp->mnt_flag & MNT_RDONLY) != 0; vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p); error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, p); VOP_UNLOCK(devvp, 0, p); if (error) return (error); if (VOP_IOCTL(devvp, DIOCGPART, (caddr_t)&dpart, FREAD, NOCRED, p) != 0) size = DEV_BSIZE; else { havepart = 1; size = dpart.disklab->d_secsize; } bp = NULL; ump = NULL; if ((error = bread(devvp, SBLOCK, SBSIZE, NOCRED, &bp)) != 0) goto out; es = (struct ext2_super_block *)bp->b_data; if (ext2_check_sb_compat(es, dev, ronly) != 0) { error = EINVAL; /* XXX needs translation */ goto out; } if ((es->s_state & EXT2_VALID_FS) == 0 || (es->s_state & EXT2_ERROR_FS)) { if (ronly || (mp->mnt_flag & MNT_FORCE)) { printf( "WARNING: Filesystem was not properly dismounted\n"); } else { printf( "WARNING: R/W mount denied. Filesystem is not clean - run fsck\n"); error = EPERM; goto out; } } ump = bsd_malloc(sizeof *ump, M_UFSMNT, M_WAITOK); bzero((caddr_t)ump, sizeof *ump); ump->um_malloctype = M_EXT2NODE; ump->um_blkatoff = ext2_blkatoff; ump->um_truncate = ext2_truncate; ump->um_update = ext2_update; ump->um_valloc = ext2_valloc; ump->um_vfree = ext2_vfree; /* I don't know whether this is the right strategy. Note that we dynamically allocate both a ext2_sb_info and a ext2_super_block while Linux keeps the super block in a locked buffer */ ump->um_e2fs = bsd_malloc(sizeof(struct ext2_sb_info), M_UFSMNT, M_WAITOK); ump->um_e2fs->s_es = bsd_malloc(sizeof(struct ext2_super_block), M_UFSMNT, M_WAITOK); bcopy(es, ump->um_e2fs->s_es, (u_int)sizeof(struct ext2_super_block)); if ((error = compute_sb_data(devvp, ump->um_e2fs->s_es, ump->um_e2fs))) goto out; /* * We don't free the group descriptors allocated by compute_sb_data() * until ext2_unmount(). This is OK since the mount will succeed. */ brelse(bp); bp = NULL; fs = ump->um_e2fs; fs->s_rd_only = ronly; /* ronly is set according to mnt_flags */ /* if the fs is not mounted read-only, make sure the super block is always written back on a sync() */ fs->s_wasvalid = fs->s_es->s_state & EXT2_VALID_FS ? 1 : 0; if (ronly == 0) { fs->s_dirt = 1; /* mark it modified */ fs->s_es->s_state &= ~EXT2_VALID_FS; /* set fs invalid */ } mp->mnt_data = (qaddr_t)ump; mp->mnt_stat.f_fsid.val[0] = dev2udev(dev); mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum; mp->mnt_maxsymlinklen = EXT2_MAXSYMLINKLEN; mp->mnt_flag |= MNT_LOCAL; ump->um_mountp = mp; ump->um_dev = dev; ump->um_devvp = devvp; /* setting those two parameters allows us to use ufs_bmap w/o changse ! */ ump->um_nindir = EXT2_ADDR_PER_BLOCK(fs); ump->um_bptrtodb = fs->s_es->s_log_block_size + 1; ump->um_seqinc = EXT2_FRAGS_PER_BLOCK(fs); for (i = 0; i < MAXQUOTAS; i++) ump->um_quotas[i] = NULLVP; devvp->v_rdev->si_mountpoint = mp; if (ronly == 0) ext2_sbupdate(ump, MNT_WAIT); return (0); out: if (bp) brelse(bp); (void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD|FWRITE, NOCRED, p); if (ump) { bsd_free(ump->um_e2fs->s_es, M_UFSMNT); bsd_free(ump->um_e2fs, M_UFSMNT); bsd_free(ump, M_UFSMNT); mp->mnt_data = (qaddr_t)0; } return (error); } /* * unmount system call */ static int ext2_unmount(mp, mntflags, p) struct mount *mp; int mntflags; struct proc *p; { register struct ufsmount *ump; register struct ext2_sb_info *fs; int error, flags, ronly, i; flags = 0; if (mntflags & MNT_FORCE) { if (mp->mnt_flag & MNT_ROOTFS) return (EINVAL); flags |= FORCECLOSE; } if ((error = ext2_flushfiles(mp, flags, p)) != 0) return (error); ump = VFSTOUFS(mp); fs = ump->um_e2fs; ronly = fs->s_rd_only; if (ronly == 0) { if (fs->s_wasvalid) fs->s_es->s_state |= EXT2_VALID_FS; ext2_sbupdate(ump, MNT_WAIT); } /* release buffers containing group descriptors */ for(i = 0; i < fs->s_db_per_group; i++) ULCK_BUF(fs->s_group_desc[i]) bsd_free(fs->s_group_desc, M_UFSMNT); /* release cached inode/block bitmaps */ for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++) if (fs->s_inode_bitmap[i]) ULCK_BUF(fs->s_inode_bitmap[i]) for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++) if (fs->s_block_bitmap[i]) ULCK_BUF(fs->s_block_bitmap[i]) ump->um_devvp->v_rdev->si_mountpoint = NULL; error = VOP_CLOSE(ump->um_devvp, ronly ? FREAD : FREAD|FWRITE, NOCRED, p); vrele(ump->um_devvp); bsd_free(fs->s_es, M_UFSMNT); bsd_free(fs, M_UFSMNT); bsd_free(ump, M_UFSMNT); mp->mnt_data = (qaddr_t)0; mp->mnt_flag &= ~MNT_LOCAL; return (error); } /* * Flush out all the files in a filesystem. */ static int ext2_flushfiles(mp, flags, p) register struct mount *mp; int flags; struct proc *p; { register struct ufsmount *ump; int error; #if QUOTA int i; #endif ump = VFSTOUFS(mp); #if QUOTA if (mp->mnt_flag & MNT_QUOTA) { if ((error = vflush(mp, NULLVP, SKIPSYSTEM|flags)) != 0) return (error); for (i = 0; i < MAXQUOTAS; i++) { if (ump->um_quotas[i] == NULLVP) continue; quotaoff(p, mp, i); } /* * Here we fall through to vflush again to ensure * that we have gotten rid of all the system vnodes. */ } #endif error = vflush(mp, NULLVP, flags); return (error); } /* * Get file system statistics. * taken from ext2/super.c ext2_statfs */ static int ext2_statfs(mp, sbp, p) struct mount *mp; register struct statfs *sbp; struct proc *p; { unsigned long overhead; register struct ufsmount *ump; register struct ext2_sb_info *fs; register struct ext2_super_block *es; int i, nsb; ump = VFSTOUFS(mp); fs = ump->um_e2fs; es = fs->s_es; if (es->s_magic != EXT2_SUPER_MAGIC) panic("ext2_statfs - magic number spoiled"); /* * Compute the overhead (FS structures) */ if (es->s_feature_ro_compat & EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER) { nsb = 0; for (i = 0 ; i < fs->s_groups_count; i++) if (ext2_group_sparse(i)) nsb++; } else nsb = fs->s_groups_count; overhead = es->s_first_data_block + /* Superblocks and block group descriptors: */ nsb * (1 + fs->s_db_per_group) + /* Inode bitmap, block bitmap, and inode table: */ fs->s_groups_count * (1 + 1 + fs->s_itb_per_group); sbp->f_bsize = EXT2_FRAG_SIZE(fs); sbp->f_iosize = EXT2_BLOCK_SIZE(fs); sbp->f_blocks = es->s_blocks_count - overhead; sbp->f_bfree = es->s_free_blocks_count; sbp->f_bavail = sbp->f_bfree - es->s_r_blocks_count; sbp->f_files = es->s_inodes_count; sbp->f_ffree = es->s_free_inodes_count; if (sbp != &mp->mnt_stat) { sbp->f_type = mp->mnt_vfc->vfc_typenum; bcopy((caddr_t)mp->mnt_stat.f_mntonname, (caddr_t)&sbp->f_mntonname[0], MNAMELEN); bcopy((caddr_t)mp->mnt_stat.f_mntfromname, (caddr_t)&sbp->f_mntfromname[0], MNAMELEN); } return (0); } /* * Go through the disk queues to initiate sandbagged IO; * go through the inodes to write those that have been modified; * initiate the writing of the super block if it has been modified. * * Note: we are always called with the filesystem marked `MPBUSY'. */ static int ext2_sync(mp, waitfor, cred, p) struct mount *mp; int waitfor; struct ucred *cred; struct proc *p; { struct vnode *nvp, *vp; struct inode *ip; struct ufsmount *ump = VFSTOUFS(mp); struct ext2_sb_info *fs; int error, allerror = 0; fs = ump->um_e2fs; if (fs->s_dirt != 0 && fs->s_rd_only != 0) { /* XXX */ printf("fs = %s\n", fs->fs_fsmnt); panic("ext2_sync: rofs mod"); } /* * Write back each (modified) inode. */ mtx_enter(&mntvnode_mtx, MTX_DEF); loop: for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) { /* * If the vnode that we are about to sync is no longer * associated with this mount point, start over. */ if (vp->v_mount != mp) goto loop; mtx_enter(&vp->v_interlock, MTX_DEF); nvp = vp->v_mntvnodes.le_next; ip = VTOI(vp); if (vp->v_type == VNON || ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 && (TAILQ_EMPTY(&vp->v_dirtyblkhd) || waitfor == MNT_LAZY))) { mtx_exit(&vp->v_interlock, MTX_DEF); continue; } mtx_exit(&mntvnode_mtx, MTX_DEF); error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, p); if (error) { mtx_enter(&mntvnode_mtx, MTX_DEF); if (error == ENOENT) goto loop; continue; } if ((error = VOP_FSYNC(vp, cred, waitfor, p)) != 0) allerror = error; VOP_UNLOCK(vp, 0, p); vrele(vp); mtx_enter(&mntvnode_mtx, MTX_DEF); } mtx_exit(&mntvnode_mtx, MTX_DEF); /* * Force stale file system control information to be flushed. */ if (waitfor != MNT_LAZY) { vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, p); if ((error = VOP_FSYNC(ump->um_devvp, cred, waitfor, p)) != 0) allerror = error; VOP_UNLOCK(ump->um_devvp, 0, p); } #if QUOTA qsync(mp); #endif /* * Write back modified superblock. */ if (fs->s_dirt != 0) { fs->s_dirt = 0; fs->s_es->s_wtime = time_second; if ((error = ext2_sbupdate(ump, waitfor)) != 0) allerror = error; } return (allerror); } /* * Look up a EXT2FS dinode number to find its incore vnode, otherwise read it * in from disk. If it is in core, wait for the lock bit to clear, then * return the inode locked. Detection and handling of mount points must be * done by the calling routine. */ static int ext2_vget(mp, ino, vpp) struct mount *mp; ino_t ino; struct vnode **vpp; { register struct ext2_sb_info *fs; register struct inode *ip; struct ufsmount *ump; struct buf *bp; struct vnode *vp; dev_t dev; int i, error; int used_blocks; ump = VFSTOUFS(mp); dev = ump->um_dev; restart: if ((*vpp = ufs_ihashget(dev, ino)) != NULL) return (0); /* * Lock out the creation of new entries in the FFS hash table in * case getnewvnode() or MALLOC() blocks, otherwise a duplicate * may occur! */ if (ext2fs_inode_hash_lock) { while (ext2fs_inode_hash_lock) { ext2fs_inode_hash_lock = -1; tsleep(&ext2fs_inode_hash_lock, PVM, "e2vget", 0); } goto restart; } ext2fs_inode_hash_lock = 1; /* * If this MALLOC() is performed after the getnewvnode() * it might block, leaving a vnode with a NULL v_data to be * found by ext2_sync() if a sync happens to fire right then, * which will cause a panic because ext2_sync() blindly * dereferences vp->v_data (as well it should). */ MALLOC(ip, struct inode *, sizeof(struct inode), M_EXT2NODE, M_WAITOK); /* Allocate a new vnode/inode. */ if ((error = getnewvnode(VT_UFS, mp, ext2_vnodeop_p, &vp)) != 0) { if (ext2fs_inode_hash_lock < 0) wakeup(&ext2fs_inode_hash_lock); ext2fs_inode_hash_lock = 0; *vpp = NULL; FREE(ip, M_EXT2NODE); return (error); } bzero((caddr_t)ip, sizeof(struct inode)); lockinit(&vp->v_lock, PINOD, "ext2in", 0, 0); vp->v_data = ip; ip->i_vnode = vp; ip->i_e2fs = fs = ump->um_e2fs; ip->i_dev = dev; ip->i_number = ino; #if QUOTA for (i = 0; i < MAXQUOTAS; i++) ip->i_dquot[i] = NODQUOT; #endif /* * Put it onto its hash chain and lock it so that other requests for * this inode will block if they arrive while we are sleeping waiting * for old data structures to be purged or for the contents of the * disk portion of this inode to be read. */ ufs_ihashins(ip); if (ext2fs_inode_hash_lock < 0) wakeup(&ext2fs_inode_hash_lock); ext2fs_inode_hash_lock = 0; /* Read in the disk contents for the inode, copy into the inode. */ #if 0 printf("ext2_vget(%d) dbn= %d ", ino, fsbtodb(fs, ino_to_fsba(fs, ino))); #endif if ((error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)), (int)fs->s_blocksize, NOCRED, &bp)) != 0) { /* * The inode does not contain anything useful, so it would * be misleading to leave it on its hash chain. With mode * still zero, it will be unlinked and returned to the free * list by vput(). */ vput(vp); brelse(bp); *vpp = NULL; return (error); } /* convert ext2 inode to dinode */ ext2_ei2di((struct ext2_inode *) ((char *)bp->b_data + EXT2_INODE_SIZE * ino_to_fsbo(fs, ino)), &ip->i_din); ip->i_block_group = ino_to_cg(fs, ino); ip->i_next_alloc_block = 0; ip->i_next_alloc_goal = 0; ip->i_prealloc_count = 0; ip->i_prealloc_block = 0; /* now we want to make sure that block pointers for unused blocks are zeroed out - ext2_balloc depends on this although for regular files and directories only */ if(S_ISDIR(ip->i_mode) || S_ISREG(ip->i_mode)) { used_blocks = (ip->i_size+fs->s_blocksize-1) / fs->s_blocksize; for(i = used_blocks; i < EXT2_NDIR_BLOCKS; i++) ip->i_db[i] = 0; } /* ext2_print_inode(ip); */ brelse(bp); /* * Initialize the vnode from the inode, check for aliases. * Note that the underlying vnode may have changed. */ if ((error = ufs_vinit(mp, ext2_specop_p, ext2_fifoop_p, &vp)) != 0) { vput(vp); *vpp = NULL; return (error); } /* * Finish inode initialization now that aliasing has been resolved. */ ip->i_devvp = ump->um_devvp; VREF(ip->i_devvp); /* * Set up a generation number for this inode if it does not * already have one. This should only happen on old filesystems. */ if (ip->i_gen == 0) { ip->i_gen = random() / 2 + 1; if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) ip->i_flag |= IN_MODIFIED; } *vpp = vp; return (0); } /* * File handle to vnode * * Have to be really careful about stale file handles: * - check that the inode number is valid * - call ext2_vget() to get the locked inode * - check for an unallocated inode (i_mode == 0) * - check that the given client host has export rights and return * those rights via. exflagsp and credanonp */ static int ext2_fhtovp(mp, fhp, vpp) register struct mount *mp; struct fid *fhp; struct vnode **vpp; { register struct ufid *ufhp; struct ext2_sb_info *fs; ufhp = (struct ufid *)fhp; fs = VFSTOUFS(mp)->um_e2fs; if (ufhp->ufid_ino < ROOTINO || ufhp->ufid_ino >= fs->s_groups_count * fs->s_es->s_inodes_per_group) return (ESTALE); return (ufs_fhtovp(mp, ufhp, vpp)); } /* * Vnode pointer to File handle */ /* ARGSUSED */ static int ext2_vptofh(vp, fhp) struct vnode *vp; struct fid *fhp; { register struct inode *ip; register struct ufid *ufhp; ip = VTOI(vp); ufhp = (struct ufid *)fhp; ufhp->ufid_len = sizeof(struct ufid); ufhp->ufid_ino = ip->i_number; ufhp->ufid_gen = ip->i_gen; return (0); } /* * Write a superblock and associated information back to disk. */ static int ext2_sbupdate(mp, waitfor) struct ufsmount *mp; int waitfor; { register struct ext2_sb_info *fs = mp->um_e2fs; register struct ext2_super_block *es = fs->s_es; register struct buf *bp; int error = 0; /* printf("\nupdating superblock, waitfor=%s\n", waitfor == MNT_WAIT ? "yes":"no"); */ bp = getblk(mp->um_devvp, SBLOCK, SBSIZE, 0, 0); bcopy((caddr_t)es, bp->b_data, (u_int)sizeof(struct ext2_super_block)); if (waitfor == MNT_WAIT) error = bwrite(bp); else bawrite(bp); /* * The buffers for group descriptors, inode bitmaps and block bitmaps * are not busy at this point and are (hopefully) written by the * usual sync mechanism. No need to write them here */ return (error); } Index: head/sys/gnu/fs/ext2fs/ext2_vfsops.c =================================================================== --- head/sys/gnu/fs/ext2fs/ext2_vfsops.c (revision 71698) +++ head/sys/gnu/fs/ext2fs/ext2_vfsops.c (revision 71699) @@ -1,1224 +1,1203 @@ /* * modified for EXT2FS support in Lites 1.1 * * Aug 1995, Godmar Back (gback@cs.utah.edu) * University of Utah, Department of Computer Science */ /* * Copyright (c) 1989, 1991, 1993, 1994 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ffs_vfsops.c 8.8 (Berkeley) 4/18/94 * $FreeBSD$ */ #include "opt_quota.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static int ext2_fhtovp __P((struct mount *, struct fid *, struct vnode **)); static int ext2_flushfiles __P((struct mount *mp, int flags, struct proc *p)); static int ext2_mount __P((struct mount *, char *, caddr_t, struct nameidata *, struct proc *)); static int ext2_mountfs __P((struct vnode *, struct mount *, struct proc *)); static int ext2_reload __P((struct mount *mountp, struct ucred *cred, struct proc *p)); static int ext2_sbupdate __P((struct ufsmount *, int)); static int ext2_statfs __P((struct mount *, struct statfs *, struct proc *)); static int ext2_sync __P((struct mount *, int, struct ucred *, struct proc *)); static int ext2_unmount __P((struct mount *, int, struct proc *)); static int ext2_vget __P((struct mount *, ino_t, struct vnode **)); static int ext2_vptofh __P((struct vnode *, struct fid *)); static MALLOC_DEFINE(M_EXT2NODE, "EXT2 node", "EXT2 vnode private part"); static struct vfsops ext2fs_vfsops = { ext2_mount, ufs_start, /* empty function */ ext2_unmount, ufs_root, /* root inode via vget */ ufs_quotactl, /* does operations associated with quotas */ ext2_statfs, ext2_sync, ext2_vget, ext2_fhtovp, ufs_check_export, ext2_vptofh, ext2_init, vfs_stduninit, vfs_stdextattrctl, }; VFS_SET(ext2fs_vfsops, ext2fs, 0); #define bsd_malloc malloc #define bsd_free free static int ext2fs_inode_hash_lock; static int ext2_check_sb_compat __P((struct ext2_super_block *es, dev_t dev, int ronly)); static int compute_sb_data __P((struct vnode * devvp, struct ext2_super_block * es, struct ext2_sb_info * fs)); #ifdef notyet static int ext2_mountroot __P((void)); /* * Called by main() when ext2fs is going to be mounted as root. * * Name is updated by mount(8) after booting. */ #define ROOTNAME "root_device" static int ext2_mountroot() { register struct ext2_sb_info *fs; register struct mount *mp; struct proc *p = curproc; struct ufsmount *ump; u_int size; int error; if ((error = bdevvp(rootdev, &rootvp))) { printf("ext2_mountroot: can't find rootvp\n"); return (error); } mp = bsd_malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); bzero((char *)mp, (u_long)sizeof(struct mount)); mp->mnt_op = &ext2fs_vfsops; mp->mnt_flag = MNT_RDONLY; if (error = ext2_mountfs(rootvp, mp, p)) { bsd_free(mp, M_MOUNT); return (error); } if (error = vfs_lock(mp)) { (void)ext2_unmount(mp, 0, p); bsd_free(mp, M_MOUNT); return (error); } TAILQ_INSERT_HEAD(&mountlist, mp, mnt_list); mp->mnt_flag |= MNT_ROOTFS; mp->mnt_vnodecovered = NULLVP; ump = VFSTOUFS(mp); fs = ump->um_e2fs; bzero(fs->fs_fsmnt, sizeof(fs->fs_fsmnt)); fs->fs_fsmnt[0] = '/'; bcopy((caddr_t)fs->fs_fsmnt, (caddr_t)mp->mnt_stat.f_mntonname, MNAMELEN); (void) copystr(ROOTNAME, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &size); bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size); (void)ext2_statfs(mp, &mp->mnt_stat, p); vfs_unlock(mp); inittodr(fs->s_es->s_wtime); /* this helps to set the time */ return (0); } #endif /* * VFS Operations. * * mount system call */ static int ext2_mount(mp, path, data, ndp, p) register struct mount *mp; char *path; caddr_t data; /* this is actually a (struct ufs_args *) */ struct nameidata *ndp; struct proc *p; { struct vnode *devvp; struct ufs_args args; struct ufsmount *ump = 0; - struct ucred *uc; register struct ext2_sb_info *fs; size_t size; int error, flags; mode_t accessmode; if ((error = copyin(data, (caddr_t)&args, sizeof (struct ufs_args))) != 0) return (error); /* * If updating, check whether changing from read-only to * read/write; if there is no device name, that's all we do. */ if (mp->mnt_flag & MNT_UPDATE) { ump = VFSTOUFS(mp); fs = ump->um_e2fs; error = 0; if (fs->s_rd_only == 0 && (mp->mnt_flag & MNT_RDONLY)) { flags = WRITECLOSE; if (mp->mnt_flag & MNT_FORCE) flags |= FORCECLOSE; if (vfs_busy(mp, LK_NOWAIT, 0, p)) return (EBUSY); error = ext2_flushfiles(mp, flags, p); vfs_unbusy(mp, p); if (!error && fs->s_wasvalid) { fs->s_es->s_state |= EXT2_VALID_FS; ext2_sbupdate(ump, MNT_WAIT); } fs->s_rd_only = 1; } if (!error && (mp->mnt_flag & MNT_RELOAD)) error = ext2_reload(mp, ndp->ni_cnd.cn_cred, p); if (error) return (error); devvp = ump->um_devvp; if (ext2_check_sb_compat(fs->s_es, devvp->v_rdev, (mp->mnt_kern_flag & MNTK_WANTRDWR) == 0) != 0) return (EPERM); if (fs->s_rd_only && (mp->mnt_kern_flag & MNTK_WANTRDWR)) { /* * If upgrade to read-write by non-root, then verify * that user has necessary permissions on the device. */ if (suser(p)) { vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p); - PROC_LOCK(p); - uc = p->p_ucred; - crhold(uc); - PROC_UNLOCK(p); if ((error = VOP_ACCESS(devvp, VREAD | VWRITE, - uc, p)) != 0) { - crfree(uc); + p->p_ucred, p)) != 0) { VOP_UNLOCK(devvp, 0, p); return (error); } - crfree(uc); VOP_UNLOCK(devvp, 0, p); } if ((fs->s_es->s_state & EXT2_VALID_FS) == 0 || (fs->s_es->s_state & EXT2_ERROR_FS)) { if (mp->mnt_flag & MNT_FORCE) { printf( "WARNING: %s was not properly dismounted\n", fs->fs_fsmnt); } else { printf( "WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck\n", fs->fs_fsmnt); return (EPERM); } } fs->s_es->s_state &= ~EXT2_VALID_FS; ext2_sbupdate(ump, MNT_WAIT); fs->s_rd_only = 0; } if (args.fspec == 0) { /* * Process export requests. */ return (vfs_export(mp, &ump->um_export, &args.export)); } } /* * Not an update, or updating the name: look up the name * and verify that it refers to a sensible block device. */ NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, p); if ((error = namei(ndp)) != 0) return (error); NDFREE(ndp, NDF_ONLY_PNBUF); devvp = ndp->ni_vp; if (!vn_isdisk(devvp, &error)) { vrele(devvp); return (error); } /* * If mount by non-root, then verify that user has necessary * permissions on the device. */ if (suser(p)) { accessmode = VREAD; if ((mp->mnt_flag & MNT_RDONLY) == 0) accessmode |= VWRITE; vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p); - PROC_LOCK(p); - uc = p->p_ucred; - crhold(uc); - PROC_UNLOCK(p); - if ((error = VOP_ACCESS(devvp, accessmode, uc, p)) != 0) { - crfree(uc); + if ((error = VOP_ACCESS(devvp, accessmode, p->p_ucred, p)) != 0) { vput(devvp); return (error); } - crfree(uc); VOP_UNLOCK(devvp, 0, p); } if ((mp->mnt_flag & MNT_UPDATE) == 0) { error = ext2_mountfs(devvp, mp, p); } else { if (devvp != ump->um_devvp) error = EINVAL; /* needs translation */ else vrele(devvp); } if (error) { vrele(devvp); return (error); } ump = VFSTOUFS(mp); fs = ump->um_e2fs; (void) copyinstr(path, fs->fs_fsmnt, sizeof(fs->fs_fsmnt) - 1, &size); bzero(fs->fs_fsmnt + size, sizeof(fs->fs_fsmnt) - size); bcopy((caddr_t)fs->fs_fsmnt, (caddr_t)mp->mnt_stat.f_mntonname, MNAMELEN); (void) copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &size); bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size); (void)ext2_statfs(mp, &mp->mnt_stat, p); return (0); } /* * checks that the data in the descriptor blocks make sense * this is taken from ext2/super.c */ static int ext2_check_descriptors (struct ext2_sb_info * sb) { int i; int desc_block = 0; unsigned long block = sb->s_es->s_first_data_block; struct ext2_group_desc * gdp = NULL; /* ext2_debug ("Checking group descriptors"); */ for (i = 0; i < sb->s_groups_count; i++) { /* examine next descriptor block */ if ((i % EXT2_DESC_PER_BLOCK(sb)) == 0) gdp = (struct ext2_group_desc *) sb->s_group_desc[desc_block++]->b_data; if (gdp->bg_block_bitmap < block || gdp->bg_block_bitmap >= block + EXT2_BLOCKS_PER_GROUP(sb)) { printf ("ext2_check_descriptors: " "Block bitmap for group %d" " not in group (block %lu)!\n", i, (unsigned long) gdp->bg_block_bitmap); return 0; } if (gdp->bg_inode_bitmap < block || gdp->bg_inode_bitmap >= block + EXT2_BLOCKS_PER_GROUP(sb)) { printf ("ext2_check_descriptors: " "Inode bitmap for group %d" " not in group (block %lu)!\n", i, (unsigned long) gdp->bg_inode_bitmap); return 0; } if (gdp->bg_inode_table < block || gdp->bg_inode_table + sb->s_itb_per_group >= block + EXT2_BLOCKS_PER_GROUP(sb)) { printf ("ext2_check_descriptors: " "Inode table for group %d" " not in group (block %lu)!\n", i, (unsigned long) gdp->bg_inode_table); return 0; } block += EXT2_BLOCKS_PER_GROUP(sb); gdp++; } return 1; } static int ext2_check_sb_compat(es, dev, ronly) struct ext2_super_block *es; dev_t dev; int ronly; { if (es->s_magic != EXT2_SUPER_MAGIC) { printf("ext2fs: %s: wrong magic number %#x (expected %#x)\n", devtoname(dev), es->s_magic, EXT2_SUPER_MAGIC); return (1); } if (es->s_rev_level > EXT2_GOOD_OLD_REV) { if (es->s_feature_incompat & ~EXT2_FEATURE_INCOMPAT_SUPP) { printf( "WARNING: mount of %s denied due to unsupported optional features\n", devtoname(dev)); return (1); } if (!ronly && (es->s_feature_ro_compat & ~EXT2_FEATURE_RO_COMPAT_SUPP)) { printf( "WARNING: R/W mount of %s denied due to unsupported optional features\n", devtoname(dev)); return (1); } } return (0); } /* * this computes the fields of the ext2_sb_info structure from the * data in the ext2_super_block structure read in */ static int compute_sb_data(devvp, es, fs) struct vnode * devvp; struct ext2_super_block * es; struct ext2_sb_info * fs; { int db_count, error; int i, j; int logic_sb_block = 1; /* XXX for now */ #if 1 #define V(v) #else #define V(v) printf(#v"= %d\n", fs->v); #endif fs->s_blocksize = EXT2_MIN_BLOCK_SIZE << es->s_log_block_size; V(s_blocksize) fs->s_bshift = EXT2_MIN_BLOCK_LOG_SIZE + es->s_log_block_size; V(s_bshift) fs->s_fsbtodb = es->s_log_block_size + 1; V(s_fsbtodb) fs->s_qbmask = fs->s_blocksize - 1; V(s_bmask) fs->s_blocksize_bits = EXT2_BLOCK_SIZE_BITS(es); V(s_blocksize_bits) fs->s_frag_size = EXT2_MIN_FRAG_SIZE << es->s_log_frag_size; V(s_frag_size) if (fs->s_frag_size) fs->s_frags_per_block = fs->s_blocksize / fs->s_frag_size; V(s_frags_per_block) fs->s_blocks_per_group = es->s_blocks_per_group; V(s_blocks_per_group) fs->s_frags_per_group = es->s_frags_per_group; V(s_frags_per_group) fs->s_inodes_per_group = es->s_inodes_per_group; V(s_inodes_per_group) fs->s_inodes_per_block = fs->s_blocksize / EXT2_INODE_SIZE; V(s_inodes_per_block) fs->s_itb_per_group = fs->s_inodes_per_group /fs->s_inodes_per_block; V(s_itb_per_group) fs->s_desc_per_block = fs->s_blocksize / sizeof (struct ext2_group_desc); V(s_desc_per_block) /* s_resuid / s_resgid ? */ fs->s_groups_count = (es->s_blocks_count - es->s_first_data_block + EXT2_BLOCKS_PER_GROUP(fs) - 1) / EXT2_BLOCKS_PER_GROUP(fs); V(s_groups_count) db_count = (fs->s_groups_count + EXT2_DESC_PER_BLOCK(fs) - 1) / EXT2_DESC_PER_BLOCK(fs); fs->s_db_per_group = db_count; V(s_db_per_group) fs->s_group_desc = bsd_malloc(db_count * sizeof (struct buf *), M_UFSMNT, M_WAITOK); /* adjust logic_sb_block */ if(fs->s_blocksize > SBSIZE) /* Godmar thinks: if the blocksize is greater than 1024, then the superblock is logically part of block zero. */ logic_sb_block = 0; for (i = 0; i < db_count; i++) { error = bread(devvp , fsbtodb(fs, logic_sb_block + i + 1), fs->s_blocksize, NOCRED, &fs->s_group_desc[i]); if(error) { for (j = 0; j < i; j++) brelse(fs->s_group_desc[j]); bsd_free(fs->s_group_desc, M_UFSMNT); printf("EXT2-fs: unable to read group descriptors (%d)\n", error); return EIO; } /* Set the B_LOCKED flag on the buffer, then brelse() it */ LCK_BUF(fs->s_group_desc[i]) } if(!ext2_check_descriptors(fs)) { for (j = 0; j < db_count; j++) ULCK_BUF(fs->s_group_desc[j]) bsd_free(fs->s_group_desc, M_UFSMNT); printf("EXT2-fs: (ext2_check_descriptors failure) " "unable to read group descriptors\n"); return EIO; } for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++) { fs->s_inode_bitmap_number[i] = 0; fs->s_inode_bitmap[i] = NULL; fs->s_block_bitmap_number[i] = 0; fs->s_block_bitmap[i] = NULL; } fs->s_loaded_inode_bitmaps = 0; fs->s_loaded_block_bitmaps = 0; return 0; } /* * Reload all incore data for a filesystem (used after running fsck on * the root filesystem and finding things to fix). The filesystem must * be mounted read-only. * * Things to do to update the mount: * 1) invalidate all cached meta-data. * 2) re-read superblock from disk. * 3) re-read summary information from disk. * 4) invalidate all inactive vnodes. * 5) invalidate all cached file data. * 6) re-read inode data for all active vnodes. */ static int ext2_reload(mountp, cred, p) register struct mount *mountp; struct ucred *cred; struct proc *p; { register struct vnode *vp, *nvp, *devvp; struct inode *ip; struct buf *bp; struct ext2_super_block * es; struct ext2_sb_info *fs; int error; if ((mountp->mnt_flag & MNT_RDONLY) == 0) return (EINVAL); /* * Step 1: invalidate all cached meta-data. */ devvp = VFSTOUFS(mountp)->um_devvp; if (vinvalbuf(devvp, 0, cred, p, 0, 0)) panic("ext2_reload: dirty1"); /* * Step 2: re-read superblock from disk. * constants have been adjusted for ext2 */ if ((error = bread(devvp, SBLOCK, SBSIZE, NOCRED, &bp)) != 0) return (error); es = (struct ext2_super_block *)bp->b_data; if (ext2_check_sb_compat(es, devvp->v_rdev, 0) != 0) { brelse(bp); return (EIO); /* XXX needs translation */ } fs = VFSTOUFS(mountp)->um_e2fs; bcopy(bp->b_data, fs->s_es, sizeof(struct ext2_super_block)); if((error = compute_sb_data(devvp, es, fs)) != 0) { brelse(bp); return error; } #ifdef UNKLAR if (fs->fs_sbsize < SBSIZE) bp->b_flags |= B_INVAL; #endif brelse(bp); loop: mtx_enter(&mntvnode_mtx, MTX_DEF); for (vp = mountp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) { if (vp->v_mount != mountp) { mtx_exit(&mntvnode_mtx, MTX_DEF); goto loop; } nvp = vp->v_mntvnodes.le_next; /* * Step 4: invalidate all inactive vnodes. */ if (vrecycle(vp, &mntvnode_mtx, p)) goto loop; /* * Step 5: invalidate all cached file data. */ mtx_enter(&vp->v_interlock, MTX_DEF); mtx_exit(&mntvnode_mtx, MTX_DEF); if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) { goto loop; } if (vinvalbuf(vp, 0, cred, p, 0, 0)) panic("ext2_reload: dirty2"); /* * Step 6: re-read inode data for all active vnodes. */ ip = VTOI(vp); error = bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)), (int)fs->s_blocksize, NOCRED, &bp); if (error) { vput(vp); return (error); } ext2_ei2di((struct ext2_inode *) ((char *)bp->b_data + EXT2_INODE_SIZE * ino_to_fsbo(fs, ip->i_number)), &ip->i_din); brelse(bp); vput(vp); mtx_enter(&mntvnode_mtx, MTX_DEF); } mtx_exit(&mntvnode_mtx, MTX_DEF); return (0); } /* * Common code for mount and mountroot */ static int ext2_mountfs(devvp, mp, p) register struct vnode *devvp; struct mount *mp; struct proc *p; { register struct ufsmount *ump; struct buf *bp; - struct ucred *uc; register struct ext2_sb_info *fs; struct ext2_super_block * es; dev_t dev = devvp->v_rdev; struct partinfo dpart; int havepart = 0; int error, i, size; int ronly; /* * Disallow multiple mounts of the same device. * Disallow mounting of a device that is currently in use * (except for root, which might share swap device for miniroot). * Flush out any old buffers remaining from a previous use. */ if ((error = vfs_mountedon(devvp)) != 0) return (error); if (vcount(devvp) > 1 && devvp != rootvp) return (EBUSY); - PROC_LOCK(p); - uc = p->p_ucred; - crhold(uc); - PROC_UNLOCK(p); - if ((error = vinvalbuf(devvp, V_SAVE, uc, p, 0, 0)) != 0) { - crfree(uc); + if ((error = vinvalbuf(devvp, V_SAVE, p->p_ucred, p, 0, 0)) != 0) return (error); - } - crfree(uc); #ifdef READONLY /* turn on this to force it to be read-only */ mp->mnt_flag |= MNT_RDONLY; #endif ronly = (mp->mnt_flag & MNT_RDONLY) != 0; vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p); error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, p); VOP_UNLOCK(devvp, 0, p); if (error) return (error); if (VOP_IOCTL(devvp, DIOCGPART, (caddr_t)&dpart, FREAD, NOCRED, p) != 0) size = DEV_BSIZE; else { havepart = 1; size = dpart.disklab->d_secsize; } bp = NULL; ump = NULL; if ((error = bread(devvp, SBLOCK, SBSIZE, NOCRED, &bp)) != 0) goto out; es = (struct ext2_super_block *)bp->b_data; if (ext2_check_sb_compat(es, dev, ronly) != 0) { error = EINVAL; /* XXX needs translation */ goto out; } if ((es->s_state & EXT2_VALID_FS) == 0 || (es->s_state & EXT2_ERROR_FS)) { if (ronly || (mp->mnt_flag & MNT_FORCE)) { printf( "WARNING: Filesystem was not properly dismounted\n"); } else { printf( "WARNING: R/W mount denied. Filesystem is not clean - run fsck\n"); error = EPERM; goto out; } } ump = bsd_malloc(sizeof *ump, M_UFSMNT, M_WAITOK); bzero((caddr_t)ump, sizeof *ump); ump->um_malloctype = M_EXT2NODE; ump->um_blkatoff = ext2_blkatoff; ump->um_truncate = ext2_truncate; ump->um_update = ext2_update; ump->um_valloc = ext2_valloc; ump->um_vfree = ext2_vfree; /* I don't know whether this is the right strategy. Note that we dynamically allocate both a ext2_sb_info and a ext2_super_block while Linux keeps the super block in a locked buffer */ ump->um_e2fs = bsd_malloc(sizeof(struct ext2_sb_info), M_UFSMNT, M_WAITOK); ump->um_e2fs->s_es = bsd_malloc(sizeof(struct ext2_super_block), M_UFSMNT, M_WAITOK); bcopy(es, ump->um_e2fs->s_es, (u_int)sizeof(struct ext2_super_block)); if ((error = compute_sb_data(devvp, ump->um_e2fs->s_es, ump->um_e2fs))) goto out; /* * We don't free the group descriptors allocated by compute_sb_data() * until ext2_unmount(). This is OK since the mount will succeed. */ brelse(bp); bp = NULL; fs = ump->um_e2fs; fs->s_rd_only = ronly; /* ronly is set according to mnt_flags */ /* if the fs is not mounted read-only, make sure the super block is always written back on a sync() */ fs->s_wasvalid = fs->s_es->s_state & EXT2_VALID_FS ? 1 : 0; if (ronly == 0) { fs->s_dirt = 1; /* mark it modified */ fs->s_es->s_state &= ~EXT2_VALID_FS; /* set fs invalid */ } mp->mnt_data = (qaddr_t)ump; mp->mnt_stat.f_fsid.val[0] = dev2udev(dev); mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum; mp->mnt_maxsymlinklen = EXT2_MAXSYMLINKLEN; mp->mnt_flag |= MNT_LOCAL; ump->um_mountp = mp; ump->um_dev = dev; ump->um_devvp = devvp; /* setting those two parameters allows us to use ufs_bmap w/o changse ! */ ump->um_nindir = EXT2_ADDR_PER_BLOCK(fs); ump->um_bptrtodb = fs->s_es->s_log_block_size + 1; ump->um_seqinc = EXT2_FRAGS_PER_BLOCK(fs); for (i = 0; i < MAXQUOTAS; i++) ump->um_quotas[i] = NULLVP; devvp->v_rdev->si_mountpoint = mp; if (ronly == 0) ext2_sbupdate(ump, MNT_WAIT); return (0); out: if (bp) brelse(bp); (void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD|FWRITE, NOCRED, p); if (ump) { bsd_free(ump->um_e2fs->s_es, M_UFSMNT); bsd_free(ump->um_e2fs, M_UFSMNT); bsd_free(ump, M_UFSMNT); mp->mnt_data = (qaddr_t)0; } return (error); } /* * unmount system call */ static int ext2_unmount(mp, mntflags, p) struct mount *mp; int mntflags; struct proc *p; { register struct ufsmount *ump; register struct ext2_sb_info *fs; int error, flags, ronly, i; flags = 0; if (mntflags & MNT_FORCE) { if (mp->mnt_flag & MNT_ROOTFS) return (EINVAL); flags |= FORCECLOSE; } if ((error = ext2_flushfiles(mp, flags, p)) != 0) return (error); ump = VFSTOUFS(mp); fs = ump->um_e2fs; ronly = fs->s_rd_only; if (ronly == 0) { if (fs->s_wasvalid) fs->s_es->s_state |= EXT2_VALID_FS; ext2_sbupdate(ump, MNT_WAIT); } /* release buffers containing group descriptors */ for(i = 0; i < fs->s_db_per_group; i++) ULCK_BUF(fs->s_group_desc[i]) bsd_free(fs->s_group_desc, M_UFSMNT); /* release cached inode/block bitmaps */ for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++) if (fs->s_inode_bitmap[i]) ULCK_BUF(fs->s_inode_bitmap[i]) for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++) if (fs->s_block_bitmap[i]) ULCK_BUF(fs->s_block_bitmap[i]) ump->um_devvp->v_rdev->si_mountpoint = NULL; error = VOP_CLOSE(ump->um_devvp, ronly ? FREAD : FREAD|FWRITE, NOCRED, p); vrele(ump->um_devvp); bsd_free(fs->s_es, M_UFSMNT); bsd_free(fs, M_UFSMNT); bsd_free(ump, M_UFSMNT); mp->mnt_data = (qaddr_t)0; mp->mnt_flag &= ~MNT_LOCAL; return (error); } /* * Flush out all the files in a filesystem. */ static int ext2_flushfiles(mp, flags, p) register struct mount *mp; int flags; struct proc *p; { register struct ufsmount *ump; int error; #if QUOTA int i; #endif ump = VFSTOUFS(mp); #if QUOTA if (mp->mnt_flag & MNT_QUOTA) { if ((error = vflush(mp, NULLVP, SKIPSYSTEM|flags)) != 0) return (error); for (i = 0; i < MAXQUOTAS; i++) { if (ump->um_quotas[i] == NULLVP) continue; quotaoff(p, mp, i); } /* * Here we fall through to vflush again to ensure * that we have gotten rid of all the system vnodes. */ } #endif error = vflush(mp, NULLVP, flags); return (error); } /* * Get file system statistics. * taken from ext2/super.c ext2_statfs */ static int ext2_statfs(mp, sbp, p) struct mount *mp; register struct statfs *sbp; struct proc *p; { unsigned long overhead; register struct ufsmount *ump; register struct ext2_sb_info *fs; register struct ext2_super_block *es; int i, nsb; ump = VFSTOUFS(mp); fs = ump->um_e2fs; es = fs->s_es; if (es->s_magic != EXT2_SUPER_MAGIC) panic("ext2_statfs - magic number spoiled"); /* * Compute the overhead (FS structures) */ if (es->s_feature_ro_compat & EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER) { nsb = 0; for (i = 0 ; i < fs->s_groups_count; i++) if (ext2_group_sparse(i)) nsb++; } else nsb = fs->s_groups_count; overhead = es->s_first_data_block + /* Superblocks and block group descriptors: */ nsb * (1 + fs->s_db_per_group) + /* Inode bitmap, block bitmap, and inode table: */ fs->s_groups_count * (1 + 1 + fs->s_itb_per_group); sbp->f_bsize = EXT2_FRAG_SIZE(fs); sbp->f_iosize = EXT2_BLOCK_SIZE(fs); sbp->f_blocks = es->s_blocks_count - overhead; sbp->f_bfree = es->s_free_blocks_count; sbp->f_bavail = sbp->f_bfree - es->s_r_blocks_count; sbp->f_files = es->s_inodes_count; sbp->f_ffree = es->s_free_inodes_count; if (sbp != &mp->mnt_stat) { sbp->f_type = mp->mnt_vfc->vfc_typenum; bcopy((caddr_t)mp->mnt_stat.f_mntonname, (caddr_t)&sbp->f_mntonname[0], MNAMELEN); bcopy((caddr_t)mp->mnt_stat.f_mntfromname, (caddr_t)&sbp->f_mntfromname[0], MNAMELEN); } return (0); } /* * Go through the disk queues to initiate sandbagged IO; * go through the inodes to write those that have been modified; * initiate the writing of the super block if it has been modified. * * Note: we are always called with the filesystem marked `MPBUSY'. */ static int ext2_sync(mp, waitfor, cred, p) struct mount *mp; int waitfor; struct ucred *cred; struct proc *p; { struct vnode *nvp, *vp; struct inode *ip; struct ufsmount *ump = VFSTOUFS(mp); struct ext2_sb_info *fs; int error, allerror = 0; fs = ump->um_e2fs; if (fs->s_dirt != 0 && fs->s_rd_only != 0) { /* XXX */ printf("fs = %s\n", fs->fs_fsmnt); panic("ext2_sync: rofs mod"); } /* * Write back each (modified) inode. */ mtx_enter(&mntvnode_mtx, MTX_DEF); loop: for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) { /* * If the vnode that we are about to sync is no longer * associated with this mount point, start over. */ if (vp->v_mount != mp) goto loop; mtx_enter(&vp->v_interlock, MTX_DEF); nvp = vp->v_mntvnodes.le_next; ip = VTOI(vp); if (vp->v_type == VNON || ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 && (TAILQ_EMPTY(&vp->v_dirtyblkhd) || waitfor == MNT_LAZY))) { mtx_exit(&vp->v_interlock, MTX_DEF); continue; } mtx_exit(&mntvnode_mtx, MTX_DEF); error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, p); if (error) { mtx_enter(&mntvnode_mtx, MTX_DEF); if (error == ENOENT) goto loop; continue; } if ((error = VOP_FSYNC(vp, cred, waitfor, p)) != 0) allerror = error; VOP_UNLOCK(vp, 0, p); vrele(vp); mtx_enter(&mntvnode_mtx, MTX_DEF); } mtx_exit(&mntvnode_mtx, MTX_DEF); /* * Force stale file system control information to be flushed. */ if (waitfor != MNT_LAZY) { vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, p); if ((error = VOP_FSYNC(ump->um_devvp, cred, waitfor, p)) != 0) allerror = error; VOP_UNLOCK(ump->um_devvp, 0, p); } #if QUOTA qsync(mp); #endif /* * Write back modified superblock. */ if (fs->s_dirt != 0) { fs->s_dirt = 0; fs->s_es->s_wtime = time_second; if ((error = ext2_sbupdate(ump, waitfor)) != 0) allerror = error; } return (allerror); } /* * Look up a EXT2FS dinode number to find its incore vnode, otherwise read it * in from disk. If it is in core, wait for the lock bit to clear, then * return the inode locked. Detection and handling of mount points must be * done by the calling routine. */ static int ext2_vget(mp, ino, vpp) struct mount *mp; ino_t ino; struct vnode **vpp; { register struct ext2_sb_info *fs; register struct inode *ip; struct ufsmount *ump; struct buf *bp; struct vnode *vp; dev_t dev; int i, error; int used_blocks; ump = VFSTOUFS(mp); dev = ump->um_dev; restart: if ((*vpp = ufs_ihashget(dev, ino)) != NULL) return (0); /* * Lock out the creation of new entries in the FFS hash table in * case getnewvnode() or MALLOC() blocks, otherwise a duplicate * may occur! */ if (ext2fs_inode_hash_lock) { while (ext2fs_inode_hash_lock) { ext2fs_inode_hash_lock = -1; tsleep(&ext2fs_inode_hash_lock, PVM, "e2vget", 0); } goto restart; } ext2fs_inode_hash_lock = 1; /* * If this MALLOC() is performed after the getnewvnode() * it might block, leaving a vnode with a NULL v_data to be * found by ext2_sync() if a sync happens to fire right then, * which will cause a panic because ext2_sync() blindly * dereferences vp->v_data (as well it should). */ MALLOC(ip, struct inode *, sizeof(struct inode), M_EXT2NODE, M_WAITOK); /* Allocate a new vnode/inode. */ if ((error = getnewvnode(VT_UFS, mp, ext2_vnodeop_p, &vp)) != 0) { if (ext2fs_inode_hash_lock < 0) wakeup(&ext2fs_inode_hash_lock); ext2fs_inode_hash_lock = 0; *vpp = NULL; FREE(ip, M_EXT2NODE); return (error); } bzero((caddr_t)ip, sizeof(struct inode)); lockinit(&vp->v_lock, PINOD, "ext2in", 0, 0); vp->v_data = ip; ip->i_vnode = vp; ip->i_e2fs = fs = ump->um_e2fs; ip->i_dev = dev; ip->i_number = ino; #if QUOTA for (i = 0; i < MAXQUOTAS; i++) ip->i_dquot[i] = NODQUOT; #endif /* * Put it onto its hash chain and lock it so that other requests for * this inode will block if they arrive while we are sleeping waiting * for old data structures to be purged or for the contents of the * disk portion of this inode to be read. */ ufs_ihashins(ip); if (ext2fs_inode_hash_lock < 0) wakeup(&ext2fs_inode_hash_lock); ext2fs_inode_hash_lock = 0; /* Read in the disk contents for the inode, copy into the inode. */ #if 0 printf("ext2_vget(%d) dbn= %d ", ino, fsbtodb(fs, ino_to_fsba(fs, ino))); #endif if ((error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)), (int)fs->s_blocksize, NOCRED, &bp)) != 0) { /* * The inode does not contain anything useful, so it would * be misleading to leave it on its hash chain. With mode * still zero, it will be unlinked and returned to the free * list by vput(). */ vput(vp); brelse(bp); *vpp = NULL; return (error); } /* convert ext2 inode to dinode */ ext2_ei2di((struct ext2_inode *) ((char *)bp->b_data + EXT2_INODE_SIZE * ino_to_fsbo(fs, ino)), &ip->i_din); ip->i_block_group = ino_to_cg(fs, ino); ip->i_next_alloc_block = 0; ip->i_next_alloc_goal = 0; ip->i_prealloc_count = 0; ip->i_prealloc_block = 0; /* now we want to make sure that block pointers for unused blocks are zeroed out - ext2_balloc depends on this although for regular files and directories only */ if(S_ISDIR(ip->i_mode) || S_ISREG(ip->i_mode)) { used_blocks = (ip->i_size+fs->s_blocksize-1) / fs->s_blocksize; for(i = used_blocks; i < EXT2_NDIR_BLOCKS; i++) ip->i_db[i] = 0; } /* ext2_print_inode(ip); */ brelse(bp); /* * Initialize the vnode from the inode, check for aliases. * Note that the underlying vnode may have changed. */ if ((error = ufs_vinit(mp, ext2_specop_p, ext2_fifoop_p, &vp)) != 0) { vput(vp); *vpp = NULL; return (error); } /* * Finish inode initialization now that aliasing has been resolved. */ ip->i_devvp = ump->um_devvp; VREF(ip->i_devvp); /* * Set up a generation number for this inode if it does not * already have one. This should only happen on old filesystems. */ if (ip->i_gen == 0) { ip->i_gen = random() / 2 + 1; if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) ip->i_flag |= IN_MODIFIED; } *vpp = vp; return (0); } /* * File handle to vnode * * Have to be really careful about stale file handles: * - check that the inode number is valid * - call ext2_vget() to get the locked inode * - check for an unallocated inode (i_mode == 0) * - check that the given client host has export rights and return * those rights via. exflagsp and credanonp */ static int ext2_fhtovp(mp, fhp, vpp) register struct mount *mp; struct fid *fhp; struct vnode **vpp; { register struct ufid *ufhp; struct ext2_sb_info *fs; ufhp = (struct ufid *)fhp; fs = VFSTOUFS(mp)->um_e2fs; if (ufhp->ufid_ino < ROOTINO || ufhp->ufid_ino >= fs->s_groups_count * fs->s_es->s_inodes_per_group) return (ESTALE); return (ufs_fhtovp(mp, ufhp, vpp)); } /* * Vnode pointer to File handle */ /* ARGSUSED */ static int ext2_vptofh(vp, fhp) struct vnode *vp; struct fid *fhp; { register struct inode *ip; register struct ufid *ufhp; ip = VTOI(vp); ufhp = (struct ufid *)fhp; ufhp->ufid_len = sizeof(struct ufid); ufhp->ufid_ino = ip->i_number; ufhp->ufid_gen = ip->i_gen; return (0); } /* * Write a superblock and associated information back to disk. */ static int ext2_sbupdate(mp, waitfor) struct ufsmount *mp; int waitfor; { register struct ext2_sb_info *fs = mp->um_e2fs; register struct ext2_super_block *es = fs->s_es; register struct buf *bp; int error = 0; /* printf("\nupdating superblock, waitfor=%s\n", waitfor == MNT_WAIT ? "yes":"no"); */ bp = getblk(mp->um_devvp, SBLOCK, SBSIZE, 0, 0); bcopy((caddr_t)es, bp->b_data, (u_int)sizeof(struct ext2_super_block)); if (waitfor == MNT_WAIT) error = bwrite(bp); else bawrite(bp); /* * The buffers for group descriptors, inode bitmaps and block bitmaps * are not busy at this point and are (hopefully) written by the * usual sync mechanism. No need to write them here */ return (error); } Index: head/sys/i386/ibcs2/ibcs2_util.c =================================================================== --- head/sys/i386/ibcs2/ibcs2_util.c (revision 71698) +++ head/sys/i386/ibcs2/ibcs2_util.c (revision 71699) @@ -1,184 +1,177 @@ /* * Copyright (c) 1994 Christos Zoulas * Copyright (c) 1995 Frank van der Linden * Copyright (c) 1995 Scott Bartram * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * from: svr4_util.c,v 1.5 1995/01/22 23:44:50 christos Exp * $FreeBSD$ */ #include #include #include #include #include #include const char ibcs2_emul_path[] = "/compat/ibcs2"; /* * Search an alternate path before passing pathname arguments on * to system calls. Useful for keeping a seperate 'emulation tree'. * * If cflag is set, we check if an attempt can be made to create * the named file, i.e. we check if the directory it should * be in exists. */ int ibcs2_emul_find(p, sgp, prefix, path, pbuf, cflag) struct proc *p; caddr_t *sgp; /* Pointer to stackgap memory */ const char *prefix; char *path; char **pbuf; int cflag; { struct nameidata nd; struct nameidata ndroot; struct vattr vat; struct vattr vatroot; - struct ucred *uc; int error; char *ptr, *buf, *cp; size_t sz, len; buf = (char *) malloc(MAXPATHLEN, M_TEMP, M_WAITOK); *pbuf = path; for (ptr = buf; (*ptr = *prefix) != '\0'; ptr++, prefix++) continue; sz = MAXPATHLEN - (ptr - buf); /* * If sgp is not given then the path is already in kernel space */ if (sgp == NULL) error = copystr(path, ptr, sz, &len); else error = copyinstr(path, ptr, sz, &len); if (error) { free(buf, M_TEMP); return error; } if (*ptr != '/') { free(buf, M_TEMP); return EINVAL; } /* * We know that there is a / somewhere in this pathname. * Search backwards for it, to find the file's parent dir * to see if it exists in the alternate tree. If it does, * and we want to create a file (cflag is set). We don't * need to worry about the root comparison in this case. */ if (cflag) { for (cp = &ptr[len] - 1; *cp != '/'; cp--); *cp = '\0'; NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, buf, p); if ((error = namei(&nd)) != 0) { free(buf, M_TEMP); return error; } *cp = '/'; } else { NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, buf, p); if ((error = namei(&nd)) != 0) { free(buf, M_TEMP); return error; } /* * We now compare the vnode of the ibcs2_root to the one * vnode asked. If they resolve to be the same, then we * ignore the match so that the real root gets used. * This avoids the problem of traversing "../.." to find the * root directory and never finding it, because "/" resolves * to the emulation root directory. This is expensive :-( */ NDINIT(&ndroot, LOOKUP, FOLLOW, UIO_SYSSPACE, ibcs2_emul_path, p); if ((error = namei(&ndroot)) != 0) { /* Cannot happen! */ free(buf, M_TEMP); NDFREE(&nd, NDF_ONLY_PNBUF); vrele(nd.ni_vp); return error; } - PROC_LOCK(p); - uc = p->p_ucred; - crhold(uc); - PROC_UNLOCK(p); - if ((error = VOP_GETATTR(nd.ni_vp, &vat, uc, p)) != 0) { - crfree(uc); + if ((error = VOP_GETATTR(nd.ni_vp, &vat, p->p_ucred, p)) != 0) { goto done; } - if ((error = VOP_GETATTR(ndroot.ni_vp, &vatroot, uc, p)) != 0) { - crfree(uc); + if ((error = VOP_GETATTR(ndroot.ni_vp, &vatroot, p->p_ucred, p)) + != 0) { goto done; } - crfree(uc); if (vat.va_fsid == vatroot.va_fsid && vat.va_fileid == vatroot.va_fileid) { error = ENOENT; goto done; } } if (sgp == NULL) *pbuf = buf; else { sz = &ptr[len] - buf; *pbuf = stackgap_alloc(sgp, sz + 1); error = copyout(buf, *pbuf, sz); free(buf, M_TEMP); } done: NDFREE(&nd, NDF_ONLY_PNBUF); vrele(nd.ni_vp); if (!cflag) { NDFREE(&ndroot, NDF_ONLY_PNBUF); vrele(ndroot.ni_vp); } return error; } Index: head/sys/i386/ibcs2/imgact_coff.c =================================================================== --- head/sys/i386/ibcs2/imgact_coff.c (revision 71698) +++ head/sys/i386/ibcs2/imgact_coff.c (revision 71699) @@ -1,495 +1,480 @@ /*- * Copyright (c) 1994 Sean Eric Fagan * Copyright (c) 1994 Søren Schmidt * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer * in this position and unchanged. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software withough specific prior written permission * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include MODULE_DEPEND(coff, ibcs2, 1, 1, 1); extern struct sysentvec ibcs2_svr3_sysvec; static int coff_load_file __P((struct proc *p, char *name)); static int exec_coff_imgact __P((struct image_params *imgp)); static int load_coff_section __P((struct vmspace *vmspace, struct vnode *vp, vm_offset_t offset, caddr_t vmaddr, size_t memsz, size_t filsz, vm_prot_t prot)); static int load_coff_section(struct vmspace *vmspace, struct vnode *vp, vm_offset_t offset, caddr_t vmaddr, size_t memsz, size_t filsz, vm_prot_t prot) { size_t map_len; vm_offset_t map_offset; vm_offset_t map_addr; int error; unsigned char *data_buf = 0; size_t copy_len; map_offset = trunc_page(offset); map_addr = trunc_page((vm_offset_t)vmaddr); if (memsz > filsz) { /* * We have the stupid situation that * the section is longer than it is on file, * which means it has zero-filled areas, and * we have to work for it. Stupid iBCS! */ map_len = trunc_page(offset + filsz) - trunc_page(map_offset); } else { /* * The only stuff we care about is on disk, and we * don't care if we map in more than is really there. */ map_len = round_page(offset + filsz) - trunc_page(map_offset); } DPRINTF(("%s(%d): vm_mmap(&vmspace->vm_map, &0x%08lx, 0x%x, 0x%x, " "VM_PROT_ALL, MAP_PRIVATE | MAP_FIXED, vp, 0x%x)\n", __FILE__, __LINE__, map_addr, map_len, prot, map_offset)); if ((error = vm_mmap(&vmspace->vm_map, &map_addr, map_len, prot, VM_PROT_ALL, MAP_PRIVATE | MAP_FIXED, (caddr_t) vp, map_offset)) != 0) return error; if (memsz == filsz) { /* We're done! */ return 0; } /* * Now we have screwball stuff, to accomodate stupid COFF. * We have to map the remaining bit of the file into the kernel's * memory map, allocate some anonymous memory, copy that last * bit into it, and then we're done. *sigh* * For clean-up reasons, we actally map in the file last. */ copy_len = (offset + filsz) - trunc_page(offset + filsz); map_addr = trunc_page((vm_offset_t)vmaddr + filsz); map_len = round_page((vm_offset_t)vmaddr + memsz) - map_addr; DPRINTF(("%s(%d): vm_map_find(&vmspace->vm_map, NULL, 0, &0x%08lx,0x%x, FALSE, VM_PROT_ALL, VM_PROT_ALL, 0)\n", __FILE__, __LINE__, map_addr, map_len)); if (map_len != 0) { error = vm_map_find(&vmspace->vm_map, NULL, 0, &map_addr, map_len, FALSE, VM_PROT_ALL, VM_PROT_ALL, 0); if (error) return error; } if ((error = vm_mmap(kernel_map, (vm_offset_t *) &data_buf, PAGE_SIZE, VM_PROT_READ, VM_PROT_READ, 0, (caddr_t) vp, trunc_page(offset + filsz))) != 0) return error; error = copyout(data_buf, (caddr_t) map_addr, copy_len); if (vm_map_remove(kernel_map, (vm_offset_t) data_buf, (vm_offset_t) data_buf + PAGE_SIZE)) panic("load_coff_section vm_map_remove failed"); return error; } static int coff_load_file(struct proc *p, char *name) { struct vmspace *vmspace = p->p_vmspace; int error; struct nameidata nd; struct vnode *vp; struct vattr attr; - struct ucred *uc; struct filehdr *fhdr; struct aouthdr *ahdr; struct scnhdr *scns; char *ptr = 0; int nscns; unsigned long text_offset = 0, text_address = 0, text_size = 0; unsigned long data_offset = 0, data_address = 0, data_size = 0; unsigned long bss_size = 0; int i; NDINIT(&nd, LOOKUP, LOCKLEAF | FOLLOW | SAVENAME, UIO_SYSSPACE, name, p); error = namei(&nd); if (error) return error; vp = nd.ni_vp; if (vp == NULL) return ENOEXEC; if (vp->v_writecount) { error = ETXTBSY; goto fail; } - PROC_LOCK(p); - uc = p->p_ucred; - crhold(uc); - PROC_UNLOCK(p); - if ((error = VOP_GETATTR(vp, &attr, uc, p)) != 0) { - crfree(uc); + if ((error = VOP_GETATTR(vp, &attr, p->p_ucred, p)) != 0) goto fail; - } if ((vp->v_mount->mnt_flag & MNT_NOEXEC) || ((attr.va_mode & 0111) == 0) - || (attr.va_type != VREG)) { - crfree(uc); + || (attr.va_type != VREG)) goto fail; - } if (attr.va_size == 0) { error = ENOEXEC; - crfree(uc); goto fail; } - if ((error = VOP_ACCESS(vp, VEXEC, uc, p)) != 0) { - crfree(uc); + if ((error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p)) != 0) goto fail; - } - if ((error = VOP_OPEN(vp, FREAD, uc, p)) != 0) { - crfree(uc); + if ((error = VOP_OPEN(vp, FREAD, p->p_ucred, p)) != 0) goto fail; - } - crfree(uc); /* * Lose the lock on the vnode. It's no longer needed, and must not * exist for the pagefault paging to work below. */ VOP_UNLOCK(vp, 0, p); if ((error = vm_mmap(kernel_map, (vm_offset_t *) &ptr, PAGE_SIZE, VM_PROT_READ, VM_PROT_READ, 0, (caddr_t) vp, 0)) != 0) goto unlocked_fail; fhdr = (struct filehdr *)ptr; if (fhdr->f_magic != I386_COFF) { error = ENOEXEC; goto dealloc_and_fail; } nscns = fhdr->f_nscns; if ((nscns * sizeof(struct scnhdr)) > PAGE_SIZE) { /* * XXX -- just fail. I'm so lazy. */ error = ENOEXEC; goto dealloc_and_fail; } ahdr = (struct aouthdr*)(ptr + sizeof(struct filehdr)); scns = (struct scnhdr*)(ptr + sizeof(struct filehdr) + sizeof(struct aouthdr)); for (i = 0; i < nscns; i++) { if (scns[i].s_flags & STYP_NOLOAD) continue; else if (scns[i].s_flags & STYP_TEXT) { text_address = scns[i].s_vaddr; text_size = scns[i].s_size; text_offset = scns[i].s_scnptr; } else if (scns[i].s_flags & STYP_DATA) { data_address = scns[i].s_vaddr; data_size = scns[i].s_size; data_offset = scns[i].s_scnptr; } else if (scns[i].s_flags & STYP_BSS) { bss_size = scns[i].s_size; } } if ((error = load_coff_section(vmspace, vp, text_offset, (caddr_t)(void *)(uintptr_t)text_address, text_size, text_size, VM_PROT_READ | VM_PROT_EXECUTE)) != 0) { goto dealloc_and_fail; } if ((error = load_coff_section(vmspace, vp, data_offset, (caddr_t)(void *)(uintptr_t)data_address, data_size + bss_size, data_size, VM_PROT_ALL)) != 0) { goto dealloc_and_fail; } error = 0; dealloc_and_fail: if (vm_map_remove(kernel_map, (vm_offset_t) ptr, (vm_offset_t) ptr + PAGE_SIZE)) panic(__FUNCTION__ " vm_map_remove failed"); fail: VOP_UNLOCK(vp, 0, p); unlocked_fail: NDFREE(&nd, NDF_ONLY_PNBUF); vrele(nd.ni_vp); return error; } static int exec_coff_imgact(imgp) struct image_params *imgp; { const struct filehdr *fhdr = (const struct filehdr*)imgp->image_header; const struct aouthdr *ahdr; const struct scnhdr *scns; int i; struct vmspace *vmspace; int nscns; int error; unsigned long text_offset = 0, text_address = 0, text_size = 0; unsigned long data_offset = 0, data_address = 0, data_size = 0; unsigned long bss_size = 0; caddr_t hole; if (fhdr->f_magic != I386_COFF || !(fhdr->f_flags & F_EXEC)) { DPRINTF(("%s(%d): return -1\n", __FILE__, __LINE__)); return -1; } nscns = fhdr->f_nscns; if ((nscns * sizeof(struct scnhdr)) > PAGE_SIZE) { /* * For now, return an error -- need to be able to * read in all of the section structures. */ DPRINTF(("%s(%d): return -1\n", __FILE__, __LINE__)); return -1; } ahdr = (const struct aouthdr*) ((const char*)(imgp->image_header) + sizeof(struct filehdr)); imgp->entry_addr = ahdr->entry; scns = (const struct scnhdr*) ((const char*)(imgp->image_header) + sizeof(struct filehdr) + sizeof(struct aouthdr)); if ((error = exec_extract_strings(imgp)) != 0) { DPRINTF(("%s(%d): return %d\n", __FILE__, __LINE__, error)); return error; } exec_new_vmspace(imgp); vmspace = imgp->proc->p_vmspace; for (i = 0; i < nscns; i++) { DPRINTF(("i = %d, scns[i].s_name = %s, scns[i].s_vaddr = %08lx, " "scns[i].s_scnptr = %d\n", i, scns[i].s_name, scns[i].s_vaddr, scns[i].s_scnptr)); if (scns[i].s_flags & STYP_NOLOAD) { /* * A section that is not loaded, for whatever * reason. It takes precedance over other flag * bits... */ continue; } else if (scns[i].s_flags & STYP_TEXT) { text_address = scns[i].s_vaddr; text_size = scns[i].s_size; text_offset = scns[i].s_scnptr; } else if (scns[i].s_flags & STYP_DATA) { /* .data section */ data_address = scns[i].s_vaddr; data_size = scns[i].s_size; data_offset = scns[i].s_scnptr; } else if (scns[i].s_flags & STYP_BSS) { /* .bss section */ bss_size = scns[i].s_size; } else if (scns[i].s_flags & STYP_LIB) { char *buf = 0; int foff = trunc_page(scns[i].s_scnptr); int off = scns[i].s_scnptr - foff; int len = round_page(scns[i].s_size + PAGE_SIZE); int j; if ((error = vm_mmap(kernel_map, (vm_offset_t *) &buf, len, VM_PROT_READ, VM_PROT_READ, 0, (caddr_t) imgp->vp, foff)) != 0) { return ENOEXEC; } if(scns[i].s_size) { char *libbuf; int emul_path_len = strlen(ibcs2_emul_path); libbuf = malloc(MAXPATHLEN + emul_path_len, M_TEMP, M_WAITOK); strcpy(libbuf, ibcs2_emul_path); for (j = off; j < scns[i].s_size + off; j++) { char *libname; libname = buf + j + 4 * *(long*)(buf + j + 4); j += 4* *(long*)(buf + j); DPRINTF(("%s(%d): shared library %s\n", __FILE__, __LINE__, libname)); strcpy(&libbuf[emul_path_len], libname); error = coff_load_file(imgp->proc, libbuf); if (error) error = coff_load_file(imgp->proc, libname); if (error) break; } free(libbuf, M_TEMP); } if (vm_map_remove(kernel_map, (vm_offset_t) buf, (vm_offset_t) buf + len)) panic("exec_coff_imgact vm_map_remove failed"); if (error) return error; } } /* * Map in .text now */ DPRINTF(("%s(%d): load_coff_section(vmspace, " "imgp->vp, %08lx, %08lx, 0x%x, 0x%x, 0x%x)\n", __FILE__, __LINE__, text_offset, text_address, text_size, text_size, VM_PROT_READ | VM_PROT_EXECUTE)); if ((error = load_coff_section(vmspace, imgp->vp, text_offset, (caddr_t)(void *)(uintptr_t)text_address, text_size, text_size, VM_PROT_READ | VM_PROT_EXECUTE)) != 0) { DPRINTF(("%s(%d): error = %d\n", __FILE__, __LINE__, error)); return error; } /* * Map in .data and .bss now */ DPRINTF(("%s(%d): load_coff_section(vmspace, " "imgp->vp, 0x%08lx, 0x%08lx, 0x%x, 0x%x, 0x%x)\n", __FILE__, __LINE__, data_offset, data_address, data_size + bss_size, data_size, VM_PROT_ALL)); if ((error = load_coff_section(vmspace, imgp->vp, data_offset, (caddr_t)(void *)(uintptr_t)data_address, data_size + bss_size, data_size, VM_PROT_ALL)) != 0) { DPRINTF(("%s(%d): error = %d\n", __FILE__, __LINE__, error)); return error; } imgp->interpreted = 0; imgp->proc->p_sysent = &ibcs2_svr3_sysvec; vmspace->vm_tsize = round_page(text_size) >> PAGE_SHIFT; vmspace->vm_dsize = round_page(data_size + bss_size) >> PAGE_SHIFT; vmspace->vm_taddr = (caddr_t)(void *)(uintptr_t)text_address; vmspace->vm_daddr = (caddr_t)(void *)(uintptr_t)data_address; hole = (caddr_t)trunc_page((vm_offset_t)vmspace->vm_daddr) + ctob(vmspace->vm_dsize); DPRINTF(("%s(%d): vm_map_find(&vmspace->vm_map, NULL, 0, &0x%08lx, PAGE_SIZE, FALSE, VM_PROT_ALL, VM_PROT_ALL, 0)\n", __FILE__, __LINE__, hole)); DPRINTF(("imgact: error = %d\n", error)); error = vm_map_find(&vmspace->vm_map, NULL, 0, (vm_offset_t *) &hole, PAGE_SIZE, FALSE, VM_PROT_ALL, VM_PROT_ALL, 0); DPRINTF(("IBCS2: start vm_dsize = 0x%x, vm_daddr = 0x%x end = 0x%x\n", ctob(vmspace->vm_dsize), vmspace->vm_daddr, ctob(vmspace->vm_dsize) + vmspace->vm_daddr )); DPRINTF(("%s(%d): returning successfully!\n", __FILE__, __LINE__)); /* Indicate that this file should not be modified */ imgp->vp->v_flag |= VTEXT; return 0; } /* * Tell kern_execve.c about it, with a little help from the linker. */ static struct execsw coff_execsw = { exec_coff_imgact, "coff" }; EXEC_SET(coff, coff_execsw); Index: head/sys/isofs/cd9660/cd9660_vfsops.c =================================================================== --- head/sys/isofs/cd9660/cd9660_vfsops.c (revision 71698) +++ head/sys/isofs/cd9660/cd9660_vfsops.c (revision 71699) @@ -1,914 +1,901 @@ /*- * Copyright (c) 1994 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley * by Pace Willisson (pace@blitz.com). The Rock Ridge Extension * Support code is derived from software contributed to Berkeley * by Atsushi Murai (amurai@spec.co.jp). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)cd9660_vfsops.c 8.18 (Berkeley) 5/22/95 * $FreeBSD$ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include MALLOC_DEFINE(M_ISOFSMNT, "ISOFS mount", "ISOFS mount structure"); MALLOC_DEFINE(M_ISOFSNODE, "ISOFS node", "ISOFS vnode private part"); static int cd9660_mount __P((struct mount *, char *, caddr_t, struct nameidata *, struct proc *)); static int cd9660_unmount __P((struct mount *, int, struct proc *)); static int cd9660_root __P((struct mount *, struct vnode **)); static int cd9660_statfs __P((struct mount *, struct statfs *, struct proc *)); static int cd9660_vget __P((struct mount *, ino_t, struct vnode **)); static int cd9660_fhtovp __P((struct mount *, struct fid *, struct vnode **)); static int cd9660_checkexp __P((struct mount *, struct sockaddr *, int *, struct ucred **)); static int cd9660_vptofh __P((struct vnode *, struct fid *)); static struct vfsops cd9660_vfsops = { cd9660_mount, vfs_stdstart, cd9660_unmount, cd9660_root, vfs_stdquotactl, cd9660_statfs, vfs_stdsync, cd9660_vget, cd9660_fhtovp, cd9660_checkexp, cd9660_vptofh, cd9660_init, cd9660_uninit, vfs_stdextattrctl, }; VFS_SET(cd9660_vfsops, cd9660, VFCF_READONLY); /* * Called by vfs_mountroot when iso is going to be mounted as root. */ static int iso_get_ssector __P((dev_t dev, struct proc *p)); static int iso_mountfs __P((struct vnode *devvp, struct mount *mp, struct proc *p, struct iso_args *argp)); /* * Try to find the start of the last data track on this CD-ROM. This * is used to mount the last session of a multi-session CD. Bail out * and return 0 if we fail, this is always a safe bet. */ static int iso_get_ssector(dev, p) dev_t dev; struct proc *p; { struct ioc_toc_header h; struct ioc_read_toc_single_entry t; int i; struct cdevsw *bd; d_ioctl_t *ioctlp; bd = devsw(dev); ioctlp = bd->d_ioctl; if (ioctlp == NULL) return 0; if (ioctlp(dev, CDIOREADTOCHEADER, (caddr_t)&h, FREAD, p) != 0) return 0; for (i = h.ending_track; i >= 0; i--) { t.address_format = CD_LBA_FORMAT; t.track = i; if (ioctlp(dev, CDIOREADTOCENTRY, (caddr_t)&t, FREAD, p) != 0) return 0; if ((t.entry.control & 4) != 0) /* found a data track */ break; } if (i < 0) return 0; return ntohl(t.entry.addr.lba); } static int iso_mountroot __P((struct mount *mp, struct proc *p)); static int iso_mountroot(mp, p) struct mount *mp; struct proc *p; { struct iso_args args; int error; if ((error = bdevvp(rootdev, &rootvp))) { printf("iso_mountroot: can't find rootvp\n"); return (error); } args.flags = ISOFSMNT_ROOT; args.ssector = iso_get_ssector(rootdev, p); if (bootverbose) printf("iso_mountroot(): using session at block %d\n", args.ssector); if ((error = iso_mountfs(rootvp, mp, p, &args)) != 0) return (error); (void)cd9660_statfs(mp, &mp->mnt_stat, p); return (0); } /* * VFS Operations. * * mount system call */ static int cd9660_mount(mp, path, data, ndp, p) register struct mount *mp; char *path; caddr_t data; struct nameidata *ndp; struct proc *p; { struct vnode *devvp; struct iso_args args; - struct ucred *uc; size_t size; int error; mode_t accessmode; struct iso_mnt *imp = 0; if ((mp->mnt_flag & MNT_ROOTFS) != 0) { return (iso_mountroot(mp, p)); } if ((error = copyin(data, (caddr_t)&args, sizeof (struct iso_args)))) return (error); if ((mp->mnt_flag & MNT_RDONLY) == 0) return (EROFS); /* * If updating, check whether changing from read-only to * read/write; if there is no device name, that's all we do. */ if (mp->mnt_flag & MNT_UPDATE) { imp = VFSTOISOFS(mp); if (args.fspec == 0) return (vfs_export(mp, &imp->im_export, &args.export)); } /* * Not an update, or updating the name: look up the name * and verify that it refers to a sensible block device. */ NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, p); if ((error = namei(ndp))) return (error); NDFREE(ndp, NDF_ONLY_PNBUF); devvp = ndp->ni_vp; if (!vn_isdisk(devvp, &error)) { vrele(devvp); return (error); } /* * Verify that user has necessary permissions on the device, * or has superuser abilities */ accessmode = VREAD; vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p); - PROC_LOCK(p); - uc = p->p_ucred; - crhold(uc); - PROC_UNLOCK(p); - error = VOP_ACCESS(devvp, accessmode, uc, p); - crfree(uc); + error = VOP_ACCESS(devvp, accessmode, p->p_ucred, p); if (error) error = suser(p); if (error) { vput(devvp); return (error); } VOP_UNLOCK(devvp, 0, p); if ((mp->mnt_flag & MNT_UPDATE) == 0) { error = iso_mountfs(devvp, mp, p, &args); } else { if (devvp != imp->im_devvp) error = EINVAL; /* needs translation */ else vrele(devvp); } if (error) { vrele(devvp); return error; } imp = VFSTOISOFS(mp); (void) copyinstr(path, mp->mnt_stat.f_mntonname, MNAMELEN - 1, &size); bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size); (void) copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &size); bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size); (void) cd9660_statfs(mp, &mp->mnt_stat, p); return 0; } /* * Common code for mount and mountroot */ static int iso_mountfs(devvp, mp, p, argp) register struct vnode *devvp; struct mount *mp; struct proc *p; struct iso_args *argp; { register struct iso_mnt *isomp = (struct iso_mnt *)0; struct buf *bp = NULL; struct buf *pribp = NULL, *supbp = NULL; - struct ucred *uc; dev_t dev = devvp->v_rdev; int error = EINVAL; int needclose = 0; int high_sierra = 0; int iso_bsize; int iso_blknum; int joliet_level; struct iso_volume_descriptor *vdp = 0; struct iso_primary_descriptor *pri = NULL; struct iso_sierra_primary_descriptor *pri_sierra = NULL; struct iso_supplementary_descriptor *sup = NULL; struct iso_directory_record *rootp; int logical_block_size; if (!(mp->mnt_flag & MNT_RDONLY)) return EROFS; /* * Disallow multiple mounts of the same device. * Disallow mounting of a device that is currently in use * (except for root, which might share swap device for miniroot). * Flush out any old buffers remaining from a previous use. */ if ((error = vfs_mountedon(devvp))) return error; if (vcount(devvp) > 1 && devvp != rootvp) return EBUSY; - PROC_LOCK(p); - uc = p->p_ucred; - crhold(uc); - PROC_UNLOCK(p); - error = vinvalbuf(devvp, V_SAVE, p->p_ucred, p, 0, 0); - crfree(uc); - if (error) + if ((error = vinvalbuf(devvp, V_SAVE, p->p_ucred, p, 0, 0))) return (error); vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p); error = VOP_OPEN(devvp, FREAD, FSCRED, p); VOP_UNLOCK(devvp, 0, p); if (error) return error; needclose = 1; /* This is the "logical sector size". The standard says this * should be 2048 or the physical sector size on the device, * whichever is greater. For now, we'll just use a constant. */ iso_bsize = ISO_DEFAULT_BLOCK_SIZE; joliet_level = 0; for (iso_blknum = 16 + argp->ssector; iso_blknum < 100 + argp->ssector; iso_blknum++) { if ((error = bread(devvp, iso_blknum * btodb(iso_bsize), iso_bsize, NOCRED, &bp)) != 0) goto out; vdp = (struct iso_volume_descriptor *)bp->b_data; if (bcmp (vdp->id, ISO_STANDARD_ID, sizeof vdp->id) != 0) { if (bcmp (vdp->id_sierra, ISO_SIERRA_ID, sizeof vdp->id) != 0) { error = EINVAL; goto out; } else high_sierra = 1; } switch (isonum_711 (high_sierra? vdp->type_sierra: vdp->type)){ case ISO_VD_PRIMARY: if (pribp == NULL) { pribp = bp; bp = NULL; pri = (struct iso_primary_descriptor *)vdp; pri_sierra = (struct iso_sierra_primary_descriptor *)vdp; } break; case ISO_VD_SUPPLEMENTARY: if (supbp == NULL) { supbp = bp; bp = NULL; sup = (struct iso_supplementary_descriptor *)vdp; if (!(argp->flags & ISOFSMNT_NOJOLIET)) { if (bcmp(sup->escape, "%/@", 3) == 0) joliet_level = 1; if (bcmp(sup->escape, "%/C", 3) == 0) joliet_level = 2; if (bcmp(sup->escape, "%/E", 3) == 0) joliet_level = 3; if (isonum_711 (sup->flags) & 1) joliet_level = 0; } } break; case ISO_VD_END: goto vd_end; default: break; } if (bp) { brelse(bp); bp = NULL; } } vd_end: if (bp) { brelse(bp); bp = NULL; } if (pri == NULL) { error = EINVAL; goto out; } logical_block_size = isonum_723 (high_sierra? pri_sierra->logical_block_size: pri->logical_block_size); if (logical_block_size < DEV_BSIZE || logical_block_size > MAXBSIZE || (logical_block_size & (logical_block_size - 1)) != 0) { error = EINVAL; goto out; } rootp = (struct iso_directory_record *) (high_sierra? pri_sierra->root_directory_record: pri->root_directory_record); isomp = malloc(sizeof *isomp, M_ISOFSMNT, M_WAITOK | M_ZERO); isomp->logical_block_size = logical_block_size; isomp->volume_space_size = isonum_733 (high_sierra? pri_sierra->volume_space_size: pri->volume_space_size); isomp->joliet_level = 0; /* * Since an ISO9660 multi-session CD can also access previous * sessions, we have to include them into the space consider- * ations. This doesn't yield a very accurate number since * parts of the old sessions might be inaccessible now, but we * can't do much better. This is also important for the NFS * filehandle validation. */ isomp->volume_space_size += argp->ssector; bcopy (rootp, isomp->root, sizeof isomp->root); isomp->root_extent = isonum_733 (rootp->extent); isomp->root_size = isonum_733 (rootp->size); isomp->im_bmask = logical_block_size - 1; isomp->im_bshift = ffs(logical_block_size) - 1; pribp->b_flags |= B_AGE; brelse(pribp); pribp = NULL; mp->mnt_data = (qaddr_t)isomp; mp->mnt_stat.f_fsid.val[0] = dev2udev(dev); mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum; mp->mnt_maxsymlinklen = 0; mp->mnt_flag |= MNT_LOCAL; isomp->im_mountp = mp; isomp->im_dev = dev; isomp->im_devvp = devvp; devvp->v_rdev->si_mountpoint = mp; /* Check the Rock Ridge Extention support */ if (!(argp->flags & ISOFSMNT_NORRIP)) { if ((error = bread(isomp->im_devvp, (isomp->root_extent + isonum_711(rootp->ext_attr_length)) << (isomp->im_bshift - DEV_BSHIFT), isomp->logical_block_size, NOCRED, &bp)) != 0) goto out; rootp = (struct iso_directory_record *)bp->b_data; if ((isomp->rr_skip = cd9660_rrip_offset(rootp,isomp)) < 0) { argp->flags |= ISOFSMNT_NORRIP; } else { argp->flags &= ~ISOFSMNT_GENS; } /* * The contents are valid, * but they will get reread as part of another vnode, so... */ bp->b_flags |= B_AGE; brelse(bp); bp = NULL; } isomp->im_flags = argp->flags & (ISOFSMNT_NORRIP | ISOFSMNT_GENS | ISOFSMNT_EXTATT | ISOFSMNT_NOJOLIET); if (high_sierra) { /* this effectively ignores all the mount flags */ log(LOG_INFO, "cd9660: High Sierra Format\n"); isomp->iso_ftype = ISO_FTYPE_HIGH_SIERRA; } else switch (isomp->im_flags&(ISOFSMNT_NORRIP|ISOFSMNT_GENS)) { default: isomp->iso_ftype = ISO_FTYPE_DEFAULT; break; case ISOFSMNT_GENS|ISOFSMNT_NORRIP: isomp->iso_ftype = ISO_FTYPE_9660; break; case 0: log(LOG_INFO, "cd9660: RockRidge Extension\n"); isomp->iso_ftype = ISO_FTYPE_RRIP; break; } /* Decide whether to use the Joliet descriptor */ if (isomp->iso_ftype != ISO_FTYPE_RRIP && joliet_level) { log(LOG_INFO, "cd9660: Joliet Extension\n"); rootp = (struct iso_directory_record *) sup->root_directory_record; bcopy (rootp, isomp->root, sizeof isomp->root); isomp->root_extent = isonum_733 (rootp->extent); isomp->root_size = isonum_733 (rootp->size); isomp->joliet_level = joliet_level; supbp->b_flags |= B_AGE; } if (supbp) { brelse(supbp); supbp = NULL; } return 0; out: devvp->v_rdev->si_mountpoint = NULL; if (bp) brelse(bp); if (pribp) brelse(pribp); if (supbp) brelse(supbp); if (needclose) (void)VOP_CLOSE(devvp, FREAD, NOCRED, p); if (isomp) { free((caddr_t)isomp, M_ISOFSMNT); mp->mnt_data = (qaddr_t)0; } return error; } /* * unmount system call */ static int cd9660_unmount(mp, mntflags, p) struct mount *mp; int mntflags; struct proc *p; { register struct iso_mnt *isomp; int error, flags = 0; if (mntflags & MNT_FORCE) flags |= FORCECLOSE; #if 0 mntflushbuf(mp, 0); if (mntinvalbuf(mp)) return EBUSY; #endif if ((error = vflush(mp, NULLVP, flags))) return (error); isomp = VFSTOISOFS(mp); isomp->im_devvp->v_rdev->si_mountpoint = NULL; error = VOP_CLOSE(isomp->im_devvp, FREAD, NOCRED, p); vrele(isomp->im_devvp); free((caddr_t)isomp, M_ISOFSMNT); mp->mnt_data = (qaddr_t)0; mp->mnt_flag &= ~MNT_LOCAL; return (error); } /* * Return root of a filesystem */ static int cd9660_root(mp, vpp) struct mount *mp; struct vnode **vpp; { struct iso_mnt *imp = VFSTOISOFS(mp); struct iso_directory_record *dp = (struct iso_directory_record *)imp->root; ino_t ino = isodirino(dp, imp); /* * With RRIP we must use the `.' entry of the root directory. * Simply tell vget, that it's a relocated directory. */ return (cd9660_vget_internal(mp, ino, vpp, imp->iso_ftype == ISO_FTYPE_RRIP, dp)); } /* * Get file system statistics. */ int cd9660_statfs(mp, sbp, p) struct mount *mp; register struct statfs *sbp; struct proc *p; { register struct iso_mnt *isomp; isomp = VFSTOISOFS(mp); sbp->f_bsize = isomp->logical_block_size; sbp->f_iosize = sbp->f_bsize; /* XXX */ sbp->f_blocks = isomp->volume_space_size; sbp->f_bfree = 0; /* total free blocks */ sbp->f_bavail = 0; /* blocks free for non superuser */ sbp->f_files = 0; /* total files */ sbp->f_ffree = 0; /* free file nodes */ if (sbp != &mp->mnt_stat) { sbp->f_type = mp->mnt_vfc->vfc_typenum; bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN); bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN); } return 0; } /* * File handle to vnode * * Have to be really careful about stale file handles: * - check that the inode number is in range * - call iget() to get the locked inode * - check for an unallocated inode (i_mode == 0) * - check that the generation number matches */ struct ifid { ushort ifid_len; ushort ifid_pad; int ifid_ino; long ifid_start; }; /* ARGSUSED */ int cd9660_fhtovp(mp, fhp, vpp) register struct mount *mp; struct fid *fhp; struct vnode **vpp; { struct ifid *ifhp = (struct ifid *)fhp; register struct iso_node *ip; struct vnode *nvp; int error; #ifdef ISOFS_DBG printf("fhtovp: ino %d, start %ld\n", ifhp->ifid_ino, ifhp->ifid_start); #endif if ((error = VFS_VGET(mp, ifhp->ifid_ino, &nvp)) != 0) { *vpp = NULLVP; return (error); } ip = VTOI(nvp); if (ip->inode.iso_mode == 0) { vput(nvp); *vpp = NULLVP; return (ESTALE); } *vpp = nvp; return (0); } int cd9660_checkexp(mp, nam, exflagsp, credanonp) struct mount *mp; struct sockaddr *nam; int *exflagsp; struct ucred **credanonp; { register struct netcred *np; register struct iso_mnt *imp; imp = VFSTOISOFS(mp); /* * Get the export permission structure for this tuple. */ np = vfs_export_lookup(mp, &imp->im_export, nam); if (np == NULL) return (EACCES); *exflagsp = np->netc_exflags; *credanonp = &np->netc_anon; return (0); } int cd9660_vget(mp, ino, vpp) struct mount *mp; ino_t ino; struct vnode **vpp; { /* * XXXX * It would be nice if we didn't always set the `relocated' flag * and force the extra read, but I don't want to think about fixing * that right now. */ return (cd9660_vget_internal(mp, ino, vpp, #if 0 VFSTOISOFS(mp)->iso_ftype == ISO_FTYPE_RRIP, #else 0, #endif (struct iso_directory_record *)0)); } int cd9660_vget_internal(mp, ino, vpp, relocated, isodir) struct mount *mp; ino_t ino; struct vnode **vpp; int relocated; struct iso_directory_record *isodir; { struct iso_mnt *imp; struct iso_node *ip; struct buf *bp; struct vnode *vp; dev_t dev; int error; imp = VFSTOISOFS(mp); dev = imp->im_dev; if ((*vpp = cd9660_ihashget(dev, ino)) != NULLVP) return (0); /* Allocate a new vnode/iso_node. */ if ((error = getnewvnode(VT_ISOFS, mp, cd9660_vnodeop_p, &vp)) != 0) { *vpp = NULLVP; return (error); } MALLOC(ip, struct iso_node *, sizeof(struct iso_node), M_ISOFSNODE, M_WAITOK | M_ZERO); lockinit(&vp->v_lock, PINOD, "isonode", 0, 0); /* * ISOFS uses stdlock and can share lock structure */ vp->v_vnlock = &vp->v_lock; vp->v_data = ip; ip->i_vnode = vp; ip->i_dev = dev; ip->i_number = ino; /* * Put it onto its hash chain and lock it so that other requests for * this inode will block if they arrive while we are sleeping waiting * for old data structures to be purged or for the contents of the * disk portion of this inode to be read. */ cd9660_ihashins(ip); if (isodir == 0) { int lbn, off; lbn = lblkno(imp, ino); if (lbn >= imp->volume_space_size) { vput(vp); printf("fhtovp: lbn exceed volume space %d\n", lbn); return (ESTALE); } off = blkoff(imp, ino); if (off + ISO_DIRECTORY_RECORD_SIZE > imp->logical_block_size) { vput(vp); printf("fhtovp: crosses block boundary %d\n", off + ISO_DIRECTORY_RECORD_SIZE); return (ESTALE); } error = bread(imp->im_devvp, lbn << (imp->im_bshift - DEV_BSHIFT), imp->logical_block_size, NOCRED, &bp); if (error) { vput(vp); brelse(bp); printf("fhtovp: bread error %d\n",error); return (error); } isodir = (struct iso_directory_record *)(bp->b_data + off); if (off + isonum_711(isodir->length) > imp->logical_block_size) { vput(vp); if (bp != 0) brelse(bp); printf("fhtovp: directory crosses block boundary %d[off=%d/len=%d]\n", off +isonum_711(isodir->length), off, isonum_711(isodir->length)); return (ESTALE); } #if 0 if (isonum_733(isodir->extent) + isonum_711(isodir->ext_attr_length) != ifhp->ifid_start) { if (bp != 0) brelse(bp); printf("fhtovp: file start miss %d vs %d\n", isonum_733(isodir->extent) + isonum_711(isodir->ext_attr_length), ifhp->ifid_start); return (ESTALE); } #endif } else bp = 0; ip->i_mnt = imp; ip->i_devvp = imp->im_devvp; VREF(ip->i_devvp); if (relocated) { /* * On relocated directories we must * read the `.' entry out of a dir. */ ip->iso_start = ino >> imp->im_bshift; if (bp != 0) brelse(bp); if ((error = cd9660_blkatoff(vp, (off_t)0, NULL, &bp)) != 0) { vput(vp); return (error); } isodir = (struct iso_directory_record *)bp->b_data; } ip->iso_extent = isonum_733(isodir->extent); ip->i_size = isonum_733(isodir->size); ip->iso_start = isonum_711(isodir->ext_attr_length) + ip->iso_extent; /* * Setup time stamp, attribute */ vp->v_type = VNON; switch (imp->iso_ftype) { default: /* ISO_FTYPE_9660 */ { struct buf *bp2; int off; if ((imp->im_flags & ISOFSMNT_EXTATT) && (off = isonum_711(isodir->ext_attr_length))) cd9660_blkatoff(vp, (off_t)-(off << imp->im_bshift), NULL, &bp2); else bp2 = NULL; cd9660_defattr(isodir, ip, bp2, ISO_FTYPE_9660); cd9660_deftstamp(isodir, ip, bp2, ISO_FTYPE_9660); if (bp2) brelse(bp2); break; } case ISO_FTYPE_RRIP: cd9660_rrip_analyze(isodir, ip, imp); break; } if (bp != 0) brelse(bp); /* * Initialize the associated vnode */ switch (vp->v_type = IFTOVT(ip->inode.iso_mode)) { case VFIFO: vp->v_op = cd9660_fifoop_p; break; case VCHR: case VBLK: vp->v_op = cd9660_specop_p; vp = addaliasu(vp, ip->inode.iso_rdev); ip->i_vnode = vp; break; default: break; } if (ip->iso_extent == imp->root_extent) vp->v_flag |= VROOT; /* * XXX need generation number? */ *vpp = vp; return (0); } /* * Vnode pointer to File handle */ /* ARGSUSED */ int cd9660_vptofh(vp, fhp) struct vnode *vp; struct fid *fhp; { register struct iso_node *ip = VTOI(vp); register struct ifid *ifhp; ifhp = (struct ifid *)fhp; ifhp->ifid_len = sizeof(struct ifid); ifhp->ifid_ino = ip->i_number; ifhp->ifid_start = ip->iso_start; #ifdef ISOFS_DBG printf("vptofh: ino %d, start %ld\n", ifhp->ifid_ino,ifhp->ifid_start); #endif return 0; } Index: head/sys/kern/imgact_aout.c =================================================================== --- head/sys/kern/imgact_aout.c (revision 71698) +++ head/sys/kern/imgact_aout.c (revision 71699) @@ -1,282 +1,277 @@ /* * Copyright (c) 1993, David Greenman * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static int exec_aout_imgact __P((struct image_params *imgp)); struct sysentvec aout_sysvec = { SYS_MAXSYSCALL, sysent, 0, 0, 0, 0, 0, 0, 0, sendsig, sigcode, &szsigcode, 0, "FreeBSD a.out", aout_coredump, NULL, MINSIGSTKSZ }; static int exec_aout_imgact(imgp) struct image_params *imgp; { const struct exec *a_out = (const struct exec *) imgp->image_header; struct vmspace *vmspace; struct vnode *vp; vm_map_t map; vm_object_t object; vm_offset_t text_end, data_end; unsigned long virtual_offset; unsigned long file_offset; unsigned long bss_size; int error; /* * Linux and *BSD binaries look very much alike, * only the machine id is different: * 0x64 for Linux, 0x86 for *BSD, 0x00 for BSDI. * NetBSD is in network byte order.. ugh. */ if (((a_out->a_magic >> 16) & 0xff) != 0x86 && ((a_out->a_magic >> 16) & 0xff) != 0 && ((((int)ntohl(a_out->a_magic)) >> 16) & 0xff) != 0x86) return -1; /* * Set file/virtual offset based on a.out variant. * We do two cases: host byte order and network byte order * (for NetBSD compatibility) */ switch ((int)(a_out->a_magic & 0xffff)) { case ZMAGIC: virtual_offset = 0; if (a_out->a_text) { file_offset = PAGE_SIZE; } else { /* Bill's "screwball mode" */ file_offset = 0; } break; case QMAGIC: virtual_offset = PAGE_SIZE; file_offset = 0; /* Pass PS_STRINGS for BSD/OS binaries only. */ if (N_GETMID(*a_out) == MID_ZERO) imgp->ps_strings = PS_STRINGS; break; default: /* NetBSD compatibility */ switch ((int)(ntohl(a_out->a_magic) & 0xffff)) { case ZMAGIC: case QMAGIC: virtual_offset = PAGE_SIZE; file_offset = 0; break; default: return (-1); } } bss_size = roundup(a_out->a_bss, PAGE_SIZE); /* * Check various fields in header for validity/bounds. */ if (/* entry point must lay with text region */ a_out->a_entry < virtual_offset || a_out->a_entry >= virtual_offset + a_out->a_text || /* text and data size must each be page rounded */ a_out->a_text & PAGE_MASK || a_out->a_data & PAGE_MASK) return (-1); /* text + data can't exceed file size */ if (a_out->a_data + a_out->a_text > imgp->attr->va_size) return (EFAULT); /* * text/data/bss must not exceed limits */ mtx_assert(&Giant, MA_OWNED); if (/* text can't exceed maximum text size */ a_out->a_text > MAXTSIZ || /* data + bss can't exceed rlimit */ a_out->a_data + bss_size > imgp->proc->p_rlimit[RLIMIT_DATA].rlim_cur) return (ENOMEM); /* copy in arguments and/or environment from old process */ error = exec_extract_strings(imgp); if (error) return (error); /* * Destroy old process VM and create a new one (with a new stack) */ exec_new_vmspace(imgp); /* * The vm space can be changed by exec_new_vmspace */ vmspace = imgp->proc->p_vmspace; vp = imgp->vp; map = &vmspace->vm_map; vm_map_lock(map); VOP_GETVOBJECT(vp, &object); vm_object_reference(object); text_end = virtual_offset + a_out->a_text; error = vm_map_insert(map, object, file_offset, virtual_offset, text_end, VM_PROT_READ | VM_PROT_EXECUTE, VM_PROT_ALL, MAP_COPY_ON_WRITE | MAP_PREFAULT); if (error) { vm_map_unlock(map); return (error); } data_end = text_end + a_out->a_data; if (a_out->a_data) { vm_object_reference(object); error = vm_map_insert(map, object, file_offset + a_out->a_text, text_end, data_end, VM_PROT_ALL, VM_PROT_ALL, MAP_COPY_ON_WRITE | MAP_PREFAULT); if (error) { vm_map_unlock(map); return (error); } } if (bss_size) { error = vm_map_insert(map, NULL, 0, data_end, data_end + bss_size, VM_PROT_ALL, VM_PROT_ALL, 0); if (error) { vm_map_unlock(map); return (error); } } vm_map_unlock(map); /* Fill in process VM information */ vmspace->vm_tsize = a_out->a_text >> PAGE_SHIFT; vmspace->vm_dsize = (a_out->a_data + bss_size) >> PAGE_SHIFT; vmspace->vm_taddr = (caddr_t) (uintptr_t) virtual_offset; vmspace->vm_daddr = (caddr_t) (uintptr_t) (virtual_offset + a_out->a_text); /* Fill in image_params */ imgp->interpreted = 0; imgp->entry_addr = a_out->a_entry; imgp->proc->p_sysent = &aout_sysvec; /* Indicate that this file should not be modified */ imgp->vp->v_flag |= VTEXT; return (0); } /* * Dump core, into a file named as described in the comments for * expand_name(), unless the process was setuid/setgid. */ int aout_coredump(p, vp, limit) register struct proc *p; register struct vnode *vp; off_t limit; { - register struct ucred *cred; + register struct ucred *cred = p->p_ucred; register struct vmspace *vm = p->p_vmspace; int error; if (ctob(UPAGES + vm->vm_dsize + vm->vm_ssize) >= limit) return (EFAULT); fill_kinfo_proc(p, &p->p_addr->u_kproc); - PROC_LOCK(p); - cred = p->p_ucred; - crhold(cred); - PROC_UNLOCK(p); error = cpu_coredump(p, vp, cred); if (error == 0) error = vn_rdwr(UIO_WRITE, vp, vm->vm_daddr, (int)ctob(vm->vm_dsize), (off_t)ctob(UPAGES), UIO_USERSPACE, IO_NODELOCKED|IO_UNIT, cred, (int *) NULL, p); if (error == 0) error = vn_rdwr(UIO_WRITE, vp, (caddr_t) trunc_page(USRSTACK - ctob(vm->vm_ssize)), round_page(ctob(vm->vm_ssize)), (off_t)ctob(UPAGES) + ctob(vm->vm_dsize), UIO_USERSPACE, IO_NODELOCKED|IO_UNIT, cred, (int *) NULL, p); - crfree(cred); return (error); } /* * Tell kern_execve.c about it, with a little help from the linker. */ static struct execsw aout_execsw = { exec_aout_imgact, "a.out" }; EXEC_SET(aout, aout_execsw); Index: head/sys/kern/imgact_elf.c =================================================================== --- head/sys/kern/imgact_elf.c (revision 71698) +++ head/sys/kern/imgact_elf.c (revision 71699) @@ -1,1036 +1,1031 @@ /*- * Copyright (c) 1995-1996 Søren Schmidt * Copyright (c) 1996 Peter Wemm * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer * in this position and unchanged. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software withough specific prior written permission * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #include "opt_rlimit.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define OLD_EI_BRAND 8 __ElfType(Brandinfo); __ElfType(Auxargs); static int elf_check_header __P((const Elf_Ehdr *hdr)); static int elf_freebsd_fixup __P((register_t **stack_base, struct image_params *imgp)); static int elf_load_file __P((struct proc *p, const char *file, u_long *addr, u_long *entry)); static int elf_load_section __P((struct proc *p, struct vmspace *vmspace, struct vnode *vp, vm_offset_t offset, caddr_t vmaddr, size_t memsz, size_t filsz, vm_prot_t prot)); static int exec_elf_imgact __P((struct image_params *imgp)); static int elf_trace = 0; SYSCTL_INT(_debug, OID_AUTO, elf_trace, CTLFLAG_RW, &elf_trace, 0, ""); struct sysentvec elf_freebsd_sysvec = { SYS_MAXSYSCALL, sysent, 0, 0, 0, 0, 0, 0, elf_freebsd_fixup, sendsig, sigcode, &szsigcode, 0, "FreeBSD ELF", elf_coredump, NULL, MINSIGSTKSZ }; static Elf_Brandinfo freebsd_brand_info = { ELFOSABI_FREEBSD, "", "/usr/libexec/ld-elf.so.1", &elf_freebsd_sysvec }; static Elf_Brandinfo *elf_brand_list[MAX_BRANDS] = { &freebsd_brand_info, NULL, NULL, NULL, NULL, NULL, NULL, NULL }; int elf_insert_brand_entry(Elf_Brandinfo *entry) { int i; for (i=1; ip_sysent == entry->sysvec) { rval = TRUE; break; } } ALLPROC_LOCK(AP_RELEASE); return (rval); } static int elf_check_header(const Elf_Ehdr *hdr) { if (!IS_ELF(*hdr) || hdr->e_ident[EI_CLASS] != ELF_TARG_CLASS || hdr->e_ident[EI_DATA] != ELF_TARG_DATA || hdr->e_ident[EI_VERSION] != EV_CURRENT) return ENOEXEC; if (!ELF_MACHINE_OK(hdr->e_machine)) return ENOEXEC; if (hdr->e_version != ELF_TARG_VER) return ENOEXEC; return 0; } static int elf_load_section(struct proc *p, struct vmspace *vmspace, struct vnode *vp, vm_offset_t offset, caddr_t vmaddr, size_t memsz, size_t filsz, vm_prot_t prot) { size_t map_len; vm_offset_t map_addr; int error, rv; size_t copy_len; vm_object_t object; vm_offset_t file_addr; vm_offset_t data_buf = 0; VOP_GETVOBJECT(vp, &object); error = 0; /* * It's necessary to fail if the filsz + offset taken from the * header is greater than the actual file pager object's size. * If we were to allow this, then the vm_map_find() below would * walk right off the end of the file object and into the ether. * * While I'm here, might as well check for something else that * is invalid: filsz cannot be greater than memsz. */ if ((off_t)filsz + offset > object->un_pager.vnp.vnp_size || filsz > memsz) { uprintf("elf_load_section: truncated ELF file\n"); return (ENOEXEC); } map_addr = trunc_page((vm_offset_t)vmaddr); file_addr = trunc_page(offset); /* * We have two choices. We can either clear the data in the last page * of an oversized mapping, or we can start the anon mapping a page * early and copy the initialized data into that first page. We * choose the second.. */ if (memsz > filsz) map_len = trunc_page(offset+filsz) - file_addr; else map_len = round_page(offset+filsz) - file_addr; if (map_len != 0) { vm_object_reference(object); vm_map_lock(&vmspace->vm_map); rv = vm_map_insert(&vmspace->vm_map, object, file_addr, /* file offset */ map_addr, /* virtual start */ map_addr + map_len,/* virtual end */ prot, VM_PROT_ALL, MAP_COPY_ON_WRITE | MAP_PREFAULT); vm_map_unlock(&vmspace->vm_map); if (rv != KERN_SUCCESS) { vm_object_deallocate(object); return EINVAL; } /* we can stop now if we've covered it all */ if (memsz == filsz) return 0; } /* * We have to get the remaining bit of the file into the first part * of the oversized map segment. This is normally because the .data * segment in the file is extended to provide bss. It's a neat idea * to try and save a page, but it's a pain in the behind to implement. */ copy_len = (offset + filsz) - trunc_page(offset + filsz); map_addr = trunc_page((vm_offset_t)vmaddr + filsz); map_len = round_page((vm_offset_t)vmaddr + memsz) - map_addr; /* This had damn well better be true! */ if (map_len != 0) { vm_map_lock(&vmspace->vm_map); rv = vm_map_insert(&vmspace->vm_map, NULL, 0, map_addr, map_addr + map_len, VM_PROT_ALL, VM_PROT_ALL, 0); vm_map_unlock(&vmspace->vm_map); if (rv != KERN_SUCCESS) return EINVAL; } if (copy_len != 0) { vm_object_reference(object); rv = vm_map_find(exec_map, object, trunc_page(offset + filsz), &data_buf, PAGE_SIZE, TRUE, VM_PROT_READ, VM_PROT_ALL, MAP_COPY_ON_WRITE | MAP_PREFAULT_PARTIAL); if (rv != KERN_SUCCESS) { vm_object_deallocate(object); return EINVAL; } /* send the page fragment to user space */ error = copyout((caddr_t)data_buf, (caddr_t)map_addr, copy_len); vm_map_remove(exec_map, data_buf, data_buf + PAGE_SIZE); if (error) return (error); } /* * set it to the specified protection */ vm_map_protect(&vmspace->vm_map, map_addr, map_addr + map_len, prot, FALSE); return error; } /* * Load the file "file" into memory. It may be either a shared object * or an executable. * * The "addr" reference parameter is in/out. On entry, it specifies * the address where a shared object should be loaded. If the file is * an executable, this value is ignored. On exit, "addr" specifies * where the file was actually loaded. * * The "entry" reference parameter is out only. On exit, it specifies * the entry point for the loaded file. */ static int elf_load_file(struct proc *p, const char *file, u_long *addr, u_long *entry) { const Elf_Ehdr *hdr = NULL; const Elf_Phdr *phdr = NULL; struct nameidata nd; struct vmspace *vmspace = p->p_vmspace; struct vattr attr; struct image_params image_params, *imgp; vm_prot_t prot; u_long rbase; u_long base_addr = 0; int error, i, numsegs; imgp = &image_params; /* * Initialize part of the common data */ imgp->proc = p; imgp->uap = NULL; imgp->attr = &attr; imgp->firstpage = NULL; imgp->image_header = (char *)kmem_alloc_wait(exec_map, PAGE_SIZE); if (imgp->image_header == NULL) { nd.ni_vp = NULL; error = ENOMEM; goto fail; } NDINIT(&nd, LOOKUP, LOCKLEAF|FOLLOW, UIO_SYSSPACE, file, p); if ((error = namei(&nd)) != 0) { nd.ni_vp = NULL; goto fail; } NDFREE(&nd, NDF_ONLY_PNBUF); imgp->vp = nd.ni_vp; /* * Check permissions, modes, uid, etc on the file, and "open" it. */ error = exec_check_permissions(imgp); if (error) { VOP_UNLOCK(nd.ni_vp, 0, p); goto fail; } error = exec_map_first_page(imgp); /* * Also make certain that the interpreter stays the same, so set * its VTEXT flag, too. */ if (error == 0) nd.ni_vp->v_flag |= VTEXT; VOP_UNLOCK(nd.ni_vp, 0, p); if (error) goto fail; hdr = (const Elf_Ehdr *)imgp->image_header; if ((error = elf_check_header(hdr)) != 0) goto fail; if (hdr->e_type == ET_DYN) rbase = *addr; else if (hdr->e_type == ET_EXEC) rbase = 0; else { error = ENOEXEC; goto fail; } /* Only support headers that fit within first page for now */ if ((hdr->e_phoff > PAGE_SIZE) || (hdr->e_phoff + hdr->e_phentsize * hdr->e_phnum) > PAGE_SIZE) { error = ENOEXEC; goto fail; } phdr = (const Elf_Phdr *)(imgp->image_header + hdr->e_phoff); for (i = 0, numsegs = 0; i < hdr->e_phnum; i++) { if (phdr[i].p_type == PT_LOAD) { /* Loadable segment */ prot = 0; if (phdr[i].p_flags & PF_X) prot |= VM_PROT_EXECUTE; if (phdr[i].p_flags & PF_W) prot |= VM_PROT_WRITE; if (phdr[i].p_flags & PF_R) prot |= VM_PROT_READ; if ((error = elf_load_section(p, vmspace, nd.ni_vp, phdr[i].p_offset, (caddr_t)phdr[i].p_vaddr + rbase, phdr[i].p_memsz, phdr[i].p_filesz, prot)) != 0) goto fail; /* * Establish the base address if this is the * first segment. */ if (numsegs == 0) base_addr = trunc_page(phdr[i].p_vaddr + rbase); numsegs++; } } *addr = base_addr; *entry=(unsigned long)hdr->e_entry + rbase; fail: if (imgp->firstpage) exec_unmap_first_page(imgp); if (imgp->image_header) kmem_free_wakeup(exec_map, (vm_offset_t)imgp->image_header, PAGE_SIZE); if (nd.ni_vp) vrele(nd.ni_vp); return error; } static int fallback_elf_brand = ELFOSABI_FREEBSD; SYSCTL_INT(_kern, OID_AUTO, fallback_elf_brand, CTLFLAG_RW, &fallback_elf_brand, ELFOSABI_FREEBSD, "ELF brand of last resort"); static int exec_elf_imgact(struct image_params *imgp) { const Elf_Ehdr *hdr = (const Elf_Ehdr *) imgp->image_header; const Elf_Phdr *phdr; Elf_Auxargs *elf_auxargs = NULL; struct vmspace *vmspace; vm_prot_t prot; u_long text_size = 0, data_size = 0; u_long text_addr = 0, data_addr = 0; u_long addr, entry = 0, proghdr = 0; int error, i; const char *interp = NULL; Elf_Brandinfo *brand_info; char path[MAXPATHLEN]; /* * Do we have a valid ELF header ? */ if (elf_check_header(hdr) != 0 || hdr->e_type != ET_EXEC) return -1; /* * From here on down, we return an errno, not -1, as we've * detected an ELF file. */ if ((hdr->e_phoff > PAGE_SIZE) || (hdr->e_phoff + hdr->e_phentsize * hdr->e_phnum) > PAGE_SIZE) { /* Only support headers in first page for now */ return ENOEXEC; } phdr = (const Elf_Phdr*)(imgp->image_header + hdr->e_phoff); /* * From this point on, we may have resources that need to be freed. */ /* * Yeah, I'm paranoid. There is every reason in the world to get * VTEXT now since from here on out, there are places we can have * a context switch. Better safe than sorry; I really don't want * the file to change while it's being loaded. */ mtx_enter(&imgp->vp->v_interlock, MTX_DEF); imgp->vp->v_flag |= VTEXT; mtx_exit(&imgp->vp->v_interlock, MTX_DEF); if ((error = exec_extract_strings(imgp)) != 0) goto fail; exec_new_vmspace(imgp); vmspace = imgp->proc->p_vmspace; for (i = 0; i < hdr->e_phnum; i++) { switch(phdr[i].p_type) { case PT_LOAD: /* Loadable segment */ prot = 0; if (phdr[i].p_flags & PF_X) prot |= VM_PROT_EXECUTE; if (phdr[i].p_flags & PF_W) prot |= VM_PROT_WRITE; if (phdr[i].p_flags & PF_R) prot |= VM_PROT_READ; if ((error = elf_load_section(imgp->proc, vmspace, imgp->vp, phdr[i].p_offset, (caddr_t)phdr[i].p_vaddr, phdr[i].p_memsz, phdr[i].p_filesz, prot)) != 0) goto fail; /* * Is this .text or .data ?? * * We only handle one each of those yet XXX */ if (hdr->e_entry >= phdr[i].p_vaddr && hdr->e_entry <(phdr[i].p_vaddr+phdr[i].p_memsz)) { text_addr = trunc_page(phdr[i].p_vaddr); text_size = round_page(phdr[i].p_memsz + phdr[i].p_vaddr - text_addr); entry = (u_long)hdr->e_entry; } else { data_addr = trunc_page(phdr[i].p_vaddr); data_size = round_page(phdr[i].p_memsz + phdr[i].p_vaddr - data_addr); } break; case PT_INTERP: /* Path to interpreter */ if (phdr[i].p_filesz > MAXPATHLEN || phdr[i].p_offset + phdr[i].p_filesz > PAGE_SIZE) { error = ENOEXEC; goto fail; } interp = imgp->image_header + phdr[i].p_offset; break; case PT_PHDR: /* Program header table info */ proghdr = phdr[i].p_vaddr; break; default: break; } } vmspace->vm_tsize = text_size >> PAGE_SHIFT; vmspace->vm_taddr = (caddr_t)(uintptr_t)text_addr; vmspace->vm_dsize = data_size >> PAGE_SHIFT; vmspace->vm_daddr = (caddr_t)(uintptr_t)data_addr; addr = ELF_RTLD_ADDR(vmspace); imgp->entry_addr = entry; brand_info = NULL; /* XXX For now we look for the magic "FreeBSD" that we used to put * into the ELF header at the EI_ABIVERSION location. If found use * that information rather than figuring out the ABI from proper * branding. This should be removed for 5.0-RELEASE. The Linux caes * can be figured out from the `interp_path' field. */ if (strcmp("FreeBSD", (const char *)&hdr->e_ident[OLD_EI_BRAND]) == 0) brand_info = &freebsd_brand_info; /* If the executable has a brand, search for it in the brand list. */ if (brand_info == NULL) { for (i = 0; i < MAX_BRANDS; i++) { Elf_Brandinfo *bi = elf_brand_list[i]; if (bi != NULL && hdr->e_ident[EI_OSABI] == bi->brand) { brand_info = bi; break; } } } /* Lacking a known brand, search for a recognized interpreter. */ if (brand_info == NULL && interp != NULL) { for (i = 0; i < MAX_BRANDS; i++) { Elf_Brandinfo *bi = elf_brand_list[i]; if (bi != NULL && strcmp(interp, bi->interp_path) == 0) { brand_info = bi; break; } } } /* Lacking a recognized interpreter, try the default brand */ if (brand_info == NULL) { for (i = 0; i < MAX_BRANDS; i++) { Elf_Brandinfo *bi = elf_brand_list[i]; if (bi != NULL && fallback_elf_brand == bi->brand) { brand_info = bi; break; } } } /* XXX - Assume FreeBSD after the branding method change. */ if (brand_info == NULL) brand_info = &freebsd_brand_info; if (brand_info == NULL) { uprintf("ELF binary type \"%u\" not known.\n", hdr->e_ident[EI_OSABI]); error = ENOEXEC; goto fail; } imgp->proc->p_sysent = brand_info->sysvec; if (interp != NULL) { snprintf(path, sizeof(path), "%s%s", brand_info->emul_path, interp); if ((error = elf_load_file(imgp->proc, path, &addr, &imgp->entry_addr)) != 0) { if ((error = elf_load_file(imgp->proc, interp, &addr, &imgp->entry_addr)) != 0) { uprintf("ELF interpreter %s not found\n", path); goto fail; } } } /* * Construct auxargs table (used by the fixup routine) */ elf_auxargs = malloc(sizeof(Elf_Auxargs), M_TEMP, M_WAITOK); elf_auxargs->execfd = -1; elf_auxargs->phdr = proghdr; elf_auxargs->phent = hdr->e_phentsize; elf_auxargs->phnum = hdr->e_phnum; elf_auxargs->pagesz = PAGE_SIZE; elf_auxargs->base = addr; elf_auxargs->flags = 0; elf_auxargs->entry = entry; elf_auxargs->trace = elf_trace; imgp->auxargs = elf_auxargs; imgp->interpreted = 0; fail: return error; } static int elf_freebsd_fixup(register_t **stack_base, struct image_params *imgp) { Elf_Auxargs *args = (Elf_Auxargs *)imgp->auxargs; register_t *pos; pos = *stack_base + (imgp->argc + imgp->envc + 2); if (args->trace) { AUXARGS_ENTRY(pos, AT_DEBUG, 1); } if (args->execfd != -1) { AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd); } AUXARGS_ENTRY(pos, AT_PHDR, args->phdr); AUXARGS_ENTRY(pos, AT_PHENT, args->phent); AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum); AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz); AUXARGS_ENTRY(pos, AT_FLAGS, args->flags); AUXARGS_ENTRY(pos, AT_ENTRY, args->entry); AUXARGS_ENTRY(pos, AT_BASE, args->base); AUXARGS_ENTRY(pos, AT_NULL, 0); free(imgp->auxargs, M_TEMP); imgp->auxargs = NULL; (*stack_base)--; suword(*stack_base, (long) imgp->argc); return 0; } /* * Code for generating ELF core dumps. */ typedef void (*segment_callback) __P((vm_map_entry_t, void *)); /* Closure for cb_put_phdr(). */ struct phdr_closure { Elf_Phdr *phdr; /* Program header to fill in */ Elf_Off offset; /* Offset of segment in core file */ }; /* Closure for cb_size_segment(). */ struct sseg_closure { int count; /* Count of writable segments. */ size_t size; /* Total size of all writable segments. */ }; static void cb_put_phdr __P((vm_map_entry_t, void *)); static void cb_size_segment __P((vm_map_entry_t, void *)); static void each_writable_segment __P((struct proc *, segment_callback, void *)); static int elf_corehdr __P((struct proc *, struct vnode *, struct ucred *, int, void *, size_t)); static void elf_puthdr __P((struct proc *, void *, size_t *, const prstatus_t *, const prfpregset_t *, const prpsinfo_t *, int)); static void elf_putnote __P((void *, size_t *, const char *, int, const void *, size_t)); extern int osreldate; int elf_coredump(p, vp, limit) register struct proc *p; register struct vnode *vp; off_t limit; { - register struct ucred *cred; + register struct ucred *cred = p->p_ucred; int error = 0; struct sseg_closure seginfo; void *hdr; size_t hdrsize; /* Size the program segments. */ seginfo.count = 0; seginfo.size = 0; each_writable_segment(p, cb_size_segment, &seginfo); /* * Calculate the size of the core file header area by making * a dry run of generating it. Nothing is written, but the * size is calculated. */ hdrsize = 0; elf_puthdr((struct proc *)NULL, (void *)NULL, &hdrsize, (const prstatus_t *)NULL, (const prfpregset_t *)NULL, (const prpsinfo_t *)NULL, seginfo.count); if (hdrsize + seginfo.size >= limit) return (EFAULT); /* * Allocate memory for building the header, fill it up, * and write it out. */ hdr = malloc(hdrsize, M_TEMP, M_WAITOK); if (hdr == NULL) { return EINVAL; } - PROC_LOCK(p); - cred = p->p_ucred; - crhold(cred); - PROC_UNLOCK(p); error = elf_corehdr(p, vp, cred, seginfo.count, hdr, hdrsize); /* Write the contents of all of the writable segments. */ if (error == 0) { Elf_Phdr *php; off_t offset; int i; php = (Elf_Phdr *)((char *)hdr + sizeof(Elf_Ehdr)) + 1; offset = hdrsize; for (i = 0; i < seginfo.count; i++) { error = vn_rdwr(UIO_WRITE, vp, (caddr_t)php->p_vaddr, php->p_filesz, offset, UIO_USERSPACE, IO_NODELOCKED|IO_UNIT, cred, (int *)NULL, p); if (error != 0) break; offset += php->p_filesz; php++; } } - crfree(cred); free(hdr, M_TEMP); return error; } /* * A callback for each_writable_segment() to write out the segment's * program header entry. */ static void cb_put_phdr(entry, closure) vm_map_entry_t entry; void *closure; { struct phdr_closure *phc = (struct phdr_closure *)closure; Elf_Phdr *phdr = phc->phdr; phc->offset = round_page(phc->offset); phdr->p_type = PT_LOAD; phdr->p_offset = phc->offset; phdr->p_vaddr = entry->start; phdr->p_paddr = 0; phdr->p_filesz = phdr->p_memsz = entry->end - entry->start; phdr->p_align = PAGE_SIZE; phdr->p_flags = 0; if (entry->protection & VM_PROT_READ) phdr->p_flags |= PF_R; if (entry->protection & VM_PROT_WRITE) phdr->p_flags |= PF_W; if (entry->protection & VM_PROT_EXECUTE) phdr->p_flags |= PF_X; phc->offset += phdr->p_filesz; phc->phdr++; } /* * A callback for each_writable_segment() to gather information about * the number of segments and their total size. */ static void cb_size_segment(entry, closure) vm_map_entry_t entry; void *closure; { struct sseg_closure *ssc = (struct sseg_closure *)closure; ssc->count++; ssc->size += entry->end - entry->start; } /* * For each writable segment in the process's memory map, call the given * function with a pointer to the map entry and some arbitrary * caller-supplied data. */ static void each_writable_segment(p, func, closure) struct proc *p; segment_callback func; void *closure; { vm_map_t map = &p->p_vmspace->vm_map; vm_map_entry_t entry; for (entry = map->header.next; entry != &map->header; entry = entry->next) { vm_object_t obj; if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) || (entry->protection & (VM_PROT_READ|VM_PROT_WRITE)) != (VM_PROT_READ|VM_PROT_WRITE)) continue; /* ** Dont include memory segment in the coredump if ** MAP_NOCORE is set in mmap(2) or MADV_NOCORE in ** madvise(2). */ if (entry->eflags & MAP_ENTRY_NOCOREDUMP) continue; if ((obj = entry->object.vm_object) == NULL) continue; /* Find the deepest backing object. */ while (obj->backing_object != NULL) obj = obj->backing_object; /* Ignore memory-mapped devices and such things. */ if (obj->type != OBJT_DEFAULT && obj->type != OBJT_SWAP && obj->type != OBJT_VNODE) continue; (*func)(entry, closure); } } /* * Write the core file header to the file, including padding up to * the page boundary. */ static int elf_corehdr(p, vp, cred, numsegs, hdr, hdrsize) struct proc *p; struct vnode *vp; struct ucred *cred; int numsegs; size_t hdrsize; void *hdr; { size_t off; prstatus_t status; prfpregset_t fpregset; prpsinfo_t psinfo; /* Gather the information for the header. */ bzero(&status, sizeof status); status.pr_version = PRSTATUS_VERSION; status.pr_statussz = sizeof(prstatus_t); status.pr_gregsetsz = sizeof(gregset_t); status.pr_fpregsetsz = sizeof(fpregset_t); status.pr_osreldate = osreldate; status.pr_cursig = p->p_sig; status.pr_pid = p->p_pid; fill_regs(p, &status.pr_reg); fill_fpregs(p, &fpregset); bzero(&psinfo, sizeof psinfo); psinfo.pr_version = PRPSINFO_VERSION; psinfo.pr_psinfosz = sizeof(prpsinfo_t); strncpy(psinfo.pr_fname, p->p_comm, MAXCOMLEN); /* XXX - We don't fill in the command line arguments properly yet. */ strncpy(psinfo.pr_psargs, p->p_comm, PRARGSZ); /* Fill in the header. */ bzero(hdr, hdrsize); off = 0; elf_puthdr(p, hdr, &off, &status, &fpregset, &psinfo, numsegs); /* Write it to the core file. */ return vn_rdwr(UIO_WRITE, vp, hdr, hdrsize, (off_t)0, UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, cred, NULL, p); } static void elf_puthdr(struct proc *p, void *dst, size_t *off, const prstatus_t *status, const prfpregset_t *fpregset, const prpsinfo_t *psinfo, int numsegs) { size_t ehoff; size_t phoff; size_t noteoff; size_t notesz; ehoff = *off; *off += sizeof(Elf_Ehdr); phoff = *off; *off += (numsegs + 1) * sizeof(Elf_Phdr); noteoff = *off; elf_putnote(dst, off, "FreeBSD", NT_PRSTATUS, status, sizeof *status); elf_putnote(dst, off, "FreeBSD", NT_FPREGSET, fpregset, sizeof *fpregset); elf_putnote(dst, off, "FreeBSD", NT_PRPSINFO, psinfo, sizeof *psinfo); notesz = *off - noteoff; /* Align up to a page boundary for the program segments. */ *off = round_page(*off); if (dst != NULL) { Elf_Ehdr *ehdr; Elf_Phdr *phdr; struct phdr_closure phc; /* * Fill in the ELF header. */ ehdr = (Elf_Ehdr *)((char *)dst + ehoff); ehdr->e_ident[EI_MAG0] = ELFMAG0; ehdr->e_ident[EI_MAG1] = ELFMAG1; ehdr->e_ident[EI_MAG2] = ELFMAG2; ehdr->e_ident[EI_MAG3] = ELFMAG3; ehdr->e_ident[EI_CLASS] = ELF_CLASS; ehdr->e_ident[EI_DATA] = ELF_DATA; ehdr->e_ident[EI_VERSION] = EV_CURRENT; ehdr->e_ident[EI_OSABI] = ELFOSABI_FREEBSD; ehdr->e_ident[EI_ABIVERSION] = 0; ehdr->e_ident[EI_PAD] = 0; ehdr->e_type = ET_CORE; ehdr->e_machine = ELF_ARCH; ehdr->e_version = EV_CURRENT; ehdr->e_entry = 0; ehdr->e_phoff = phoff; ehdr->e_flags = 0; ehdr->e_ehsize = sizeof(Elf_Ehdr); ehdr->e_phentsize = sizeof(Elf_Phdr); ehdr->e_phnum = numsegs + 1; ehdr->e_shentsize = sizeof(Elf_Shdr); ehdr->e_shnum = 0; ehdr->e_shstrndx = SHN_UNDEF; /* * Fill in the program header entries. */ phdr = (Elf_Phdr *)((char *)dst + phoff); /* The note segement. */ phdr->p_type = PT_NOTE; phdr->p_offset = noteoff; phdr->p_vaddr = 0; phdr->p_paddr = 0; phdr->p_filesz = notesz; phdr->p_memsz = 0; phdr->p_flags = 0; phdr->p_align = 0; phdr++; /* All the writable segments from the program. */ phc.phdr = phdr; phc.offset = *off; each_writable_segment(p, cb_put_phdr, &phc); } } static void elf_putnote(void *dst, size_t *off, const char *name, int type, const void *desc, size_t descsz) { Elf_Note note; note.n_namesz = strlen(name) + 1; note.n_descsz = descsz; note.n_type = type; if (dst != NULL) bcopy(¬e, (char *)dst + *off, sizeof note); *off += sizeof note; if (dst != NULL) bcopy(name, (char *)dst + *off, note.n_namesz); *off += roundup2(note.n_namesz, sizeof(Elf_Size)); if (dst != NULL) bcopy(desc, (char *)dst + *off, note.n_descsz); *off += roundup2(note.n_descsz, sizeof(Elf_Size)); } /* * Tell kern_execve.c about it, with a little help from the linker. */ static struct execsw elf_execsw = {exec_elf_imgact, "ELF"}; EXEC_SET(elf, elf_execsw); Index: head/sys/kern/kern_acct.c =================================================================== --- head/sys/kern/kern_acct.c (revision 71698) +++ head/sys/kern/kern_acct.c (revision 71699) @@ -1,352 +1,334 @@ /*- * Copyright (c) 1994 Christopher G. Demetriou * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)kern_acct.c 8.1 (Berkeley) 6/14/93 * $FreeBSD$ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * The routines implemented in this file are described in: * Leffler, et al.: The Design and Implementation of the 4.3BSD * UNIX Operating System (Addison Welley, 1989) * on pages 62-63. * * Arguably, to simplify accounting operations, this mechanism should * be replaced by one in which an accounting log file (similar to /dev/klog) * is read by a user process, etc. However, that has its own problems. */ /* * Internal accounting functions. * The former's operation is described in Leffler, et al., and the latter * was provided by UCB with the 4.4BSD-Lite release */ static comp_t encode_comp_t __P((u_long, u_long)); static void acctwatch __P((void *)); /* * Accounting callout used for periodic scheduling of acctwatch. */ static struct callout acctwatch_callout; /* * Accounting vnode pointer, and saved vnode pointer. */ static struct vnode *acctp; static struct vnode *savacctp; /* * Values associated with enabling and disabling accounting */ static int acctsuspend = 2; /* stop accounting when < 2% free space left */ SYSCTL_INT(_kern, OID_AUTO, acct_suspend, CTLFLAG_RW, &acctsuspend, 0, "percentage of free disk space below which accounting stops"); static int acctresume = 4; /* resume when free space risen to > 4% */ SYSCTL_INT(_kern, OID_AUTO, acct_resume, CTLFLAG_RW, &acctresume, 0, "percentage of free disk space above which accounting resumes"); static int acctchkfreq = 15; /* frequency (in seconds) to check space */ SYSCTL_INT(_kern, OID_AUTO, acct_chkfreq, CTLFLAG_RW, &acctchkfreq, 0, "frequency for checking the free space"); /* * Accounting system call. Written based on the specification and * previous implementation done by Mark Tinguely. */ int acct(a1, uap) struct proc *a1; struct acct_args /* { syscallarg(char *) path; } */ *uap; { struct proc *p = curproc; /* XXX */ - struct ucred *uc; struct nameidata nd; int error, flags; /* Make sure that the caller is root. */ error = suser(p); if (error) return (error); /* * If accounting is to be started to a file, open that file for * writing and make sure it's a 'normal'. */ if (SCARG(uap, path) != NULL) { NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), p); flags = FWRITE; error = vn_open(&nd, &flags, 0); if (error) return (error); NDFREE(&nd, NDF_ONLY_PNBUF); VOP_UNLOCK(nd.ni_vp, 0, p); if (nd.ni_vp->v_type != VREG) { - PROC_LOCK(p); - uc = p->p_ucred; - crhold(uc); - PROC_UNLOCK(p); - vn_close(nd.ni_vp, FWRITE, uc, p); - crfree(uc); + vn_close(nd.ni_vp, FWRITE, p->p_ucred, p); return (EACCES); } } /* * If accounting was previously enabled, kill the old space-watcher, * close the file, and (if no new file was specified, leave). */ if (acctp != NULLVP || savacctp != NULLVP) { callout_stop(&acctwatch_callout); - PROC_LOCK(p); - uc = p->p_ucred; - crhold(uc); - PROC_UNLOCK(p); error = vn_close((acctp != NULLVP ? acctp : savacctp), FWRITE, - uc, p); - crfree(uc); + p->p_ucred, p); acctp = savacctp = NULLVP; } if (SCARG(uap, path) == NULL) return (error); /* * Save the new accounting file vnode, and schedule the new * free space watcher. */ acctp = nd.ni_vp; callout_init(&acctwatch_callout, 0); acctwatch(NULL); return (error); } /* * Write out process accounting information, on process exit. * Data to be written out is specified in Leffler, et al. * and are enumerated below. (They're also noted in the system * "acct.h" header file.) */ int acct_process(p) struct proc *p; { struct acct acct; - struct ucred *uc; struct rusage *r; struct timeval ut, st, tmp; - int t, error; + int t; struct vnode *vp; /* If accounting isn't enabled, don't bother */ vp = acctp; if (vp == NULLVP) return (0); /* * Get process accounting information. */ /* (1) The name of the command that ran */ bcopy(p->p_comm, acct.ac_comm, sizeof acct.ac_comm); /* (2) The amount of user and system time that was used */ mtx_enter(&sched_lock, MTX_SPIN); calcru(p, &ut, &st, NULL); mtx_exit(&sched_lock, MTX_SPIN); acct.ac_utime = encode_comp_t(ut.tv_sec, ut.tv_usec); acct.ac_stime = encode_comp_t(st.tv_sec, st.tv_usec); /* (3) The elapsed time the commmand ran (and its starting time) */ acct.ac_btime = p->p_stats->p_start.tv_sec; microtime(&tmp); timevalsub(&tmp, &p->p_stats->p_start); acct.ac_etime = encode_comp_t(tmp.tv_sec, tmp.tv_usec); /* (4) The average amount of memory used */ r = &p->p_stats->p_ru; tmp = ut; timevaladd(&tmp, &st); t = tmp.tv_sec * hz + tmp.tv_usec / tick; if (t) acct.ac_mem = (r->ru_ixrss + r->ru_idrss + r->ru_isrss) / t; else acct.ac_mem = 0; /* (5) The number of disk I/O operations done */ acct.ac_io = encode_comp_t(r->ru_inblock + r->ru_oublock, 0); - PROC_LOCK(p); /* (6) The UID and GID of the process */ acct.ac_uid = p->p_cred->p_ruid; acct.ac_gid = p->p_cred->p_rgid; /* (7) The terminal from which the process was started */ if ((p->p_flag & P_CONTROLT) && p->p_pgrp->pg_session->s_ttyp) acct.ac_tty = dev2udev(p->p_pgrp->pg_session->s_ttyp->t_dev); else acct.ac_tty = NOUDEV; /* (8) The boolean flags that tell how the process terminated, etc. */ acct.ac_flag = p->p_acflag; /* * Eliminate any file size rlimit. */ - mtx_assert(&Giant, MA_OWNED); if (p->p_limit->p_refcnt > 1 && (p->p_limit->p_lflags & PL_SHAREMOD) == 0) { p->p_limit->p_refcnt--; p->p_limit = limcopy(p->p_limit); } p->p_rlimit[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY; /* * Write the accounting information to the file. */ - uc = p->p_ucred; - crhold(uc); - PROC_UNLOCK(p); - VOP_LEASE(vp, p, uc, LEASE_WRITE); - error = vn_rdwr(UIO_WRITE, vp, (caddr_t)&acct, sizeof (acct), - (off_t)0, UIO_SYSSPACE, IO_APPEND|IO_UNIT, uc, (int *)0, p); - crfree(uc); - return (error); + VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); + return (vn_rdwr(UIO_WRITE, vp, (caddr_t)&acct, sizeof (acct), + (off_t)0, UIO_SYSSPACE, IO_APPEND|IO_UNIT, p->p_ucred, + (int *)0, p)); } /* * Encode_comp_t converts from ticks in seconds and microseconds * to ticks in 1/AHZ seconds. The encoding is described in * Leffler, et al., on page 63. */ #define MANTSIZE 13 /* 13 bit mantissa. */ #define EXPSIZE 3 /* Base 8 (3 bit) exponent. */ #define MAXFRACT ((1 << MANTSIZE) - 1) /* Maximum fractional value. */ static comp_t encode_comp_t(s, us) u_long s, us; { int exp, rnd; exp = 0; rnd = 0; s *= AHZ; s += us / (1000000 / AHZ); /* Maximize precision. */ while (s > MAXFRACT) { rnd = s & (1 << (EXPSIZE - 1)); /* Round up? */ s >>= EXPSIZE; /* Base 8 exponent == 3 bit shift. */ exp++; } /* If we need to round up, do it (and handle overflow correctly). */ if (rnd && (++s > MAXFRACT)) { s >>= EXPSIZE; exp++; } /* Clean it up and polish it off. */ exp <<= MANTSIZE; /* Shift the exponent into place */ exp += s; /* and add on the mantissa. */ return (exp); } /* * Periodically check the file system to see if accounting * should be turned on or off. Beware the case where the vnode * has been vgone()'d out from underneath us, e.g. when the file * system containing the accounting file has been forcibly unmounted. */ /* ARGSUSED */ static void acctwatch(a) void *a; { struct statfs sb; if (savacctp != NULLVP) { if (savacctp->v_type == VBAD) { (void) vn_close(savacctp, FWRITE, NOCRED, NULL); savacctp = NULLVP; return; } (void)VFS_STATFS(savacctp->v_mount, &sb, (struct proc *)0); if (sb.f_bavail > acctresume * sb.f_blocks / 100) { acctp = savacctp; savacctp = NULLVP; log(LOG_NOTICE, "Accounting resumed\n"); } } else { if (acctp == NULLVP) return; if (acctp->v_type == VBAD) { (void) vn_close(acctp, FWRITE, NOCRED, NULL); acctp = NULLVP; return; } (void)VFS_STATFS(acctp->v_mount, &sb, (struct proc *)0); if (sb.f_bavail <= acctsuspend * sb.f_blocks / 100) { savacctp = acctp; acctp = NULLVP; log(LOG_NOTICE, "Accounting suspended\n"); } } callout_reset(&acctwatch_callout, acctchkfreq * hz, acctwatch, NULL); } Index: head/sys/kern/kern_acl.c =================================================================== --- head/sys/kern/kern_acl.c (revision 71698) +++ head/sys/kern/kern_acl.c (revision 71699) @@ -1,303 +1,279 @@ /*- * Copyright (c) 1999, 2000 Robert N. M. Watson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /* * Generic routines to support file system ACLs, at a syntactic level * Semantics are the responsibility of the underlying file system */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include static MALLOC_DEFINE(M_ACL, "acl", "access control list"); static int vacl_set_acl(struct proc *p, struct vnode *vp, acl_type_t type, struct acl *aclp); static int vacl_get_acl(struct proc *p, struct vnode *vp, acl_type_t type, struct acl *aclp); static int vacl_aclcheck(struct proc *p, struct vnode *vp, acl_type_t type, struct acl *aclp); /* * These calls wrap the real vnode operations, and are called by the * syscall code once the syscall has converted the path or file * descriptor to a vnode (unlocked). The aclp pointer is assumed * still to point to userland, so this should not be consumed within * the kernel except by syscall code. Other code should directly * invoke VOP_{SET,GET}ACL. */ /* * Given a vnode, set its ACL. */ static int vacl_set_acl(struct proc *p, struct vnode *vp, acl_type_t type, struct acl *aclp) { struct acl inkernacl; - struct ucred *uc; int error; error = copyin(aclp, &inkernacl, sizeof(struct acl)); if (error) return(error); - PROC_LOCK(p); - uc = p->p_ucred; - crhold(uc); - PROC_UNLOCK(p); - VOP_LEASE(vp, p, uc, LEASE_WRITE); + VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); - error = VOP_SETACL(vp, type, &inkernacl, uc, p); + error = VOP_SETACL(vp, type, &inkernacl, p->p_ucred, p); VOP_UNLOCK(vp, 0, p); - crfree(uc); - return (error); + return(error); } /* * Given a vnode, get its ACL. */ static int vacl_get_acl(struct proc *p, struct vnode *vp, acl_type_t type, struct acl *aclp) { struct acl inkernelacl; - struct ucred *uc; int error; - PROC_LOCK(p); - uc = p->p_ucred; - crhold(uc); - PROC_UNLOCK(p); - VOP_LEASE(vp, p, uc, LEASE_WRITE); + VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); - error = VOP_GETACL(vp, type, &inkernelacl, uc, p); + error = VOP_GETACL(vp, type, &inkernelacl, p->p_ucred, p); VOP_UNLOCK(vp, 0, p); - crfree(uc); if (error == 0) error = copyout(&inkernelacl, aclp, sizeof(struct acl)); return (error); } /* * Given a vnode, delete its ACL. */ static int vacl_delete(struct proc *p, struct vnode *vp, acl_type_t type) { - struct ucred *uc; int error; - PROC_LOCK(p); - uc = p->p_ucred; - crhold(uc); - PROC_UNLOCK(p); - VOP_LEASE(vp, p, uc, LEASE_WRITE); + VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); - error = VOP_SETACL(vp, ACL_TYPE_DEFAULT, 0, uc, p); + error = VOP_SETACL(vp, ACL_TYPE_DEFAULT, 0, p->p_ucred, p); VOP_UNLOCK(vp, 0, p); - crfree(uc); return (error); } /* * Given a vnode, check whether an ACL is appropriate for it */ static int vacl_aclcheck(struct proc *p, struct vnode *vp, acl_type_t type, struct acl *aclp) { struct acl inkernelacl; - struct ucred *uc; int error; error = copyin(aclp, &inkernelacl, sizeof(struct acl)); if (error) return(error); - PROC_LOCK(p); - uc = p->p_ucred; - crhold(uc); - PROC_UNLOCK(p); - error = VOP_ACLCHECK(vp, type, &inkernelacl, uc, p); - crfree(uc); + error = VOP_ACLCHECK(vp, type, &inkernelacl, p->p_ucred, p); return (error); } /* * syscalls -- convert the path/fd to a vnode, and call vacl_whatever. * Don't need to lock, as the vacl_ code will get/release any locks * required. */ /* * Given a file path, get an ACL for it */ int __acl_get_file(struct proc *p, struct __acl_get_file_args *uap) { struct nameidata nd; int error; /* what flags are required here -- possible not LOCKLEAF? */ NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); error = namei(&nd); if (error) return(error); error = vacl_get_acl(p, nd.ni_vp, SCARG(uap, type), SCARG(uap, aclp)); NDFREE(&nd, 0); return (error); } /* * Given a file path, set an ACL for it */ int __acl_set_file(struct proc *p, struct __acl_set_file_args *uap) { struct nameidata nd; int error; NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); error = namei(&nd); if (error) return(error); error = vacl_set_acl(p, nd.ni_vp, SCARG(uap, type), SCARG(uap, aclp)); NDFREE(&nd, 0); return (error); } /* * Given a file descriptor, get an ACL for it */ int __acl_get_fd(struct proc *p, struct __acl_get_fd_args *uap) { struct file *fp; int error; error = getvnode(p->p_fd, SCARG(uap, filedes), &fp); if (error) return(error); return vacl_get_acl(p, (struct vnode *)fp->f_data, SCARG(uap, type), SCARG(uap, aclp)); } /* * Given a file descriptor, set an ACL for it */ int __acl_set_fd(struct proc *p, struct __acl_set_fd_args *uap) { struct file *fp; int error; error = getvnode(p->p_fd, SCARG(uap, filedes), &fp); if (error) return(error); return vacl_set_acl(p, (struct vnode *)fp->f_data, SCARG(uap, type), SCARG(uap, aclp)); } /* * Given a file path, delete an ACL from it. */ int __acl_delete_file(struct proc *p, struct __acl_delete_file_args *uap) { struct nameidata nd; int error; NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); error = namei(&nd); if (error) return(error); error = vacl_delete(p, nd.ni_vp, SCARG(uap, type)); NDFREE(&nd, 0); return (error); } /* * Given a file path, delete an ACL from it. */ int __acl_delete_fd(struct proc *p, struct __acl_delete_fd_args *uap) { struct file *fp; int error; error = getvnode(p->p_fd, SCARG(uap, filedes), &fp); if (error) return(error); error = vacl_delete(p, (struct vnode *)fp->f_data, SCARG(uap, type)); return (error); } /* * Given a file path, check an ACL for it */ int __acl_aclcheck_file(struct proc *p, struct __acl_aclcheck_file_args *uap) { struct nameidata nd; int error; NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); error = namei(&nd); if (error) return(error); error = vacl_aclcheck(p, nd.ni_vp, SCARG(uap, type), SCARG(uap, aclp)); NDFREE(&nd, 0); return (error); } /* * Given a file descriptor, check an ACL for it */ int __acl_aclcheck_fd(struct proc *p, struct __acl_aclcheck_fd_args *uap) { struct file *fp; int error; error = getvnode(p->p_fd, SCARG(uap, filedes), &fp); if (error) return(error); return vacl_aclcheck(p, (struct vnode *)fp->f_data, SCARG(uap, type), SCARG(uap, aclp)); } Index: head/sys/kern/subr_acl_posix1e.c =================================================================== --- head/sys/kern/subr_acl_posix1e.c (revision 71698) +++ head/sys/kern/subr_acl_posix1e.c (revision 71699) @@ -1,303 +1,279 @@ /*- * Copyright (c) 1999, 2000 Robert N. M. Watson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /* * Generic routines to support file system ACLs, at a syntactic level * Semantics are the responsibility of the underlying file system */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include static MALLOC_DEFINE(M_ACL, "acl", "access control list"); static int vacl_set_acl(struct proc *p, struct vnode *vp, acl_type_t type, struct acl *aclp); static int vacl_get_acl(struct proc *p, struct vnode *vp, acl_type_t type, struct acl *aclp); static int vacl_aclcheck(struct proc *p, struct vnode *vp, acl_type_t type, struct acl *aclp); /* * These calls wrap the real vnode operations, and are called by the * syscall code once the syscall has converted the path or file * descriptor to a vnode (unlocked). The aclp pointer is assumed * still to point to userland, so this should not be consumed within * the kernel except by syscall code. Other code should directly * invoke VOP_{SET,GET}ACL. */ /* * Given a vnode, set its ACL. */ static int vacl_set_acl(struct proc *p, struct vnode *vp, acl_type_t type, struct acl *aclp) { struct acl inkernacl; - struct ucred *uc; int error; error = copyin(aclp, &inkernacl, sizeof(struct acl)); if (error) return(error); - PROC_LOCK(p); - uc = p->p_ucred; - crhold(uc); - PROC_UNLOCK(p); - VOP_LEASE(vp, p, uc, LEASE_WRITE); + VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); - error = VOP_SETACL(vp, type, &inkernacl, uc, p); + error = VOP_SETACL(vp, type, &inkernacl, p->p_ucred, p); VOP_UNLOCK(vp, 0, p); - crfree(uc); - return (error); + return(error); } /* * Given a vnode, get its ACL. */ static int vacl_get_acl(struct proc *p, struct vnode *vp, acl_type_t type, struct acl *aclp) { struct acl inkernelacl; - struct ucred *uc; int error; - PROC_LOCK(p); - uc = p->p_ucred; - crhold(uc); - PROC_UNLOCK(p); - VOP_LEASE(vp, p, uc, LEASE_WRITE); + VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); - error = VOP_GETACL(vp, type, &inkernelacl, uc, p); + error = VOP_GETACL(vp, type, &inkernelacl, p->p_ucred, p); VOP_UNLOCK(vp, 0, p); - crfree(uc); if (error == 0) error = copyout(&inkernelacl, aclp, sizeof(struct acl)); return (error); } /* * Given a vnode, delete its ACL. */ static int vacl_delete(struct proc *p, struct vnode *vp, acl_type_t type) { - struct ucred *uc; int error; - PROC_LOCK(p); - uc = p->p_ucred; - crhold(uc); - PROC_UNLOCK(p); - VOP_LEASE(vp, p, uc, LEASE_WRITE); + VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); - error = VOP_SETACL(vp, ACL_TYPE_DEFAULT, 0, uc, p); + error = VOP_SETACL(vp, ACL_TYPE_DEFAULT, 0, p->p_ucred, p); VOP_UNLOCK(vp, 0, p); - crfree(uc); return (error); } /* * Given a vnode, check whether an ACL is appropriate for it */ static int vacl_aclcheck(struct proc *p, struct vnode *vp, acl_type_t type, struct acl *aclp) { struct acl inkernelacl; - struct ucred *uc; int error; error = copyin(aclp, &inkernelacl, sizeof(struct acl)); if (error) return(error); - PROC_LOCK(p); - uc = p->p_ucred; - crhold(uc); - PROC_UNLOCK(p); - error = VOP_ACLCHECK(vp, type, &inkernelacl, uc, p); - crfree(uc); + error = VOP_ACLCHECK(vp, type, &inkernelacl, p->p_ucred, p); return (error); } /* * syscalls -- convert the path/fd to a vnode, and call vacl_whatever. * Don't need to lock, as the vacl_ code will get/release any locks * required. */ /* * Given a file path, get an ACL for it */ int __acl_get_file(struct proc *p, struct __acl_get_file_args *uap) { struct nameidata nd; int error; /* what flags are required here -- possible not LOCKLEAF? */ NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); error = namei(&nd); if (error) return(error); error = vacl_get_acl(p, nd.ni_vp, SCARG(uap, type), SCARG(uap, aclp)); NDFREE(&nd, 0); return (error); } /* * Given a file path, set an ACL for it */ int __acl_set_file(struct proc *p, struct __acl_set_file_args *uap) { struct nameidata nd; int error; NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); error = namei(&nd); if (error) return(error); error = vacl_set_acl(p, nd.ni_vp, SCARG(uap, type), SCARG(uap, aclp)); NDFREE(&nd, 0); return (error); } /* * Given a file descriptor, get an ACL for it */ int __acl_get_fd(struct proc *p, struct __acl_get_fd_args *uap) { struct file *fp; int error; error = getvnode(p->p_fd, SCARG(uap, filedes), &fp); if (error) return(error); return vacl_get_acl(p, (struct vnode *)fp->f_data, SCARG(uap, type), SCARG(uap, aclp)); } /* * Given a file descriptor, set an ACL for it */ int __acl_set_fd(struct proc *p, struct __acl_set_fd_args *uap) { struct file *fp; int error; error = getvnode(p->p_fd, SCARG(uap, filedes), &fp); if (error) return(error); return vacl_set_acl(p, (struct vnode *)fp->f_data, SCARG(uap, type), SCARG(uap, aclp)); } /* * Given a file path, delete an ACL from it. */ int __acl_delete_file(struct proc *p, struct __acl_delete_file_args *uap) { struct nameidata nd; int error; NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); error = namei(&nd); if (error) return(error); error = vacl_delete(p, nd.ni_vp, SCARG(uap, type)); NDFREE(&nd, 0); return (error); } /* * Given a file path, delete an ACL from it. */ int __acl_delete_fd(struct proc *p, struct __acl_delete_fd_args *uap) { struct file *fp; int error; error = getvnode(p->p_fd, SCARG(uap, filedes), &fp); if (error) return(error); error = vacl_delete(p, (struct vnode *)fp->f_data, SCARG(uap, type)); return (error); } /* * Given a file path, check an ACL for it */ int __acl_aclcheck_file(struct proc *p, struct __acl_aclcheck_file_args *uap) { struct nameidata nd; int error; NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); error = namei(&nd); if (error) return(error); error = vacl_aclcheck(p, nd.ni_vp, SCARG(uap, type), SCARG(uap, aclp)); NDFREE(&nd, 0); return (error); } /* * Given a file descriptor, check an ACL for it */ int __acl_aclcheck_fd(struct proc *p, struct __acl_aclcheck_fd_args *uap) { struct file *fp; int error; error = getvnode(p->p_fd, SCARG(uap, filedes), &fp); if (error) return(error); return vacl_aclcheck(p, (struct vnode *)fp->f_data, SCARG(uap, type), SCARG(uap, aclp)); } Index: head/sys/kern/vfs_acl.c =================================================================== --- head/sys/kern/vfs_acl.c (revision 71698) +++ head/sys/kern/vfs_acl.c (revision 71699) @@ -1,303 +1,279 @@ /*- * Copyright (c) 1999, 2000 Robert N. M. Watson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /* * Generic routines to support file system ACLs, at a syntactic level * Semantics are the responsibility of the underlying file system */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include static MALLOC_DEFINE(M_ACL, "acl", "access control list"); static int vacl_set_acl(struct proc *p, struct vnode *vp, acl_type_t type, struct acl *aclp); static int vacl_get_acl(struct proc *p, struct vnode *vp, acl_type_t type, struct acl *aclp); static int vacl_aclcheck(struct proc *p, struct vnode *vp, acl_type_t type, struct acl *aclp); /* * These calls wrap the real vnode operations, and are called by the * syscall code once the syscall has converted the path or file * descriptor to a vnode (unlocked). The aclp pointer is assumed * still to point to userland, so this should not be consumed within * the kernel except by syscall code. Other code should directly * invoke VOP_{SET,GET}ACL. */ /* * Given a vnode, set its ACL. */ static int vacl_set_acl(struct proc *p, struct vnode *vp, acl_type_t type, struct acl *aclp) { struct acl inkernacl; - struct ucred *uc; int error; error = copyin(aclp, &inkernacl, sizeof(struct acl)); if (error) return(error); - PROC_LOCK(p); - uc = p->p_ucred; - crhold(uc); - PROC_UNLOCK(p); - VOP_LEASE(vp, p, uc, LEASE_WRITE); + VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); - error = VOP_SETACL(vp, type, &inkernacl, uc, p); + error = VOP_SETACL(vp, type, &inkernacl, p->p_ucred, p); VOP_UNLOCK(vp, 0, p); - crfree(uc); - return (error); + return(error); } /* * Given a vnode, get its ACL. */ static int vacl_get_acl(struct proc *p, struct vnode *vp, acl_type_t type, struct acl *aclp) { struct acl inkernelacl; - struct ucred *uc; int error; - PROC_LOCK(p); - uc = p->p_ucred; - crhold(uc); - PROC_UNLOCK(p); - VOP_LEASE(vp, p, uc, LEASE_WRITE); + VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); - error = VOP_GETACL(vp, type, &inkernelacl, uc, p); + error = VOP_GETACL(vp, type, &inkernelacl, p->p_ucred, p); VOP_UNLOCK(vp, 0, p); - crfree(uc); if (error == 0) error = copyout(&inkernelacl, aclp, sizeof(struct acl)); return (error); } /* * Given a vnode, delete its ACL. */ static int vacl_delete(struct proc *p, struct vnode *vp, acl_type_t type) { - struct ucred *uc; int error; - PROC_LOCK(p); - uc = p->p_ucred; - crhold(uc); - PROC_UNLOCK(p); - VOP_LEASE(vp, p, uc, LEASE_WRITE); + VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); - error = VOP_SETACL(vp, ACL_TYPE_DEFAULT, 0, uc, p); + error = VOP_SETACL(vp, ACL_TYPE_DEFAULT, 0, p->p_ucred, p); VOP_UNLOCK(vp, 0, p); - crfree(uc); return (error); } /* * Given a vnode, check whether an ACL is appropriate for it */ static int vacl_aclcheck(struct proc *p, struct vnode *vp, acl_type_t type, struct acl *aclp) { struct acl inkernelacl; - struct ucred *uc; int error; error = copyin(aclp, &inkernelacl, sizeof(struct acl)); if (error) return(error); - PROC_LOCK(p); - uc = p->p_ucred; - crhold(uc); - PROC_UNLOCK(p); - error = VOP_ACLCHECK(vp, type, &inkernelacl, uc, p); - crfree(uc); + error = VOP_ACLCHECK(vp, type, &inkernelacl, p->p_ucred, p); return (error); } /* * syscalls -- convert the path/fd to a vnode, and call vacl_whatever. * Don't need to lock, as the vacl_ code will get/release any locks * required. */ /* * Given a file path, get an ACL for it */ int __acl_get_file(struct proc *p, struct __acl_get_file_args *uap) { struct nameidata nd; int error; /* what flags are required here -- possible not LOCKLEAF? */ NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); error = namei(&nd); if (error) return(error); error = vacl_get_acl(p, nd.ni_vp, SCARG(uap, type), SCARG(uap, aclp)); NDFREE(&nd, 0); return (error); } /* * Given a file path, set an ACL for it */ int __acl_set_file(struct proc *p, struct __acl_set_file_args *uap) { struct nameidata nd; int error; NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); error = namei(&nd); if (error) return(error); error = vacl_set_acl(p, nd.ni_vp, SCARG(uap, type), SCARG(uap, aclp)); NDFREE(&nd, 0); return (error); } /* * Given a file descriptor, get an ACL for it */ int __acl_get_fd(struct proc *p, struct __acl_get_fd_args *uap) { struct file *fp; int error; error = getvnode(p->p_fd, SCARG(uap, filedes), &fp); if (error) return(error); return vacl_get_acl(p, (struct vnode *)fp->f_data, SCARG(uap, type), SCARG(uap, aclp)); } /* * Given a file descriptor, set an ACL for it */ int __acl_set_fd(struct proc *p, struct __acl_set_fd_args *uap) { struct file *fp; int error; error = getvnode(p->p_fd, SCARG(uap, filedes), &fp); if (error) return(error); return vacl_set_acl(p, (struct vnode *)fp->f_data, SCARG(uap, type), SCARG(uap, aclp)); } /* * Given a file path, delete an ACL from it. */ int __acl_delete_file(struct proc *p, struct __acl_delete_file_args *uap) { struct nameidata nd; int error; NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); error = namei(&nd); if (error) return(error); error = vacl_delete(p, nd.ni_vp, SCARG(uap, type)); NDFREE(&nd, 0); return (error); } /* * Given a file path, delete an ACL from it. */ int __acl_delete_fd(struct proc *p, struct __acl_delete_fd_args *uap) { struct file *fp; int error; error = getvnode(p->p_fd, SCARG(uap, filedes), &fp); if (error) return(error); error = vacl_delete(p, (struct vnode *)fp->f_data, SCARG(uap, type)); return (error); } /* * Given a file path, check an ACL for it */ int __acl_aclcheck_file(struct proc *p, struct __acl_aclcheck_file_args *uap) { struct nameidata nd; int error; NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); error = namei(&nd); if (error) return(error); error = vacl_aclcheck(p, nd.ni_vp, SCARG(uap, type), SCARG(uap, aclp)); NDFREE(&nd, 0); return (error); } /* * Given a file descriptor, check an ACL for it */ int __acl_aclcheck_fd(struct proc *p, struct __acl_aclcheck_fd_args *uap) { struct file *fp; int error; error = getvnode(p->p_fd, SCARG(uap, filedes), &fp); if (error) return(error); return vacl_aclcheck(p, (struct vnode *)fp->f_data, SCARG(uap, type), SCARG(uap, aclp)); }