diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c index d214f0dec361..8dc24d725b02 100644 --- a/sys/kern/kern_descrip.c +++ b/sys/kern/kern_descrip.c @@ -1,1096 +1,1140 @@ /* * Copyright (c) 1982, 1986, 1989, 1991, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)kern_descrip.c 8.6 (Berkeley) 4/19/94 - * $Id: kern_descrip.c,v 1.24 1996/01/28 23:41:39 dyson Exp $ + * $Id: kern_descrip.c,v 1.25 1996/02/04 19:56:34 dyson Exp $ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DEVFS #include #endif /*DEVFS*/ static d_open_t fdopen; #define NUMFDESC 64 #define CDEV_MAJOR 22 static struct cdevsw fildesc_cdevsw = { fdopen, noclose, noread, nowrite, /*22*/ noioc, nostop, nullreset, nodevtotty,/*fd(!=Fd)*/ noselect, nommap, nostrat }; static int finishdup(struct filedesc *fdp, int old, int new, int *retval); /* * Descriptor management. */ struct file *filehead; /* head of list of open files */ int nfiles; /* actual number of open files */ +extern int cmask; /* * System calls on descriptors. */ #ifndef _SYS_SYSPROTO_H_ struct getdtablesize_args { int dummy; }; #endif /* ARGSUSED */ int getdtablesize(p, uap, retval) struct proc *p; struct getdtablesize_args *uap; int *retval; { *retval = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc); return (0); } /* * Duplicate a file descriptor to a particular value. */ #ifndef _SYS_SYSPROTO_H_ struct dup2_args { u_int from; u_int to; }; #endif /* ARGSUSED */ int dup2(p, uap, retval) struct proc *p; struct dup2_args *uap; int *retval; { register struct filedesc *fdp = p->p_fd; register u_int old = uap->from, new = uap->to; int i, error; if (old >= fdp->fd_nfiles || fdp->fd_ofiles[old] == NULL || new >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur || new >= maxfilesperproc) return (EBADF); if (old == new) { *retval = new; return (0); } if (new >= fdp->fd_nfiles) { if ((error = fdalloc(p, new, &i))) return (error); if (new != i) panic("dup2: fdalloc"); } else if (fdp->fd_ofiles[new]) { if (fdp->fd_ofileflags[new] & UF_MAPPED) (void) munmapfd(p, new); /* * dup2() must succeed even if the close has an error. */ (void) closef(fdp->fd_ofiles[new], p); } return (finishdup(fdp, (int)old, (int)new, retval)); } /* * Duplicate a file descriptor. */ #ifndef _SYS_SYSPROTO_H_ struct dup_args { u_int fd; }; #endif /* ARGSUSED */ int dup(p, uap, retval) struct proc *p; struct dup_args *uap; int *retval; { register struct filedesc *fdp; u_int old; int new, error; old = uap->fd; #if 0 /* * XXX Compatibility */ if (old &~ 077) { uap->fd &= 077; return (dup2(p, uap, retval)); } #endif fdp = p->p_fd; if (old >= fdp->fd_nfiles || fdp->fd_ofiles[old] == NULL) return (EBADF); if ((error = fdalloc(p, 0, &new))) return (error); return (finishdup(fdp, (int)old, new, retval)); } /* * The file control system call. */ #ifndef _SYS_SYSPROTO_H_ struct fcntl_args { int fd; int cmd; int arg; }; #endif /* ARGSUSED */ int fcntl(p, uap, retval) struct proc *p; register struct fcntl_args *uap; int *retval; { register struct filedesc *fdp = p->p_fd; register struct file *fp; register char *pop; struct vnode *vp; int i, tmp, error, flg = F_POSIX; struct flock fl; u_int newmin; if ((unsigned)uap->fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[uap->fd]) == NULL) return (EBADF); pop = &fdp->fd_ofileflags[uap->fd]; switch (uap->cmd) { case F_DUPFD: newmin = uap->arg; if (newmin >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur || newmin >= maxfilesperproc) return (EINVAL); if ((error = fdalloc(p, newmin, &i))) return (error); return (finishdup(fdp, uap->fd, i, retval)); case F_GETFD: *retval = *pop & 1; return (0); case F_SETFD: *pop = (*pop &~ 1) | (uap->arg & 1); return (0); case F_GETFL: *retval = OFLAGS(fp->f_flag); return (0); case F_SETFL: fp->f_flag &= ~FCNTLFLAGS; fp->f_flag |= FFLAGS(uap->arg) & FCNTLFLAGS; tmp = fp->f_flag & FNONBLOCK; error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p); if (error) return (error); tmp = fp->f_flag & FASYNC; error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p); if (!error) return (0); fp->f_flag &= ~FNONBLOCK; tmp = 0; (void) (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p); return (error); case F_GETOWN: if (fp->f_type == DTYPE_SOCKET) { *retval = ((struct socket *)fp->f_data)->so_pgid; return (0); } error = (*fp->f_ops->fo_ioctl) (fp, (int)TIOCGPGRP, (caddr_t)retval, p); *retval = -*retval; return (error); case F_SETOWN: if (fp->f_type == DTYPE_SOCKET) { ((struct socket *)fp->f_data)->so_pgid = uap->arg; return (0); } if (uap->arg <= 0) { uap->arg = -uap->arg; } else { struct proc *p1 = pfind(uap->arg); if (p1 == 0) return (ESRCH); uap->arg = p1->p_pgrp->pg_id; } return ((*fp->f_ops->fo_ioctl) (fp, (int)TIOCSPGRP, (caddr_t)&uap->arg, p)); case F_SETLKW: flg |= F_WAIT; /* Fall into F_SETLK */ case F_SETLK: if (fp->f_type != DTYPE_VNODE) return (EBADF); vp = (struct vnode *)fp->f_data; /* Copy in the lock structure */ error = copyin((caddr_t)uap->arg, (caddr_t)&fl, sizeof (fl)); if (error) return (error); if (fl.l_whence == SEEK_CUR) fl.l_start += fp->f_offset; switch (fl.l_type) { case F_RDLCK: if ((fp->f_flag & FREAD) == 0) return (EBADF); p->p_flag |= P_ADVLOCK; return (VOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &fl, flg)); case F_WRLCK: if ((fp->f_flag & FWRITE) == 0) return (EBADF); p->p_flag |= P_ADVLOCK; return (VOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &fl, flg)); case F_UNLCK: return (VOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &fl, F_POSIX)); default: return (EINVAL); } case F_GETLK: if (fp->f_type != DTYPE_VNODE) return (EBADF); vp = (struct vnode *)fp->f_data; /* Copy in the lock structure */ error = copyin((caddr_t)uap->arg, (caddr_t)&fl, sizeof (fl)); if (error) return (error); if (fl.l_whence == SEEK_CUR) fl.l_start += fp->f_offset; if ((error = VOP_ADVLOCK(vp,(caddr_t)p,F_GETLK,&fl,F_POSIX))) return (error); return (copyout((caddr_t)&fl, (caddr_t)uap->arg, sizeof (fl))); default: return (EINVAL); } /* NOTREACHED */ } /* * Common code for dup, dup2, and fcntl(F_DUPFD). */ static int finishdup(fdp, old, new, retval) register struct filedesc *fdp; register int old, new, *retval; { register struct file *fp; fp = fdp->fd_ofiles[old]; fdp->fd_ofiles[new] = fp; fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] &~ UF_EXCLOSE; fp->f_count++; if (new > fdp->fd_lastfile) fdp->fd_lastfile = new; *retval = new; return (0); } /* * Close a file descriptor. */ #ifndef _SYS_SYSPROTO_H_ struct close_args { int fd; }; #endif /* ARGSUSED */ int close(p, uap, retval) struct proc *p; struct close_args *uap; int *retval; { register struct filedesc *fdp = p->p_fd; register struct file *fp; register int fd = uap->fd; register u_char *pf; if ((unsigned)fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[fd]) == NULL) return (EBADF); pf = (u_char *)&fdp->fd_ofileflags[fd]; if (*pf & UF_MAPPED) (void) munmapfd(p, fd); fdp->fd_ofiles[fd] = NULL; while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL) fdp->fd_lastfile--; if (fd < fdp->fd_freefile) fdp->fd_freefile = fd; *pf = 0; return (closef(fp, p)); } #if defined(COMPAT_43) || defined(COMPAT_SUNOS) /* * Return status information about a file descriptor. */ #ifndef _SYS_SYSPROTO_H_ struct ofstat_args { int fd; struct ostat *sb; }; #endif /* ARGSUSED */ int ofstat(p, uap, retval) struct proc *p; register struct ofstat_args *uap; int *retval; { register struct filedesc *fdp = p->p_fd; register struct file *fp; struct stat ub; struct ostat oub; int error; if ((unsigned)uap->fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[uap->fd]) == NULL) return (EBADF); switch (fp->f_type) { case DTYPE_VNODE: error = vn_stat((struct vnode *)fp->f_data, &ub, p); break; case DTYPE_SOCKET: error = soo_stat((struct socket *)fp->f_data, &ub); break; #ifndef OLD_PIPE case DTYPE_PIPE: error = pipe_stat((struct pipe *)fp->f_data, &ub); break; #endif default: panic("ofstat"); /*NOTREACHED*/ } cvtstat(&ub, &oub); if (error == 0) error = copyout((caddr_t)&oub, (caddr_t)uap->sb, sizeof (oub)); return (error); } #endif /* COMPAT_43 || COMPAT_SUNOS */ /* * Return status information about a file descriptor. */ #ifndef _SYS_SYSPROTO_H_ struct fstat_args { int fd; struct stat *sb; }; #endif /* ARGSUSED */ int fstat(p, uap, retval) struct proc *p; register struct fstat_args *uap; int *retval; { register struct filedesc *fdp = p->p_fd; register struct file *fp; struct stat ub; int error; if ((unsigned)uap->fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[uap->fd]) == NULL) return (EBADF); switch (fp->f_type) { case DTYPE_VNODE: error = vn_stat((struct vnode *)fp->f_data, &ub, p); break; case DTYPE_SOCKET: error = soo_stat((struct socket *)fp->f_data, &ub); break; #ifndef OLD_PIPE case DTYPE_PIPE: error = pipe_stat((struct pipe *)fp->f_data, &ub); break; #endif default: panic("fstat"); /*NOTREACHED*/ } if (error == 0) error = copyout((caddr_t)&ub, (caddr_t)uap->sb, sizeof (ub)); return (error); } /* * Return pathconf information about a file descriptor. */ #ifndef _SYS_SYSPROTO_H_ struct fpathconf_args { int fd; int name; }; #endif /* ARGSUSED */ int fpathconf(p, uap, retval) struct proc *p; register struct fpathconf_args *uap; int *retval; { struct filedesc *fdp = p->p_fd; struct file *fp; struct vnode *vp; if ((unsigned)uap->fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[uap->fd]) == NULL) return (EBADF); switch (fp->f_type) { #ifndef OLD_PIPE case DTYPE_PIPE: #endif case DTYPE_SOCKET: if (uap->name != _PC_PIPE_BUF) return (EINVAL); *retval = PIPE_BUF; return (0); case DTYPE_VNODE: vp = (struct vnode *)fp->f_data; return (VOP_PATHCONF(vp, uap->name, retval)); default: panic("fpathconf"); } /*NOTREACHED*/ } /* * Allocate a file descriptor for the process. */ static int fdexpand; SYSCTL_INT(_debug, OID_AUTO, fdexpand, CTLFLAG_RD, &fdexpand, 0, ""); int fdalloc(p, want, result) struct proc *p; int want; int *result; { register struct filedesc *fdp = p->p_fd; register int i; int lim, last, nfiles; struct file **newofile; char *newofileflags; /* * Search for a free descriptor starting at the higher * of want or fd_freefile. If that fails, consider * expanding the ofile array. */ lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc); for (;;) { last = min(fdp->fd_nfiles, lim); if ((i = want) < fdp->fd_freefile) i = fdp->fd_freefile; for (; i < last; i++) { if (fdp->fd_ofiles[i] == NULL) { fdp->fd_ofileflags[i] = 0; if (i > fdp->fd_lastfile) fdp->fd_lastfile = i; if (want <= fdp->fd_freefile) fdp->fd_freefile = i; *result = i; return (0); } } /* * No space in current array. Expand? */ if (fdp->fd_nfiles >= lim) return (EMFILE); if (fdp->fd_nfiles < NDEXTENT) nfiles = NDEXTENT; else nfiles = 2 * fdp->fd_nfiles; MALLOC(newofile, struct file **, nfiles * OFILESIZE, M_FILEDESC, M_WAITOK); newofileflags = (char *) &newofile[nfiles]; /* * Copy the existing ofile and ofileflags arrays * and zero the new portion of each array. */ bcopy(fdp->fd_ofiles, newofile, (i = sizeof(struct file *) * fdp->fd_nfiles)); bzero((char *)newofile + i, nfiles * sizeof(struct file *) - i); bcopy(fdp->fd_ofileflags, newofileflags, (i = sizeof(char) * fdp->fd_nfiles)); bzero(newofileflags + i, nfiles * sizeof(char) - i); if (fdp->fd_nfiles > NDFILE) FREE(fdp->fd_ofiles, M_FILEDESC); fdp->fd_ofiles = newofile; fdp->fd_ofileflags = newofileflags; fdp->fd_nfiles = nfiles; fdexpand++; } return (0); } /* * Check to see whether n user file descriptors * are available to the process p. */ int fdavail(p, n) struct proc *p; register int n; { register struct filedesc *fdp = p->p_fd; register struct file **fpp; register int i, lim; lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc); if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0) return (1); fpp = &fdp->fd_ofiles[fdp->fd_freefile]; for (i = fdp->fd_nfiles - fdp->fd_freefile; --i >= 0; fpp++) if (*fpp == NULL && --n <= 0) return (1); return (0); } /* * Create a new open file structure and allocate * a file decriptor for the process that refers to it. */ int falloc(p, resultfp, resultfd) register struct proc *p; struct file **resultfp; int *resultfd; { register struct file *fp, *fq, **fpp; int error, i; if ((error = fdalloc(p, 0, &i))) return (error); if (nfiles >= maxfiles) { tablefull("file"); return (ENFILE); } /* * Allocate a new file descriptor. * If the process has file descriptor zero open, add to the list * of open files at that point, otherwise put it at the front of * the list of open files. */ nfiles++; MALLOC(fp, struct file *, sizeof(struct file), M_FILE, M_WAITOK); bzero(fp, sizeof(struct file)); if ((fq = p->p_fd->fd_ofiles[0])) fpp = &fq->f_filef; else fpp = &filehead; p->p_fd->fd_ofiles[i] = fp; if ((fq = *fpp)) fq->f_fileb = &fp->f_filef; fp->f_filef = fq; fp->f_fileb = fpp; *fpp = fp; fp->f_count = 1; fp->f_cred = p->p_ucred; crhold(fp->f_cred); if (resultfp) *resultfp = fp; if (resultfd) *resultfd = i; return (0); } /* * Free a file descriptor. */ void ffree(fp) register struct file *fp; { register struct file *fq; if ((fq = fp->f_filef)) fq->f_fileb = fp->f_fileb; *fp->f_fileb = fq; crfree(fp->f_cred); #ifdef DIAGNOSTIC fp->f_filef = NULL; fp->f_fileb = NULL; fp->f_count = 0; #endif nfiles--; FREE(fp, M_FILE); } +/* + * Build a new filedesc structure. + */ +struct filedesc * +fdinit(p) + struct proc *p; +{ + register struct filedesc0 *newfdp; + register struct filedesc *fdp = p->p_fd; + + MALLOC(newfdp, struct filedesc0 *, sizeof(struct filedesc0), + M_FILEDESC, M_WAITOK); + bzero(newfdp, sizeof(struct filedesc0)); + newfdp->fd_fd.fd_cdir = fdp->fd_cdir; + VREF(newfdp->fd_fd.fd_cdir); + newfdp->fd_fd.fd_rdir = fdp->fd_rdir; + if (newfdp->fd_fd.fd_rdir) + VREF(newfdp->fd_fd.fd_rdir); + + /* Create the file descriptor table. */ + newfdp->fd_fd.fd_refcnt = 1; + newfdp->fd_fd.fd_cmask = cmask; + newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles; + newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags; + newfdp->fd_fd.fd_nfiles = NDFILE; + + newfdp->fd_fd.fd_freefile = 0; + newfdp->fd_fd.fd_lastfile = 0; + + return (&newfdp->fd_fd); +} + +/* + * Share a filedesc structure. + */ +struct filedesc * +fdshare(p) + struct proc *p; +{ + p->p_fd->fd_refcnt++; + return (p->p_fd); +} + /* * Copy a filedesc structure. */ struct filedesc * fdcopy(p) struct proc *p; { register struct filedesc *newfdp, *fdp = p->p_fd; register struct file **fpp; register int i; MALLOC(newfdp, struct filedesc *, sizeof(struct filedesc0), M_FILEDESC, M_WAITOK); bcopy(fdp, newfdp, sizeof(struct filedesc)); VREF(newfdp->fd_cdir); if (newfdp->fd_rdir) VREF(newfdp->fd_rdir); newfdp->fd_refcnt = 1; /* * If the number of open files fits in the internal arrays * of the open file structure, use them, otherwise allocate * additional memory for the number of descriptors currently * in use. */ if (newfdp->fd_lastfile < NDFILE) { newfdp->fd_ofiles = ((struct filedesc0 *) newfdp)->fd_dfiles; newfdp->fd_ofileflags = ((struct filedesc0 *) newfdp)->fd_dfileflags; i = NDFILE; } else { /* * Compute the smallest multiple of NDEXTENT needed * for the file descriptors currently in use, * allowing the table to shrink. */ i = newfdp->fd_nfiles; while (i > 2 * NDEXTENT && i > newfdp->fd_lastfile * 2) i /= 2; MALLOC(newfdp->fd_ofiles, struct file **, i * OFILESIZE, M_FILEDESC, M_WAITOK); newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i]; } newfdp->fd_nfiles = i; bcopy(fdp->fd_ofiles, newfdp->fd_ofiles, i * sizeof(struct file **)); bcopy(fdp->fd_ofileflags, newfdp->fd_ofileflags, i * sizeof(char)); fpp = newfdp->fd_ofiles; for (i = newfdp->fd_lastfile; i-- >= 0; fpp++) if (*fpp != NULL) (*fpp)->f_count++; return (newfdp); } /* * Release a filedesc structure. */ void fdfree(p) struct proc *p; { register struct filedesc *fdp = p->p_fd; struct file **fpp; register int i; if (--fdp->fd_refcnt > 0) return; fpp = fdp->fd_ofiles; for (i = fdp->fd_lastfile; i-- >= 0; fpp++) if (*fpp) (void) closef(*fpp, p); if (fdp->fd_nfiles > NDFILE) FREE(fdp->fd_ofiles, M_FILEDESC); vrele(fdp->fd_cdir); if (fdp->fd_rdir) vrele(fdp->fd_rdir); FREE(fdp, M_FILEDESC); } /* * Close any files on exec? */ void fdcloseexec(p) struct proc *p; { struct filedesc *fdp = p->p_fd; struct file **fpp; char *fdfp; register int i; fpp = fdp->fd_ofiles; fdfp = fdp->fd_ofileflags; for (i = 0; i <= fdp->fd_lastfile; i++, fpp++, fdfp++) if (*fpp != NULL && (*fdfp & UF_EXCLOSE)) { if (*fdfp & UF_MAPPED) (void) munmapfd(p, i); (void) closef(*fpp, p); *fpp = NULL; *fdfp = 0; if (i < fdp->fd_freefile) fdp->fd_freefile = i; } while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL) fdp->fd_lastfile--; } /* * Internal form of close. * Decrement reference count on file structure. * Note: p may be NULL when closing a file * that was being passed in a message. */ int closef(fp, p) register struct file *fp; register struct proc *p; { struct vnode *vp; struct flock lf; int error; if (fp == NULL) return (0); /* * POSIX record locking dictates that any close releases ALL * locks owned by this process. This is handled by setting * a flag in the unlock to free ONLY locks obeying POSIX * semantics, and not to free BSD-style file locks. * If the descriptor was in a message, POSIX-style locks * aren't passed with the descriptor. */ if (p && (p->p_flag & P_ADVLOCK) && fp->f_type == DTYPE_VNODE) { lf.l_whence = SEEK_SET; lf.l_start = 0; lf.l_len = 0; lf.l_type = F_UNLCK; vp = (struct vnode *)fp->f_data; (void) VOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &lf, F_POSIX); } if (--fp->f_count > 0) return (0); if (fp->f_count < 0) panic("closef: count < 0"); if ((fp->f_flag & FHASLOCK) && fp->f_type == DTYPE_VNODE) { lf.l_whence = SEEK_SET; lf.l_start = 0; lf.l_len = 0; lf.l_type = F_UNLCK; vp = (struct vnode *)fp->f_data; (void) VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK); } if (fp->f_ops) error = (*fp->f_ops->fo_close)(fp, p); else error = 0; ffree(fp); return (error); } /* * Apply an advisory lock on a file descriptor. * * Just attempt to get a record lock of the requested type on * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0). */ #ifndef _SYS_SYSPROTO_H_ struct flock_args { int fd; int how; }; #endif /* ARGSUSED */ int flock(p, uap, retval) struct proc *p; register struct flock_args *uap; int *retval; { register struct filedesc *fdp = p->p_fd; register struct file *fp; struct vnode *vp; struct flock lf; if ((unsigned)uap->fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[uap->fd]) == NULL) return (EBADF); if (fp->f_type != DTYPE_VNODE) return (EOPNOTSUPP); vp = (struct vnode *)fp->f_data; lf.l_whence = SEEK_SET; lf.l_start = 0; lf.l_len = 0; if (uap->how & LOCK_UN) { lf.l_type = F_UNLCK; fp->f_flag &= ~FHASLOCK; return (VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK)); } if (uap->how & LOCK_EX) lf.l_type = F_WRLCK; else if (uap->how & LOCK_SH) lf.l_type = F_RDLCK; else return (EBADF); fp->f_flag |= FHASLOCK; if (uap->how & LOCK_NB) return (VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK)); return (VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK|F_WAIT)); } /* * File Descriptor pseudo-device driver (/dev/fd/). * * Opening minor device N dup()s the file (if any) connected to file * descriptor N belonging to the calling process. Note that this driver * consists of only the ``open()'' routine, because all subsequent * references to this file will be direct to the other driver. */ /* ARGSUSED */ static int fdopen(dev, mode, type, p) dev_t dev; int mode, type; struct proc *p; { /* * XXX Kludge: set curproc->p_dupfd to contain the value of the * the file descriptor being sought for duplication. The error * return ensures that the vnode for this device will be released * by vn_open. Open will detect this special error and take the * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN * will simply report the error. */ p->p_dupfd = minor(dev); return (ENODEV); } /* * Duplicate the specified descriptor to a free descriptor. */ int dupfdopen(fdp, indx, dfd, mode, error) register struct filedesc *fdp; register int indx, dfd; int mode; int error; { register struct file *wfp; struct file *fp; /* * If the to-be-dup'd fd number is greater than the allowed number * of file descriptors, or the fd to be dup'd has already been * closed, reject. Note, check for new == old is necessary as * falloc could allocate an already closed to-be-dup'd descriptor * as the new descriptor. */ fp = fdp->fd_ofiles[indx]; if ((u_int)dfd >= fdp->fd_nfiles || (wfp = fdp->fd_ofiles[dfd]) == NULL || fp == wfp) return (EBADF); /* * There are two cases of interest here. * * For ENODEV simply dup (dfd) to file descriptor * (indx) and return. * * For ENXIO steal away the file structure from (dfd) and * store it in (indx). (dfd) is effectively closed by * this operation. * * Any other error code is just returned. */ switch (error) { case ENODEV: /* * Check that the mode the file is being opened for is a * subset of the mode of the existing descriptor. */ if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag) return (EACCES); fdp->fd_ofiles[indx] = wfp; fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd]; wfp->f_count++; if (indx > fdp->fd_lastfile) fdp->fd_lastfile = indx; return (0); case ENXIO: /* * Steal away the file pointer from dfd, and stuff it into indx. */ fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd]; fdp->fd_ofiles[dfd] = NULL; fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd]; fdp->fd_ofileflags[dfd] = 0; /* * Complete the clean up of the filedesc structure by * recomputing the various hints. */ if (indx > fdp->fd_lastfile) fdp->fd_lastfile = indx; else while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL) fdp->fd_lastfile--; if (dfd < fdp->fd_freefile) fdp->fd_freefile = dfd; return (0); default: return (error); } /* NOTREACHED */ } /* * Get file structures. */ static int sysctl_kern_file SYSCTL_HANDLER_ARGS { int error; struct file *fp; if (!req->oldptr) { /* * overestimate by 10 files */ return (SYSCTL_OUT(req, 0, sizeof(filehead) + (nfiles + 10) * sizeof(struct file))); } error = SYSCTL_OUT(req, (caddr_t)&filehead, sizeof(filehead)); if (error) return (error); /* * followed by an array of file structures */ for (fp = filehead; fp != NULL; fp = fp->f_filef) { error = SYSCTL_OUT(req, (caddr_t)fp, sizeof (struct file)); if (error) return (error); } return (0); } SYSCTL_PROC(_kern, KERN_FILE, file, CTLTYPE_OPAQUE|CTLFLAG_RD, 0, 0, sysctl_kern_file, "S,file", ""); SYSCTL_INT(_kern, KERN_MAXFILESPERPROC, maxfilesperproc, CTLFLAG_RD, &maxfilesperproc, 0, ""); SYSCTL_INT(_kern, KERN_MAXFILES, maxfiles, CTLFLAG_RW, &maxfiles, 0, ""); static fildesc_devsw_installed = 0; static void *devfs_token_stdin; static void *devfs_token_stdout; static void *devfs_token_stderr; static void *devfs_token_fildesc[NUMFDESC]; static void fildesc_drvinit(void *unused) { dev_t dev; int i; char name[32]; if( ! fildesc_devsw_installed ) { dev = makedev(CDEV_MAJOR,0); cdevsw_add(&dev,&fildesc_cdevsw,NULL); fildesc_devsw_installed = 1; #ifdef DEVFS for ( i = 0 ; i < NUMFDESC ; i++ ) { sprintf(name,"%d",i); devfs_token_fildesc[i] = devfs_add_devsw("fd",name, &fildesc_cdevsw,0, DV_CHR, 0, 0, 0666); } devfs_token_stdin = dev_link("/","stdin",devfs_token_fildesc[0]); devfs_token_stdout = dev_link("/","stdout",devfs_token_fildesc[1]); devfs_token_stderr = dev_link("/","stderr",devfs_token_fildesc[2]); #endif } } SYSINIT(fildescdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR, fildesc_drvinit,NULL) diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c index d73ff1a5e484..dbb56c454484 100644 --- a/sys/kern/kern_fork.c +++ b/sys/kern/kern_fork.c @@ -1,335 +1,384 @@ /* * Copyright (c) 1982, 1986, 1989, 1991, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)kern_fork.c 8.6 (Berkeley) 4/8/94 - * $Id: kern_fork.c,v 1.15 1995/12/07 12:46:42 davidg Exp $ + * $Id: kern_fork.c,v 1.16 1996/01/03 21:42:02 wollman Exp $ */ #include "opt_ktrace.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include +#include +#include +#include #include +#include -static int fork1(struct proc *, int, int *); +static int fork1(struct proc *p, int forktype, int rforkflags, int *retval); + +#define ISFORK 0 +#define ISVFORK 1 +#define ISRFORK 2 #ifndef _SYS_SYSPROTO_H_ struct fork_args { int dummy; }; #endif /* ARGSUSED */ int fork(p, uap, retval) struct proc *p; struct fork_args *uap; int retval[]; { - - return (fork1(p, 0, retval)); + return (fork1(p, ISFORK, 0, retval)); } /* ARGSUSED */ int vfork(p, uap, retval) struct proc *p; - struct fork_args *uap; + struct vfork_args *uap; int retval[]; { + return (fork1(p, ISVFORK, 0, retval)); +} - return (fork1(p, 1, retval)); +/* ARGSUSED */ +int +rfork(p, uap, retval) + struct proc *p; + struct rfork_args *uap; + int retval[]; +{ + return (fork1(p, ISRFORK, uap->flags, retval)); } + int nprocs = 1; /* process 0 */ static int -fork1(p1, isvfork, retval) +fork1(p1, forktype, rforkflags, retval) register struct proc *p1; - int isvfork, retval[]; + int forktype; + int rforkflags; + int retval[]; { register struct proc *p2; register uid_t uid; struct proc *newproc; struct proc **hash; int count; static int nextpid, pidchecked = 0; + int dupfd = 1, cleanfd = 0; + + if (forktype == ISRFORK) { + dupfd = 0; + if ((rforkflags & RFPROC) == 0) + return (EINVAL); + if ((rforkflags & (RFFDG|RFCFDG)) == (RFFDG|RFCFDG)) + return (EINVAL); + if (rforkflags & RFFDG) + dupfd = 1; + if (rforkflags & RFNOWAIT) + return (EINVAL); /* XXX unimplimented */ + if (rforkflags & RFCFDG) + cleanfd = 1; + } /* * Although process entries are dynamically created, we still keep * a global limit on the maximum number we will create. Don't allow * a nonprivileged user to use the last process; don't let root * exceed the limit. The variable nprocs is the current number of * processes, maxproc is the limit. */ uid = p1->p_cred->p_ruid; if ((nprocs >= maxproc - 1 && uid != 0) || nprocs >= maxproc) { tablefull("proc"); return (EAGAIN); } /* * Increment the count of procs running with this uid. Don't allow * a nonprivileged user to exceed their current limit. */ count = chgproccnt(uid, 1); if (uid != 0 && count > p1->p_rlimit[RLIMIT_NPROC].rlim_cur) { (void)chgproccnt(uid, -1); return (EAGAIN); } /* Allocate new proc. */ MALLOC(newproc, struct proc *, sizeof(struct proc), M_PROC, M_WAITOK); /* * Find an unused process ID. We remember a range of unused IDs * ready to use (from nextpid+1 through pidchecked-1). */ nextpid++; retry: /* * If the process ID prototype has wrapped around, * restart somewhat above 0, as the low-numbered procs * tend to include daemons that don't exit. */ if (nextpid >= PID_MAX) { nextpid = 100; pidchecked = 0; } if (nextpid >= pidchecked) { int doingzomb = 0; pidchecked = PID_MAX; /* * Scan the active and zombie procs to check whether this pid * is in use. Remember the lowest pid that's greater * than nextpid, so we can avoid checking for a while. */ p2 = (struct proc *)allproc; again: for (; p2 != NULL; p2 = p2->p_next) { while (p2->p_pid == nextpid || p2->p_pgrp->pg_id == nextpid) { nextpid++; if (nextpid >= pidchecked) goto retry; } if (p2->p_pid > nextpid && pidchecked > p2->p_pid) pidchecked = p2->p_pid; if (p2->p_pgrp->pg_id > nextpid && pidchecked > p2->p_pgrp->pg_id) pidchecked = p2->p_pgrp->pg_id; } if (!doingzomb) { doingzomb = 1; p2 = zombproc; goto again; } } /* * Link onto allproc (this should probably be delayed). * Heavy use of volatile here to prevent the compiler from * rearranging code. Yes, it *is* terribly ugly, but at least * it works. */ nprocs++; p2 = newproc; #define Vp2 ((volatile struct proc *)p2) Vp2->p_stat = SIDL; /* protect against others */ Vp2->p_pid = nextpid; /* * This is really: * p2->p_next = allproc; * allproc->p_prev = &p2->p_next; * p2->p_prev = &allproc; * allproc = p2; * The assignment via allproc is legal since it is never NULL. */ *(volatile struct proc **)&Vp2->p_next = allproc; *(volatile struct proc ***)&allproc->p_prev = (volatile struct proc **)&Vp2->p_next; *(volatile struct proc ***)&Vp2->p_prev = &allproc; allproc = Vp2; #undef Vp2 p2->p_forw = p2->p_back = NULL; /* shouldn't be necessary */ /* Insert on the hash chain. */ hash = &pidhash[PIDHASH(p2->p_pid)]; p2->p_hash = *hash; *hash = p2; /* * Make a proc table entry for the new process. * Start by zeroing the section of proc that is zero-initialized, * then copy the section that is copied directly from the parent. */ bzero(&p2->p_startzero, (unsigned) ((caddr_t)&p2->p_endzero - (caddr_t)&p2->p_startzero)); bcopy(&p1->p_startcopy, &p2->p_startcopy, (unsigned) ((caddr_t)&p2->p_endcopy - (caddr_t)&p2->p_startcopy)); /* * Duplicate sub-structures as needed. * Increase reference counts on shared objects. * The p_stats and p_sigacts substructs are set in vm_fork. */ p2->p_flag = P_INMEM; if (p1->p_flag & P_PROFIL) startprofclock(p2); MALLOC(p2->p_cred, struct pcred *, sizeof(struct pcred), M_SUBPROC, M_WAITOK); bcopy(p1->p_cred, p2->p_cred, sizeof(*p2->p_cred)); p2->p_cred->p_refcnt = 1; crhold(p1->p_ucred); /* bump references to the text vnode (for procfs) */ p2->p_textvp = p1->p_textvp; if (p2->p_textvp) VREF(p2->p_textvp); - p2->p_fd = fdcopy(p1); + if (cleanfd) + p2->p_fd = fdinit(p1); + else if (dupfd) + p2->p_fd = fdcopy(p1); + else + p2->p_fd = fdshare(p1); + /* * If p_limit is still copy-on-write, bump refcnt, * otherwise get a copy that won't be modified. * (If PL_SHAREMOD is clear, the structure is shared * copy-on-write.) */ if (p1->p_limit->p_lflags & PL_SHAREMOD) p2->p_limit = limcopy(p1->p_limit); else { p2->p_limit = p1->p_limit; p2->p_limit->p_refcnt++; } if (p1->p_session->s_ttyvp != NULL && p1->p_flag & P_CONTROLT) p2->p_flag |= P_CONTROLT; - if (isvfork) + if (forktype == ISVFORK) p2->p_flag |= P_PPWAIT; p2->p_pgrpnxt = p1->p_pgrpnxt; p1->p_pgrpnxt = p2; p2->p_pptr = p1; p2->p_osptr = p1->p_cptr; if (p1->p_cptr) p1->p_cptr->p_ysptr = p2; p1->p_cptr = p2; #ifdef KTRACE /* * Copy traceflag and tracefile if enabled. * If not inherited, these were zeroed above. */ if (p1->p_traceflag&KTRFAC_INHERIT) { p2->p_traceflag = p1->p_traceflag; if ((p2->p_tracep = p1->p_tracep) != NULL) VREF(p2->p_tracep); } #endif /* * set priority of child to be that of parent */ p2->p_estcpu = p1->p_estcpu; /* * This begins the section where we must prevent the parent * from being swapped. */ p1->p_flag |= P_NOSWAP; + /* + * share as much address space as possible + */ + if (forktype == ISRFORK && (rforkflags & RFMEM)) { + (void) vm_map_inherit(&p1->p_vmspace->vm_map, + VM_MIN_ADDRESS, VM_MAXUSER_ADDRESS - MAXSSIZ, + VM_INHERIT_SHARE); + } + /* * Set return values for child before vm_fork, * so they can be copied to child stack. * We return parent pid, and mark as child in retval[1]. * NOTE: the kernel stack may be at a different location in the child * process, and thus addresses of automatic variables (including retval) * may be invalid after vm_fork returns in the child process. */ retval[0] = p1->p_pid; retval[1] = 1; - if (vm_fork(p1, p2, isvfork)) { + if (vm_fork(p1, p2)) { /* * Child process. Set start time and get to work. */ microtime(&runtime); p2->p_stats->p_start = runtime; p2->p_acflag = AFORK; return (0); } /* * Make child runnable and add to run queue. */ (void) splhigh(); p2->p_stat = SRUN; setrunqueue(p2); (void) spl0(); /* * Now can be swapped. */ p1->p_flag &= ~P_NOSWAP; /* * Preserve synchronization semantics of vfork. If waiting for * child to exec or exit, set P_PPWAIT on child, and sleep on our * proc (in case of exit). */ - if (isvfork) + if (forktype == ISVFORK) while (p2->p_flag & P_PPWAIT) tsleep(p1, PWAIT, "ppwait", 0); /* * Return child pid to parent process, * marking us as parent via retval[1]. */ retval[0] = p2->p_pid; retval[1] = 0; return (0); } diff --git a/sys/kern/sysv_ipc.c b/sys/kern/sysv_ipc.c index bd7c0c8bb5a3..e62068e83af1 100644 --- a/sys/kern/sysv_ipc.c +++ b/sys/kern/sysv_ipc.c @@ -1,298 +1,297 @@ -/* $Id: sysv_ipc.c,v 1.2 1996/01/05 16:37:52 wollman Exp $ */ +/* $Id: sysv_ipc.c,v 1.3 1996/01/08 04:30:48 peter Exp $ */ /* $NetBSD: sysv_ipc.c,v 1.7 1994/06/29 06:33:11 cgd Exp $ */ /* * Copyright (c) 1994 Herb Peyerl * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Herb Peyerl. * 4. The name of Herb Peyerl may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "opt_sysvipc.h" #include #include #include #include #include #include #include #include #include #if defined(SYSVSEM) || defined(SYSVSHM) || defined(SYSVMSG) /* * Check for ipc permission */ int ipcperm(cred, perm, mode) struct ucred *cred; struct ipc_perm *perm; int mode; { if (cred->cr_uid == 0) return (0); /* Check for user match. */ if (cred->cr_uid != perm->cuid && cred->cr_uid != perm->uid) { if (mode & IPC_M) return (EPERM); /* Check for group match. */ mode >>= 3; if (!groupmember(perm->gid, cred) && !groupmember(perm->cgid, cred)) /* Check for `other' match. */ mode >>= 3; } if (mode & IPC_M) return (0); return ((mode & perm->mode) == mode ? 0 : EACCES); } #endif /* defined(SYSVSEM) || defined(SYSVSHM) || defined(SYSVMSG) */ #if !defined(SYSVSEM) || !defined(SYSVSHM) || !defined(SYSVMSG) static void sysv_nosys __P((struct proc *p, char *s)); static void sysv_nosys(p, s) struct proc *p; char *s; { log(LOG_ERR, "cmd %s pid %d tried to use non-present %s\n", p->p_comm, p->p_pid, s); } #if !defined(SYSVSEM) /* * SYSVSEM stubs */ int semsys(p, uap, retval) struct proc *p; struct semsys_args *uap; int *retval; { sysv_nosys(p, "SYSVSEM"); return nosys(p, (struct nosys_args *)uap, retval); }; int semconfig(p, uap, retval) struct proc *p; struct semconfig_args *uap; int *retval; { sysv_nosys(p, "SYSVSEM"); return nosys(p, (struct nosys_args *)uap, retval); }; int __semctl(p, uap, retval) struct proc *p; register struct __semctl_args *uap; int *retval; { sysv_nosys(p, "SYSVSEM"); return nosys(p, (struct nosys_args *)uap, retval); }; int semget(p, uap, retval) struct proc *p; register struct semget_args *uap; int *retval; { sysv_nosys(p, "SYSVSEM"); return nosys(p, (struct nosys_args *)uap, retval); }; int semop(p, uap, retval) struct proc *p; register struct semop_args *uap; int *retval; { sysv_nosys(p, "SYSVSEM"); return nosys(p, (struct nosys_args *)uap, retval); }; /* called from kern_exit.c */ void semexit(p) struct proc *p; { return; } #endif /* !defined(SYSVSEM) */ #if !defined(SYSVMSG) /* * SYSVMSG stubs */ int msgsys(p, uap, retval) struct proc *p; /* XXX actually varargs. */ struct msgsys_args *uap; int *retval; { sysv_nosys(p, "SYSVMSG"); return nosys(p, (struct nosys_args *)uap, retval); }; int msgctl(p, uap, retval) struct proc *p; register struct msgctl_args *uap; int *retval; { sysv_nosys(p, "SYSVMSG"); return nosys(p, (struct nosys_args *)uap, retval); }; int msgget(p, uap, retval) struct proc *p; register struct msgget_args *uap; int *retval; { sysv_nosys(p, "SYSVMSG"); return nosys(p, (struct nosys_args *)uap, retval); }; int msgsnd(p, uap, retval) struct proc *p; register struct msgsnd_args *uap; int *retval; { sysv_nosys(p, "SYSVMSG"); return nosys(p, (struct nosys_args *)uap, retval); }; int msgrcv(p, uap, retval) struct proc *p; register struct msgrcv_args *uap; int *retval; { sysv_nosys(p, "SYSVMSG"); return nosys(p, (struct nosys_args *)uap, retval); }; #endif /* !defined(SYSVMSG) */ #if !defined(SYSVSHM) /* * SYSVSHM stubs */ int shmdt(p, uap, retval) struct proc *p; struct shmdt_args *uap; int *retval; { sysv_nosys(p, "SYSVSHM"); return nosys(p, (struct nosys_args *)uap, retval); }; int shmat(p, uap, retval) struct proc *p; struct shmat_args *uap; int *retval; { sysv_nosys(p, "SYSVSHM"); return nosys(p, (struct nosys_args *)uap, retval); }; int shmctl(p, uap, retval) struct proc *p; struct shmctl_args *uap; int *retval; { sysv_nosys(p, "SYSVSHM"); return nosys(p, (struct nosys_args *)uap, retval); }; int shmget(p, uap, retval) struct proc *p; struct shmget_args *uap; int *retval; { sysv_nosys(p, "SYSVSHM"); return nosys(p, (struct nosys_args *)uap, retval); }; int shmsys(p, uap, retval) struct proc *p; /* XXX actually varargs. */ struct shmsys_args *uap; int *retval; { sysv_nosys(p, "SYSVSHM"); return nosys(p, (struct nosys_args *)uap, retval); }; /* called from kern_fork.c */ void -shmfork(p1, p2, isvfork) +shmfork(p1, p2) struct proc *p1, *p2; - int isvfork; { return; } /* called from kern_exit.c */ void shmexit(p) struct proc *p; { return; } #endif /* !defined(SYSVSHM) */ #endif /* !defined(SYSVSEM) || !defined(SYSVSHM) || !defined(SYSVMSG) */ diff --git a/sys/kern/sysv_shm.c b/sys/kern/sysv_shm.c index 72c8a2210307..a68f8a64f777 100644 --- a/sys/kern/sysv_shm.c +++ b/sys/kern/sysv_shm.c @@ -1,617 +1,616 @@ -/* $Id: sysv_shm.c,v 1.16 1995/12/26 16:03:32 joerg Exp $ */ +/* $Id: sysv_shm.c,v 1.17 1996/01/05 16:38:03 wollman Exp $ */ /* $NetBSD: sysv_shm.c,v 1.23 1994/07/04 23:25:12 glass Exp $ */ /* * Copyright (c) 1994 Adam Glass and Charles Hannum. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Adam Glass and Charles * Hannum. * 4. The names of the authors may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "opt_sysvipc.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifndef _SYS_SYSPROTO_H_ struct shmat_args; extern int shmat __P((struct proc *p, struct shmat_args *uap, int *retval)); struct shmctl_args; extern int shmctl __P((struct proc *p, struct shmctl_args *uap, int *retval)); struct shmdt_args; extern int shmdt __P((struct proc *p, struct shmdt_args *uap, int *retval)); struct shmget_args; extern int shmget __P((struct proc *p, struct shmget_args *uap, int *retval)); #endif static void shminit __P((void *)); SYSINIT(sysv_shm, SI_SUB_SYSV_SHM, SI_ORDER_FIRST, shminit, NULL) struct oshmctl_args; static int oshmctl __P((struct proc *p, struct oshmctl_args *uap, int *retval)); static int shmget_allocate_segment __P((struct proc *p, struct shmget_args *uap, int mode, int *retval)); static int shmget_existing __P((struct proc *p, struct shmget_args *uap, int mode, int segnum, int *retval)); /* XXX casting to (sy_call_t *) is bogus, as usual. */ sy_call_t *shmcalls[] = { (sy_call_t *)shmat, (sy_call_t *)oshmctl, (sy_call_t *)shmdt, (sy_call_t *)shmget, (sy_call_t *)shmctl }; #define SHMSEG_FREE 0x0200 #define SHMSEG_REMOVED 0x0400 #define SHMSEG_ALLOCATED 0x0800 #define SHMSEG_WANTED 0x1000 static vm_map_t sysvshm_map; static int shm_last_free, shm_nused, shm_committed; struct shmid_ds *shmsegs; struct shm_handle { vm_offset_t kva; }; struct shmmap_state { vm_offset_t va; int shmid; }; static void shm_deallocate_segment __P((struct shmid_ds *)); static int shm_find_segment_by_key __P((key_t)); static struct shmid_ds *shm_find_segment_by_shmid __P((int)); static int shm_delete_mapping __P((struct proc *, struct shmmap_state *)); static int shm_find_segment_by_key(key) key_t key; { int i; for (i = 0; i < shminfo.shmmni; i++) if ((shmsegs[i].shm_perm.mode & SHMSEG_ALLOCATED) && shmsegs[i].shm_perm.key == key) return i; return -1; } static struct shmid_ds * shm_find_segment_by_shmid(shmid) int shmid; { int segnum; struct shmid_ds *shmseg; segnum = IPCID_TO_IX(shmid); if (segnum < 0 || segnum >= shminfo.shmmni) return NULL; shmseg = &shmsegs[segnum]; if ((shmseg->shm_perm.mode & (SHMSEG_ALLOCATED | SHMSEG_REMOVED)) != SHMSEG_ALLOCATED || shmseg->shm_perm.seq != IPCID_TO_SEQ(shmid)) return NULL; return shmseg; } static void shm_deallocate_segment(shmseg) struct shmid_ds *shmseg; { struct shm_handle *shm_handle; size_t size; shm_handle = shmseg->shm_internal; size = (shmseg->shm_segsz + CLOFSET) & ~CLOFSET; (void) vm_map_remove(sysvshm_map, shm_handle->kva, shm_handle->kva + size); free((caddr_t)shm_handle, M_SHM); shmseg->shm_internal = NULL; shm_committed -= btoc(size); shm_nused--; shmseg->shm_perm.mode = SHMSEG_FREE; } static int shm_delete_mapping(p, shmmap_s) struct proc *p; struct shmmap_state *shmmap_s; { struct shmid_ds *shmseg; int segnum, result; size_t size; segnum = IPCID_TO_IX(shmmap_s->shmid); shmseg = &shmsegs[segnum]; size = (shmseg->shm_segsz + CLOFSET) & ~CLOFSET; result = vm_map_remove(&p->p_vmspace->vm_map, shmmap_s->va, shmmap_s->va + size); if (result != KERN_SUCCESS) return EINVAL; shmmap_s->shmid = -1; shmseg->shm_dtime = time.tv_sec; if ((--shmseg->shm_nattch <= 0) && (shmseg->shm_perm.mode & SHMSEG_REMOVED)) { shm_deallocate_segment(shmseg); shm_last_free = segnum; } return 0; } #ifndef _SYS_SYSPROTO_H_ struct shmdt_args { void *shmaddr; }; #endif int shmdt(p, uap, retval) struct proc *p; struct shmdt_args *uap; int *retval; { struct shmmap_state *shmmap_s; int i; shmmap_s = (struct shmmap_state *)p->p_vmspace->vm_shm; if (shmmap_s == NULL) return EINVAL; for (i = 0; i < shminfo.shmseg; i++, shmmap_s++) if (shmmap_s->shmid != -1 && shmmap_s->va == (vm_offset_t)uap->shmaddr) break; if (i == shminfo.shmseg) return EINVAL; return shm_delete_mapping(p, shmmap_s); } #ifndef _SYS_SYSPROTO_H_ struct shmat_args { int shmid; void *shmaddr; int shmflg; }; #endif int shmat(p, uap, retval) struct proc *p; struct shmat_args *uap; int *retval; { int error, i, flags; struct ucred *cred = p->p_ucred; struct shmid_ds *shmseg; struct shmmap_state *shmmap_s = NULL; vm_offset_t attach_va; vm_prot_t prot; vm_size_t size; shmmap_s = (struct shmmap_state *)p->p_vmspace->vm_shm; if (shmmap_s == NULL) { size = shminfo.shmseg * sizeof(struct shmmap_state); shmmap_s = malloc(size, M_SHM, M_WAITOK); for (i = 0; i < shminfo.shmseg; i++) shmmap_s[i].shmid = -1; p->p_vmspace->vm_shm = (caddr_t)shmmap_s; } shmseg = shm_find_segment_by_shmid(uap->shmid); if (shmseg == NULL) return EINVAL; error = ipcperm(cred, &shmseg->shm_perm, (uap->shmflg & SHM_RDONLY) ? IPC_R : IPC_R|IPC_W); if (error) return error; for (i = 0; i < shminfo.shmseg; i++) { if (shmmap_s->shmid == -1) break; shmmap_s++; } if (i >= shminfo.shmseg) return EMFILE; size = (shmseg->shm_segsz + CLOFSET) & ~CLOFSET; prot = VM_PROT_READ; if ((uap->shmflg & SHM_RDONLY) == 0) prot |= VM_PROT_WRITE; flags = MAP_ANON | MAP_SHARED; if (uap->shmaddr) { flags |= MAP_FIXED; if (uap->shmflg & SHM_RND) attach_va = (vm_offset_t)uap->shmaddr & ~(SHMLBA-1); else if (((vm_offset_t)uap->shmaddr & (SHMLBA-1)) == 0) attach_va = (vm_offset_t)uap->shmaddr; else return EINVAL; } else { /* This is just a hint to vm_mmap() about where to put it. */ attach_va = round_page(p->p_vmspace->vm_taddr + MAXTSIZ + MAXDSIZ); } error = vm_mmap(&p->p_vmspace->vm_map, &attach_va, size, prot, VM_PROT_DEFAULT, flags, (caddr_t) uap->shmid, 0); if (error) return error; shmmap_s->va = attach_va; shmmap_s->shmid = uap->shmid; shmseg->shm_lpid = p->p_pid; shmseg->shm_atime = time.tv_sec; shmseg->shm_nattch++; *retval = attach_va; return 0; } struct oshmid_ds { struct ipc_perm shm_perm; /* operation perms */ int shm_segsz; /* size of segment (bytes) */ ushort shm_cpid; /* pid, creator */ ushort shm_lpid; /* pid, last operation */ short shm_nattch; /* no. of current attaches */ time_t shm_atime; /* last attach time */ time_t shm_dtime; /* last detach time */ time_t shm_ctime; /* last change time */ void *shm_handle; /* internal handle for shm segment */ }; struct oshmctl_args { int shmid; int cmd; struct oshmid_ds *ubuf; }; static int oshmctl(p, uap, retval) struct proc *p; struct oshmctl_args *uap; int *retval; { #ifdef COMPAT_43 int error; struct ucred *cred = p->p_ucred; struct shmid_ds *shmseg; struct oshmid_ds outbuf; shmseg = shm_find_segment_by_shmid(uap->shmid); if (shmseg == NULL) return EINVAL; switch (uap->cmd) { case IPC_STAT: error = ipcperm(cred, &shmseg->shm_perm, IPC_R); if (error) return error; outbuf.shm_perm = shmseg->shm_perm; outbuf.shm_segsz = shmseg->shm_segsz; outbuf.shm_cpid = shmseg->shm_cpid; outbuf.shm_lpid = shmseg->shm_lpid; outbuf.shm_nattch = shmseg->shm_nattch; outbuf.shm_atime = shmseg->shm_atime; outbuf.shm_dtime = shmseg->shm_dtime; outbuf.shm_ctime = shmseg->shm_ctime; outbuf.shm_handle = shmseg->shm_internal; error = copyout((caddr_t)&outbuf, uap->ubuf, sizeof(outbuf)); if (error) return error; break; default: /* XXX casting to (sy_call_t *) is bogus, as usual. */ return ((sy_call_t *)shmctl)(p, uap, retval); } return 0; #else return EINVAL; #endif } #ifndef _SYS_SYSPROTO_H_ struct shmctl_args { int shmid; int cmd; struct shmid_ds *buf; }; #endif int shmctl(p, uap, retval) struct proc *p; struct shmctl_args *uap; int *retval; { int error; struct ucred *cred = p->p_ucred; struct shmid_ds inbuf; struct shmid_ds *shmseg; shmseg = shm_find_segment_by_shmid(uap->shmid); if (shmseg == NULL) return EINVAL; switch (uap->cmd) { case IPC_STAT: error = ipcperm(cred, &shmseg->shm_perm, IPC_R); if (error) return error; error = copyout((caddr_t)shmseg, uap->buf, sizeof(inbuf)); if (error) return error; break; case IPC_SET: error = ipcperm(cred, &shmseg->shm_perm, IPC_M); if (error) return error; error = copyin(uap->buf, (caddr_t)&inbuf, sizeof(inbuf)); if (error) return error; shmseg->shm_perm.uid = inbuf.shm_perm.uid; shmseg->shm_perm.gid = inbuf.shm_perm.gid; shmseg->shm_perm.mode = (shmseg->shm_perm.mode & ~ACCESSPERMS) | (inbuf.shm_perm.mode & ACCESSPERMS); shmseg->shm_ctime = time.tv_sec; break; case IPC_RMID: error = ipcperm(cred, &shmseg->shm_perm, IPC_M); if (error) return error; shmseg->shm_perm.key = IPC_PRIVATE; shmseg->shm_perm.mode |= SHMSEG_REMOVED; if (shmseg->shm_nattch <= 0) { shm_deallocate_segment(shmseg); shm_last_free = IPCID_TO_IX(uap->shmid); } break; #if 0 case SHM_LOCK: case SHM_UNLOCK: #endif default: return EINVAL; } return 0; } #ifndef _SYS_SYSPROTO_H_ struct shmget_args { key_t key; size_t size; int shmflg; }; #endif static int shmget_existing(p, uap, mode, segnum, retval) struct proc *p; struct shmget_args *uap; int mode; int segnum; int *retval; { struct shmid_ds *shmseg; struct ucred *cred = p->p_ucred; int error; shmseg = &shmsegs[segnum]; if (shmseg->shm_perm.mode & SHMSEG_REMOVED) { /* * This segment is in the process of being allocated. Wait * until it's done, and look the key up again (in case the * allocation failed or it was freed). */ shmseg->shm_perm.mode |= SHMSEG_WANTED; error = tsleep((caddr_t)shmseg, PLOCK | PCATCH, "shmget", 0); if (error) return error; return EAGAIN; } error = ipcperm(cred, &shmseg->shm_perm, mode); if (error) return error; if (uap->size && uap->size > shmseg->shm_segsz) return EINVAL; if ((uap->shmflg & (IPC_CREAT | IPC_EXCL)) == (IPC_CREAT | IPC_EXCL)) return EEXIST; *retval = IXSEQ_TO_IPCID(segnum, shmseg->shm_perm); return 0; } static int shmget_allocate_segment(p, uap, mode, retval) struct proc *p; struct shmget_args *uap; int mode; int *retval; { int i, segnum, result, shmid, size; struct ucred *cred = p->p_ucred; struct shmid_ds *shmseg; struct shm_handle *shm_handle; if (uap->size < shminfo.shmmin || uap->size > shminfo.shmmax) return EINVAL; if (shm_nused >= shminfo.shmmni) /* any shmids left? */ return ENOSPC; size = (uap->size + CLOFSET) & ~CLOFSET; if (shm_committed + btoc(size) > shminfo.shmall) return ENOMEM; if (shm_last_free < 0) { for (i = 0; i < shminfo.shmmni; i++) if (shmsegs[i].shm_perm.mode & SHMSEG_FREE) break; if (i == shminfo.shmmni) panic("shmseg free count inconsistent"); segnum = i; } else { segnum = shm_last_free; shm_last_free = -1; } shmseg = &shmsegs[segnum]; /* * In case we sleep in malloc(), mark the segment present but deleted * so that noone else tries to create the same key. */ shmseg->shm_perm.mode = SHMSEG_ALLOCATED | SHMSEG_REMOVED; shmseg->shm_perm.key = uap->key; shmseg->shm_perm.seq = (shmseg->shm_perm.seq + 1) & 0x7fff; shm_handle = (struct shm_handle *) malloc(sizeof(struct shm_handle), M_SHM, M_WAITOK); shmid = IXSEQ_TO_IPCID(segnum, shmseg->shm_perm); result = vm_mmap(sysvshm_map, &shm_handle->kva, size, VM_PROT_ALL, VM_PROT_DEFAULT, MAP_ANON, (caddr_t) shmid, 0); if (result != KERN_SUCCESS) { shmseg->shm_perm.mode = SHMSEG_FREE; shm_last_free = segnum; free((caddr_t)shm_handle, M_SHM); /* Just in case. */ wakeup((caddr_t)shmseg); return ENOMEM; } shmseg->shm_internal = shm_handle; shmseg->shm_perm.cuid = shmseg->shm_perm.uid = cred->cr_uid; shmseg->shm_perm.cgid = shmseg->shm_perm.gid = cred->cr_gid; shmseg->shm_perm.mode = (shmseg->shm_perm.mode & SHMSEG_WANTED) | (mode & ACCESSPERMS) | SHMSEG_ALLOCATED; shmseg->shm_segsz = uap->size; shmseg->shm_cpid = p->p_pid; shmseg->shm_lpid = shmseg->shm_nattch = 0; shmseg->shm_atime = shmseg->shm_dtime = 0; shmseg->shm_ctime = time.tv_sec; shm_committed += btoc(size); shm_nused++; if (shmseg->shm_perm.mode & SHMSEG_WANTED) { /* * Somebody else wanted this key while we were asleep. Wake * them up now. */ shmseg->shm_perm.mode &= ~SHMSEG_WANTED; wakeup((caddr_t)shmseg); } *retval = shmid; return 0; } int shmget(p, uap, retval) struct proc *p; struct shmget_args *uap; int *retval; { int segnum, mode, error; mode = uap->shmflg & ACCESSPERMS; if (uap->key != IPC_PRIVATE) { again: segnum = shm_find_segment_by_key(uap->key); if (segnum >= 0) { error = shmget_existing(p, uap, mode, segnum, retval); if (error == EAGAIN) goto again; return error; } if ((uap->shmflg & IPC_CREAT) == 0) return ENOENT; } return shmget_allocate_segment(p, uap, mode, retval); } int shmsys(p, uap, retval) struct proc *p; /* XXX actually varargs. */ struct shmsys_args /* { u_int which; int a2; int a3; int a4; } */ *uap; int *retval; { if (uap->which >= sizeof(shmcalls)/sizeof(shmcalls[0])) return EINVAL; return ((*shmcalls[uap->which])(p, &uap->a2, retval)); } void -shmfork(p1, p2, isvfork) +shmfork(p1, p2) struct proc *p1, *p2; - int isvfork; { struct shmmap_state *shmmap_s; size_t size; int i; size = shminfo.shmseg * sizeof(struct shmmap_state); shmmap_s = malloc(size, M_SHM, M_WAITOK); bcopy((caddr_t)p1->p_vmspace->vm_shm, (caddr_t)shmmap_s, size); p2->p_vmspace->vm_shm = (caddr_t)shmmap_s; for (i = 0; i < shminfo.shmseg; i++, shmmap_s++) if (shmmap_s->shmid != -1) shmsegs[IPCID_TO_IX(shmmap_s->shmid)].shm_nattch++; } void shmexit(p) struct proc *p; { struct shmmap_state *shmmap_s; int i; shmmap_s = (struct shmmap_state *)p->p_vmspace->vm_shm; for (i = 0; i < shminfo.shmseg; i++, shmmap_s++) if (shmmap_s->shmid != -1) shm_delete_mapping(p, shmmap_s); free((caddr_t)p->p_vmspace->vm_shm, M_SHM); p->p_vmspace->vm_shm = NULL; } void shminit(dummy) void *dummy; { int i; vm_offset_t garbage1, garbage2; /* actually this *should* be pageable. SHM_{LOCK,UNLOCK} */ sysvshm_map = kmem_suballoc(kernel_map, &garbage1, &garbage2, shminfo.shmall * NBPG, TRUE); for (i = 0; i < shminfo.shmmni; i++) { shmsegs[i].shm_perm.mode = SHMSEG_FREE; shmsegs[i].shm_perm.seq = 0; } shm_last_free = 0; shm_nused = 0; shm_committed = 0; } diff --git a/sys/sys/filedesc.h b/sys/sys/filedesc.h index 7f71cd8d2ecd..7bd4578e1f7f 100644 --- a/sys/sys/filedesc.h +++ b/sys/sys/filedesc.h @@ -1,110 +1,112 @@ /* * Copyright (c) 1990, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)filedesc.h 8.1 (Berkeley) 6/2/93 - * $Id: filedesc.h,v 1.6 1995/05/30 08:14:20 rgrimes Exp $ + * $Id: filedesc.h,v 1.7 1995/11/04 10:35:17 bde Exp $ */ #ifndef _SYS_FILEDESC_H_ #define _SYS_FILEDESC_H_ /* * This structure is used for the management of descriptors. It may be * shared by multiple processes. * * A process is initially started out with NDFILE descriptors stored within * this structure, selected to be enough for typical applications based on * the historical limit of 20 open files (and the usage of descriptors by * shells). If these descriptors are exhausted, a larger descriptor table * may be allocated, up to a process' resource limit; the internal arrays * are then unused. The initial expansion is set to NDEXTENT; each time * it runs out, it is doubled until the resource limit is reached. NDEXTENT * should be selected to be the biggest multiple of OFILESIZE (see below) * that will fit in a power-of-two sized piece of memory. */ #define NDFILE 20 #define NDEXTENT 50 /* 250 bytes in 256-byte alloc. */ struct filedesc { struct file **fd_ofiles; /* file structures for open files */ char *fd_ofileflags; /* per-process open file flags */ struct vnode *fd_cdir; /* current directory */ struct vnode *fd_rdir; /* root directory */ int fd_nfiles; /* number of open files allocated */ u_short fd_lastfile; /* high-water mark of fd_ofiles */ u_short fd_freefile; /* approx. next free file */ u_short fd_cmask; /* mask for file creation */ u_short fd_refcnt; /* reference count */ }; /* * Basic allocation of descriptors: * one of the above, plus arrays for NDFILE descriptors. */ struct filedesc0 { struct filedesc fd_fd; /* * These arrays are used when the number of open files is * <= NDFILE, and are then pointed to by the pointers above. */ struct file *fd_dfiles[NDFILE]; char fd_dfileflags[NDFILE]; }; /* * Per-process open flags. */ #define UF_EXCLOSE 0x01 /* auto-close on exec */ #define UF_MAPPED 0x02 /* mapped from device */ /* * Storage required per open file descriptor. */ #define OFILESIZE (sizeof(struct file *) + sizeof(char)) #ifdef KERNEL /* * Kernel global variables and routines. */ int dupfdopen __P((struct filedesc *, int, int, int, int)); int fdalloc __P((struct proc *p, int want, int *result)); int fdavail __P((struct proc *p, int n)); int falloc __P((struct proc *p, struct file **resultfp, int *resultfd)); void ffree __P((struct file *)); +struct filedesc *fdinit __P((struct proc *p)); +struct filedesc *fdshare __P((struct proc *p)); struct filedesc *fdcopy __P((struct proc *p)); void fdfree __P((struct proc *p)); int closef __P((struct file *fp,struct proc *p)); void fdcloseexec __P((struct proc *p)); int getvnode __P((struct filedesc *fdp, int fd, struct file **fpp)); #endif #endif diff --git a/sys/sys/param.h b/sys/sys/param.h index 88e0e1610527..6766c2fde1c7 100644 --- a/sys/sys/param.h +++ b/sys/sys/param.h @@ -1,229 +1,245 @@ /*- * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)param.h 8.2 (Berkeley) 1/21/94 - * $Id: param.h,v 1.10 1995/12/05 21:03:13 bde Exp $ + * $Id: param.h,v 1.11 1996/01/30 23:01:04 mpp Exp $ */ #ifndef _SYS_PARAM_H_ #define _SYS_PARAM_H_ #define BSD 199306 /* System version (year & month). */ #define BSD4_3 1 #define BSD4_4 1 #ifndef NULL #define NULL 0 #endif #ifndef LOCORE #include #endif /* * Machine-independent constants (some used in following include files). * Redefined constants are from POSIX 1003.1 limits file. * * MAXCOMLEN should be >= sizeof(ac_comm) (see ) * MAXLOGNAME should be >= UT_NAMESIZE (see ) */ #include #define MAXCOMLEN 16 /* max command name remembered */ #define MAXINTERP 32 /* max interpreter file name length */ #define MAXLOGNAME 12 /* max login name length */ #define MAXUPRC CHILD_MAX /* max simultaneous processes */ #define NCARGS ARG_MAX /* max bytes for an exec function */ #define NGROUPS NGROUPS_MAX /* max number groups */ #define NOFILE OPEN_MAX /* max open files per process */ #define NOGROUP 65535 /* marker for empty group set member */ #define MAXHOSTNAMELEN 256 /* max hostname size */ /* More types and definitions used throughout the kernel. */ #ifdef KERNEL #include #include #include #include #include #include #include #define FALSE 0 #define TRUE 1 #endif /* Signals. */ #include /* Machine type dependent parameters. */ #include #include /* * Priorities. Note that with 32 run queues, differences less than 4 are * insignificant. */ #define PSWP 0 #define PVM 4 #define PINOD 8 #define PRIBIO 16 #define PVFS 20 #define PZERO 22 /* No longer magic, shouldn't be here. XXX */ #define PSOCK 24 #define PWAIT 32 #define PLOCK 36 #define PPAUSE 40 #define PUSER 50 #define MAXPRI 127 /* Priorities range from 0 through MAXPRI. */ #define PRIMASK 0x0ff #define PCATCH 0x100 /* OR'd with pri for tsleep to check signals */ #define NZERO 0 /* default "nice" */ #define NBPW sizeof(int) /* number of bytes per word (integer) */ #define CMASK 022 /* default file mask: S_IWGRP|S_IWOTH */ #define NODEV (dev_t)(-1) /* non-existent device */ /* * Clustering of hardware pages on machines with ridiculously small * page sizes is done here. The paging subsystem deals with units of * CLSIZE pte's describing NBPG (from machine/machparam.h) pages each. */ #define CLBYTES (CLSIZE*NBPG) #define CLOFSET (CLSIZE*NBPG-1) /* for clusters, like PGOFSET */ #define claligned(x) ((((int)(x))&CLOFSET)==0) #define CLOFF CLOFSET #define CLSHIFT (PGSHIFT+CLSIZELOG2) #if CLSIZE==1 #define clbase(i) (i) #define clrnd(i) (i) #else /* Give the base virtual address (first of CLSIZE). */ #define clbase(i) ((i) &~ (CLSIZE-1)) /* Round a number of clicks up to a whole cluster. */ #define clrnd(i) (((i) + (CLSIZE-1)) &~ (CLSIZE-1)) #endif #define CBLOCK 128 /* Clist block size, must be a power of 2. */ #define CBQSIZE (CBLOCK/NBBY) /* Quote bytes/cblock - can do better. */ /* Data chars/clist. */ #define CBSIZE (CBLOCK - sizeof(struct cblock *) - CBQSIZE) #define CROUND (CBLOCK - 1) /* Clist rounding. */ /* * File system parameters and macros. * * The file system is made out of blocks of at most MAXBSIZE units, with * smaller units (fragments) only in the last direct block. MAXBSIZE * primarily determines the size of buffers in the buffer pool. It may be * made larger without any effect on existing file systems; however making * it smaller make make some file systems unmountable. Also, MAXBSIZE * must be less than MAXPHYS!!! */ #define MAXBSIZE 16384 #define MAXFRAG 8 /* * MAXPATHLEN defines the longest permissible path length after expanding * symbolic links. It is used to allocate a temporary buffer from the buffer * pool in which to do the name expansion, hence should be a power of two, * and must be less than or equal to MAXBSIZE. MAXSYMLINKS defines the * maximum number of symbolic links that may be expanded in a path name. * It should be set high enough to allow all legitimate uses, but halt * infinite loops reasonably quickly. */ #define MAXPATHLEN PATH_MAX #define MAXSYMLINKS 32 /* Bit map related macros. */ #define setbit(a,i) ((a)[(i)/NBBY] |= 1<<((i)%NBBY)) #define clrbit(a,i) ((a)[(i)/NBBY] &= ~(1<<((i)%NBBY))) #define isset(a,i) ((a)[(i)/NBBY] & (1<<((i)%NBBY))) #define isclr(a,i) (((a)[(i)/NBBY] & (1<<((i)%NBBY))) == 0) /* Macros for counting and rounding. */ #ifndef howmany #define howmany(x, y) (((x)+((y)-1))/(y)) #endif #define rounddown(x, y) (((x)/(y))*(y)) #define roundup(x, y) ((((x)+((y)-1))/(y))*(y)) /* to any y */ #define roundup2(x, y) (((x)+((y)-1))&(~((y)-1))) /* if y is powers of two */ #define powerof2(x) ((((x)-1)&(x))==0) /* Macros for min/max. */ #ifndef KERNEL #define MIN(a,b) (((a)<(b))?(a):(b)) #define MAX(a,b) (((a)>(b))?(a):(b)) #endif /* * Constants for setting the parameters of the kernel memory allocator. * * 2 ** MINBUCKET is the smallest unit of memory that will be * allocated. It must be at least large enough to hold a pointer. * * Units of memory less or equal to MAXALLOCSAVE will permanently * allocate physical memory; requests for these size pieces of * memory are quite fast. Allocations greater than MAXALLOCSAVE must * always allocate and free physical memory; requests for these * size allocations should be done infrequently as they will be slow. * * Constraints: CLBYTES <= MAXALLOCSAVE <= 2 ** (MINBUCKET + 14), and * MAXALLOCSIZE must be a power of two. */ #define MINBUCKET 4 /* 4 => min allocation of 16 bytes */ #define MAXALLOCSAVE (2 * CLBYTES) /* * Scale factor for scaled integers used to count %cpu time and load avgs. * * The number of CPU `tick's that map to a unique `%age' can be expressed * by the formula (1 / (2 ^ (FSHIFT - 11))). The maximum load average that * can be calculated (assuming 32 bits) can be closely approximated using * the formula (2 ^ (2 * (16 - FSHIFT))) for (FSHIFT < 15). * * For the scheduler to maintain a 1:1 mapping of CPU `tick' to `%age', * FSHIFT must be at least 11; this gives us a maximum load avg of ~1024. */ #define FSHIFT 11 /* bits to right of fixed binary point */ #define FSCALE (1< #define SHM_RDONLY 010000 /* Attach read-only (else read-write) */ #define SHM_RND 020000 /* Round attach address to SHMLBA */ #define SHMLBA CLBYTES /* Segment low boundary address multiple */ /* "official" access mode definitions; somewhat braindead since you have to specify (SHM_* >> 3) for group and (SHM_* >> 6) for world permissions */ #define SHM_R (IPC_R) #define SHM_W (IPC_W) struct shmid_ds { struct ipc_perm shm_perm; /* operation permission structure */ int shm_segsz; /* size of segment in bytes */ pid_t shm_lpid; /* process ID of last shared memory op */ pid_t shm_cpid; /* process ID of creator */ short shm_nattch; /* number of current attaches */ time_t shm_atime; /* time of last shmat() */ time_t shm_dtime; /* time of last shmdt() */ time_t shm_ctime; /* time of last change by shmctl() */ void *shm_internal; /* sysv stupidity */ }; #ifdef KERNEL /* * System 5 style catch-all structure for shared memory constants that * might be of interest to user programs. Do we really want/need this? */ struct shminfo { int shmmax, /* max shared memory segment size (bytes) */ shmmin, /* min shared memory segment size (bytes) */ shmmni, /* max number of shared memory identifiers */ shmseg, /* max shared memory segments per process */ shmall; /* max amount of shared memory (pages) */ }; extern struct shminfo shminfo; extern struct shmid_ds *shmsegs; void shmexit __P((struct proc *)); -void shmfork __P((struct proc *, struct proc *, int)); +void shmfork __P((struct proc *, struct proc *)); #else /* !KERNEL */ #include __BEGIN_DECLS int shmsys __P((int, ...)); void *shmat __P((int, void *, int)); int shmget __P((key_t, int, int)); int shmctl __P((int, int, struct shmid_ds *)); int shmdt __P((void *)); __END_DECLS #endif /* !KERNEL */ #endif /* !_SYS_SHM_H_ */ diff --git a/sys/vm/vm_extern.h b/sys/vm/vm_extern.h index c6b30678558d..b0d748036460 100644 --- a/sys/vm/vm_extern.h +++ b/sys/vm/vm_extern.h @@ -1,111 +1,111 @@ /*- * Copyright (c) 1992, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)vm_extern.h 8.2 (Berkeley) 1/12/94 - * $Id: vm_extern.h,v 1.22 1995/12/14 09:54:55 phk Exp $ + * $Id: vm_extern.h,v 1.23 1996/02/04 22:08:57 dyson Exp $ */ #ifndef _VM_EXTERN_H_ #define _VM_EXTERN_H_ struct buf; struct proc; struct vmspace; struct vmtotal; struct mount; struct vnode; #ifdef KGDB void chgkprot __P((caddr_t, int, int)); #endif #ifdef KERNEL extern int indent; #ifdef TYPEDEF_FOR_UAP int getpagesize __P((struct proc * p, void *, int *)); int madvise __P((struct proc *, void *, int *)); int mincore __P((struct proc *, void *, int *)); int mprotect __P((struct proc *, void *, int *)); int msync __P((struct proc *, void *, int *)); int munmap __P((struct proc *, void *, int *)); int obreak __P((struct proc *, void *, int *)); int sbrk __P((struct proc *, void *, int *)); int smmap __P((struct proc *, void *, int *)); int sstk __P((struct proc *, void *, int *)); int swapon __P((struct proc *, void *, int *)); #endif void faultin __P((struct proc *p)); int grow __P((struct proc *, u_int)); void iprintf __P((const char *,...)); int kernacc __P((caddr_t, int, int)); vm_offset_t kmem_alloc __P((vm_map_t, vm_size_t)); vm_offset_t kmem_alloc_pageable __P((vm_map_t, vm_size_t)); vm_offset_t kmem_alloc_wait __P((vm_map_t, vm_size_t)); void kmem_free __P((vm_map_t, vm_offset_t, vm_size_t)); void kmem_free_wakeup __P((vm_map_t, vm_offset_t, vm_size_t)); void kmem_init __P((vm_offset_t, vm_offset_t)); vm_offset_t kmem_malloc __P((vm_map_t, vm_size_t, boolean_t)); vm_map_t kmem_suballoc __P((vm_map_t, vm_offset_t *, vm_offset_t *, vm_size_t, boolean_t)); void munmapfd __P((struct proc *, int)); int pager_cache __P((vm_object_t, boolean_t)); int swaponvp __P((struct proc *, struct vnode *, dev_t , u_long)); void swapout_procs __P((void)); int useracc __P((caddr_t, int, int)); int vm_fault __P((vm_map_t, vm_offset_t, vm_prot_t, boolean_t)); void vm_fault_copy_entry __P((vm_map_t, vm_map_t, vm_map_entry_t, vm_map_entry_t)); void vm_fault_unwire __P((vm_map_t, vm_offset_t, vm_offset_t)); int vm_fault_wire __P((vm_map_t, vm_offset_t, vm_offset_t)); -int vm_fork __P((struct proc *, struct proc *, int)); +int vm_fork __P((struct proc *, struct proc *)); void vm_map_print __P((/* db_expr_t */ int, boolean_t, /* db_expr_t */ int, char *)); int vm_mmap __P((vm_map_t, vm_offset_t *, vm_size_t, vm_prot_t, vm_prot_t, int, caddr_t, vm_ooffset_t)); vm_offset_t vm_page_alloc_contig __P((vm_offset_t, vm_offset_t, vm_offset_t, vm_offset_t)); void vm_set_page_size __P((void)); void vmmeter __P((void)); struct vmspace *vmspace_alloc __P((vm_offset_t, vm_offset_t, int)); struct vmspace *vmspace_fork __P((struct vmspace *)); void vmspace_free __P((struct vmspace *)); void vnode_pager_setsize __P((struct vnode *, vm_ooffset_t)); void vnode_pager_umount __P((struct mount *)); void vnode_pager_uncache __P((struct vnode *)); void vslock __P((caddr_t, u_int)); void vsunlock __P((caddr_t, u_int, int)); void vm_object_print __P((/* db_expr_t */ int, boolean_t, /* db_expr_t */ int, char *)); void vm_fault_quick __P((caddr_t v, int prot)); #endif /* KERNEL */ #endif /* !_VM_EXTERN_H_ */ diff --git a/sys/vm/vm_glue.c b/sys/vm/vm_glue.c index 1d45e37f7127..3dfa281512e6 100644 --- a/sys/vm/vm_glue.c +++ b/sys/vm/vm_glue.c @@ -1,636 +1,632 @@ /* * Copyright (c) 1991, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * The Mach Operating System project at Carnegie-Mellon University. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)vm_glue.c 8.6 (Berkeley) 1/5/94 * * * Copyright (c) 1987, 1990 Carnegie-Mellon University. * All rights reserved. * * Permission to use, copy, modify and distribute this software and * its documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. * * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU * School of Computer Science * Carnegie Mellon University * Pittsburgh PA 15213-3890 * * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. * - * $Id: vm_glue.c,v 1.38 1996/01/29 12:10:30 davidg Exp $ + * $Id: vm_glue.c,v 1.39 1996/02/22 10:57:36 davidg Exp $ */ -#include "opt_sysvipc.h" #include "opt_ddb.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * System initialization * * Note: proc0 from proc.h */ static void vm_init_limits __P((void *)); SYSINIT(vm_limits, SI_SUB_VM_CONF, SI_ORDER_FIRST, vm_init_limits, &proc0) /* * THIS MUST BE THE LAST INITIALIZATION ITEM!!! * * Note: run scheduling should be divorced from the vm system. */ static void scheduler __P((void *)); SYSINIT(scheduler, SI_SUB_RUN_SCHEDULER, SI_ORDER_FIRST, scheduler, NULL) static void swapout __P((struct proc *)); extern char kstack[]; /* vm_map_t upages_map; */ int kernacc(addr, len, rw) caddr_t addr; int len, rw; { boolean_t rv; vm_offset_t saddr, eaddr; vm_prot_t prot = rw == B_READ ? VM_PROT_READ : VM_PROT_WRITE; saddr = trunc_page(addr); eaddr = round_page(addr + len); rv = vm_map_check_protection(kernel_map, saddr, eaddr, prot); return (rv == TRUE); } int useracc(addr, len, rw) caddr_t addr; int len, rw; { boolean_t rv; vm_prot_t prot = rw == B_READ ? VM_PROT_READ : VM_PROT_WRITE; /* * XXX - check separately to disallow access to user area and user * page tables - they are in the map. * * XXX - VM_MAXUSER_ADDRESS is an end address, not a max. It was once * only used (as an end address) in trap.c. Use it as an end address * here too. This bogusness has spread. I just fixed where it was * used as a max in vm_mmap.c. */ if ((vm_offset_t) addr + len > /* XXX */ VM_MAXUSER_ADDRESS || (vm_offset_t) addr + len < (vm_offset_t) addr) { return (FALSE); } rv = vm_map_check_protection(&curproc->p_vmspace->vm_map, trunc_page(addr), round_page(addr + len), prot); return (rv == TRUE); } #ifdef KGDB /* * Change protections on kernel pages from addr to addr+len * (presumably so debugger can plant a breakpoint). * All addresses are assumed to reside in the Sysmap, */ chgkprot(addr, len, rw) register caddr_t addr; int len, rw; { vm_prot_t prot = rw == B_READ ? VM_PROT_READ : VM_PROT_WRITE; vm_map_protect(kernel_map, trunc_page(addr), round_page(addr + len), prot, FALSE); } #endif void vslock(addr, len) caddr_t addr; u_int len; { vm_map_pageable(&curproc->p_vmspace->vm_map, trunc_page(addr), round_page(addr + len), FALSE); } void vsunlock(addr, len, dirtied) caddr_t addr; u_int len; int dirtied; { #ifdef lint dirtied++; #endif /* lint */ vm_map_pageable(&curproc->p_vmspace->vm_map, trunc_page(addr), round_page(addr + len), TRUE); } /* * Implement fork's actions on an address space. * Here we arrange for the address space to be copied or referenced, * allocate a user struct (pcb and kernel stack), then call the * machine-dependent layer to fill those in and make the new process * ready to run. * NOTE: the kernel stack may be at a different location in the child * process, and thus addresses of automatic variables may be invalid * after cpu_fork returns in the child process. We do nothing here * after cpu_fork returns. */ int -vm_fork(p1, p2, isvfork) +vm_fork(p1, p2) register struct proc *p1, *p2; - int isvfork; { register struct user *up; vm_offset_t addr, ptaddr, ptpa; int error, i; vm_map_t map; pmap_t pvp; vm_page_t stkm; while ((cnt.v_free_count + cnt.v_cache_count) < cnt.v_free_min) { VM_WAIT; } /* * avoid copying any of the parent's pagetables or other per-process * objects that reside in the map by marking all of them * non-inheritable */ (void) vm_map_inherit(&p1->p_vmspace->vm_map, UPT_MIN_ADDRESS - UPAGES * PAGE_SIZE, VM_MAX_ADDRESS, VM_INHERIT_NONE); p2->p_vmspace = vmspace_fork(p1->p_vmspace); -#ifdef SYSVSHM if (p1->p_vmspace->vm_shm) - shmfork(p1, p2, isvfork); -#endif + shmfork(p1, p2); /* * Allocate a wired-down (for now) pcb and kernel stack for the * process */ addr = (vm_offset_t) kstack; map = &p2->p_vmspace->vm_map; pvp = &p2->p_vmspace->vm_pmap; /* get new pagetables and kernel stack */ error = vm_map_find(map, NULL, 0, &addr, UPT_MAX_ADDRESS - addr, FALSE, VM_PROT_ALL, VM_PROT_ALL, 0); if (error != KERN_SUCCESS) panic("vm_fork: vm_map_find failed, addr=0x%x, error=%d", addr, error); /* get a kernel virtual address for the UPAGES for this proc */ up = (struct user *) kmem_alloc_pageable(u_map, UPAGES * PAGE_SIZE); if (up == NULL) panic("vm_fork: u_map allocation failed"); p2->p_vmspace->vm_upages_obj = vm_object_allocate( OBJT_DEFAULT, UPAGES); ptaddr = trunc_page((u_int) vtopte(kstack)); (void) vm_fault(map, ptaddr, VM_PROT_READ|VM_PROT_WRITE, FALSE); ptpa = pmap_extract(pvp, ptaddr); if (ptpa == 0) { panic("vm_fork: no pte for UPAGES"); } stkm = PHYS_TO_VM_PAGE(ptpa); vm_page_hold(stkm); for(i=0;ip_vmspace->vm_upages_obj, i, VM_ALLOC_ZERO)) == NULL) { VM_WAIT; } vm_page_wire(m); m->flags &= ~PG_BUSY; pmap_enter( pvp, (vm_offset_t) kstack + i * PAGE_SIZE, VM_PAGE_TO_PHYS(m), VM_PROT_READ|VM_PROT_WRITE, 1); pmap_kenter(((vm_offset_t) up) + i * PAGE_SIZE, VM_PAGE_TO_PHYS(m)); if ((m->flags & PG_ZERO) == 0) bzero(((caddr_t) up) + i * PAGE_SIZE, PAGE_SIZE); m->flags &= ~PG_ZERO; m->valid = VM_PAGE_BITS_ALL; } vm_page_unhold(stkm); p2->p_addr = up; /* * p_stats and p_sigacts currently point at fields in the user struct * but not at &u, instead at p_addr. Copy p_sigacts and parts of * p_stats; zero the rest of p_stats (statistics). */ p2->p_stats = &up->u_stats; p2->p_sigacts = &up->u_sigacts; up->u_sigacts = *p1->p_sigacts; bzero(&up->u_stats.pstat_startzero, (unsigned) ((caddr_t) &up->u_stats.pstat_endzero - (caddr_t) &up->u_stats.pstat_startzero)); bcopy(&p1->p_stats->pstat_startcopy, &up->u_stats.pstat_startcopy, ((caddr_t) &up->u_stats.pstat_endcopy - (caddr_t) &up->u_stats.pstat_startcopy)); /* * cpu_fork will copy and update the kernel stack and pcb, and make * the child ready to run. It marks the child so that it can return * differently than the parent. It returns twice, once in the parent * process and once in the child. */ return (cpu_fork(p1, p2)); } /* * Set default limits for VM system. * Called for proc 0, and then inherited by all others. * * XXX should probably act directly on proc0. */ static void vm_init_limits(udata) void *udata; { register struct proc *p = udata; int rss_limit; /* * Set up the initial limits on process VM. Set the maximum resident * set size to be half of (reasonably) available memory. Since this * is a soft limit, it comes into effect only when the system is out * of memory - half of main memory helps to favor smaller processes, * and reduces thrashing of the object cache. */ p->p_rlimit[RLIMIT_STACK].rlim_cur = DFLSSIZ; p->p_rlimit[RLIMIT_STACK].rlim_max = MAXSSIZ; p->p_rlimit[RLIMIT_DATA].rlim_cur = DFLDSIZ; p->p_rlimit[RLIMIT_DATA].rlim_max = MAXDSIZ; /* limit the limit to no less than 2MB */ rss_limit = max(cnt.v_free_count, 512); p->p_rlimit[RLIMIT_RSS].rlim_cur = ptoa(rss_limit); p->p_rlimit[RLIMIT_RSS].rlim_max = RLIM_INFINITY; } void faultin(p) struct proc *p; { vm_offset_t i; vm_offset_t ptaddr; int s; if ((p->p_flag & P_INMEM) == 0) { vm_map_t map = &p->p_vmspace->vm_map; pmap_t pmap = &p->p_vmspace->vm_pmap; vm_page_t stkm, m; vm_offset_t ptpa; int error; ++p->p_lock; ptaddr = trunc_page((u_int) vtopte(kstack)); (void) vm_fault(map, ptaddr, VM_PROT_READ|VM_PROT_WRITE, FALSE); ptpa = pmap_extract(&p->p_vmspace->vm_pmap, ptaddr); if (ptpa == 0) { panic("vm_fork: no pte for UPAGES"); } stkm = PHYS_TO_VM_PAGE(ptpa); vm_page_hold(stkm); for(i=0;ip_vmspace->vm_upages_obj, i)) == NULL) { if ((m = vm_page_alloc(p->p_vmspace->vm_upages_obj, i, VM_ALLOC_NORMAL)) == NULL) { VM_WAIT; goto retry; } } else { if ((m->flags & PG_BUSY) || m->busy) { m->flags |= PG_WANTED; tsleep(m, PVM, "swinuw",0); goto retry; } } vm_page_wire(m); if (m->valid == VM_PAGE_BITS_ALL) m->flags &= ~PG_BUSY; splx(s); pmap_enter( pmap, (vm_offset_t) kstack + i * PAGE_SIZE, VM_PAGE_TO_PHYS(m), VM_PROT_READ|VM_PROT_WRITE, TRUE); pmap_kenter(((vm_offset_t) p->p_addr) + i * PAGE_SIZE, VM_PAGE_TO_PHYS(m)); if (m->valid != VM_PAGE_BITS_ALL) { int rv; rv = vm_pager_get_pages(p->p_vmspace->vm_upages_obj, &m, 1, 0); if (rv != VM_PAGER_OK) panic("faultin: cannot get upages for proc: %d\n", p->p_pid); m->valid = VM_PAGE_BITS_ALL; m->flags &= ~PG_BUSY; } } vm_page_unhold(stkm); s = splhigh(); if (p->p_stat == SRUN) setrunqueue(p); p->p_flag |= P_INMEM; /* undo the effect of setting SLOCK above */ --p->p_lock; splx(s); } } /* * This swapin algorithm attempts to swap-in processes only if there * is enough space for them. Of course, if a process waits for a long * time, it will be swapped in anyway. */ /* ARGSUSED*/ static void scheduler(dummy) void *dummy; { register struct proc *p; register int pri; struct proc *pp; int ppri; loop: while ((cnt.v_free_count + cnt.v_cache_count) < (cnt.v_free_reserved + UPAGES + 2)) { VM_WAIT; } pp = NULL; ppri = INT_MIN; for (p = (struct proc *) allproc; p != NULL; p = p->p_next) { if (p->p_stat == SRUN && (p->p_flag & (P_INMEM | P_SWAPPING)) == 0) { int mempri; pri = p->p_swtime + p->p_slptime - p->p_nice * 8; mempri = pri > 0 ? pri : 0; /* * if this process is higher priority and there is * enough space, then select this process instead of * the previous selection. */ if (pri > ppri) { pp = p; ppri = pri; } } } /* * Nothing to do, back to sleep */ if ((p = pp) == NULL) { tsleep(&proc0, PVM, "sched", 0); goto loop; } /* * We would like to bring someone in. (only if there is space). */ faultin(p); p->p_swtime = 0; goto loop; } #ifndef NO_SWAPPING #define swappable(p) \ (((p)->p_lock == 0) && \ ((p)->p_flag & (P_TRACED|P_NOSWAP|P_SYSTEM|P_INMEM|P_WEXIT|P_PHYSIO|P_SWAPPING)) == P_INMEM) /* * Swapout is driven by the pageout daemon. Very simple, we find eligible * procs and unwire their u-areas. We try to always "swap" at least one * process in case we need the room for a swapin. * If any procs have been sleeping/stopped for at least maxslp seconds, * they are swapped. Else, we swap the longest-sleeping or stopped process, * if any, otherwise the longest-resident process. */ void swapout_procs() { register struct proc *p; struct proc *outp, *outp2; int outpri, outpri2; int didswap = 0; outp = outp2 = NULL; outpri = outpri2 = INT_MIN; retry: for (p = (struct proc *) allproc; p != NULL; p = p->p_next) { if (!swappable(p)) continue; switch (p->p_stat) { default: continue; case SSLEEP: case SSTOP: /* * do not swapout a realtime process */ if (p->p_rtprio.type == RTP_PRIO_REALTIME) continue; /* * do not swapout a process waiting on a critical * event of some kind */ if (((p->p_priority & 0x7f) < PSOCK) || (p->p_slptime <= 4)) continue; vm_map_reference(&p->p_vmspace->vm_map); /* * do not swapout a process that is waiting for VM * datastructures there is a possible deadlock. */ if (!lock_try_write(&p->p_vmspace->vm_map.lock)) { vm_map_deallocate(&p->p_vmspace->vm_map); continue; } vm_map_unlock(&p->p_vmspace->vm_map); /* * If the process has been asleep for awhile and had * most of its pages taken away already, swap it out. */ swapout(p); vm_map_deallocate(&p->p_vmspace->vm_map); didswap++; goto retry; } } /* * If we swapped something out, and another process needed memory, * then wakeup the sched process. */ if (didswap) wakeup(&proc0); } static void swapout(p) register struct proc *p; { vm_map_t map = &p->p_vmspace->vm_map; pmap_t pmap = &p->p_vmspace->vm_pmap; vm_offset_t ptaddr; int i; ++p->p_stats->p_ru.ru_nswap; /* * remember the process resident count */ p->p_vmspace->vm_swrss = p->p_vmspace->vm_pmap.pm_stats.resident_count; (void) splhigh(); p->p_flag &= ~P_INMEM; p->p_flag |= P_SWAPPING; if (p->p_stat == SRUN) remrq(p); (void) spl0(); /* * let the upages be paged */ for(i=0;ip_vmspace->vm_upages_obj, i)) == NULL) panic("swapout: upage already missing???"); m->dirty = VM_PAGE_BITS_ALL; vm_page_unwire(m); pmap_kremove( (vm_offset_t) p->p_addr + PAGE_SIZE * i); } pmap_remove(pmap, (vm_offset_t) kstack, (vm_offset_t) kstack + PAGE_SIZE * UPAGES); p->p_flag &= ~P_SWAPPING; p->p_swtime = 0; } #endif /* !NO_SWAPPING */ #ifdef DDB /* * DEBUG stuff */ int indent; #include /* see subr_prf.c */ /*ARGSUSED2*/ void #if __STDC__ iprintf(const char *fmt,...) #else iprintf(fmt /* , va_alist */ ) char *fmt; /* va_dcl */ #endif { register int i; va_list ap; for (i = indent; i >= 8; i -= 8) printf("\t"); while (--i >= 0) printf(" "); va_start(ap, fmt); vprintf(fmt, ap); va_end(ap); } #endif /* DDB */ diff --git a/sys/vm/vm_mmap.c b/sys/vm/vm_mmap.c index c68f5f454836..d7d597d7a670 100644 --- a/sys/vm/vm_mmap.c +++ b/sys/vm/vm_mmap.c @@ -1,739 +1,832 @@ /* * Copyright (c) 1988 University of Utah. * Copyright (c) 1991, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * the Systems Programming Group of the University of Utah Computer * Science Department. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$ * * @(#)vm_mmap.c 8.4 (Berkeley) 1/12/94 - * $Id: vm_mmap.c,v 1.34 1995/12/17 07:19:57 bde Exp $ + * $Id: vm_mmap.c,v 1.35 1996/01/19 03:59:59 dyson Exp $ */ /* * Mapped file (mmap) interface to VM */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifndef _SYS_SYSPROTO_H_ struct sbrk_args { int incr; }; #endif /* ARGSUSED */ int sbrk(p, uap, retval) struct proc *p; struct sbrk_args *uap; int *retval; { /* Not yet implemented */ return (EOPNOTSUPP); } #ifndef _SYS_SYSPROTO_H_ struct sstk_args { int incr; }; #endif /* ARGSUSED */ int sstk(p, uap, retval) struct proc *p; struct sstk_args *uap; int *retval; { /* Not yet implemented */ return (EOPNOTSUPP); } #if defined(COMPAT_43) || defined(COMPAT_SUNOS) #ifndef _SYS_SYSPROTO_H_ struct getpagesize_args { int dummy; }; #endif /* ARGSUSED */ int ogetpagesize(p, uap, retval) struct proc *p; struct getpagesize_args *uap; int *retval; { *retval = PAGE_SIZE; return (0); } #endif /* COMPAT_43 || COMPAT_SUNOS */ #ifndef _SYS_SYSPROTO_H_ struct mmap_args { caddr_t addr; size_t len; int prot; int flags; int fd; long pad; off_t pos; }; #endif int mmap(p, uap, retval) struct proc *p; register struct mmap_args *uap; int *retval; { register struct filedesc *fdp = p->p_fd; register struct file *fp; struct vnode *vp; vm_offset_t addr; vm_size_t size; vm_prot_t prot, maxprot; caddr_t handle; int flags, error; prot = uap->prot & VM_PROT_ALL; flags = uap->flags; /* * Address (if FIXED) must be page aligned. Size is implicitly rounded * to a page boundary. */ addr = (vm_offset_t) uap->addr; if (((flags & MAP_FIXED) && (addr & PAGE_MASK)) || (ssize_t) uap->len < 0 || ((flags & MAP_ANON) && uap->fd != -1)) return (EINVAL); size = (vm_size_t) round_page(uap->len); /* * Check for illegal addresses. Watch out for address wrap... Note * that VM_*_ADDRESS are not constants due to casts (argh). */ if (flags & MAP_FIXED) { if (VM_MAXUSER_ADDRESS > 0 && addr + size > VM_MAXUSER_ADDRESS) return (EINVAL); #ifndef i386 if (VM_MIN_ADDRESS > 0 && addr < VM_MIN_ADDRESS) return (EINVAL); #endif if (addr + size < addr) return (EINVAL); } /* * XXX if no hint provided for a non-fixed mapping place it after the * end of the largest possible heap. * * There should really be a pmap call to determine a reasonable location. */ if (addr == 0 && (flags & MAP_FIXED) == 0) addr = round_page(p->p_vmspace->vm_daddr + MAXDSIZ); if (flags & MAP_ANON) { /* * Mapping blank space is trivial. */ handle = NULL; maxprot = VM_PROT_ALL; } else { /* * Mapping file, get fp for validation. Obtain vnode and make * sure it is of appropriate type. */ if (((unsigned) uap->fd) >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[uap->fd]) == NULL) return (EBADF); if (fp->f_type != DTYPE_VNODE) return (EINVAL); vp = (struct vnode *) fp->f_data; if (vp->v_type != VREG && vp->v_type != VCHR) return (EINVAL); /* * XXX hack to handle use of /dev/zero to map anon memory (ala * SunOS). */ if (vp->v_type == VCHR && iszerodev(vp->v_rdev)) { handle = NULL; maxprot = VM_PROT_ALL; flags |= MAP_ANON; } else { /* * Ensure that file and memory protections are * compatible. Note that we only worry about * writability if mapping is shared; in this case, * current and max prot are dictated by the open file. * XXX use the vnode instead? Problem is: what * credentials do we use for determination? What if * proc does a setuid? */ maxprot = VM_PROT_EXECUTE; /* ??? */ if (fp->f_flag & FREAD) maxprot |= VM_PROT_READ; else if (prot & PROT_READ) return (EACCES); if (flags & MAP_SHARED) { if (fp->f_flag & FWRITE) maxprot |= VM_PROT_WRITE; else if (prot & PROT_WRITE) return (EACCES); } else maxprot |= VM_PROT_WRITE; handle = (caddr_t) vp; } } error = vm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot, flags, handle, uap->pos); if (error == 0) *retval = (int) addr; return (error); } #ifdef COMPAT_43 #ifndef _SYS_SYSPROTO_H_ struct ommap_args { caddr_t addr; int len; int prot; int flags; int fd; long pos; }; #endif int ommap(p, uap, retval) struct proc *p; register struct ommap_args *uap; int *retval; { struct mmap_args nargs; static const char cvtbsdprot[8] = { 0, PROT_EXEC, PROT_WRITE, PROT_EXEC | PROT_WRITE, PROT_READ, PROT_EXEC | PROT_READ, PROT_WRITE | PROT_READ, PROT_EXEC | PROT_WRITE | PROT_READ, }; #define OMAP_ANON 0x0002 #define OMAP_COPY 0x0020 #define OMAP_SHARED 0x0010 #define OMAP_FIXED 0x0100 #define OMAP_INHERIT 0x0800 nargs.addr = uap->addr; nargs.len = uap->len; nargs.prot = cvtbsdprot[uap->prot & 0x7]; nargs.flags = 0; if (uap->flags & OMAP_ANON) nargs.flags |= MAP_ANON; if (uap->flags & OMAP_COPY) nargs.flags |= MAP_COPY; if (uap->flags & OMAP_SHARED) nargs.flags |= MAP_SHARED; else nargs.flags |= MAP_PRIVATE; if (uap->flags & OMAP_FIXED) nargs.flags |= MAP_FIXED; if (uap->flags & OMAP_INHERIT) nargs.flags |= MAP_INHERIT; nargs.fd = uap->fd; nargs.pos = uap->pos; return (mmap(p, &nargs, retval)); } #endif /* COMPAT_43 */ #ifndef _SYS_SYSPROTO_H_ struct msync_args { caddr_t addr; int len; int flags; }; #endif int msync(p, uap, retval) struct proc *p; struct msync_args *uap; int *retval; { vm_offset_t addr; - vm_size_t size; + vm_size_t size, pageoff; int flags; vm_map_t map; int rv; map = &p->p_vmspace->vm_map; addr = (vm_offset_t) uap->addr; size = round_page((vm_size_t) uap->len); flags = uap->flags; - if (((int) addr & PAGE_MASK) || addr + size < addr || - (flags & (MS_ASYNC|MS_INVALIDATE)) == (MS_ASYNC|MS_INVALIDATE)) + /* + * Align the address to a page boundary, + * and adjust the size accordingly. + */ + pageoff = (addr & PAGE_MASK); + addr -= pageoff; + size += pageoff; + size = (vm_size_t) round_page(size); + if ((int)size < 0) + return(EINVAL); + + if ((flags & (MS_ASYNC|MS_INVALIDATE)) == (MS_ASYNC|MS_INVALIDATE)) return (EINVAL); /* * XXX Gak! If size is zero we are supposed to sync "all modified * pages with the region containing addr". Unfortunately, we don't * really keep track of individual mmaps so we approximate by flushing * the range of the map entry containing addr. This can be incorrect * if the region splits or is coalesced with a neighbor. */ if (size == 0) { vm_map_entry_t entry; vm_map_lock_read(map); rv = vm_map_lookup_entry(map, addr, &entry); vm_map_unlock_read(map); if (rv == FALSE) return (EINVAL); addr = entry->start; size = entry->end - entry->start; } /* * Clean the pages and interpret the return value. */ rv = vm_map_clean(map, addr, addr + size, (flags & MS_ASYNC) == 0, (flags & MS_INVALIDATE) != 0); switch (rv) { case KERN_SUCCESS: break; case KERN_INVALID_ADDRESS: return (EINVAL); /* Sun returns ENOMEM? */ case KERN_FAILURE: return (EIO); default: return (EINVAL); } return (0); } #ifndef _SYS_SYSPROTO_H_ struct munmap_args { caddr_t addr; int len; }; #endif int munmap(p, uap, retval) register struct proc *p; register struct munmap_args *uap; int *retval; { vm_offset_t addr; - vm_size_t size; + vm_size_t size, pageoff; vm_map_t map; addr = (vm_offset_t) uap->addr; - if ((addr & PAGE_MASK) || uap->len < 0) - return (EINVAL); - size = (vm_size_t) round_page(uap->len); + size = (vm_size_t) uap->len; + + /* + * Align the address to a page boundary, + * and adjust the size accordingly. + */ + pageoff = (addr & PAGE_MASK); + addr -= pageoff; + size += pageoff; + size = (vm_size_t) round_page(size); + if ((int)size < 0) + return(EINVAL); if (size == 0) return (0); + /* * Check for illegal addresses. Watch out for address wrap... Note * that VM_*_ADDRESS are not constants due to casts (argh). */ if (VM_MAXUSER_ADDRESS > 0 && addr + size > VM_MAXUSER_ADDRESS) return (EINVAL); #ifndef i386 if (VM_MIN_ADDRESS > 0 && addr < VM_MIN_ADDRESS) return (EINVAL); #endif if (addr + size < addr) return (EINVAL); map = &p->p_vmspace->vm_map; /* * Make sure entire range is allocated. */ if (!vm_map_check_protection(map, addr, addr + size, VM_PROT_NONE)) return (EINVAL); /* returns nothing but KERN_SUCCESS anyway */ (void) vm_map_remove(map, addr, addr + size); return (0); } void munmapfd(p, fd) struct proc *p; int fd; { /* * XXX should unmap any regions mapped to this file */ p->p_fd->fd_ofileflags[fd] &= ~UF_MAPPED; } #ifndef _SYS_SYSPROTO_H_ struct mprotect_args { caddr_t addr; int len; int prot; }; #endif int mprotect(p, uap, retval) struct proc *p; struct mprotect_args *uap; int *retval; { vm_offset_t addr; - vm_size_t size; + vm_size_t size, pageoff; register vm_prot_t prot; addr = (vm_offset_t) uap->addr; - if ((addr & PAGE_MASK) || uap->len < 0) - return (EINVAL); size = (vm_size_t) uap->len; prot = uap->prot & VM_PROT_ALL; + /* + * Align the address to a page boundary, + * and adjust the size accordingly. + */ + pageoff = (addr & PAGE_MASK); + addr -= pageoff; + size += pageoff; + size = (vm_size_t) round_page(size); + if ((int)size < 0) + return(EINVAL); + switch (vm_map_protect(&p->p_vmspace->vm_map, addr, addr + size, prot, FALSE)) { case KERN_SUCCESS: return (0); case KERN_PROTECTION_FAILURE: return (EACCES); } return (EINVAL); } +#ifndef _SYS_SYSPROTO_H_ +struct minherit_args { + caddr_t addr; + int len; + int inherit; +}; +#endif +int +minherit(p, uap, retval) + struct proc *p; + struct minherit_args *uap; + int *retval; +{ + vm_offset_t addr; + vm_size_t size, pageoff; + register vm_inherit_t inherit; + + addr = (vm_offset_t)uap->addr; + size = (vm_size_t)uap->len; + inherit = uap->inherit; + + /* + * Align the address to a page boundary, + * and adjust the size accordingly. + */ + pageoff = (addr & PAGE_MASK); + addr -= pageoff; + size += pageoff; + size = (vm_size_t) round_page(size); + if ((int)size < 0) + return(EINVAL); + + switch (vm_map_inherit(&p->p_vmspace->vm_map, addr, addr+size, + inherit)) { + case KERN_SUCCESS: + return (0); + case KERN_PROTECTION_FAILURE: + return (EACCES); + } + return (EINVAL); +} + #ifndef _SYS_SYSPROTO_H_ struct madvise_args { caddr_t addr; int len; int behav; }; #endif /* ARGSUSED */ int madvise(p, uap, retval) struct proc *p; struct madvise_args *uap; int *retval; { /* Not yet implemented */ return (EOPNOTSUPP); } #ifndef _SYS_SYSPROTO_H_ struct mincore_args { caddr_t addr; int len; char *vec; }; #endif /* ARGSUSED */ int mincore(p, uap, retval) struct proc *p; struct mincore_args *uap; int *retval; { vm_offset_t addr; vm_offset_t end; char *vec; addr = trunc_page((vm_offset_t) uap->addr); end = addr + round_page((vm_size_t) uap->len); if (VM_MAXUSER_ADDRESS > 0 && end > VM_MAXUSER_ADDRESS) return (EINVAL); if (end < addr) return (EINVAL); vec = uap->vec; while(addr < end) { int error; if (pmap_extract(&p->p_vmspace->vm_pmap, addr)) { error = subyte( vec, 1); } else { error = subyte( vec, 0); } if (error) return EFAULT; vec++; addr += PAGE_SIZE; } return (0); } #ifndef _SYS_SYSPROTO_H_ struct mlock_args { caddr_t addr; size_t len; }; #endif int mlock(p, uap, retval) struct proc *p; struct mlock_args *uap; int *retval; { vm_offset_t addr; - vm_size_t size; + vm_size_t size, pageoff; int error; addr = (vm_offset_t) uap->addr; - if ((addr & PAGE_MASK) || uap->addr + uap->len < uap->addr) + size = (vm_size_t) uap->len; + /* + * Align the address to a page boundary, + * and adjust the size accordingly. + */ + pageoff = (addr & PAGE_MASK); + addr -= pageoff; + size += pageoff; + size = (vm_size_t) round_page(size); + + /* disable wrap around */ + if (addr + (int)size < addr) return (EINVAL); - size = round_page((vm_size_t) uap->len); + if (atop(size) + cnt.v_wire_count > vm_page_max_wired) return (EAGAIN); #ifdef pmap_wired_count if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) > p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur) return (EAGAIN); #else error = suser(p->p_ucred, &p->p_acflag); if (error) return (error); #endif error = vm_map_pageable(&p->p_vmspace->vm_map, addr, addr + size, FALSE); return (error == KERN_SUCCESS ? 0 : ENOMEM); } #ifndef _SYS_SYSPROTO_H_ struct munlock_args { caddr_t addr; size_t len; }; #endif int munlock(p, uap, retval) struct proc *p; struct munlock_args *uap; int *retval; { vm_offset_t addr; - vm_size_t size; + vm_size_t size, pageoff; int error; addr = (vm_offset_t) uap->addr; - if ((addr & PAGE_MASK) || uap->addr + uap->len < uap->addr) + size = (vm_size_t) uap->len; + /* + * Align the address to a page boundary, + * and adjust the size accordingly. + */ + pageoff = (addr & PAGE_MASK); + addr -= pageoff; + size += pageoff; + size = (vm_size_t) round_page(size); + + /* disable wrap around */ + if (addr + (int)size < addr) return (EINVAL); + #ifndef pmap_wired_count error = suser(p->p_ucred, &p->p_acflag); if (error) return (error); #endif - size = round_page((vm_size_t) uap->len); error = vm_map_pageable(&p->p_vmspace->vm_map, addr, addr + size, TRUE); return (error == KERN_SUCCESS ? 0 : ENOMEM); } /* * Internal version of mmap. * Currently used by mmap, exec, and sys5 shared memory. * Handle is either a vnode pointer or NULL for MAP_ANON. */ int vm_mmap(map, addr, size, prot, maxprot, flags, handle, foff) register vm_map_t map; register vm_offset_t *addr; register vm_size_t size; vm_prot_t prot, maxprot; register int flags; caddr_t handle; /* XXX should be vp */ vm_ooffset_t foff; { boolean_t fitit; vm_object_t object, object2; struct vnode *vp = NULL; objtype_t type; int rv = KERN_SUCCESS; vm_ooffset_t objsize; int docow; struct proc *p = curproc; if (size == 0) return (0); objsize = size = round_page(size); /* * We currently can only deal with page aligned file offsets. * The check is here rather than in the syscall because the * kernel calls this function internally for other mmaping * operations (such as in exec) and non-aligned offsets will * cause pmap inconsistencies...so we want to be sure to * disallow this in all cases. */ if (foff & PAGE_MASK) return (EINVAL); if ((flags & MAP_FIXED) == 0) { fitit = TRUE; *addr = round_page(*addr); } else { if (*addr != trunc_page(*addr)) return (EINVAL); fitit = FALSE; (void) vm_map_remove(map, *addr, *addr + size); } /* * Lookup/allocate object. */ if (flags & MAP_ANON) { type = OBJT_SWAP; /* * Unnamed anonymous regions always start at 0. */ if (handle == 0) foff = 0; } else { vp = (struct vnode *) handle; if (vp->v_type == VCHR) { type = OBJT_DEVICE; handle = (caddr_t) vp->v_rdev; } else { struct vattr vat; int error; error = VOP_GETATTR(vp, &vat, p->p_ucred, p); if (error) return (error); objsize = round_page(vat.va_size); type = OBJT_VNODE; } } object = vm_pager_allocate(type, handle, OFF_TO_IDX(objsize), prot, foff); if (object == NULL) return (type == OBJT_DEVICE ? EINVAL : ENOMEM); object2 = NULL; docow = 0; if ((flags & (MAP_ANON|MAP_SHARED)) == 0 && (type != OBJT_DEVICE)) { docow = MAP_COPY_ON_WRITE; if (objsize < size) { object2 = vm_object_allocate( OBJT_DEFAULT, OFF_TO_IDX(size - (foff & ~(PAGE_SIZE - 1)))); object2->backing_object = object; object2->backing_object_offset = foff; TAILQ_INSERT_TAIL(&object->shadow_head, object2, shadow_list); } else { docow |= MAP_COPY_NEEDED; } } if (object2) rv = vm_map_find(map, object2, 0, addr, size, fitit, prot, maxprot, docow); else rv = vm_map_find(map, object, foff, addr, size, fitit, prot, maxprot, docow); if (rv != KERN_SUCCESS) { /* * Lose the object reference. Will destroy the * object if it's an unnamed anonymous mapping * or named anonymous without other references. */ if (object2) vm_object_deallocate(object2); else vm_object_deallocate(object); goto out; } /* * "Pre-fault" resident pages. */ if ((map != kernel_map) && (type == OBJT_VNODE) && (map->pmap != NULL)) { pmap_object_init_pt(map->pmap, *addr, object, (vm_pindex_t) OFF_TO_IDX(foff), size); } /* * Shared memory is also shared with children. */ if (flags & MAP_SHARED) { rv = vm_map_inherit(map, *addr, *addr + size, VM_INHERIT_SHARE); if (rv != KERN_SUCCESS) { (void) vm_map_remove(map, *addr, *addr + size); goto out; } } out: switch (rv) { case KERN_SUCCESS: return (0); case KERN_INVALID_ADDRESS: case KERN_NO_SPACE: return (ENOMEM); case KERN_PROTECTION_FAILURE: return (EACCES); default: return (EINVAL); } }