Index: head/sys/kern/kern_descrip.c =================================================================== --- head/sys/kern/kern_descrip.c (revision 41085) +++ head/sys/kern/kern_descrip.c (revision 41086) @@ -1,1208 +1,1310 @@ /* * Copyright (c) 1982, 1986, 1989, 1991, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)kern_descrip.c 8.6 (Berkeley) 4/19/94 - * $Id: kern_descrip.c,v 1.54 1998/07/15 06:10:16 bde Exp $ + * $Id: kern_descrip.c,v 1.55 1998/07/29 17:38:13 bde Exp $ */ #include "opt_compat.h" #include "opt_devfs.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DEVFS #include #endif /*DEVFS*/ static MALLOC_DEFINE(M_FILEDESC, "file desc", "Open file descriptor table"); MALLOC_DEFINE(M_FILE, "file", "Open file structure"); +static MALLOC_DEFINE(M_SIGIO, "sigio", "sigio structures"); static d_open_t fdopen; #define NUMFDESC 64 #define CDEV_MAJOR 22 static struct cdevsw fildesc_cdevsw = { fdopen, noclose, noread, nowrite, noioc, nostop, nullreset, nodevtotty, seltrue, nommap, nostrat }; static int finishdup __P((struct filedesc *fdp, int old, int new, register_t *retval)); /* * Descriptor management. */ struct filelist filehead; /* head of list of open files */ int nfiles; /* actual number of open files */ extern int cmask; /* * System calls on descriptors. */ #ifndef _SYS_SYSPROTO_H_ struct getdtablesize_args { int dummy; }; #endif /* ARGSUSED */ int getdtablesize(p, uap) struct proc *p; struct getdtablesize_args *uap; { p->p_retval[0] = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc); return (0); } /* * Duplicate a file descriptor to a particular value. */ #ifndef _SYS_SYSPROTO_H_ struct dup2_args { u_int from; u_int to; }; #endif /* ARGSUSED */ int dup2(p, uap) struct proc *p; struct dup2_args *uap; { register struct filedesc *fdp = p->p_fd; register u_int old = uap->from, new = uap->to; int i, error; if (old >= fdp->fd_nfiles || fdp->fd_ofiles[old] == NULL || new >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur || new >= maxfilesperproc) return (EBADF); if (old == new) { p->p_retval[0] = new; return (0); } if (new >= fdp->fd_nfiles) { if ((error = fdalloc(p, new, &i))) return (error); if (new != i) panic("dup2: fdalloc"); } else if (fdp->fd_ofiles[new]) { if (fdp->fd_ofileflags[new] & UF_MAPPED) (void) munmapfd(p, new); /* * dup2() must succeed even if the close has an error. */ (void) closef(fdp->fd_ofiles[new], p); } return (finishdup(fdp, (int)old, (int)new, p->p_retval)); } /* * Duplicate a file descriptor. */ #ifndef _SYS_SYSPROTO_H_ struct dup_args { u_int fd; }; #endif /* ARGSUSED */ int dup(p, uap) struct proc *p; struct dup_args *uap; { register struct filedesc *fdp; u_int old; int new, error; old = uap->fd; #if 0 /* * XXX Compatibility */ if (old &~ 077) { uap->fd &= 077; return (dup2(p, uap, p->p_retval)); } #endif fdp = p->p_fd; if (old >= fdp->fd_nfiles || fdp->fd_ofiles[old] == NULL) return (EBADF); if ((error = fdalloc(p, 0, &new))) return (error); return (finishdup(fdp, (int)old, new, p->p_retval)); } /* * The file control system call. */ #ifndef _SYS_SYSPROTO_H_ struct fcntl_args { int fd; int cmd; long arg; }; #endif /* ARGSUSED */ int fcntl(p, uap) struct proc *p; register struct fcntl_args *uap; { register struct filedesc *fdp = p->p_fd; register struct file *fp; register char *pop; struct vnode *vp; int i, tmp, error, flg = F_POSIX; struct flock fl; u_int newmin; if ((unsigned)uap->fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[uap->fd]) == NULL) return (EBADF); pop = &fdp->fd_ofileflags[uap->fd]; switch (uap->cmd) { case F_DUPFD: newmin = uap->arg; if (newmin >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur || newmin >= maxfilesperproc) return (EINVAL); if ((error = fdalloc(p, newmin, &i))) return (error); return (finishdup(fdp, uap->fd, i, p->p_retval)); case F_GETFD: p->p_retval[0] = *pop & 1; return (0); case F_SETFD: *pop = (*pop &~ 1) | (uap->arg & 1); return (0); case F_GETFL: p->p_retval[0] = OFLAGS(fp->f_flag); return (0); case F_SETFL: fp->f_flag &= ~FCNTLFLAGS; fp->f_flag |= FFLAGS(uap->arg & ~O_ACCMODE) & FCNTLFLAGS; tmp = fp->f_flag & FNONBLOCK; error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p); if (error) return (error); tmp = fp->f_flag & FASYNC; error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p); if (!error) return (0); fp->f_flag &= ~FNONBLOCK; tmp = 0; (void) (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p); return (error); case F_GETOWN: - if (fp->f_type == DTYPE_SOCKET) { - p->p_retval[0] = ((struct socket *)fp->f_data)->so_pgid; - return (0); - } error = (*fp->f_ops->fo_ioctl) - (fp, TIOCGPGRP, (caddr_t)p->p_retval, p); - p->p_retval[0] = - p->p_retval[0]; + (fp, FIOGETOWN, (caddr_t)p->p_retval, p); return (error); case F_SETOWN: - if (fp->f_type == DTYPE_SOCKET) { - ((struct socket *)fp->f_data)->so_pgid = uap->arg; - return (0); - } - if (uap->arg <= 0) { - uap->arg = -uap->arg; - } else { - struct proc *p1 = pfind(uap->arg); - if (p1 == 0) - return (ESRCH); - uap->arg = p1->p_pgrp->pg_id; - } return ((*fp->f_ops->fo_ioctl) - (fp, TIOCSPGRP, (caddr_t)&uap->arg, p)); + (fp, FIOSETOWN, (caddr_t)&uap->arg, p)); case F_SETLKW: flg |= F_WAIT; /* Fall into F_SETLK */ case F_SETLK: if (fp->f_type != DTYPE_VNODE) return (EBADF); vp = (struct vnode *)fp->f_data; /* Copy in the lock structure */ error = copyin((caddr_t)(intptr_t)uap->arg, (caddr_t)&fl, sizeof(fl)); if (error) return (error); if (fl.l_whence == SEEK_CUR) fl.l_start += fp->f_offset; switch (fl.l_type) { case F_RDLCK: if ((fp->f_flag & FREAD) == 0) return (EBADF); p->p_flag |= P_ADVLOCK; return (VOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &fl, flg)); case F_WRLCK: if ((fp->f_flag & FWRITE) == 0) return (EBADF); p->p_flag |= P_ADVLOCK; return (VOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &fl, flg)); case F_UNLCK: return (VOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &fl, F_POSIX)); default: return (EINVAL); } case F_GETLK: if (fp->f_type != DTYPE_VNODE) return (EBADF); vp = (struct vnode *)fp->f_data; /* Copy in the lock structure */ error = copyin((caddr_t)(intptr_t)uap->arg, (caddr_t)&fl, sizeof(fl)); if (error) return (error); if (fl.l_type != F_RDLCK && fl.l_type != F_WRLCK && fl.l_type != F_UNLCK) return (EINVAL); if (fl.l_whence == SEEK_CUR) fl.l_start += fp->f_offset; if ((error = VOP_ADVLOCK(vp,(caddr_t)p,F_GETLK,&fl,F_POSIX))) return (error); return (copyout((caddr_t)&fl, (caddr_t)(intptr_t)uap->arg, sizeof(fl))); default: return (EINVAL); } /* NOTREACHED */ } /* * Common code for dup, dup2, and fcntl(F_DUPFD). */ static int finishdup(fdp, old, new, retval) register struct filedesc *fdp; register int old, new; register_t *retval; { register struct file *fp; fp = fdp->fd_ofiles[old]; fdp->fd_ofiles[new] = fp; fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] &~ UF_EXCLOSE; fp->f_count++; if (new > fdp->fd_lastfile) fdp->fd_lastfile = new; *retval = new; return (0); +} + +/* + * If sigio is on the list associated with a process or process group, + * disable signalling from the device, remove sigio from the list and + * free sigio. + */ +void +funsetown(sigio) + struct sigio *sigio; +{ + int s; + + if (sigio == NULL) + return; + s = splhigh(); + *(sigio->sio_myref) = NULL; + splx(s); + if (sigio->sio_pgid < 0) { + SLIST_REMOVE(&sigio->sio_pgrp->pg_sigiolst, sigio, + sigio, sio_pgsigio); + } else /* if ((*sigiop)->sio_pgid > 0) */ { + SLIST_REMOVE(&sigio->sio_proc->p_sigiolst, sigio, + sigio, sio_pgsigio); + } + crfree(sigio->sio_ucred); + FREE(sigio, M_SIGIO); +} + +/* Free a list of sigio structures. */ +void +funsetownlst(sigiolst) + struct sigiolst *sigiolst; +{ + struct sigio *sigio; + + while ((sigio = sigiolst->slh_first) != NULL) + funsetown(sigio); +} + +/* + * This is common code for FIOSETOWN ioctl called by fcntl(fd, F_SETOWN, arg). + * + * After permission checking, add a sigio structure to the sigio list for + * the process or process group. + */ +int +fsetown(pgid, sigiop) + pid_t pgid; + struct sigio **sigiop; +{ + struct proc *proc = NULL; + struct pgrp *pgrp = NULL; + struct sigio *sigio; + int s; + + if (pgid == 0) { + funsetown(*sigiop); + return (0); + } else if (pgid > 0) { + proc = pfind(pgid); + if (proc == NULL) + return (ESRCH); + /* + * Policy - Don't allow a process to FSETOWN a process + * in another session. + * + * Remove this test to allow maximum flexibility or + * restrict FSETOWN to the current process or process + * group for maximum safety. + */ + else if (proc->p_session != curproc->p_session) + return (EPERM); + } else /* if (pgid < 0) */ { + pgrp = pgfind(-pgid); + if (pgrp == NULL) + return (ESRCH); + /* + * Policy - Don't allow a process to FSETOWN a process + * in another session. + * + * Remove this test to allow maximum flexibility or + * restrict FSETOWN to the current process or process + * group for maximum safety. + */ + else if (pgrp->pg_session != curproc->p_session) + return (EPERM); + } + funsetown(*sigiop); + MALLOC(sigio, struct sigio *, sizeof(struct sigio), M_SIGIO, + M_WAITOK); + if (pgid > 0) { + SLIST_INSERT_HEAD(&proc->p_sigiolst, sigio, sio_pgsigio); + sigio->sio_proc = proc; + } else { + SLIST_INSERT_HEAD(&pgrp->pg_sigiolst, sigio, sio_pgsigio); + sigio->sio_pgrp = pgrp; + } + sigio->sio_pgid = pgid; + crhold(curproc->p_ucred); + sigio->sio_ucred = curproc->p_ucred; + /* It would be convenient if p_ruid was in ucred. */ + sigio->sio_ruid = curproc->p_cred->p_ruid; + sigio->sio_myref = sigiop; + s = splhigh(); + *sigiop = sigio; + splx(s); + return (0); +} + +/* + * This is common code for FIOGETOWN ioctl called by fcntl(fd, F_GETOWN, arg). + */ +pid_t +fgetown(sigio) + struct sigio *sigio; +{ + return (sigio != NULL ? sigio->sio_pgid : 0); } /* * Close a file descriptor. */ #ifndef _SYS_SYSPROTO_H_ struct close_args { int fd; }; #endif /* ARGSUSED */ int close(p, uap) struct proc *p; struct close_args *uap; { register struct filedesc *fdp = p->p_fd; register struct file *fp; register int fd = uap->fd; register u_char *pf; if ((unsigned)fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[fd]) == NULL) return (EBADF); pf = (u_char *)&fdp->fd_ofileflags[fd]; if (*pf & UF_MAPPED) (void) munmapfd(p, fd); fdp->fd_ofiles[fd] = NULL; while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL) fdp->fd_lastfile--; if (fd < fdp->fd_freefile) fdp->fd_freefile = fd; *pf = 0; return (closef(fp, p)); } #if defined(COMPAT_43) || defined(COMPAT_SUNOS) /* * Return status information about a file descriptor. */ #ifndef _SYS_SYSPROTO_H_ struct ofstat_args { int fd; struct ostat *sb; }; #endif /* ARGSUSED */ int ofstat(p, uap) struct proc *p; register struct ofstat_args *uap; { register struct filedesc *fdp = p->p_fd; register struct file *fp; struct stat ub; struct ostat oub; int error; if ((unsigned)uap->fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[uap->fd]) == NULL) return (EBADF); switch (fp->f_type) { case DTYPE_FIFO: case DTYPE_VNODE: error = vn_stat((struct vnode *)fp->f_data, &ub, p); break; case DTYPE_SOCKET: error = soo_stat((struct socket *)fp->f_data, &ub); break; case DTYPE_PIPE: error = pipe_stat((struct pipe *)fp->f_data, &ub); break; default: panic("ofstat"); /*NOTREACHED*/ } cvtstat(&ub, &oub); if (error == 0) error = copyout((caddr_t)&oub, (caddr_t)uap->sb, sizeof (oub)); return (error); } #endif /* COMPAT_43 || COMPAT_SUNOS */ /* * Return status information about a file descriptor. */ #ifndef _SYS_SYSPROTO_H_ struct fstat_args { int fd; struct stat *sb; }; #endif /* ARGSUSED */ int fstat(p, uap) struct proc *p; register struct fstat_args *uap; { register struct filedesc *fdp = p->p_fd; register struct file *fp; struct stat ub; int error; if ((unsigned)uap->fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[uap->fd]) == NULL) return (EBADF); switch (fp->f_type) { case DTYPE_FIFO: case DTYPE_VNODE: error = vn_stat((struct vnode *)fp->f_data, &ub, p); break; case DTYPE_SOCKET: error = soo_stat((struct socket *)fp->f_data, &ub); break; case DTYPE_PIPE: error = pipe_stat((struct pipe *)fp->f_data, &ub); break; default: panic("fstat"); /*NOTREACHED*/ } if (error == 0) error = copyout((caddr_t)&ub, (caddr_t)uap->sb, sizeof (ub)); return (error); } /* * Return status information about a file descriptor. */ #ifndef _SYS_SYSPROTO_H_ struct nfstat_args { int fd; struct nstat *sb; }; #endif /* ARGSUSED */ int nfstat(p, uap) struct proc *p; register struct nfstat_args *uap; { register struct filedesc *fdp = p->p_fd; register struct file *fp; struct stat ub; struct nstat nub; int error; if ((unsigned)uap->fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[uap->fd]) == NULL) return (EBADF); switch (fp->f_type) { case DTYPE_FIFO: case DTYPE_VNODE: error = vn_stat((struct vnode *)fp->f_data, &ub, p); break; case DTYPE_SOCKET: error = soo_stat((struct socket *)fp->f_data, &ub); break; case DTYPE_PIPE: error = pipe_stat((struct pipe *)fp->f_data, &ub); break; default: panic("fstat"); /*NOTREACHED*/ } if (error == 0) { cvtnstat(&ub, &nub); error = copyout((caddr_t)&nub, (caddr_t)uap->sb, sizeof (nub)); } return (error); } /* * Return pathconf information about a file descriptor. */ #ifndef _SYS_SYSPROTO_H_ struct fpathconf_args { int fd; int name; }; #endif /* ARGSUSED */ int fpathconf(p, uap) struct proc *p; register struct fpathconf_args *uap; { struct filedesc *fdp = p->p_fd; struct file *fp; struct vnode *vp; if ((unsigned)uap->fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[uap->fd]) == NULL) return (EBADF); switch (fp->f_type) { case DTYPE_PIPE: case DTYPE_SOCKET: if (uap->name != _PC_PIPE_BUF) return (EINVAL); p->p_retval[0] = PIPE_BUF; return (0); case DTYPE_FIFO: case DTYPE_VNODE: vp = (struct vnode *)fp->f_data; return (VOP_PATHCONF(vp, uap->name, p->p_retval)); default: panic("fpathconf"); } /*NOTREACHED*/ } /* * Allocate a file descriptor for the process. */ static int fdexpand; SYSCTL_INT(_debug, OID_AUTO, fdexpand, CTLFLAG_RD, &fdexpand, 0, ""); int fdalloc(p, want, result) struct proc *p; int want; int *result; { register struct filedesc *fdp = p->p_fd; register int i; int lim, last, nfiles; struct file **newofile; char *newofileflags; /* * Search for a free descriptor starting at the higher * of want or fd_freefile. If that fails, consider * expanding the ofile array. */ lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc); for (;;) { last = min(fdp->fd_nfiles, lim); if ((i = want) < fdp->fd_freefile) i = fdp->fd_freefile; for (; i < last; i++) { if (fdp->fd_ofiles[i] == NULL) { fdp->fd_ofileflags[i] = 0; if (i > fdp->fd_lastfile) fdp->fd_lastfile = i; if (want <= fdp->fd_freefile) fdp->fd_freefile = i; *result = i; return (0); } } /* * No space in current array. Expand? */ if (fdp->fd_nfiles >= lim) return (EMFILE); if (fdp->fd_nfiles < NDEXTENT) nfiles = NDEXTENT; else nfiles = 2 * fdp->fd_nfiles; MALLOC(newofile, struct file **, nfiles * OFILESIZE, M_FILEDESC, M_WAITOK); newofileflags = (char *) &newofile[nfiles]; /* * Copy the existing ofile and ofileflags arrays * and zero the new portion of each array. */ bcopy(fdp->fd_ofiles, newofile, (i = sizeof(struct file *) * fdp->fd_nfiles)); bzero((char *)newofile + i, nfiles * sizeof(struct file *) - i); bcopy(fdp->fd_ofileflags, newofileflags, (i = sizeof(char) * fdp->fd_nfiles)); bzero(newofileflags + i, nfiles * sizeof(char) - i); if (fdp->fd_nfiles > NDFILE) FREE(fdp->fd_ofiles, M_FILEDESC); fdp->fd_ofiles = newofile; fdp->fd_ofileflags = newofileflags; fdp->fd_nfiles = nfiles; fdexpand++; } return (0); } /* * Check to see whether n user file descriptors * are available to the process p. */ int fdavail(p, n) struct proc *p; register int n; { register struct filedesc *fdp = p->p_fd; register struct file **fpp; register int i, lim, last; lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc); if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0) return (1); last = min(fdp->fd_nfiles, lim); fpp = &fdp->fd_ofiles[fdp->fd_freefile]; for (i = last - fdp->fd_freefile; --i >= 0; fpp++) if (*fpp == NULL && --n <= 0) return (1); return (0); } /* * Create a new open file structure and allocate * a file decriptor for the process that refers to it. */ int falloc(p, resultfp, resultfd) register struct proc *p; struct file **resultfp; int *resultfd; { register struct file *fp, *fq; int error, i; if ((error = fdalloc(p, 0, &i))) return (error); if (nfiles >= maxfiles) { tablefull("file"); return (ENFILE); } /* * Allocate a new file descriptor. * If the process has file descriptor zero open, add to the list * of open files at that point, otherwise put it at the front of * the list of open files. */ nfiles++; MALLOC(fp, struct file *, sizeof(struct file), M_FILE, M_WAITOK); bzero(fp, sizeof(struct file)); if ((fq = p->p_fd->fd_ofiles[0])) { LIST_INSERT_AFTER(fq, fp, f_list); } else { LIST_INSERT_HEAD(&filehead, fp, f_list); } p->p_fd->fd_ofiles[i] = fp; fp->f_count = 1; fp->f_cred = p->p_ucred; fp->f_seqcount = 1; crhold(fp->f_cred); if (resultfp) *resultfp = fp; if (resultfd) *resultfd = i; return (0); } /* * Free a file descriptor. */ void ffree(fp) register struct file *fp; { LIST_REMOVE(fp, f_list); crfree(fp->f_cred); #ifdef DIAGNOSTIC fp->f_count = 0; #endif nfiles--; FREE(fp, M_FILE); } /* * Build a new filedesc structure. */ struct filedesc * fdinit(p) struct proc *p; { register struct filedesc0 *newfdp; register struct filedesc *fdp = p->p_fd; MALLOC(newfdp, struct filedesc0 *, sizeof(struct filedesc0), M_FILEDESC, M_WAITOK); bzero(newfdp, sizeof(struct filedesc0)); newfdp->fd_fd.fd_cdir = fdp->fd_cdir; VREF(newfdp->fd_fd.fd_cdir); newfdp->fd_fd.fd_rdir = fdp->fd_rdir; VREF(newfdp->fd_fd.fd_rdir); /* Create the file descriptor table. */ newfdp->fd_fd.fd_refcnt = 1; newfdp->fd_fd.fd_cmask = cmask; newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles; newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags; newfdp->fd_fd.fd_nfiles = NDFILE; newfdp->fd_fd.fd_freefile = 0; newfdp->fd_fd.fd_lastfile = 0; return (&newfdp->fd_fd); } /* * Share a filedesc structure. */ struct filedesc * fdshare(p) struct proc *p; { p->p_fd->fd_refcnt++; return (p->p_fd); } /* * Copy a filedesc structure. */ struct filedesc * fdcopy(p) struct proc *p; { register struct filedesc *newfdp, *fdp = p->p_fd; register struct file **fpp; register int i; /* * Certain daemons might not have file descriptors */ if (fdp == NULL) return NULL; MALLOC(newfdp, struct filedesc *, sizeof(struct filedesc0), M_FILEDESC, M_WAITOK); bcopy(fdp, newfdp, sizeof(struct filedesc)); VREF(newfdp->fd_cdir); VREF(newfdp->fd_rdir); newfdp->fd_refcnt = 1; /* * If the number of open files fits in the internal arrays * of the open file structure, use them, otherwise allocate * additional memory for the number of descriptors currently * in use. */ if (newfdp->fd_lastfile < NDFILE) { newfdp->fd_ofiles = ((struct filedesc0 *) newfdp)->fd_dfiles; newfdp->fd_ofileflags = ((struct filedesc0 *) newfdp)->fd_dfileflags; i = NDFILE; } else { /* * Compute the smallest multiple of NDEXTENT needed * for the file descriptors currently in use, * allowing the table to shrink. */ i = newfdp->fd_nfiles; while (i > 2 * NDEXTENT && i > newfdp->fd_lastfile * 2) i /= 2; MALLOC(newfdp->fd_ofiles, struct file **, i * OFILESIZE, M_FILEDESC, M_WAITOK); newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i]; } newfdp->fd_nfiles = i; bcopy(fdp->fd_ofiles, newfdp->fd_ofiles, i * sizeof(struct file **)); bcopy(fdp->fd_ofileflags, newfdp->fd_ofileflags, i * sizeof(char)); fpp = newfdp->fd_ofiles; for (i = newfdp->fd_lastfile; i-- >= 0; fpp++) if (*fpp != NULL) (*fpp)->f_count++; return (newfdp); } /* * Release a filedesc structure. */ void fdfree(p) struct proc *p; { register struct filedesc *fdp = p->p_fd; struct file **fpp; register int i; /* * Certain daemons might not have file descriptors */ if (fdp == NULL) return; if (--fdp->fd_refcnt > 0) return; fpp = fdp->fd_ofiles; for (i = fdp->fd_lastfile; i-- >= 0; fpp++) if (*fpp) (void) closef(*fpp, p); if (fdp->fd_nfiles > NDFILE) FREE(fdp->fd_ofiles, M_FILEDESC); vrele(fdp->fd_cdir); vrele(fdp->fd_rdir); FREE(fdp, M_FILEDESC); } /* * Close any files on exec? */ void fdcloseexec(p) struct proc *p; { struct filedesc *fdp = p->p_fd; struct file **fpp; char *fdfp; register int i; /* * Certain daemons might not have file descriptors */ if (fdp == NULL) return; fpp = fdp->fd_ofiles; fdfp = fdp->fd_ofileflags; for (i = 0; i <= fdp->fd_lastfile; i++, fpp++, fdfp++) if (*fpp != NULL && (*fdfp & UF_EXCLOSE)) { if (*fdfp & UF_MAPPED) (void) munmapfd(p, i); (void) closef(*fpp, p); *fpp = NULL; *fdfp = 0; if (i < fdp->fd_freefile) fdp->fd_freefile = i; } while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL) fdp->fd_lastfile--; } /* * Internal form of close. * Decrement reference count on file structure. * Note: p may be NULL when closing a file * that was being passed in a message. */ int closef(fp, p) register struct file *fp; register struct proc *p; { struct vnode *vp; struct flock lf; int error; if (fp == NULL) return (0); /* * POSIX record locking dictates that any close releases ALL * locks owned by this process. This is handled by setting * a flag in the unlock to free ONLY locks obeying POSIX * semantics, and not to free BSD-style file locks. * If the descriptor was in a message, POSIX-style locks * aren't passed with the descriptor. */ if (p && (p->p_flag & P_ADVLOCK) && fp->f_type == DTYPE_VNODE) { lf.l_whence = SEEK_SET; lf.l_start = 0; lf.l_len = 0; lf.l_type = F_UNLCK; vp = (struct vnode *)fp->f_data; (void) VOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &lf, F_POSIX); } if (--fp->f_count > 0) return (0); if (fp->f_count < 0) panic("closef: count < 0"); if ((fp->f_flag & FHASLOCK) && fp->f_type == DTYPE_VNODE) { lf.l_whence = SEEK_SET; lf.l_start = 0; lf.l_len = 0; lf.l_type = F_UNLCK; vp = (struct vnode *)fp->f_data; (void) VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK); } if (fp->f_ops) error = (*fp->f_ops->fo_close)(fp, p); else error = 0; ffree(fp); return (error); } /* * Apply an advisory lock on a file descriptor. * * Just attempt to get a record lock of the requested type on * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0). */ #ifndef _SYS_SYSPROTO_H_ struct flock_args { int fd; int how; }; #endif /* ARGSUSED */ int flock(p, uap) struct proc *p; register struct flock_args *uap; { register struct filedesc *fdp = p->p_fd; register struct file *fp; struct vnode *vp; struct flock lf; if ((unsigned)uap->fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[uap->fd]) == NULL) return (EBADF); if (fp->f_type != DTYPE_VNODE) return (EOPNOTSUPP); vp = (struct vnode *)fp->f_data; lf.l_whence = SEEK_SET; lf.l_start = 0; lf.l_len = 0; if (uap->how & LOCK_UN) { lf.l_type = F_UNLCK; fp->f_flag &= ~FHASLOCK; return (VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK)); } if (uap->how & LOCK_EX) lf.l_type = F_WRLCK; else if (uap->how & LOCK_SH) lf.l_type = F_RDLCK; else return (EBADF); fp->f_flag |= FHASLOCK; if (uap->how & LOCK_NB) return (VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK)); return (VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK|F_WAIT)); } /* * File Descriptor pseudo-device driver (/dev/fd/). * * Opening minor device N dup()s the file (if any) connected to file * descriptor N belonging to the calling process. Note that this driver * consists of only the ``open()'' routine, because all subsequent * references to this file will be direct to the other driver. */ /* ARGSUSED */ static int fdopen(dev, mode, type, p) dev_t dev; int mode, type; struct proc *p; { /* * XXX Kludge: set curproc->p_dupfd to contain the value of the * the file descriptor being sought for duplication. The error * return ensures that the vnode for this device will be released * by vn_open. Open will detect this special error and take the * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN * will simply report the error. */ p->p_dupfd = minor(dev); return (ENODEV); } /* * Duplicate the specified descriptor to a free descriptor. */ int dupfdopen(fdp, indx, dfd, mode, error) register struct filedesc *fdp; register int indx, dfd; int mode; int error; { register struct file *wfp; struct file *fp; /* * If the to-be-dup'd fd number is greater than the allowed number * of file descriptors, or the fd to be dup'd has already been * closed, reject. Note, check for new == old is necessary as * falloc could allocate an already closed to-be-dup'd descriptor * as the new descriptor. */ fp = fdp->fd_ofiles[indx]; if ((u_int)dfd >= fdp->fd_nfiles || (wfp = fdp->fd_ofiles[dfd]) == NULL || fp == wfp) return (EBADF); /* * There are two cases of interest here. * * For ENODEV simply dup (dfd) to file descriptor * (indx) and return. * * For ENXIO steal away the file structure from (dfd) and * store it in (indx). (dfd) is effectively closed by * this operation. * * Any other error code is just returned. */ switch (error) { case ENODEV: /* * Check that the mode the file is being opened for is a * subset of the mode of the existing descriptor. */ if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag) return (EACCES); fdp->fd_ofiles[indx] = wfp; fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd]; wfp->f_count++; if (indx > fdp->fd_lastfile) fdp->fd_lastfile = indx; return (0); case ENXIO: /* * Steal away the file pointer from dfd, and stuff it into indx. */ fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd]; fdp->fd_ofiles[dfd] = NULL; fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd]; fdp->fd_ofileflags[dfd] = 0; /* * Complete the clean up of the filedesc structure by * recomputing the various hints. */ if (indx > fdp->fd_lastfile) fdp->fd_lastfile = indx; else while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL) fdp->fd_lastfile--; if (dfd < fdp->fd_freefile) fdp->fd_freefile = dfd; return (0); default: return (error); } /* NOTREACHED */ } /* * Get file structures. */ static int sysctl_kern_file SYSCTL_HANDLER_ARGS { int error; struct file *fp; if (!req->oldptr) { /* * overestimate by 10 files */ return (SYSCTL_OUT(req, 0, sizeof(filehead) + (nfiles + 10) * sizeof(struct file))); } error = SYSCTL_OUT(req, (caddr_t)&filehead, sizeof(filehead)); if (error) return (error); /* * followed by an array of file structures */ for (fp = filehead.lh_first; fp != NULL; fp = fp->f_list.le_next) { error = SYSCTL_OUT(req, (caddr_t)fp, sizeof (struct file)); if (error) return (error); } return (0); } SYSCTL_PROC(_kern, KERN_FILE, file, CTLTYPE_OPAQUE|CTLFLAG_RD, 0, 0, sysctl_kern_file, "S,file", ""); SYSCTL_INT(_kern, KERN_MAXFILESPERPROC, maxfilesperproc, CTLFLAG_RW, &maxfilesperproc, 0, ""); SYSCTL_INT(_kern, KERN_MAXFILES, maxfiles, CTLFLAG_RW, &maxfiles, 0, ""); static fildesc_devsw_installed = 0; #ifdef DEVFS static void *devfs_token_stdin; static void *devfs_token_stdout; static void *devfs_token_stderr; static void *devfs_token_fildesc[NUMFDESC]; #endif static void fildesc_drvinit(void *unused) { dev_t dev; #ifdef DEVFS int fd; #endif if( ! fildesc_devsw_installed ) { dev = makedev(CDEV_MAJOR,0); cdevsw_add(&dev,&fildesc_cdevsw,NULL); fildesc_devsw_installed = 1; #ifdef DEVFS for (fd = 0; fd < NUMFDESC; fd++) devfs_token_fildesc[fd] = devfs_add_devswf(&fildesc_cdevsw, fd, DV_CHR, UID_BIN, GID_BIN, 0666, "fd/%d", fd); devfs_token_stdin = devfs_add_devswf(&fildesc_cdevsw, 0, DV_CHR, UID_ROOT, GID_WHEEL, 0666, "stdin"); devfs_token_stdout = devfs_add_devswf(&fildesc_cdevsw, 1, DV_CHR, UID_ROOT, GID_WHEEL, 0666, "stdout"); devfs_token_stderr = devfs_add_devswf(&fildesc_cdevsw, 2, DV_CHR, UID_ROOT, GID_WHEEL, 0666, "stderr"); #endif } } SYSINIT(fildescdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR, fildesc_drvinit,NULL) Index: head/sys/kern/kern_exit.c =================================================================== --- head/sys/kern/kern_exit.c (revision 41085) +++ head/sys/kern/kern_exit.c (revision 41086) @@ -1,594 +1,600 @@ /* * Copyright (c) 1982, 1986, 1989, 1991, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)kern_exit.c 8.7 (Berkeley) 2/12/94 - * $Id: kern_exit.c,v 1.67 1998/06/05 21:44:20 dg Exp $ + * $Id: kern_exit.c,v 1.68 1998/11/10 09:16:29 peter Exp $ */ #include "opt_compat.h" #include "opt_ktrace.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* for acct_process() function prototype */ #include #include #include #include #ifdef COMPAT_43 #include #include #endif #include /* for UCHAR_MAX = typeof(p_priority)_MAX */ #include #include #include #include #include #include static MALLOC_DEFINE(M_ZOMBIE, "zombie", "zombie proc status"); static int wait1 __P((struct proc *, struct wait_args *, int)); /* * callout list for things to do at exit time */ typedef struct exit_list_element { struct exit_list_element *next; exitlist_fn function; } *ele_p; static ele_p exit_list; /* * exit -- * Death of process. */ void exit(p, uap) struct proc *p; struct rexit_args /* { int rval; } */ *uap; { exit1(p, W_EXITCODE(uap->rval, 0)); /* NOTREACHED */ } /* * Exit: deallocate address space and other resources, change proc state * to zombie, and unlink proc from allproc and parent's lists. Save exit * status and rusage for wait(). Check for child processes and orphan them. */ void exit1(p, rv) register struct proc *p; int rv; { register struct proc *q, *nq; register struct vmspace *vm; ele_p ep = exit_list; if (p->p_pid == 1) { printf("init died (signal %d, exit %d)\n", WTERMSIG(rv), WEXITSTATUS(rv)); panic("Going nowhere without my init!"); } aio_proc_rundown(p); /* are we a task leader? */ if(p == p->p_leader) { struct kill_args killArgs; killArgs.signum = SIGKILL; q = p->p_peers; while(q) { killArgs.pid = q->p_pid; /* * The interface for kill is better * than the internal signal */ kill(p, &killArgs); nq = q; q = q->p_peers; /* * orphan the threads so we don't mess up * when they call exit */ nq->p_peers = 0; nq->p_leader = nq; } /* otherwise are we a peer? */ } else if(p->p_peers) { q = p->p_leader; while(q->p_peers != p) q = q->p_peers; q->p_peers = p->p_peers; } #ifdef PGINPROF vmsizmon(); #endif STOPEVENT(p, S_EXIT, rv); /* * Check if any LKMs need anything done at process exit. * e.g. SYSV IPC stuff * XXX what if one of these generates an error? */ while (ep) { (*ep->function)(p); ep = ep->next; } if (p->p_flag & P_PROFIL) stopprofclock(p); MALLOC(p->p_ru, struct rusage *, sizeof(struct rusage), M_ZOMBIE, M_WAITOK); /* * If parent is waiting for us to exit or exec, * P_PPWAIT is set; we will wakeup the parent below. */ p->p_flag &= ~(P_TRACED | P_PPWAIT); p->p_flag |= P_WEXIT; p->p_sigignore = ~0; p->p_siglist = 0; if (timevalisset(&p->p_realtimer.it_value)) untimeout(realitexpire, (caddr_t)p, p->p_ithandle); + + /* + * Reset any sigio structures pointing to us as a result of + * F_SETOWN with our pid. + */ + funsetownlst(&p->p_sigiolst); /* * Close open files and release open-file table. * This may block! */ fdfree(p); /* * XXX Shutdown SYSV semaphores */ semexit(p); /* The next two chunks should probably be moved to vmspace_exit. */ vm = p->p_vmspace; /* * Release user portion of address space. * This releases references to vnodes, * which could cause I/O if the file has been unlinked. * Need to do this early enough that we can still sleep. * Can't free the entire vmspace as the kernel stack * may be mapped within that space also. */ if (vm->vm_refcnt == 1) { if (vm->vm_shm) shmexit(p); pmap_remove_pages(&vm->vm_pmap, VM_MIN_ADDRESS, VM_MAXUSER_ADDRESS); (void) vm_map_remove(&vm->vm_map, VM_MIN_ADDRESS, VM_MAXUSER_ADDRESS); } if (SESS_LEADER(p)) { register struct session *sp = p->p_session; if (sp->s_ttyvp) { /* * Controlling process. * Signal foreground pgrp, * drain controlling terminal * and revoke access to controlling terminal. */ if (sp->s_ttyp && (sp->s_ttyp->t_session == sp)) { if (sp->s_ttyp->t_pgrp) pgsignal(sp->s_ttyp->t_pgrp, SIGHUP, 1); (void) ttywait(sp->s_ttyp); /* * The tty could have been revoked * if we blocked. */ if (sp->s_ttyvp) VOP_REVOKE(sp->s_ttyvp, REVOKEALL); } if (sp->s_ttyvp) vrele(sp->s_ttyvp); sp->s_ttyvp = NULL; /* * s_ttyp is not zero'd; we use this to indicate * that the session once had a controlling terminal. * (for logging and informational purposes) */ } sp->s_leader = NULL; } fixjobc(p, p->p_pgrp, 0); (void)acct_process(p); #ifdef KTRACE /* * release trace file */ p->p_traceflag = 0; /* don't trace the vrele() */ if (p->p_tracep) vrele(p->p_tracep); #endif /* * Remove proc from allproc queue and pidhash chain. * Place onto zombproc. Unlink from parent's child list. */ LIST_REMOVE(p, p_list); LIST_INSERT_HEAD(&zombproc, p, p_list); p->p_stat = SZOMB; LIST_REMOVE(p, p_hash); q = p->p_children.lh_first; if (q) /* only need this if any child is S_ZOMB */ wakeup((caddr_t) initproc); for (; q != 0; q = nq) { nq = q->p_sibling.le_next; LIST_REMOVE(q, p_sibling); LIST_INSERT_HEAD(&initproc->p_children, q, p_sibling); q->p_pptr = initproc; /* * Traced processes are killed * since their existence means someone is screwing up. */ if (q->p_flag & P_TRACED) { q->p_flag &= ~P_TRACED; psignal(q, SIGKILL); } } /* * Save exit status and final rusage info, adding in child rusage * info and self times. */ p->p_xstat = rv; *p->p_ru = p->p_stats->p_ru; calcru(p, &p->p_ru->ru_utime, &p->p_ru->ru_stime, NULL); ruadd(p->p_ru, &p->p_stats->p_cru); /* * Notify parent that we're gone. If parent has the P_NOCLDWAIT * flag set, notify process 1 instead (and hope it will handle * this situation). */ if (p->p_pptr->p_flag & P_NOCLDWAIT) { struct proc *pp = p->p_pptr; proc_reparent(p, initproc); /* * If this was the last child of our parent, notify * parent, so in case he was wait(2)ing, he will * continue. */ if (LIST_EMPTY(&pp->p_children)) wakeup((caddr_t)pp); } psignal(p->p_pptr, SIGCHLD); wakeup((caddr_t)p->p_pptr); #if defined(tahoe) /* move this to cpu_exit */ p->p_addr->u_pcb.pcb_savacc.faddr = (float *)NULL; #endif /* * Clear curproc after we've done all operations * that could block, and before tearing down the rest * of the process state that might be used from clock, etc. * Also, can't clear curproc while we're still runnable, * as we're not on a run queue (we are current, just not * a proper proc any longer!). * * Other substructures are freed from wait(). */ curproc = NULL; if (--p->p_limit->p_refcnt == 0) { FREE(p->p_limit, M_SUBPROC); p->p_limit = NULL; } /* * Finally, call machine-dependent code to release the remaining * resources including address space, the kernel stack and pcb. * The address space is released by "vmspace_free(p->p_vmspace)"; * This is machine-dependent, as we may have to change stacks * or ensure that the current one isn't reallocated before we * finish. cpu_exit will end with a call to cpu_switch(), finishing * our execution (pun intended). */ cpu_exit(p); } #ifdef COMPAT_43 #if defined(hp300) || defined(luna68k) #include #define GETPS(rp) ((struct frame *)(rp))->f_sr #else #define GETPS(rp) (rp)[PS] #endif int owait(p, uap) struct proc *p; register struct owait_args /* { int dummy; } */ *uap; { struct wait_args w; #ifdef PSL_ALLCC if ((GETPS(p->p_md.md_regs) & PSL_ALLCC) != PSL_ALLCC) { w.options = 0; w.rusage = NULL; } else { w.options = p->p_md.md_regs[R0]; w.rusage = (struct rusage *)p->p_md.md_regs[R1]; } #else w.options = 0; w.rusage = NULL; #endif w.pid = WAIT_ANY; w.status = NULL; return (wait1(p, &w, 1)); } #endif /* COMPAT_43 */ int wait4(p, uap) struct proc *p; struct wait_args *uap; { return (wait1(p, uap, 0)); } static int wait1(q, uap, compat) register struct proc *q; register struct wait_args /* { int pid; int *status; int options; struct rusage *rusage; } */ *uap; int compat; { register int nfound; register struct proc *p, *t; int status, error; if (uap->pid == 0) uap->pid = -q->p_pgid; if (uap->options &~ (WUNTRACED|WNOHANG)) return (EINVAL); loop: nfound = 0; for (p = q->p_children.lh_first; p != 0; p = p->p_sibling.le_next) { if (uap->pid != WAIT_ANY && p->p_pid != uap->pid && p->p_pgid != -uap->pid) continue; nfound++; if (p->p_stat == SZOMB) { /* charge childs scheduling cpu usage to parent */ if (curproc->p_pid != 1) { curproc->p_estcpu = min(curproc->p_estcpu + p->p_estcpu, UCHAR_MAX); } q->p_retval[0] = p->p_pid; #ifdef COMPAT_43 if (compat) q->p_retval[1] = p->p_xstat; else #endif if (uap->status) { status = p->p_xstat; /* convert to int */ if ((error = copyout((caddr_t)&status, (caddr_t)uap->status, sizeof(status)))) return (error); } if (uap->rusage && (error = copyout((caddr_t)p->p_ru, (caddr_t)uap->rusage, sizeof (struct rusage)))) return (error); /* * If we got the child via a ptrace 'attach', * we need to give it back to the old parent. */ if (p->p_oppid && (t = pfind(p->p_oppid))) { p->p_oppid = 0; proc_reparent(p, t); psignal(t, SIGCHLD); wakeup((caddr_t)t); return (0); } p->p_xstat = 0; ruadd(&q->p_stats->p_cru, p->p_ru); FREE(p->p_ru, M_ZOMBIE); p->p_ru = NULL; /* * Decrement the count of procs running with this uid. */ (void)chgproccnt(p->p_cred->p_ruid, -1); /* * Release reference to text vnode */ if (p->p_textvp) vrele(p->p_textvp); /* * Free up credentials. */ if (--p->p_cred->p_refcnt == 0) { crfree(p->p_cred->pc_ucred); FREE(p->p_cred, M_SUBPROC); p->p_cred = NULL; } /* * Finally finished with old proc entry. * Unlink it from its process group and free it. */ leavepgrp(p); LIST_REMOVE(p, p_list); /* off zombproc */ LIST_REMOVE(p, p_sibling); /* * Give machine-dependent layer a chance * to free anything that cpu_exit couldn't * release while still running in process context. */ cpu_wait(p); zfree(proc_zone, p); nprocs--; return (0); } if (p->p_stat == SSTOP && (p->p_flag & P_WAITED) == 0 && (p->p_flag & P_TRACED || uap->options & WUNTRACED)) { p->p_flag |= P_WAITED; q->p_retval[0] = p->p_pid; #ifdef COMPAT_43 if (compat) { q->p_retval[1] = W_STOPCODE(p->p_xstat); error = 0; } else #endif if (uap->status) { status = W_STOPCODE(p->p_xstat); error = copyout((caddr_t)&status, (caddr_t)uap->status, sizeof(status)); } else error = 0; return (error); } } if (nfound == 0) return (ECHILD); if (uap->options & WNOHANG) { q->p_retval[0] = 0; return (0); } if ((error = tsleep((caddr_t)q, PWAIT | PCATCH, "wait", 0))) return (error); goto loop; } /* * make process 'parent' the new parent of process 'child'. */ void proc_reparent(child, parent) register struct proc *child; register struct proc *parent; { if (child->p_pptr == parent) return; LIST_REMOVE(child, p_sibling); LIST_INSERT_HEAD(&parent->p_children, child, p_sibling); child->p_pptr = parent; } /* * The next two functions are to handle adding/deleting items on the * exit callout list * * at_exit(): * Take the arguments given and put them onto the exit callout list, * However first make sure that it's not already there. * returns 0 on success. */ int at_exit(function) exitlist_fn function; { ele_p ep; /* Be noisy if the programmer has lost track of things */ if (rm_at_exit(function)) printf("exit callout entry already present\n"); ep = malloc(sizeof(*ep), M_TEMP, M_NOWAIT); if (ep == NULL) return (ENOMEM); ep->next = exit_list; ep->function = function; exit_list = ep; return (0); } /* * Scan the exit callout list for the given items and remove them. * Returns the number of items removed. * Logically this can only be 0 or 1. */ int rm_at_exit(function) exitlist_fn function; { ele_p *epp, ep; int count; count = 0; epp = &exit_list; ep = *epp; while (ep) { if (ep->function == function) { *epp = ep->next; free(ep, M_TEMP); count++; } else { epp = &ep->next; } ep = *epp; } return (count); } Index: head/sys/kern/kern_proc.c =================================================================== --- head/sys/kern/kern_proc.c (revision 41085) +++ head/sys/kern/kern_proc.c (revision 41086) @@ -1,600 +1,608 @@ /* * Copyright (c) 1982, 1986, 1989, 1991, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)kern_proc.c 8.7 (Berkeley) 2/14/95 - * $Id: kern_proc.c,v 1.37 1998/07/11 07:45:40 bde Exp $ + * $Id: kern_proc.c,v 1.38 1998/11/09 15:07:41 truckman Exp $ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include +#include static MALLOC_DEFINE(M_PGRP, "pgrp", "process group header"); MALLOC_DEFINE(M_SESSION, "session", "session header"); static MALLOC_DEFINE(M_PROC, "proc", "Proc structures"); MALLOC_DEFINE(M_SUBPROC, "subproc", "Proc sub-structures"); struct prochd qs[NQS]; /* as good a place as any... */ struct prochd rtqs[NQS]; /* Space for REALTIME queues too */ struct prochd idqs[NQS]; /* Space for IDLE queues too */ static void pgdelete __P((struct pgrp *)); /* * Structure associated with user cacheing. */ struct uidinfo { LIST_ENTRY(uidinfo) ui_hash; uid_t ui_uid; long ui_proccnt; }; #define UIHASH(uid) (&uihashtbl[(uid) & uihash]) static LIST_HEAD(uihashhead, uidinfo) *uihashtbl; static u_long uihash; /* size of hash table - 1 */ static void orphanpg __P((struct pgrp *pg)); /* * Other process lists */ struct pidhashhead *pidhashtbl; u_long pidhash; struct pgrphashhead *pgrphashtbl; u_long pgrphash; struct proclist allproc; struct proclist zombproc; vm_zone_t proc_zone; /* * Initialize global process hashing structures. */ void procinit() { LIST_INIT(&allproc); LIST_INIT(&zombproc); pidhashtbl = hashinit(maxproc / 4, M_PROC, &pidhash); pgrphashtbl = hashinit(maxproc / 4, M_PROC, &pgrphash); uihashtbl = hashinit(maxproc / 16, M_PROC, &uihash); proc_zone = zinit("PROC", sizeof (struct proc), 0, 0, 5); } /* * Change the count associated with number of processes * a given user is using. */ int chgproccnt(uid, diff) uid_t uid; int diff; { register struct uidinfo *uip; register struct uihashhead *uipp; uipp = UIHASH(uid); for (uip = uipp->lh_first; uip != 0; uip = uip->ui_hash.le_next) if (uip->ui_uid == uid) break; if (uip) { uip->ui_proccnt += diff; if (uip->ui_proccnt > 0) return (uip->ui_proccnt); if (uip->ui_proccnt < 0) panic("chgproccnt: procs < 0"); LIST_REMOVE(uip, ui_hash); FREE(uip, M_PROC); return (0); } if (diff <= 0) { if (diff == 0) return(0); panic("chgproccnt: lost user"); } MALLOC(uip, struct uidinfo *, sizeof(*uip), M_PROC, M_WAITOK); LIST_INSERT_HEAD(uipp, uip, ui_hash); uip->ui_uid = uid; uip->ui_proccnt = diff; return (diff); } /* * Is p an inferior of the current process? */ int inferior(p) register struct proc *p; { for (; p != curproc; p = p->p_pptr) if (p->p_pid == 0) return (0); return (1); } /* * Locate a process by number */ struct proc * pfind(pid) register pid_t pid; { register struct proc *p; for (p = PIDHASH(pid)->lh_first; p != 0; p = p->p_hash.le_next) if (p->p_pid == pid) return (p); return (NULL); } /* * Locate a process group by number */ struct pgrp * pgfind(pgid) register pid_t pgid; { register struct pgrp *pgrp; for (pgrp = PGRPHASH(pgid)->lh_first; pgrp != 0; pgrp = pgrp->pg_hash.le_next) if (pgrp->pg_id == pgid) return (pgrp); return (NULL); } /* * Move p to a new or existing process group (and session) */ int enterpgrp(p, pgid, mksess) register struct proc *p; pid_t pgid; int mksess; { register struct pgrp *pgrp = pgfind(pgid); #ifdef DIAGNOSTIC if (pgrp != NULL && mksess) /* firewalls */ panic("enterpgrp: setsid into non-empty pgrp"); if (SESS_LEADER(p)) panic("enterpgrp: session leader attempted setpgrp"); #endif if (pgrp == NULL) { pid_t savepid = p->p_pid; struct proc *np; /* * new process group */ #ifdef DIAGNOSTIC if (p->p_pid != pgid) panic("enterpgrp: new pgrp and pid != pgid"); #endif MALLOC(pgrp, struct pgrp *, sizeof(struct pgrp), M_PGRP, M_WAITOK); if ((np = pfind(savepid)) == NULL || np != p) return (ESRCH); if (mksess) { register struct session *sess; /* * new session */ MALLOC(sess, struct session *, sizeof(struct session), M_SESSION, M_WAITOK); sess->s_leader = p; sess->s_sid = p->p_pid; sess->s_count = 1; sess->s_ttyvp = NULL; sess->s_ttyp = NULL; bcopy(p->p_session->s_login, sess->s_login, sizeof(sess->s_login)); p->p_flag &= ~P_CONTROLT; pgrp->pg_session = sess; #ifdef DIAGNOSTIC if (p != curproc) panic("enterpgrp: mksession and p != curproc"); #endif } else { pgrp->pg_session = p->p_session; pgrp->pg_session->s_count++; } pgrp->pg_id = pgid; LIST_INIT(&pgrp->pg_members); LIST_INSERT_HEAD(PGRPHASH(pgid), pgrp, pg_hash); pgrp->pg_jobc = 0; + SLIST_INIT(&pgrp->pg_sigiolst); } else if (pgrp == p->p_pgrp) return (0); /* * Adjust eligibility of affected pgrps to participate in job control. * Increment eligibility counts before decrementing, otherwise we * could reach 0 spuriously during the first call. */ fixjobc(p, pgrp, 1); fixjobc(p, p->p_pgrp, 0); LIST_REMOVE(p, p_pglist); if (p->p_pgrp->pg_members.lh_first == 0) pgdelete(p->p_pgrp); p->p_pgrp = pgrp; LIST_INSERT_HEAD(&pgrp->pg_members, p, p_pglist); return (0); } /* * remove process from process group */ int leavepgrp(p) register struct proc *p; { LIST_REMOVE(p, p_pglist); if (p->p_pgrp->pg_members.lh_first == 0) pgdelete(p->p_pgrp); p->p_pgrp = 0; return (0); } /* * delete a process group */ static void pgdelete(pgrp) register struct pgrp *pgrp; { + + /* + * Reset any sigio structures pointing to us as a result of + * F_SETOWN with our pgid. + */ + funsetownlst(&pgrp->pg_sigiolst); if (pgrp->pg_session->s_ttyp != NULL && pgrp->pg_session->s_ttyp->t_pgrp == pgrp) pgrp->pg_session->s_ttyp->t_pgrp = NULL; LIST_REMOVE(pgrp, pg_hash); if (--pgrp->pg_session->s_count == 0) FREE(pgrp->pg_session, M_SESSION); FREE(pgrp, M_PGRP); } /* * Adjust pgrp jobc counters when specified process changes process group. * We count the number of processes in each process group that "qualify" * the group for terminal job control (those with a parent in a different * process group of the same session). If that count reaches zero, the * process group becomes orphaned. Check both the specified process' * process group and that of its children. * entering == 0 => p is leaving specified group. * entering == 1 => p is entering specified group. */ void fixjobc(p, pgrp, entering) register struct proc *p; register struct pgrp *pgrp; int entering; { register struct pgrp *hispgrp; register struct session *mysession = pgrp->pg_session; /* * Check p's parent to see whether p qualifies its own process * group; if so, adjust count for p's process group. */ if ((hispgrp = p->p_pptr->p_pgrp) != pgrp && hispgrp->pg_session == mysession) if (entering) pgrp->pg_jobc++; else if (--pgrp->pg_jobc == 0) orphanpg(pgrp); /* * Check this process' children to see whether they qualify * their process groups; if so, adjust counts for children's * process groups. */ for (p = p->p_children.lh_first; p != 0; p = p->p_sibling.le_next) if ((hispgrp = p->p_pgrp) != pgrp && hispgrp->pg_session == mysession && p->p_stat != SZOMB) if (entering) hispgrp->pg_jobc++; else if (--hispgrp->pg_jobc == 0) orphanpg(hispgrp); } /* * A process group has become orphaned; * if there are any stopped processes in the group, * hang-up all process in that group. */ static void orphanpg(pg) struct pgrp *pg; { register struct proc *p; for (p = pg->pg_members.lh_first; p != 0; p = p->p_pglist.le_next) { if (p->p_stat == SSTOP) { for (p = pg->pg_members.lh_first; p != 0; p = p->p_pglist.le_next) { psignal(p, SIGHUP); psignal(p, SIGCONT); } return; } } } #include "opt_ddb.h" #ifdef DDB #include DB_SHOW_COMMAND(pgrpdump, pgrpdump) { register struct pgrp *pgrp; register struct proc *p; register int i; for (i = 0; i <= pgrphash; i++) { if (pgrp = pgrphashtbl[i].lh_first) { printf("\tindx %d\n", i); for (; pgrp != 0; pgrp = pgrp->pg_hash.le_next) { printf( "\tpgrp %p, pgid %ld, sess %p, sesscnt %d, mem %p\n", (void *)pgrp, (long)pgrp->pg_id, (void *)pgrp->pg_session, pgrp->pg_session->s_count, (void *)pgrp->pg_members.lh_first); for (p = pgrp->pg_members.lh_first; p != 0; p = p->p_pglist.le_next) { printf("\t\tpid %ld addr %p pgrp %p\n", (long)p->p_pid, (void *)p, (void *)p->p_pgrp); } } } } } #endif /* DDB */ /* * Fill in an eproc structure for the specified process. */ void fill_eproc(p, ep) register struct proc *p; register struct eproc *ep; { register struct tty *tp; bzero(ep, sizeof(*ep)); ep->e_paddr = p; if (p->p_cred) { ep->e_pcred = *p->p_cred; if (p->p_ucred) ep->e_ucred = *p->p_ucred; } if (p->p_stat != SIDL && p->p_stat != SZOMB && p->p_vmspace != NULL) { register struct vmspace *vm = p->p_vmspace; #ifdef pmap_resident_count ep->e_vm.vm_rssize = pmap_resident_count(&vm->vm_pmap); /*XXX*/ #else ep->e_vm.vm_rssize = vm->vm_rssize; #endif ep->e_vm.vm_tsize = vm->vm_tsize; ep->e_vm.vm_dsize = vm->vm_dsize; ep->e_vm.vm_ssize = vm->vm_ssize; ep->e_vm.vm_taddr = vm->vm_taddr; ep->e_vm.vm_daddr = vm->vm_daddr; ep->e_vm.vm_minsaddr = vm->vm_minsaddr; ep->e_vm.vm_maxsaddr = vm->vm_maxsaddr; ep->e_vm.vm_map = vm->vm_map; #ifndef sparc ep->e_vm.vm_pmap = vm->vm_pmap; #endif } if (p->p_pptr) ep->e_ppid = p->p_pptr->p_pid; if (p->p_pgrp) { ep->e_pgid = p->p_pgrp->pg_id; ep->e_jobc = p->p_pgrp->pg_jobc; ep->e_sess = p->p_pgrp->pg_session; if (ep->e_sess) { bcopy(ep->e_sess->s_login, ep->e_login, sizeof(ep->e_login)); if (ep->e_sess->s_ttyvp) ep->e_flag = EPROC_CTTY; if (p->p_session && SESS_LEADER(p)) ep->e_flag |= EPROC_SLEADER; } } if ((p->p_flag & P_CONTROLT) && (ep->e_sess != NULL) && ((tp = ep->e_sess->s_ttyp) != NULL)) { ep->e_tdev = tp->t_dev; ep->e_tpgid = tp->t_pgrp ? tp->t_pgrp->pg_id : NO_PID; ep->e_tsess = tp->t_session; } else ep->e_tdev = NODEV; if (p->p_wmesg) { strncpy(ep->e_wmesg, p->p_wmesg, WMESGLEN); ep->e_wmesg[WMESGLEN] = 0; } } static struct proc * zpfind(pid_t pid) { struct proc *p; for (p = zombproc.lh_first; p != 0; p = p->p_list.le_next) if (p->p_pid == pid) return (p); return (NULL); } static int sysctl_out_proc(struct proc *p, struct sysctl_req *req, int doingzomb) { struct eproc eproc; int error; pid_t pid = p->p_pid; fill_eproc(p, &eproc); error = SYSCTL_OUT(req,(caddr_t)p, sizeof(struct proc)); if (error) return (error); error = SYSCTL_OUT(req,(caddr_t)&eproc, sizeof(eproc)); if (error) return (error); if (!doingzomb && pid && (pfind(pid) != p)) return EAGAIN; if (doingzomb && zpfind(pid) != p) return EAGAIN; return (0); } static int sysctl_kern_proc SYSCTL_HANDLER_ARGS { int *name = (int*) arg1; u_int namelen = arg2; struct proc *p; int doingzomb; int error = 0; if (oidp->oid_number == KERN_PROC_PID) { if (namelen != 1) return (EINVAL); p = pfind((pid_t)name[0]); if (!p) return (0); error = sysctl_out_proc(p, req, 0); return (error); } if (oidp->oid_number == KERN_PROC_ALL && !namelen) ; else if (oidp->oid_number != KERN_PROC_ALL && namelen == 1) ; else return (EINVAL); if (!req->oldptr) { /* overestimate by 5 procs */ error = SYSCTL_OUT(req, 0, sizeof (struct kinfo_proc) * 5); if (error) return (error); } for (doingzomb=0 ; doingzomb < 2 ; doingzomb++) { if (!doingzomb) p = allproc.lh_first; else p = zombproc.lh_first; for (; p != 0; p = p->p_list.le_next) { /* * Skip embryonic processes. */ if (p->p_stat == SIDL) continue; /* * TODO - make more efficient (see notes below). * do by session. */ switch (oidp->oid_number) { case KERN_PROC_PGRP: /* could do this by traversing pgrp */ if (p->p_pgrp == NULL || p->p_pgrp->pg_id != (pid_t)name[0]) continue; break; case KERN_PROC_TTY: if ((p->p_flag & P_CONTROLT) == 0 || p->p_session == NULL || p->p_session->s_ttyp == NULL || p->p_session->s_ttyp->t_dev != (dev_t)name[0]) continue; break; case KERN_PROC_UID: if (p->p_ucred == NULL || p->p_ucred->cr_uid != (uid_t)name[0]) continue; break; case KERN_PROC_RUID: if (p->p_ucred == NULL || p->p_cred->p_ruid != (uid_t)name[0]) continue; break; } error = sysctl_out_proc(p, req, doingzomb); if (error) return (error); } } return (0); } SYSCTL_NODE(_kern, KERN_PROC, proc, CTLFLAG_RD, 0, "Process table"); SYSCTL_PROC(_kern_proc, KERN_PROC_ALL, all, CTLFLAG_RD|CTLTYPE_STRUCT, 0, 0, sysctl_kern_proc, "S,proc", ""); SYSCTL_NODE(_kern_proc, KERN_PROC_PGRP, pgrp, CTLFLAG_RD, sysctl_kern_proc, "Process table"); SYSCTL_NODE(_kern_proc, KERN_PROC_TTY, tty, CTLFLAG_RD, sysctl_kern_proc, "Process table"); SYSCTL_NODE(_kern_proc, KERN_PROC_UID, uid, CTLFLAG_RD, sysctl_kern_proc, "Process table"); SYSCTL_NODE(_kern_proc, KERN_PROC_RUID, ruid, CTLFLAG_RD, sysctl_kern_proc, "Process table"); SYSCTL_NODE(_kern_proc, KERN_PROC_PID, pid, CTLFLAG_RD, sysctl_kern_proc, "Process table"); Index: head/sys/kern/kern_sig.c =================================================================== --- head/sys/kern/kern_sig.c (revision 41085) +++ head/sys/kern/kern_sig.c (revision 41086) @@ -1,1346 +1,1383 @@ /* * Copyright (c) 1982, 1986, 1989, 1991, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)kern_sig.c 8.7 (Berkeley) 4/18/94 - * $Id: kern_sig.c,v 1.47 1998/09/14 23:25:18 jdp Exp $ + * $Id: kern_sig.c,v 1.48 1998/10/21 16:31:38 jdp Exp $ */ #include "opt_compat.h" #include "opt_ktrace.h" #define SIGPROP /* include signal properties table */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef SMP #include #endif static int killpg1 __P((struct proc *cp, int signum, int pgid, int all)); static void setsigvec __P((struct proc *p, int signum, struct sigaction *sa)); static void stop __P((struct proc *)); static int kern_logsigexit = 1; SYSCTL_INT(_kern, KERN_LOGSIGEXIT, logsigexit, CTLFLAG_RW, &kern_logsigexit, 0, ""); /* * Can process p, with pcred pc, send the signal signum to process q? */ #define CANSIGNAL(p, pc, q, signum) \ ((pc)->pc_ucred->cr_uid == 0 || \ (pc)->p_ruid == (q)->p_cred->p_ruid || \ (pc)->pc_ucred->cr_uid == (q)->p_cred->p_ruid || \ (pc)->p_ruid == (q)->p_ucred->cr_uid || \ (pc)->pc_ucred->cr_uid == (q)->p_ucred->cr_uid || \ ((signum) == SIGCONT && (q)->p_session == (p)->p_session)) +/* + * Policy -- Can real uid ruid with ucred uc send a signal to process q? + */ +#define CANSIGIO(ruid, uc, q) \ + ((uc)->cr_uid == 0 || \ + (ruid) == (q)->p_cred->p_ruid || \ + (uc)->cr_uid == (q)->p_cred->p_ruid || \ + (ruid) == (q)->p_ucred->cr_uid || \ + (uc)->cr_uid == (q)->p_ucred->cr_uid) + int sugid_coredump; SYSCTL_INT(_kern, OID_AUTO, sugid_coredump, CTLFLAG_RW, &sugid_coredump, 0, ""); #ifndef _SYS_SYSPROTO_H_ struct sigaction_args { int signum; struct sigaction *nsa; struct sigaction *osa; }; #endif /* ARGSUSED */ int sigaction(p, uap) struct proc *p; register struct sigaction_args *uap; { struct sigaction vec; register struct sigaction *sa; register struct sigacts *ps = p->p_sigacts; register int signum; int bit, error; signum = uap->signum; if (signum <= 0 || signum >= NSIG) return (EINVAL); sa = &vec; if (uap->osa) { sa->sa_handler = ps->ps_sigact[signum]; sa->sa_mask = ps->ps_catchmask[signum]; bit = sigmask(signum); sa->sa_flags = 0; if ((ps->ps_sigonstack & bit) != 0) sa->sa_flags |= SA_ONSTACK; if ((ps->ps_sigintr & bit) == 0) sa->sa_flags |= SA_RESTART; if ((ps->ps_sigreset & bit) != 0) sa->sa_flags |= SA_RESETHAND; if ((ps->ps_signodefer & bit) != 0) sa->sa_flags |= SA_NODEFER; if (signum == SIGCHLD && p->p_flag & P_NOCLDSTOP) sa->sa_flags |= SA_NOCLDSTOP; if (signum == SIGCHLD && p->p_flag & P_NOCLDWAIT) sa->sa_flags |= SA_NOCLDWAIT; if ((error = copyout((caddr_t)sa, (caddr_t)uap->osa, sizeof (vec)))) return (error); } if (uap->nsa) { if ((error = copyin((caddr_t)uap->nsa, (caddr_t)sa, sizeof (vec)))) return (error); if ((signum == SIGKILL || signum == SIGSTOP) && sa->sa_handler != SIG_DFL) return (EINVAL); setsigvec(p, signum, sa); } return (0); } static void setsigvec(p, signum, sa) register struct proc *p; int signum; register struct sigaction *sa; { register struct sigacts *ps = p->p_sigacts; register int bit; bit = sigmask(signum); /* * Change setting atomically. */ (void) splhigh(); ps->ps_sigact[signum] = sa->sa_handler; ps->ps_catchmask[signum] = sa->sa_mask &~ sigcantmask; if ((sa->sa_flags & SA_RESTART) == 0) ps->ps_sigintr |= bit; else ps->ps_sigintr &= ~bit; if (sa->sa_flags & SA_ONSTACK) ps->ps_sigonstack |= bit; else ps->ps_sigonstack &= ~bit; if (sa->sa_flags & SA_RESETHAND) ps->ps_sigreset |= bit; else ps->ps_sigreset &= ~bit; if (sa->sa_flags & SA_NODEFER) ps->ps_signodefer |= bit; else ps->ps_signodefer &= ~bit; #ifdef COMPAT_SUNOS if (sa->sa_flags & SA_USERTRAMP) ps->ps_usertramp |= bit; else ps->ps_usertramp &= ~bit; #endif if (signum == SIGCHLD) { if (sa->sa_flags & SA_NOCLDSTOP) p->p_flag |= P_NOCLDSTOP; else p->p_flag &= ~P_NOCLDSTOP; if (sa->sa_flags & SA_NOCLDWAIT) { /* * Paranoia: since SA_NOCLDWAIT is implemented by * reparenting the dying child to PID 1 (and * trust it to reap the zombie), PID 1 itself is * forbidden to set SA_NOCLDWAIT. */ if (p->p_pid == 1) p->p_flag &= ~P_NOCLDWAIT; else p->p_flag |= P_NOCLDWAIT; } else p->p_flag &= ~P_NOCLDWAIT; } /* * Set bit in p_sigignore for signals that are set to SIG_IGN, * and for signals set to SIG_DFL where the default is to ignore. * However, don't put SIGCONT in p_sigignore, * as we have to restart the process. */ if (sa->sa_handler == SIG_IGN || (sigprop[signum] & SA_IGNORE && sa->sa_handler == SIG_DFL)) { p->p_siglist &= ~bit; /* never to be seen again */ if (signum != SIGCONT) p->p_sigignore |= bit; /* easier in psignal */ p->p_sigcatch &= ~bit; } else { p->p_sigignore &= ~bit; if (sa->sa_handler == SIG_DFL) p->p_sigcatch &= ~bit; else p->p_sigcatch |= bit; } (void) spl0(); } /* * Initialize signal state for process 0; * set to ignore signals that are ignored by default. */ void siginit(p) struct proc *p; { register int i; for (i = 0; i < NSIG; i++) if (sigprop[i] & SA_IGNORE && i != SIGCONT) p->p_sigignore |= sigmask(i); } /* * Reset signals for an exec of the specified process. */ void execsigs(p) register struct proc *p; { register struct sigacts *ps = p->p_sigacts; register int nc, mask; /* * Reset caught signals. Held signals remain held * through p_sigmask (unless they were caught, * and are now ignored by default). */ while (p->p_sigcatch) { nc = ffs((long)p->p_sigcatch); mask = sigmask(nc); p->p_sigcatch &= ~mask; if (sigprop[nc] & SA_IGNORE) { if (nc != SIGCONT) p->p_sigignore |= mask; p->p_siglist &= ~mask; } ps->ps_sigact[nc] = SIG_DFL; } /* * Reset stack state to the user stack. * Clear set of signals caught on the signal stack. */ ps->ps_sigstk.ss_flags = SS_DISABLE; ps->ps_sigstk.ss_size = 0; ps->ps_sigstk.ss_sp = 0; ps->ps_flags = 0; } /* * Manipulate signal mask. * Note that we receive new mask, not pointer, * and return old mask as return value; * the library stub does the rest. */ #ifndef _SYS_SYSPROTO_H_ struct sigprocmask_args { int how; sigset_t mask; }; #endif int sigprocmask(p, uap) register struct proc *p; struct sigprocmask_args *uap; { int error = 0; p->p_retval[0] = p->p_sigmask; (void) splhigh(); switch (uap->how) { case SIG_BLOCK: p->p_sigmask |= uap->mask &~ sigcantmask; break; case SIG_UNBLOCK: p->p_sigmask &= ~uap->mask; break; case SIG_SETMASK: p->p_sigmask = uap->mask &~ sigcantmask; break; default: error = EINVAL; break; } (void) spl0(); return (error); } #ifndef _SYS_SYSPROTO_H_ struct sigpending_args { int dummy; }; #endif /* ARGSUSED */ int sigpending(p, uap) struct proc *p; struct sigpending_args *uap; { p->p_retval[0] = p->p_siglist; return (0); } #if defined(COMPAT_43) || defined(COMPAT_SUNOS) /* * Generalized interface signal handler, 4.3-compatible. */ #ifndef _SYS_SYSPROTO_H_ struct osigvec_args { int signum; struct sigvec *nsv; struct sigvec *osv; }; #endif /* ARGSUSED */ int osigvec(p, uap) struct proc *p; register struct osigvec_args *uap; { struct sigvec vec; register struct sigacts *ps = p->p_sigacts; register struct sigvec *sv; register int signum; int bit, error; signum = uap->signum; if (signum <= 0 || signum >= NSIG) return (EINVAL); sv = &vec; if (uap->osv) { *(sig_t *)&sv->sv_handler = ps->ps_sigact[signum]; sv->sv_mask = ps->ps_catchmask[signum]; bit = sigmask(signum); sv->sv_flags = 0; if ((ps->ps_sigonstack & bit) != 0) sv->sv_flags |= SV_ONSTACK; if ((ps->ps_sigintr & bit) != 0) sv->sv_flags |= SV_INTERRUPT; if ((ps->ps_sigreset & bit) != 0) sv->sv_flags |= SV_RESETHAND; if ((ps->ps_signodefer & bit) != 0) sv->sv_flags |= SV_NODEFER; #ifndef COMPAT_SUNOS if (signum == SIGCHLD && p->p_flag & P_NOCLDSTOP) sv->sv_flags |= SV_NOCLDSTOP; #endif if ((error = copyout((caddr_t)sv, (caddr_t)uap->osv, sizeof (vec)))) return (error); } if (uap->nsv) { if ((error = copyin((caddr_t)uap->nsv, (caddr_t)sv, sizeof (vec)))) return (error); if ((signum == SIGKILL || signum == SIGSTOP) && sv->sv_handler != SIG_DFL) return (EINVAL); #ifdef COMPAT_SUNOS sv->sv_flags |= SA_USERTRAMP; #endif sv->sv_flags ^= SA_RESTART; /* opposite of SV_INTERRUPT */ setsigvec(p, signum, (struct sigaction *)sv); } return (0); } #ifndef _SYS_SYSPROTO_H_ struct osigblock_args { int mask; }; #endif int osigblock(p, uap) register struct proc *p; struct osigblock_args *uap; { (void) splhigh(); p->p_retval[0] = p->p_sigmask; p->p_sigmask |= uap->mask &~ sigcantmask; (void) spl0(); return (0); } #ifndef _SYS_SYSPROTO_H_ struct osigsetmask_args { int mask; }; #endif int osigsetmask(p, uap) struct proc *p; struct osigsetmask_args *uap; { (void) splhigh(); p->p_retval[0] = p->p_sigmask; p->p_sigmask = uap->mask &~ sigcantmask; (void) spl0(); return (0); } #endif /* COMPAT_43 || COMPAT_SUNOS */ /* * Suspend process until signal, providing mask to be set * in the meantime. Note nonstandard calling convention: * libc stub passes mask, not pointer, to save a copyin. */ #ifndef _SYS_SYSPROTO_H_ struct sigsuspend_args { sigset_t mask; }; #endif /* ARGSUSED */ int sigsuspend(p, uap) register struct proc *p; struct sigsuspend_args *uap; { register struct sigacts *ps = p->p_sigacts; /* * When returning from sigpause, we want * the old mask to be restored after the * signal handler has finished. Thus, we * save it here and mark the sigacts structure * to indicate this. */ ps->ps_oldmask = p->p_sigmask; ps->ps_flags |= SAS_OLDMASK; p->p_sigmask = uap->mask &~ sigcantmask; while (tsleep((caddr_t) ps, PPAUSE|PCATCH, "pause", 0) == 0) /* void */; /* always return EINTR rather than ERESTART... */ return (EINTR); } #if defined(COMPAT_43) || defined(COMPAT_SUNOS) #ifndef _SYS_SYSPROTO_H_ struct osigstack_args { struct sigstack *nss; struct sigstack *oss; }; #endif /* ARGSUSED */ int osigstack(p, uap) struct proc *p; register struct osigstack_args *uap; { struct sigstack ss; struct sigacts *psp; int error = 0; psp = p->p_sigacts; ss.ss_sp = psp->ps_sigstk.ss_sp; ss.ss_onstack = psp->ps_sigstk.ss_flags & SS_ONSTACK; if (uap->oss && (error = copyout((caddr_t)&ss, (caddr_t)uap->oss, sizeof (struct sigstack)))) return (error); if (uap->nss && (error = copyin((caddr_t)uap->nss, (caddr_t)&ss, sizeof (ss))) == 0) { psp->ps_sigstk.ss_sp = ss.ss_sp; psp->ps_sigstk.ss_size = 0; psp->ps_sigstk.ss_flags |= ss.ss_onstack & SS_ONSTACK; psp->ps_flags |= SAS_ALTSTACK; } return (error); } #endif /* COMPAT_43 || COMPAT_SUNOS */ #ifndef _SYS_SYSPROTO_H_ struct sigaltstack_args { struct sigaltstack *nss; struct sigaltstack *oss; }; #endif /* ARGSUSED */ int sigaltstack(p, uap) struct proc *p; register struct sigaltstack_args *uap; { struct sigacts *psp; struct sigaltstack ss; int error; psp = p->p_sigacts; if ((psp->ps_flags & SAS_ALTSTACK) == 0) psp->ps_sigstk.ss_flags |= SS_DISABLE; if (uap->oss && (error = copyout((caddr_t)&psp->ps_sigstk, (caddr_t)uap->oss, sizeof (struct sigaltstack)))) return (error); if (uap->nss == 0) return (0); if ((error = copyin((caddr_t)uap->nss, (caddr_t)&ss, sizeof (ss)))) return (error); if (ss.ss_flags & SS_DISABLE) { if (psp->ps_sigstk.ss_flags & SS_ONSTACK) return (EINVAL); psp->ps_flags &= ~SAS_ALTSTACK; psp->ps_sigstk.ss_flags = ss.ss_flags; return (0); } if (ss.ss_size < MINSIGSTKSZ) return (ENOMEM); psp->ps_flags |= SAS_ALTSTACK; psp->ps_sigstk= ss; return (0); } /* * Common code for kill process group/broadcast kill. * cp is calling process. */ int killpg1(cp, signum, pgid, all) register struct proc *cp; int signum, pgid, all; { register struct proc *p; register struct pcred *pc = cp->p_cred; struct pgrp *pgrp; int nfound = 0; if (all) /* * broadcast */ for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) { if (p->p_pid <= 1 || p->p_flag & P_SYSTEM || p == cp || !CANSIGNAL(cp, pc, p, signum)) continue; nfound++; if (signum) psignal(p, signum); } else { if (pgid == 0) /* * zero pgid means send to my process group. */ pgrp = cp->p_pgrp; else { pgrp = pgfind(pgid); if (pgrp == NULL) return (ESRCH); } for (p = pgrp->pg_members.lh_first; p != 0; p = p->p_pglist.le_next) { if (p->p_pid <= 1 || p->p_flag & P_SYSTEM || p->p_stat == SZOMB || !CANSIGNAL(cp, pc, p, signum)) continue; nfound++; if (signum) psignal(p, signum); } } return (nfound ? 0 : ESRCH); } #ifndef _SYS_SYSPROTO_H_ struct kill_args { int pid; int signum; }; #endif /* ARGSUSED */ int kill(cp, uap) register struct proc *cp; register struct kill_args *uap; { register struct proc *p; register struct pcred *pc = cp->p_cred; if ((u_int)uap->signum >= NSIG) return (EINVAL); if (uap->pid > 0) { /* kill single process */ if ((p = pfind(uap->pid)) == NULL) return (ESRCH); if (!CANSIGNAL(cp, pc, p, uap->signum)) return (EPERM); if (uap->signum) psignal(p, uap->signum); return (0); } switch (uap->pid) { case -1: /* broadcast signal */ return (killpg1(cp, uap->signum, 0, 1)); case 0: /* signal own process group */ return (killpg1(cp, uap->signum, 0, 0)); default: /* negative explicit process group */ return (killpg1(cp, uap->signum, -uap->pid, 0)); } /* NOTREACHED */ } #if defined(COMPAT_43) || defined(COMPAT_SUNOS) #ifndef _SYS_SYSPROTO_H_ struct okillpg_args { int pgid; int signum; }; #endif /* ARGSUSED */ int okillpg(p, uap) struct proc *p; register struct okillpg_args *uap; { if ((u_int)uap->signum >= NSIG) return (EINVAL); return (killpg1(p, uap->signum, uap->pgid, 0)); } #endif /* COMPAT_43 || COMPAT_SUNOS */ /* * Send a signal to a process group. */ void gsignal(pgid, signum) int pgid, signum; { struct pgrp *pgrp; if (pgid && (pgrp = pgfind(pgid))) pgsignal(pgrp, signum, 0); } /* * Send a signal to a process group. If checktty is 1, * limit to members which have a controlling terminal. */ void pgsignal(pgrp, signum, checkctty) struct pgrp *pgrp; int signum, checkctty; { register struct proc *p; if (pgrp) for (p = pgrp->pg_members.lh_first; p != 0; p = p->p_pglist.le_next) if (checkctty == 0 || p->p_flag & P_CONTROLT) psignal(p, signum); } /* * Send a signal caused by a trap to the current process. * If it will be caught immediately, deliver it with correct code. * Otherwise, post it normally. */ void trapsignal(p, signum, code) struct proc *p; register int signum; u_long code; { register struct sigacts *ps = p->p_sigacts; int mask; mask = sigmask(signum); if ((p->p_flag & P_TRACED) == 0 && (p->p_sigcatch & mask) != 0 && (p->p_sigmask & mask) == 0) { p->p_stats->p_ru.ru_nsignals++; #ifdef KTRACE if (KTRPOINT(p, KTR_PSIG)) ktrpsig(p->p_tracep, signum, ps->ps_sigact[signum], p->p_sigmask, code); #endif (*p->p_sysent->sv_sendsig)(ps->ps_sigact[signum], signum, p->p_sigmask, code); p->p_sigmask |= ps->ps_catchmask[signum] | (mask & ~ps->ps_signodefer); if ((ps->ps_sigreset & mask) != 0) { /* * See setsigvec() for origin of this code. */ p->p_sigcatch &= ~mask; if (signum != SIGCONT && sigprop[signum] & SA_IGNORE) p->p_sigignore |= mask; ps->ps_sigact[signum] = SIG_DFL; } } else { ps->ps_code = code; /* XXX for core dump/debugger */ ps->ps_sig = signum; /* XXX to verify code */ psignal(p, signum); } } /* * Send the signal to the process. If the signal has an action, the action * is usually performed by the target process rather than the caller; we add * the signal to the set of pending signals for the process. * * Exceptions: * o When a stop signal is sent to a sleeping process that takes the * default action, the process is stopped without awakening it. * o SIGCONT restarts stopped processes (or puts them back to sleep) * regardless of the signal action (eg, blocked or ignored). * * Other ignored signals are discarded immediately. */ void psignal(p, signum) register struct proc *p; register int signum; { register int s, prop; register sig_t action; int mask; if ((u_int)signum >= NSIG || signum == 0) { printf("psignal: signum %d\n", signum); panic("psignal signal number"); } mask = sigmask(signum); prop = sigprop[signum]; /* * If proc is traced, always give parent a chance; * if signal event is tracked by procfs, give *that* * a chance, as well. */ if ((p->p_flag & P_TRACED) || (p->p_stops & S_SIG)) action = SIG_DFL; else { /* * If the signal is being ignored, * then we forget about it immediately. * (Note: we don't set SIGCONT in p_sigignore, * and if it is set to SIG_IGN, * action will be SIG_DFL here.) */ if (p->p_sigignore & mask) return; if (p->p_sigmask & mask) action = SIG_HOLD; else if (p->p_sigcatch & mask) action = SIG_CATCH; else action = SIG_DFL; } if (p->p_nice > NZERO && action == SIG_DFL && (prop & SA_KILL) && (p->p_flag & P_TRACED) == 0) p->p_nice = NZERO; if (prop & SA_CONT) p->p_siglist &= ~stopsigmask; if (prop & SA_STOP) { /* * If sending a tty stop signal to a member of an orphaned * process group, discard the signal here if the action * is default; don't stop the process below if sleeping, * and don't clear any pending SIGCONT. */ if (prop & SA_TTYSTOP && p->p_pgrp->pg_jobc == 0 && action == SIG_DFL) return; p->p_siglist &= ~contsigmask; } p->p_siglist |= mask; /* * Defer further processing for signals which are held, * except that stopped processes must be continued by SIGCONT. */ if (action == SIG_HOLD && ((prop & SA_CONT) == 0 || p->p_stat != SSTOP)) return; s = splhigh(); switch (p->p_stat) { case SSLEEP: /* * If process is sleeping uninterruptibly * we can't interrupt the sleep... the signal will * be noticed when the process returns through * trap() or syscall(). */ if ((p->p_flag & P_SINTR) == 0) goto out; /* * Process is sleeping and traced... make it runnable * so it can discover the signal in issignal() and stop * for the parent. */ if (p->p_flag & P_TRACED) goto run; /* * If SIGCONT is default (or ignored) and process is * asleep, we are finished; the process should not * be awakened. */ if ((prop & SA_CONT) && action == SIG_DFL) { p->p_siglist &= ~mask; goto out; } /* * When a sleeping process receives a stop * signal, process immediately if possible. * All other (caught or default) signals * cause the process to run. */ if (prop & SA_STOP) { if (action != SIG_DFL) goto runfast; /* * If a child holding parent blocked, * stopping could cause deadlock. */ if (p->p_flag & P_PPWAIT) goto out; p->p_siglist &= ~mask; p->p_xstat = signum; if ((p->p_pptr->p_flag & P_NOCLDSTOP) == 0) psignal(p->p_pptr, SIGCHLD); stop(p); goto out; } else goto runfast; /*NOTREACHED*/ case SSTOP: /* * If traced process is already stopped, * then no further action is necessary. */ if (p->p_flag & P_TRACED) goto out; /* * Kill signal always sets processes running. */ if (signum == SIGKILL) goto runfast; if (prop & SA_CONT) { /* * If SIGCONT is default (or ignored), we continue the * process but don't leave the signal in p_siglist, as * it has no further action. If SIGCONT is held, we * continue the process and leave the signal in * p_siglist. If the process catches SIGCONT, let it * handle the signal itself. If it isn't waiting on * an event, then it goes back to run state. * Otherwise, process goes back to sleep state. */ if (action == SIG_DFL) p->p_siglist &= ~mask; if (action == SIG_CATCH) goto runfast; if (p->p_wchan == 0) goto run; p->p_stat = SSLEEP; goto out; } if (prop & SA_STOP) { /* * Already stopped, don't need to stop again. * (If we did the shell could get confused.) */ p->p_siglist &= ~mask; /* take it away */ goto out; } /* * If process is sleeping interruptibly, then simulate a * wakeup so that when it is continued, it will be made * runnable and can look at the signal. But don't make * the process runnable, leave it stopped. */ if (p->p_wchan && p->p_flag & P_SINTR) unsleep(p); goto out; default: /* * SRUN, SIDL, SZOMB do nothing with the signal, * other than kicking ourselves if we are running. * It will either never be noticed, or noticed very soon. */ if (p == curproc) signotify(p); #ifdef SMP else if (p->p_stat == SRUN) forward_signal(p); #endif goto out; } /*NOTREACHED*/ runfast: /* * Raise priority to at least PUSER. */ if (p->p_priority > PUSER) p->p_priority = PUSER; run: setrunnable(p); out: splx(s); } /* * If the current process has received a signal (should be caught or cause * termination, should interrupt current syscall), return the signal number. * Stop signals with default action are processed immediately, then cleared; * they aren't returned. This is checked after each entry to the system for * a syscall or trap (though this can usually be done without calling issignal * by checking the pending signal masks in the CURSIG macro.) The normal call * sequence is * * while (signum = CURSIG(curproc)) * postsig(signum); */ int issignal(p) register struct proc *p; { register int signum, mask, prop; for (;;) { int traced = (p->p_flag & P_TRACED) || (p->p_stops & S_SIG); mask = p->p_siglist & ~p->p_sigmask; if (p->p_flag & P_PPWAIT) mask &= ~stopsigmask; if (mask == 0) /* no signal to send */ return (0); signum = ffs((long)mask); mask = sigmask(signum); prop = sigprop[signum]; STOPEVENT(p, S_SIG, signum); /* * We should see pending but ignored signals * only if P_TRACED was on when they were posted. */ if ((mask & p->p_sigignore) && (traced == 0)) { p->p_siglist &= ~mask; continue; } if (p->p_flag & P_TRACED && (p->p_flag & P_PPWAIT) == 0) { /* * If traced, always stop, and stay * stopped until released by the parent. */ p->p_xstat = signum; psignal(p->p_pptr, SIGCHLD); do { stop(p); mi_switch(); } while (!trace_req(p) && p->p_flag & P_TRACED); /* * If the traced bit got turned off, go back up * to the top to rescan signals. This ensures * that p_sig* and ps_sigact are consistent. */ if ((p->p_flag & P_TRACED) == 0) continue; /* * If parent wants us to take the signal, * then it will leave it in p->p_xstat; * otherwise we just look for signals again. */ p->p_siglist &= ~mask; /* clear the old signal */ signum = p->p_xstat; if (signum == 0) continue; /* * Put the new signal into p_siglist. If the * signal is being masked, look for other signals. */ mask = sigmask(signum); p->p_siglist |= mask; if (p->p_sigmask & mask) continue; } /* * Decide whether the signal should be returned. * Return the signal's number, or fall through * to clear it from the pending mask. */ switch ((int)(intptr_t)p->p_sigacts->ps_sigact[signum]) { case (int)SIG_DFL: /* * Don't take default actions on system processes. */ if (p->p_pid <= 1) { #ifdef DIAGNOSTIC /* * Are you sure you want to ignore SIGSEGV * in init? XXX */ printf("Process (pid %lu) got signal %d\n", (u_long)p->p_pid, signum); #endif break; /* == ignore */ } /* * If there is a pending stop signal to process * with default action, stop here, * then clear the signal. However, * if process is member of an orphaned * process group, ignore tty stop signals. */ if (prop & SA_STOP) { if (p->p_flag & P_TRACED || (p->p_pgrp->pg_jobc == 0 && prop & SA_TTYSTOP)) break; /* == ignore */ p->p_xstat = signum; stop(p); if ((p->p_pptr->p_flag & P_NOCLDSTOP) == 0) psignal(p->p_pptr, SIGCHLD); mi_switch(); break; } else if (prop & SA_IGNORE) { /* * Except for SIGCONT, shouldn't get here. * Default action is to ignore; drop it. */ break; /* == ignore */ } else return (signum); /*NOTREACHED*/ case (int)SIG_IGN: /* * Masking above should prevent us ever trying * to take action on an ignored signal other * than SIGCONT, unless process is traced. */ if ((prop & SA_CONT) == 0 && (p->p_flag & P_TRACED) == 0) printf("issignal\n"); break; /* == ignore */ default: /* * This signal has an action, let * postsig() process it. */ return (signum); } p->p_siglist &= ~mask; /* take the signal! */ } /* NOTREACHED */ } /* * Put the argument process into the stopped state and notify the parent * via wakeup. Signals are handled elsewhere. The process must not be * on the run queue. */ void stop(p) register struct proc *p; { p->p_stat = SSTOP; p->p_flag &= ~P_WAITED; wakeup((caddr_t)p->p_pptr); } /* * Take the action for the specified signal * from the current set of pending signals. */ void postsig(signum) register int signum; { register struct proc *p = curproc; register struct sigacts *ps = p->p_sigacts; register sig_t action; int code, mask, returnmask; #ifdef DIAGNOSTIC if (signum == 0) panic("postsig"); #endif mask = sigmask(signum); p->p_siglist &= ~mask; action = ps->ps_sigact[signum]; #ifdef KTRACE if (KTRPOINT(p, KTR_PSIG)) ktrpsig(p->p_tracep, signum, action, ps->ps_flags & SAS_OLDMASK ? ps->ps_oldmask : p->p_sigmask, 0); #endif STOPEVENT(p, S_SIG, signum); if (action == SIG_DFL) { /* * Default action, where the default is to kill * the process. (Other cases were ignored above.) */ sigexit(p, signum); /* NOTREACHED */ } else { /* * If we get here, the signal must be caught. */ #ifdef DIAGNOSTIC if (action == SIG_IGN || (p->p_sigmask & mask)) panic("postsig action"); #endif /* * Set the new mask value and also defer further * occurences of this signal. * * Special case: user has done a sigpause. Here the * current mask is not of interest, but rather the * mask from before the sigpause is what we want * restored after the signal processing is completed. */ (void) splhigh(); if (ps->ps_flags & SAS_OLDMASK) { returnmask = ps->ps_oldmask; ps->ps_flags &= ~SAS_OLDMASK; } else returnmask = p->p_sigmask; p->p_sigmask |= ps->ps_catchmask[signum] | (mask & ~ps->ps_signodefer); if ((ps->ps_sigreset & mask) != 0) { /* * See setsigvec() for origin of this code. */ p->p_sigcatch &= ~mask; if (signum != SIGCONT && sigprop[signum] & SA_IGNORE) p->p_sigignore |= mask; ps->ps_sigact[signum] = SIG_DFL; } (void) spl0(); p->p_stats->p_ru.ru_nsignals++; if (ps->ps_sig != signum) { code = 0; } else { code = ps->ps_code; ps->ps_code = 0; ps->ps_sig = 0; } (*p->p_sysent->sv_sendsig)(action, signum, returnmask, code); } } /* * Kill the current process for stated reason. */ void killproc(p, why) struct proc *p; char *why; { log(LOG_ERR, "pid %d (%s), uid %d, was killed: %s\n", p->p_pid, p->p_comm, p->p_cred && p->p_ucred ? p->p_ucred->cr_uid : -1, why); psignal(p, SIGKILL); } /* * Force the current process to exit with the specified signal, dumping core * if appropriate. We bypass the normal tests for masked and caught signals, * allowing unrecoverable failures to terminate the process without changing * signal state. Mark the accounting record with the signal termination. * If dumping core, save the signal number for the debugger. Calls exit and * does not return. */ void sigexit(p, signum) register struct proc *p; int signum; { p->p_acflag |= AXSIG; if (sigprop[signum] & SA_CORE) { p->p_sigacts->ps_sig = signum; /* * Log signals which would cause core dumps * (Log as LOG_INFO to appease those who don't want * these messages.) * XXX : Todo, as well as euid, write out ruid too */ if (p->p_sysent->sv_coredump != NULL && (*p->p_sysent->sv_coredump)(p) == 0) signum |= WCOREFLAG; if (kern_logsigexit) log(LOG_INFO, "pid %d (%s), uid %d: exited on signal %d%s\n", p->p_pid, p->p_comm, p->p_cred && p->p_ucred ? p->p_ucred->cr_uid : -1, signum &~ WCOREFLAG, signum & WCOREFLAG ? " (core dumped)" : ""); } exit1(p, W_EXITCODE(0, signum)); /* NOTREACHED */ } static char corefilename[MAXPATHLEN+1] = {"%N.core"}; SYSCTL_STRING(_kern, OID_AUTO, corefile, CTLFLAG_RW, corefilename, sizeof(corefilename), "process corefile name format string"); /* * expand_name(name, uid, pid) * Expand the name described in corefilename, using name, uid, and pid. * corefilename is a printf-like string, with three format specifiers: * %N name of process ("name") * %P process id (pid) * %U user id (uid) * For example, "%N.core" is the default; they can be disabled completely * by using "/dev/null", or all core files can be stored in "/cores/%U/%N-%P". * This is controlled by the sysctl variable kern.corefile (see above). */ char * expand_name(name, uid, pid) const char *name; int uid; int pid; { char *temp; char buf[11]; /* Buffer for pid/uid -- max 4B */ int i, n; char *format = corefilename; temp = malloc(MAXPATHLEN + 3, M_TEMP, M_NOWAIT); bzero(temp, MAXPATHLEN+3); for (i = 0, n = 0; i < MAXPATHLEN && format[i]; i++) { int l; switch (format[i]) { case '%': /* Format character */ i++; switch (format[i]) { case '%': temp[n++] = '%'; break; case 'N': /* process name */ l = strlen(name); if ((n + l) > MAXPATHLEN) { log(LOG_ERR, "pid %d (%s), uid (%d): Path `%s%s' is too long\n", pid, name, uid, temp, name); free(temp, M_TEMP); return NULL; } memcpy(temp+n, name, l); n += l; break; case 'P': /* process id */ sprintf(buf, "%u", pid); l = strlen(buf); if ((n + l) > MAXPATHLEN) { log(LOG_ERR, "pid %d (%s), uid (%d): Path `%s%s' is too long\n", pid, name, uid, temp, name); free(temp, M_TEMP); return NULL; } memcpy(temp+n, buf, l); n += l; break; case 'U': /* user id */ sprintf(buf, "%u", uid); l = strlen(buf); if ((n + l) > MAXPATHLEN) { log(LOG_ERR, "pid %d (%s), uid (%d): Path `%s%s' is too long\n", pid, name, uid, temp, name); free(temp, M_TEMP); return NULL; } memcpy(temp+n, buf, l); n += l; break; default: log(LOG_ERR, "Unknown format character %c in `%s'\n", format[i], format); } break; default: temp[n++] = format[i]; } } return temp; } /* * Nonexistent system call-- signal process (may want to handle it). * Flag error in case process won't see signal immediately (blocked or ignored). */ #ifndef _SYS_SYSPROTO_H_ struct nosys_args { int dummy; }; #endif /* ARGSUSED */ int nosys(p, args) struct proc *p; struct nosys_args *args; { psignal(p, SIGSYS); return (EINVAL); +} + +/* + * Send a signal to a SIGIO or SIGURG to a process or process group using + * stored credentials rather than those of the current process. + */ +void +pgsigio(sigio, signum, checkctty) + struct sigio *sigio; + int signum, checkctty; +{ + if (sigio == NULL) + return; + + if (sigio->sio_pgid > 0) { + if (CANSIGIO(sigio->sio_ruid, sigio->sio_ucred, + sigio->sio_proc)) + psignal(sigio->sio_proc, signum); + } else if (sigio->sio_pgid < 0) { + struct proc *p; + + for (p = sigio->sio_pgrp->pg_members.lh_first; p != NULL; + p = p->p_pglist.le_next) + if (CANSIGIO(sigio->sio_ruid, sigio->sio_ucred, p) && + (checkctty == 0 || (p->p_flag & P_CONTROLT))) + psignal(p, signum); + } } Index: head/sys/kern/subr_log.c =================================================================== --- head/sys/kern/subr_log.c (revision 41085) +++ head/sys/kern/subr_log.c (revision 41086) @@ -1,270 +1,276 @@ /* * Copyright (c) 1982, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)subr_log.c 8.1 (Berkeley) 6/10/93 - * $Id: subr_log.c,v 1.29 1998/05/28 09:30:20 phk Exp $ + * $Id: subr_log.c,v 1.30 1998/06/07 17:11:38 dfr Exp $ */ /* * Error log buffer for kernel printf's. */ #include "opt_devfs.h" #include #include #include #include #include #include #include #include #include #include #include +#include #ifdef DEVFS #include #endif /*DEVFS*/ #define LOG_RDPRI (PZERO + 1) #define LOG_ASYNC 0x04 #define LOG_RDWAIT 0x08 static d_open_t logopen; static d_close_t logclose; static d_read_t logread; static d_ioctl_t logioctl; static d_poll_t logpoll; #define CDEV_MAJOR 7 static struct cdevsw log_cdevsw = { logopen, logclose, logread, nowrite, /*7*/ logioctl, nostop, nullreset, nodevtotty,/* klog */ logpoll, nommap, NULL, "log", NULL, -1 }; static struct logsoftc { int sc_state; /* see above for possibilities */ struct selinfo sc_selp; /* process waiting on select call */ - int sc_pgid; /* process/group for async I/O */ + struct sigio *sc_sigio; /* information for SIGIO */ } logsoftc; int log_open; /* also used in log() */ /*ARGSUSED*/ static int logopen(dev, flags, mode, p) dev_t dev; int flags, mode; struct proc *p; { if (log_open) return (EBUSY); log_open = 1; - logsoftc.sc_pgid = p->p_pid; /* signal process only */ + fsetown(p->p_pid, &logsoftc.sc_sigio); /* signal process only */ return (0); } /*ARGSUSED*/ static int logclose(dev, flag, mode, p) dev_t dev; int flag, mode; struct proc *p; { log_open = 0; logsoftc.sc_state = 0; + funsetown(logsoftc.sc_sigio); return (0); } /*ARGSUSED*/ static int logread(dev, uio, flag) dev_t dev; struct uio *uio; int flag; { register struct msgbuf *mbp = msgbufp; register long l; register int s; int error = 0; s = splhigh(); while (mbp->msg_bufr == mbp->msg_bufx) { if (flag & IO_NDELAY) { splx(s); return (EWOULDBLOCK); } logsoftc.sc_state |= LOG_RDWAIT; if ((error = tsleep((caddr_t)mbp, LOG_RDPRI | PCATCH, "klog", 0))) { splx(s); return (error); } } splx(s); logsoftc.sc_state &= ~LOG_RDWAIT; while (uio->uio_resid > 0) { l = mbp->msg_bufx - mbp->msg_bufr; if (l < 0) l = mbp->msg_size - mbp->msg_bufr; l = min(l, uio->uio_resid); if (l == 0) break; error = uiomove((caddr_t)msgbufp->msg_ptr + mbp->msg_bufr, (int)l, uio); if (error) break; mbp->msg_bufr += l; if (mbp->msg_bufr >= mbp->msg_size) mbp->msg_bufr = 0; } return (error); } /*ARGSUSED*/ static int logpoll(dev, events, p) dev_t dev; int events; struct proc *p; { int s; int revents = 0; s = splhigh(); if (events & (POLLIN | POLLRDNORM)) if (msgbufp->msg_bufr != msgbufp->msg_bufx) revents |= events & (POLLIN | POLLRDNORM); else selrecord(p, &logsoftc.sc_selp); splx(s); return (revents); } void logwakeup() { struct proc *p; if (!log_open) return; selwakeup(&logsoftc.sc_selp); - if (logsoftc.sc_state & LOG_ASYNC) { - if (logsoftc.sc_pgid < 0) - gsignal(-logsoftc.sc_pgid, SIGIO); - else if ((p = pfind(logsoftc.sc_pgid))) - psignal(p, SIGIO); - } + if ((logsoftc.sc_state & LOG_ASYNC) && logsoftc.sc_sigio != NULL) + pgsigio(logsoftc.sc_sigio, SIGIO, 0); if (logsoftc.sc_state & LOG_RDWAIT) { wakeup((caddr_t)msgbufp); logsoftc.sc_state &= ~LOG_RDWAIT; } } /*ARGSUSED*/ static int logioctl(dev, com, data, flag, p) dev_t dev; u_long com; caddr_t data; int flag; struct proc *p; { long l; int s; switch (com) { /* return number of characters immediately available */ case FIONREAD: s = splhigh(); l = msgbufp->msg_bufx - msgbufp->msg_bufr; splx(s); if (l < 0) l += msgbufp->msg_size; *(int *)data = l; break; case FIONBIO: break; case FIOASYNC: if (*(int *)data) logsoftc.sc_state |= LOG_ASYNC; else logsoftc.sc_state &= ~LOG_ASYNC; break; - case TIOCSPGRP: - logsoftc.sc_pgid = *(int *)data; + case FIOSETOWN: + return (fsetown(*(int *)data, &logsoftc.sc_sigio)); + + case FIOGETOWN: + *(int *)data = fgetown(logsoftc.sc_sigio); break; + /* This is deprecated, FIOSETOWN should be used instead. */ + case TIOCSPGRP: + return (fsetown(-(*(int *)data), &logsoftc.sc_sigio)); + + /* This is deprecated, FIOGETOWN should be used instead */ case TIOCGPGRP: - *(int *)data = logsoftc.sc_pgid; + *(int *)data = -fgetown(logsoftc.sc_sigio); break; default: return (ENOTTY); } return (0); } static int log_devsw_installed; #ifdef DEVFS static void *log_devfs_token; #endif static void log_drvinit __P((void *unused)); static void log_drvinit(unused) void *unused; { dev_t dev; if( ! log_devsw_installed ) { dev = makedev(CDEV_MAJOR,0); cdevsw_add(&dev,&log_cdevsw,NULL); log_devsw_installed = 1; #ifdef DEVFS log_devfs_token = devfs_add_devswf(&log_cdevsw, 0, DV_CHR, UID_ROOT, GID_WHEEL, 0600, "klog"); #endif } } SYSINIT(logdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR,log_drvinit,NULL) Index: head/sys/kern/sys_generic.c =================================================================== --- head/sys/kern/sys_generic.c (revision 41085) +++ head/sys/kern/sys_generic.c (revision 41086) @@ -1,901 +1,870 @@ /* * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)sys_generic.c 8.5 (Berkeley) 1/21/94 - * $Id: sys_generic.c,v 1.40 1998/08/24 08:39:38 dfr Exp $ + * $Id: sys_generic.c,v 1.41 1998/09/05 14:30:11 bde Exp $ */ #include "opt_ktrace.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef KTRACE #include #endif #include static MALLOC_DEFINE(M_IOCTLOPS, "ioctlops", "ioctl data buffer"); static MALLOC_DEFINE(M_SELECT, "select", "select() buffer"); MALLOC_DEFINE(M_IOV, "iov", "large iov's"); static int pollscan __P((struct proc *, struct pollfd *, int)); static int selscan __P((struct proc *, fd_mask **, fd_mask **, int)); /* * Read system call. */ #ifndef _SYS_SYSPROTO_H_ struct read_args { int fd; void *buf; size_t nbyte; }; #endif /* ARGSUSED */ int read(p, uap) struct proc *p; register struct read_args *uap; { register struct file *fp; register struct filedesc *fdp = p->p_fd; struct uio auio; struct iovec aiov; long cnt, error = 0; #ifdef KTRACE struct iovec ktriov; #endif if (((u_int)uap->fd) >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[uap->fd]) == NULL || (fp->f_flag & FREAD) == 0) return (EBADF); aiov.iov_base = (caddr_t)uap->buf; aiov.iov_len = uap->nbyte; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_offset = -1; if (uap->nbyte > INT_MAX) return (EINVAL); auio.uio_resid = uap->nbyte; auio.uio_rw = UIO_READ; auio.uio_segflg = UIO_USERSPACE; auio.uio_procp = p; #ifdef KTRACE /* * if tracing, save a copy of iovec */ if (KTRPOINT(p, KTR_GENIO)) ktriov = aiov; #endif cnt = uap->nbyte; if ((error = (*fp->f_ops->fo_read)(fp, &auio, fp->f_cred))) if (auio.uio_resid != cnt && (error == ERESTART || error == EINTR || error == EWOULDBLOCK)) error = 0; cnt -= auio.uio_resid; #ifdef KTRACE if (KTRPOINT(p, KTR_GENIO) && error == 0) ktrgenio(p->p_tracep, uap->fd, UIO_READ, &ktriov, cnt, error); #endif p->p_retval[0] = cnt; return (error); } /* * Scatter read system call. */ #ifndef _SYS_SYSPROTO_H_ struct readv_args { int fd; struct iovec *iovp; u_int iovcnt; }; #endif int readv(p, uap) struct proc *p; register struct readv_args *uap; { register struct file *fp; register struct filedesc *fdp = p->p_fd; struct uio auio; register struct iovec *iov; struct iovec *needfree; struct iovec aiov[UIO_SMALLIOV]; long i, cnt, error = 0; u_int iovlen; #ifdef KTRACE struct iovec *ktriov = NULL; #endif if (((u_int)uap->fd) >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[uap->fd]) == NULL || (fp->f_flag & FREAD) == 0) return (EBADF); /* note: can't use iovlen until iovcnt is validated */ iovlen = uap->iovcnt * sizeof (struct iovec); if (uap->iovcnt > UIO_SMALLIOV) { if (uap->iovcnt > UIO_MAXIOV) return (EINVAL); MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK); needfree = iov; } else { iov = aiov; needfree = NULL; } auio.uio_iov = iov; auio.uio_iovcnt = uap->iovcnt; auio.uio_rw = UIO_READ; auio.uio_segflg = UIO_USERSPACE; auio.uio_procp = p; auio.uio_offset = -1; if ((error = copyin((caddr_t)uap->iovp, (caddr_t)iov, iovlen))) goto done; auio.uio_resid = 0; for (i = 0; i < uap->iovcnt; i++) { if (iov->iov_len > INT_MAX - auio.uio_resid) { error = EINVAL; goto done; } auio.uio_resid += iov->iov_len; iov++; } #ifdef KTRACE /* * if tracing, save a copy of iovec */ if (KTRPOINT(p, KTR_GENIO)) { MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen); } #endif cnt = auio.uio_resid; if ((error = (*fp->f_ops->fo_read)(fp, &auio, fp->f_cred))) if (auio.uio_resid != cnt && (error == ERESTART || error == EINTR || error == EWOULDBLOCK)) error = 0; cnt -= auio.uio_resid; #ifdef KTRACE if (ktriov != NULL) { if (error == 0) ktrgenio(p->p_tracep, uap->fd, UIO_READ, ktriov, cnt, error); FREE(ktriov, M_TEMP); } #endif p->p_retval[0] = cnt; done: if (needfree) FREE(needfree, M_IOV); return (error); } /* * Write system call */ #ifndef _SYS_SYSPROTO_H_ struct write_args { int fd; const void *buf; size_t nbyte; }; #endif int write(p, uap) struct proc *p; register struct write_args *uap; { register struct file *fp; register struct filedesc *fdp = p->p_fd; struct uio auio; struct iovec aiov; long cnt, error = 0; #ifdef KTRACE struct iovec ktriov; #endif if (((u_int)uap->fd) >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[uap->fd]) == NULL || (fp->f_flag & FWRITE) == 0) return (EBADF); aiov.iov_base = (caddr_t)uap->buf; aiov.iov_len = uap->nbyte; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_offset = -1; if (uap->nbyte > INT_MAX) return (EINVAL); auio.uio_resid = uap->nbyte; auio.uio_rw = UIO_WRITE; auio.uio_segflg = UIO_USERSPACE; auio.uio_procp = p; #ifdef KTRACE /* * if tracing, save a copy of iovec */ if (KTRPOINT(p, KTR_GENIO)) ktriov = aiov; #endif cnt = uap->nbyte; if ((error = (*fp->f_ops->fo_write)(fp, &auio, fp->f_cred))) { if (auio.uio_resid != cnt && (error == ERESTART || error == EINTR || error == EWOULDBLOCK)) error = 0; if (error == EPIPE) psignal(p, SIGPIPE); } cnt -= auio.uio_resid; #ifdef KTRACE if (KTRPOINT(p, KTR_GENIO) && error == 0) ktrgenio(p->p_tracep, uap->fd, UIO_WRITE, &ktriov, cnt, error); #endif p->p_retval[0] = cnt; return (error); } /* * Gather write system call */ #ifndef _SYS_SYSPROTO_H_ struct writev_args { int fd; struct iovec *iovp; u_int iovcnt; }; #endif int writev(p, uap) struct proc *p; register struct writev_args *uap; { register struct file *fp; register struct filedesc *fdp = p->p_fd; struct uio auio; register struct iovec *iov; struct iovec *needfree; struct iovec aiov[UIO_SMALLIOV]; long i, cnt, error = 0; u_int iovlen; #ifdef KTRACE struct iovec *ktriov = NULL; #endif if (((u_int)uap->fd) >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[uap->fd]) == NULL || (fp->f_flag & FWRITE) == 0) return (EBADF); /* note: can't use iovlen until iovcnt is validated */ iovlen = uap->iovcnt * sizeof (struct iovec); if (uap->iovcnt > UIO_SMALLIOV) { if (uap->iovcnt > UIO_MAXIOV) return (EINVAL); MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK); needfree = iov; } else { iov = aiov; needfree = NULL; } auio.uio_iov = iov; auio.uio_iovcnt = uap->iovcnt; auio.uio_rw = UIO_WRITE; auio.uio_segflg = UIO_USERSPACE; auio.uio_procp = p; auio.uio_offset = -1; if ((error = copyin((caddr_t)uap->iovp, (caddr_t)iov, iovlen))) goto done; auio.uio_resid = 0; for (i = 0; i < uap->iovcnt; i++) { if (iov->iov_len > INT_MAX - auio.uio_resid) { error = EINVAL; goto done; } auio.uio_resid += iov->iov_len; iov++; } #ifdef KTRACE /* * if tracing, save a copy of iovec */ if (KTRPOINT(p, KTR_GENIO)) { MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen); } #endif cnt = auio.uio_resid; if ((error = (*fp->f_ops->fo_write)(fp, &auio, fp->f_cred))) { if (auio.uio_resid != cnt && (error == ERESTART || error == EINTR || error == EWOULDBLOCK)) error = 0; if (error == EPIPE) psignal(p, SIGPIPE); } cnt -= auio.uio_resid; #ifdef KTRACE if (ktriov != NULL) { if (error == 0) ktrgenio(p->p_tracep, uap->fd, UIO_WRITE, ktriov, cnt, error); FREE(ktriov, M_TEMP); } #endif p->p_retval[0] = cnt; done: if (needfree) FREE(needfree, M_IOV); return (error); } /* * Ioctl system call */ #ifndef _SYS_SYSPROTO_H_ struct ioctl_args { int fd; u_long com; caddr_t data; }; #endif /* ARGSUSED */ int ioctl(p, uap) struct proc *p; register struct ioctl_args *uap; { register struct file *fp; register struct filedesc *fdp; register u_long com; int error; register u_int size; caddr_t data, memp; int tmp; #define STK_PARAMS 128 char stkbuf[STK_PARAMS]; fdp = p->p_fd; if ((u_int)uap->fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[uap->fd]) == NULL) return (EBADF); if ((fp->f_flag & (FREAD | FWRITE)) == 0) return (EBADF); switch (com = uap->com) { case FIONCLEX: fdp->fd_ofileflags[uap->fd] &= ~UF_EXCLOSE; return (0); case FIOCLEX: fdp->fd_ofileflags[uap->fd] |= UF_EXCLOSE; return (0); } /* * Interpret high order word to find amount of data to be * copied to/from the user's address space. */ size = IOCPARM_LEN(com); if (size > IOCPARM_MAX) return (ENOTTY); memp = NULL; if (size > sizeof (stkbuf)) { memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK); data = memp; } else data = stkbuf; if (com&IOC_IN) { if (size) { error = copyin(uap->data, data, (u_int)size); if (error) { if (memp) free(memp, M_IOCTLOPS); return (error); } } else *(caddr_t *)data = uap->data; } else if ((com&IOC_OUT) && size) /* * Zero the buffer so the user always * gets back something deterministic. */ bzero(data, size); else if (com&IOC_VOID) *(caddr_t *)data = uap->data; switch (com) { case FIONBIO: if ((tmp = *(int *)data)) fp->f_flag |= FNONBLOCK; else fp->f_flag &= ~FNONBLOCK; error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p); break; case FIOASYNC: if ((tmp = *(int *)data)) fp->f_flag |= FASYNC; else fp->f_flag &= ~FASYNC; error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p); - break; - - case FIOSETOWN: - tmp = *(int *)data; - if (fp->f_type == DTYPE_SOCKET) { - ((struct socket *)fp->f_data)->so_pgid = tmp; - error = 0; - break; - } - if (tmp <= 0) { - tmp = -tmp; - } else { - struct proc *p1 = pfind(tmp); - if (p1 == 0) { - error = ESRCH; - break; - } - tmp = p1->p_pgrp->pg_id; - } - error = (*fp->f_ops->fo_ioctl) - (fp, (int)TIOCSPGRP, (caddr_t)&tmp, p); - break; - - case FIOGETOWN: - if (fp->f_type == DTYPE_SOCKET) { - error = 0; - *(int *)data = ((struct socket *)fp->f_data)->so_pgid; - break; - } - error = (*fp->f_ops->fo_ioctl)(fp, (int)TIOCGPGRP, data, p); - *(int *)data = -*(int *)data; break; default: error = (*fp->f_ops->fo_ioctl)(fp, com, data, p); /* * Copy any data to user, size was * already set and checked above. */ if (error == 0 && (com&IOC_OUT) && size) error = copyout(data, uap->data, (u_int)size); break; } if (memp) free(memp, M_IOCTLOPS); return (error); } static int nselcoll; int selwait; /* * Select system call. */ #ifndef _SYS_SYSPROTO_H_ struct select_args { int nd; fd_set *in, *ou, *ex; struct timeval *tv; }; #endif int select(p, uap) register struct proc *p; register struct select_args *uap; { /* * The magic 2048 here is chosen to be just enough for FD_SETSIZE * infds with the new FD_SETSIZE of 1024, and more than enough for * FD_SETSIZE infds, outfds and exceptfds with the old FD_SETSIZE * of 256. */ fd_mask s_selbits[howmany(2048, NFDBITS)]; fd_mask *ibits[3], *obits[3], *selbits, *sbp; struct timeval atv, rtv, ttv; int s, ncoll, error, timo; u_int nbufbytes, ncpbytes, nfdbits; if (uap->nd < 0) return (EINVAL); if (uap->nd > p->p_fd->fd_nfiles) uap->nd = p->p_fd->fd_nfiles; /* forgiving; slightly wrong */ /* * Allocate just enough bits for the non-null fd_sets. Use the * preallocated auto buffer if possible. */ nfdbits = roundup(uap->nd, NFDBITS); ncpbytes = nfdbits / NBBY; nbufbytes = 0; if (uap->in != NULL) nbufbytes += 2 * ncpbytes; if (uap->ou != NULL) nbufbytes += 2 * ncpbytes; if (uap->ex != NULL) nbufbytes += 2 * ncpbytes; if (nbufbytes <= sizeof s_selbits) selbits = &s_selbits[0]; else selbits = malloc(nbufbytes, M_SELECT, M_WAITOK); /* * Assign pointers into the bit buffers and fetch the input bits. * Put the output buffers together so that they can be bzeroed * together. */ sbp = selbits; #define getbits(name, x) \ do { \ if (uap->name == NULL) \ ibits[x] = NULL; \ else { \ ibits[x] = sbp + nbufbytes / 2 / sizeof *sbp; \ obits[x] = sbp; \ sbp += ncpbytes / sizeof *sbp; \ error = copyin(uap->name, ibits[x], ncpbytes); \ if (error != 0) \ goto done; \ } \ } while (0) getbits(in, 0); getbits(ou, 1); getbits(ex, 2); #undef getbits if (nbufbytes != 0) bzero(selbits, nbufbytes / 2); if (uap->tv) { error = copyin((caddr_t)uap->tv, (caddr_t)&atv, sizeof (atv)); if (error) goto done; if (itimerfix(&atv)) { error = EINVAL; goto done; } getmicrouptime(&rtv); timevaladd(&atv, &rtv); } else atv.tv_sec = 0; timo = 0; retry: ncoll = nselcoll; p->p_flag |= P_SELECT; error = selscan(p, ibits, obits, uap->nd); if (error || p->p_retval[0]) goto done; if (atv.tv_sec) { getmicrouptime(&rtv); if (timevalcmp(&rtv, &atv, >=)) goto done; ttv = atv; timevalsub(&ttv, &rtv); timo = ttv.tv_sec > 24 * 60 * 60 ? 24 * 60 * 60 * hz : tvtohz(&ttv); } s = splhigh(); if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { splx(s); goto retry; } p->p_flag &= ~P_SELECT; error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "select", timo); splx(s); if (error == 0) goto retry; done: p->p_flag &= ~P_SELECT; /* select is not restarted after signals... */ if (error == ERESTART) error = EINTR; if (error == EWOULDBLOCK) error = 0; #define putbits(name, x) \ if (uap->name && (error2 = copyout(obits[x], uap->name, ncpbytes))) \ error = error2; if (error == 0) { int error2; putbits(in, 0); putbits(ou, 1); putbits(ex, 2); #undef putbits } if (selbits != &s_selbits[0]) free(selbits, M_SELECT); return (error); } static int selscan(p, ibits, obits, nfd) struct proc *p; fd_mask **ibits, **obits; int nfd; { register struct filedesc *fdp = p->p_fd; register int msk, i, j, fd; register fd_mask bits; struct file *fp; int n = 0; /* Note: backend also returns POLLHUP/POLLERR if appropriate. */ static int flag[3] = { POLLRDNORM, POLLWRNORM, POLLRDBAND }; for (msk = 0; msk < 3; msk++) { if (ibits[msk] == NULL) continue; for (i = 0; i < nfd; i += NFDBITS) { bits = ibits[msk][i/NFDBITS]; while ((j = ffs(bits)) && (fd = i + --j) < nfd) { bits &= ~(1 << j); fp = fdp->fd_ofiles[fd]; if (fp == NULL) return (EBADF); if ((*fp->f_ops->fo_poll)(fp, flag[msk], fp->f_cred, p)) { obits[msk][(fd)/NFDBITS] |= (1 << ((fd) % NFDBITS)); n++; } } } } p->p_retval[0] = n; return (0); } /* * Poll system call. */ #ifndef _SYS_SYSPROTO_H_ struct poll_args { struct pollfd *fds; u_int nfds; int timeout; }; #endif int poll(p, uap) register struct proc *p; register struct poll_args *uap; { caddr_t bits; char smallbits[32 * sizeof(struct pollfd)]; struct timeval atv, rtv, ttv; int s, ncoll, error = 0, timo; size_t ni; if (SCARG(uap, nfds) > p->p_fd->fd_nfiles) { /* forgiving; slightly wrong */ SCARG(uap, nfds) = p->p_fd->fd_nfiles; } ni = SCARG(uap, nfds) * sizeof(struct pollfd); if (ni > sizeof(smallbits)) bits = malloc(ni, M_TEMP, M_WAITOK); else bits = smallbits; error = copyin(SCARG(uap, fds), bits, ni); if (error) goto done; if (SCARG(uap, timeout) != INFTIM) { atv.tv_sec = SCARG(uap, timeout) / 1000; atv.tv_usec = (SCARG(uap, timeout) % 1000) * 1000; if (itimerfix(&atv)) { error = EINVAL; goto done; } getmicrouptime(&rtv); timevaladd(&atv, &rtv); } else atv.tv_sec = 0; timo = 0; retry: ncoll = nselcoll; p->p_flag |= P_SELECT; error = pollscan(p, (struct pollfd *)bits, SCARG(uap, nfds)); if (error || p->p_retval[0]) goto done; if (atv.tv_sec) { getmicrouptime(&rtv); if (timevalcmp(&rtv, &atv, >=)) goto done; ttv = atv; timevalsub(&ttv, &rtv); timo = ttv.tv_sec > 24 * 60 * 60 ? 24 * 60 * 60 * hz : tvtohz(&ttv); } s = splhigh(); if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { splx(s); goto retry; } p->p_flag &= ~P_SELECT; error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "poll", timo); splx(s); if (error == 0) goto retry; done: p->p_flag &= ~P_SELECT; /* poll is not restarted after signals... */ if (error == ERESTART) error = EINTR; if (error == EWOULDBLOCK) error = 0; if (error == 0) { error = copyout(bits, SCARG(uap, fds), ni); if (error) goto out; } out: if (ni > sizeof(smallbits)) free(bits, M_TEMP); return (error); } static int pollscan(p, fds, nfd) struct proc *p; struct pollfd *fds; int nfd; { register struct filedesc *fdp = p->p_fd; int i; struct file *fp; int n = 0; for (i = 0; i < nfd; i++, fds++) { if ((u_int)fds->fd >= fdp->fd_nfiles) { fds->revents = POLLNVAL; n++; } else { fp = fdp->fd_ofiles[fds->fd]; if (fp == 0) { fds->revents = POLLNVAL; n++; } else { /* * Note: backend also returns POLLHUP and * POLLERR if appropriate. */ fds->revents = (*fp->f_ops->fo_poll)(fp, fds->events, fp->f_cred, p); if (fds->revents != 0) n++; } } } p->p_retval[0] = n; return (0); } /* * OpenBSD poll system call. * XXX this isn't quite a true representation.. OpenBSD uses select ops. */ #ifndef _SYS_SYSPROTO_H_ struct openbsd_poll_args { struct pollfd *fds; u_int nfds; int timeout; }; #endif int openbsd_poll(p, uap) register struct proc *p; register struct openbsd_poll_args *uap; { return (poll(p, (struct poll_args *)uap)); } /*ARGSUSED*/ int seltrue(dev, events, p) dev_t dev; int events; struct proc *p; { return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)); } /* * Record a select request. */ void selrecord(selector, sip) struct proc *selector; struct selinfo *sip; { struct proc *p; pid_t mypid; mypid = selector->p_pid; if (sip->si_pid == mypid) return; if (sip->si_pid && (p = pfind(sip->si_pid)) && p->p_wchan == (caddr_t)&selwait) sip->si_flags |= SI_COLL; else sip->si_pid = mypid; } /* * Do a wakeup when a selectable event occurs. */ void selwakeup(sip) register struct selinfo *sip; { register struct proc *p; int s; if (sip->si_pid == 0) return; if (sip->si_flags & SI_COLL) { nselcoll++; sip->si_flags &= ~SI_COLL; wakeup((caddr_t)&selwait); } p = pfind(sip->si_pid); sip->si_pid = 0; if (p != NULL) { s = splhigh(); if (p->p_wchan == (caddr_t)&selwait) { if (p->p_stat == SSLEEP) setrunnable(p); else unsleep(p); } else if (p->p_flag & P_SELECT) p->p_flag &= ~P_SELECT; splx(s); } } Index: head/sys/kern/sys_pipe.c =================================================================== --- head/sys/kern/sys_pipe.c (revision 41085) +++ head/sys/kern/sys_pipe.c (revision 41086) @@ -1,1100 +1,1104 @@ /* * Copyright (c) 1996 John S. Dyson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice immediately at the beginning of the file, without modification, * this list of conditions, and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Absolutely no warranty of function or purpose is made by the author * John S. Dyson. * 4. Modifications may be freely made to this file if the above conditions * are met. * - * $Id: sys_pipe.c,v 1.43 1998/10/13 08:24:40 dg Exp $ + * $Id: sys_pipe.c,v 1.44 1998/10/28 13:36:58 dg Exp $ */ /* * This file contains a high-performance replacement for the socket-based * pipes scheme originally used in FreeBSD/4.4Lite. It does not support * all features of sockets, but does do everything that pipes normally * do. */ /* * This code has two modes of operation, a small write mode and a large * write mode. The small write mode acts like conventional pipes with * a kernel buffer. If the buffer is less than PIPE_MINDIRECT, then the * "normal" pipe buffering is done. If the buffer is between PIPE_MINDIRECT * and PIPE_SIZE in size, it is fully mapped and wired into the kernel, and * the receiving process can copy it directly from the pages in the sending * process. * * If the sending process receives a signal, it is possible that it will * go away, and certainly its address space can change, because control * is returned back to the user-mode side. In that case, the pipe code * arranges to copy the buffer supplied by the user process, to a pageable * kernel buffer, and the receiving process will grab the data from the * pageable kernel buffer. Since signals don't happen all that often, * the copy operation is normally eliminated. * * The constant PIPE_MINDIRECT is chosen to make sure that buffering will * happen for small transfers so that the system will not spend all of * its time context switching. PIPE_SIZE is constrained by the * amount of kernel virtual memory. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * Use this define if you want to disable *fancy* VM things. Expect an * approx 30% decrease in transfer rate. This could be useful for * NetBSD or OpenBSD. */ /* #define PIPE_NODIRECT */ /* * interfaces to the outside world */ static int pipe_read __P((struct file *fp, struct uio *uio, struct ucred *cred)); static int pipe_write __P((struct file *fp, struct uio *uio, struct ucred *cred)); static int pipe_close __P((struct file *fp, struct proc *p)); static int pipe_poll __P((struct file *fp, int events, struct ucred *cred, struct proc *p)); static int pipe_ioctl __P((struct file *fp, u_long cmd, caddr_t data, struct proc *p)); static struct fileops pipeops = { pipe_read, pipe_write, pipe_ioctl, pipe_poll, pipe_close }; /* * Default pipe buffer size(s), this can be kind-of large now because pipe * space is pageable. The pipe code will try to maintain locality of * reference for performance reasons, so small amounts of outstanding I/O * will not wipe the cache. */ #define MINPIPESIZE (PIPE_SIZE/3) #define MAXPIPESIZE (2*PIPE_SIZE/3) /* * Maximum amount of kva for pipes -- this is kind-of a soft limit, but * is there so that on large systems, we don't exhaust it. */ #define MAXPIPEKVA (8*1024*1024) /* * Limit for direct transfers, we cannot, of course limit * the amount of kva for pipes in general though. */ #define LIMITPIPEKVA (16*1024*1024) /* * Limit the number of "big" pipes */ #define LIMITBIGPIPES 32 static int nbigpipe; static int amountpipekva; static void pipeclose __P((struct pipe *cpipe)); static void pipeinit __P((struct pipe *cpipe)); static __inline int pipelock __P((struct pipe *cpipe, int catch)); static __inline void pipeunlock __P((struct pipe *cpipe)); static __inline void pipeselwakeup __P((struct pipe *cpipe)); #ifndef PIPE_NODIRECT static int pipe_build_write_buffer __P((struct pipe *wpipe, struct uio *uio)); static void pipe_destroy_write_buffer __P((struct pipe *wpipe)); static int pipe_direct_write __P((struct pipe *wpipe, struct uio *uio)); static void pipe_clone_write_buffer __P((struct pipe *wpipe)); #endif static void pipespace __P((struct pipe *cpipe)); static vm_zone_t pipe_zone; /* * The pipe system call for the DTYPE_PIPE type of pipes */ /* ARGSUSED */ int pipe(p, uap) struct proc *p; struct pipe_args /* { int dummy; } */ *uap; { register struct filedesc *fdp = p->p_fd; struct file *rf, *wf; struct pipe *rpipe, *wpipe; int fd, error; if (pipe_zone == NULL) pipe_zone = zinit("PIPE", sizeof (struct pipe), 0, 0, 4); rpipe = zalloc( pipe_zone); pipeinit(rpipe); rpipe->pipe_state |= PIPE_DIRECTOK; wpipe = zalloc( pipe_zone); pipeinit(wpipe); wpipe->pipe_state |= PIPE_DIRECTOK; error = falloc(p, &rf, &fd); if (error) goto free2; p->p_retval[0] = fd; rf->f_flag = FREAD | FWRITE; rf->f_type = DTYPE_PIPE; rf->f_ops = &pipeops; rf->f_data = (caddr_t)rpipe; error = falloc(p, &wf, &fd); if (error) goto free3; wf->f_flag = FREAD | FWRITE; wf->f_type = DTYPE_PIPE; wf->f_ops = &pipeops; wf->f_data = (caddr_t)wpipe; p->p_retval[1] = fd; rpipe->pipe_peer = wpipe; wpipe->pipe_peer = rpipe; return (0); free3: ffree(rf); fdp->fd_ofiles[p->p_retval[0]] = 0; free2: (void)pipeclose(wpipe); (void)pipeclose(rpipe); return (error); } /* * Allocate kva for pipe circular buffer, the space is pageable */ static void pipespace(cpipe) struct pipe *cpipe; { int npages, error; npages = round_page(cpipe->pipe_buffer.size)/PAGE_SIZE; /* * Create an object, I don't like the idea of paging to/from * kernel_object. * XXX -- minor change needed here for NetBSD/OpenBSD VM systems. */ cpipe->pipe_buffer.object = vm_object_allocate(OBJT_DEFAULT, npages); cpipe->pipe_buffer.buffer = (caddr_t) vm_map_min(kernel_map); /* * Insert the object into the kernel map, and allocate kva for it. * The map entry is, by default, pageable. * XXX -- minor change needed here for NetBSD/OpenBSD VM systems. */ error = vm_map_find(kernel_map, cpipe->pipe_buffer.object, 0, (vm_offset_t *) &cpipe->pipe_buffer.buffer, cpipe->pipe_buffer.size, 1, VM_PROT_ALL, VM_PROT_ALL, 0); if (error != KERN_SUCCESS) panic("pipeinit: cannot allocate pipe -- out of kvm -- code = %d", error); amountpipekva += cpipe->pipe_buffer.size; } /* * initialize and allocate VM and memory for pipe */ static void pipeinit(cpipe) struct pipe *cpipe; { cpipe->pipe_buffer.in = 0; cpipe->pipe_buffer.out = 0; cpipe->pipe_buffer.cnt = 0; cpipe->pipe_buffer.size = PIPE_SIZE; /* Buffer kva gets dynamically allocated */ cpipe->pipe_buffer.buffer = NULL; /* cpipe->pipe_buffer.object = invalid */ cpipe->pipe_state = 0; cpipe->pipe_peer = NULL; cpipe->pipe_busy = 0; getnanotime(&cpipe->pipe_ctime); cpipe->pipe_atime = cpipe->pipe_ctime; cpipe->pipe_mtime = cpipe->pipe_ctime; bzero(&cpipe->pipe_sel, sizeof cpipe->pipe_sel); - cpipe->pipe_pgid = NO_PID; #ifndef PIPE_NODIRECT /* * pipe data structure initializations to support direct pipe I/O */ cpipe->pipe_map.cnt = 0; cpipe->pipe_map.kva = 0; cpipe->pipe_map.pos = 0; cpipe->pipe_map.npages = 0; /* cpipe->pipe_map.ms[] = invalid */ #endif } /* * lock a pipe for I/O, blocking other access */ static __inline int pipelock(cpipe, catch) struct pipe *cpipe; int catch; { int error; while (cpipe->pipe_state & PIPE_LOCK) { cpipe->pipe_state |= PIPE_LWANT; if (error = tsleep( cpipe, catch?(PRIBIO|PCATCH):PRIBIO, "pipelk", 0)) { return error; } } cpipe->pipe_state |= PIPE_LOCK; return 0; } /* * unlock a pipe I/O lock */ static __inline void pipeunlock(cpipe) struct pipe *cpipe; { cpipe->pipe_state &= ~PIPE_LOCK; if (cpipe->pipe_state & PIPE_LWANT) { cpipe->pipe_state &= ~PIPE_LWANT; wakeup(cpipe); } } static __inline void pipeselwakeup(cpipe) struct pipe *cpipe; { struct proc *p; if (cpipe->pipe_state & PIPE_SEL) { cpipe->pipe_state &= ~PIPE_SEL; selwakeup(&cpipe->pipe_sel); } - if (cpipe->pipe_state & PIPE_ASYNC) { - if (cpipe->pipe_pgid < 0) - gsignal(-cpipe->pipe_pgid, SIGIO); - else if ((p = pfind(cpipe->pipe_pgid)) != NULL) - psignal(p, SIGIO); - } + if ((cpipe->pipe_state & PIPE_ASYNC) && cpipe->pipe_sigio) + pgsigio(cpipe->pipe_sigio, SIGIO, 0); } /* ARGSUSED */ static int pipe_read(fp, uio, cred) struct file *fp; struct uio *uio; struct ucred *cred; { struct pipe *rpipe = (struct pipe *) fp->f_data; int error = 0; int nread = 0; u_int size; ++rpipe->pipe_busy; while (uio->uio_resid) { /* * normal pipe buffer receive */ if (rpipe->pipe_buffer.cnt > 0) { size = rpipe->pipe_buffer.size - rpipe->pipe_buffer.out; if (size > rpipe->pipe_buffer.cnt) size = rpipe->pipe_buffer.cnt; if (size > (u_int) uio->uio_resid) size = (u_int) uio->uio_resid; if ((error = pipelock(rpipe,1)) == 0) { error = uiomove( &rpipe->pipe_buffer.buffer[rpipe->pipe_buffer.out], size, uio); pipeunlock(rpipe); } if (error) { break; } rpipe->pipe_buffer.out += size; if (rpipe->pipe_buffer.out >= rpipe->pipe_buffer.size) rpipe->pipe_buffer.out = 0; rpipe->pipe_buffer.cnt -= size; nread += size; #ifndef PIPE_NODIRECT /* * Direct copy, bypassing a kernel buffer. */ } else if ((size = rpipe->pipe_map.cnt) && (rpipe->pipe_state & PIPE_DIRECTW)) { caddr_t va; if (size > (u_int) uio->uio_resid) size = (u_int) uio->uio_resid; if ((error = pipelock(rpipe,1)) == 0) { va = (caddr_t) rpipe->pipe_map.kva + rpipe->pipe_map.pos; error = uiomove(va, size, uio); pipeunlock(rpipe); } if (error) break; nread += size; rpipe->pipe_map.pos += size; rpipe->pipe_map.cnt -= size; if (rpipe->pipe_map.cnt == 0) { rpipe->pipe_state &= ~PIPE_DIRECTW; wakeup(rpipe); } #endif } else { /* * detect EOF condition */ if (rpipe->pipe_state & PIPE_EOF) { /* XXX error = ? */ break; } /* * If the "write-side" has been blocked, wake it up now. */ if (rpipe->pipe_state & PIPE_WANTW) { rpipe->pipe_state &= ~PIPE_WANTW; wakeup(rpipe); } if (nread > 0) break; if (fp->f_flag & FNONBLOCK) { error = EAGAIN; break; } /* * If there is no more to read in the pipe, reset * its pointers to the beginning. This improves * cache hit stats. */ if ((error = pipelock(rpipe,1)) == 0) { if (rpipe->pipe_buffer.cnt == 0) { rpipe->pipe_buffer.in = 0; rpipe->pipe_buffer.out = 0; } pipeunlock(rpipe); } else { break; } if (rpipe->pipe_state & PIPE_WANTW) { rpipe->pipe_state &= ~PIPE_WANTW; wakeup(rpipe); } rpipe->pipe_state |= PIPE_WANTR; if (error = tsleep(rpipe, PRIBIO|PCATCH, "piperd", 0)) { break; } } } if (error == 0) getnanotime(&rpipe->pipe_atime); --rpipe->pipe_busy; if ((rpipe->pipe_busy == 0) && (rpipe->pipe_state & PIPE_WANT)) { rpipe->pipe_state &= ~(PIPE_WANT|PIPE_WANTW); wakeup(rpipe); } else if (rpipe->pipe_buffer.cnt < MINPIPESIZE) { /* * If there is no more to read in the pipe, reset * its pointers to the beginning. This improves * cache hit stats. */ if (rpipe->pipe_buffer.cnt == 0) { if ((error == 0) && (error = pipelock(rpipe,1)) == 0) { rpipe->pipe_buffer.in = 0; rpipe->pipe_buffer.out = 0; pipeunlock(rpipe); } } /* * If the "write-side" has been blocked, wake it up now. */ if (rpipe->pipe_state & PIPE_WANTW) { rpipe->pipe_state &= ~PIPE_WANTW; wakeup(rpipe); } } if ((rpipe->pipe_buffer.size - rpipe->pipe_buffer.cnt) >= PIPE_BUF) pipeselwakeup(rpipe); return error; } #ifndef PIPE_NODIRECT /* * Map the sending processes' buffer into kernel space and wire it. * This is similar to a physical write operation. */ static int pipe_build_write_buffer(wpipe, uio) struct pipe *wpipe; struct uio *uio; { u_int size; int i; vm_offset_t addr, endaddr, paddr; size = (u_int) uio->uio_iov->iov_len; if (size > wpipe->pipe_buffer.size) size = wpipe->pipe_buffer.size; endaddr = round_page((vm_offset_t)uio->uio_iov->iov_base + size); for(i = 0, addr = trunc_page((vm_offset_t)uio->uio_iov->iov_base); addr < endaddr; addr += PAGE_SIZE, i+=1) { vm_page_t m; vm_fault_quick( (caddr_t) addr, VM_PROT_READ); paddr = pmap_kextract(addr); if (!paddr) { int j; for(j=0;jpipe_map.ms[j], 1); return EFAULT; } m = PHYS_TO_VM_PAGE(paddr); vm_page_wire(m); wpipe->pipe_map.ms[i] = m; } /* * set up the control block */ wpipe->pipe_map.npages = i; wpipe->pipe_map.pos = ((vm_offset_t) uio->uio_iov->iov_base) & PAGE_MASK; wpipe->pipe_map.cnt = size; /* * and map the buffer */ if (wpipe->pipe_map.kva == 0) { /* * We need to allocate space for an extra page because the * address range might (will) span pages at times. */ wpipe->pipe_map.kva = kmem_alloc_pageable(kernel_map, wpipe->pipe_buffer.size + PAGE_SIZE); amountpipekva += wpipe->pipe_buffer.size + PAGE_SIZE; } pmap_qenter(wpipe->pipe_map.kva, wpipe->pipe_map.ms, wpipe->pipe_map.npages); /* * and update the uio data */ uio->uio_iov->iov_len -= size; uio->uio_iov->iov_base += size; if (uio->uio_iov->iov_len == 0) uio->uio_iov++; uio->uio_resid -= size; uio->uio_offset += size; return 0; } /* * unmap and unwire the process buffer */ static void pipe_destroy_write_buffer(wpipe) struct pipe *wpipe; { int i; if (wpipe->pipe_map.kva) { pmap_qremove(wpipe->pipe_map.kva, wpipe->pipe_map.npages); if (amountpipekva > MAXPIPEKVA) { vm_offset_t kva = wpipe->pipe_map.kva; wpipe->pipe_map.kva = 0; kmem_free(kernel_map, kva, wpipe->pipe_buffer.size + PAGE_SIZE); amountpipekva -= wpipe->pipe_buffer.size + PAGE_SIZE; } } for (i=0;ipipe_map.npages;i++) vm_page_unwire(wpipe->pipe_map.ms[i], 1); } /* * In the case of a signal, the writing process might go away. This * code copies the data into the circular buffer so that the source * pages can be freed without loss of data. */ static void pipe_clone_write_buffer(wpipe) struct pipe *wpipe; { int size; int pos; size = wpipe->pipe_map.cnt; pos = wpipe->pipe_map.pos; bcopy((caddr_t) wpipe->pipe_map.kva+pos, (caddr_t) wpipe->pipe_buffer.buffer, size); wpipe->pipe_buffer.in = size; wpipe->pipe_buffer.out = 0; wpipe->pipe_buffer.cnt = size; wpipe->pipe_state &= ~PIPE_DIRECTW; pipe_destroy_write_buffer(wpipe); } /* * This implements the pipe buffer write mechanism. Note that only * a direct write OR a normal pipe write can be pending at any given time. * If there are any characters in the pipe buffer, the direct write will * be deferred until the receiving process grabs all of the bytes from * the pipe buffer. Then the direct mapping write is set-up. */ static int pipe_direct_write(wpipe, uio) struct pipe *wpipe; struct uio *uio; { int error; retry: while (wpipe->pipe_state & PIPE_DIRECTW) { if ( wpipe->pipe_state & PIPE_WANTR) { wpipe->pipe_state &= ~PIPE_WANTR; wakeup(wpipe); } wpipe->pipe_state |= PIPE_WANTW; error = tsleep(wpipe, PRIBIO|PCATCH, "pipdww", 0); if (error) goto error1; if (wpipe->pipe_state & PIPE_EOF) { error = EPIPE; goto error1; } } wpipe->pipe_map.cnt = 0; /* transfer not ready yet */ if (wpipe->pipe_buffer.cnt > 0) { if ( wpipe->pipe_state & PIPE_WANTR) { wpipe->pipe_state &= ~PIPE_WANTR; wakeup(wpipe); } wpipe->pipe_state |= PIPE_WANTW; error = tsleep(wpipe, PRIBIO|PCATCH, "pipdwc", 0); if (error) goto error1; if (wpipe->pipe_state & PIPE_EOF) { error = EPIPE; goto error1; } goto retry; } wpipe->pipe_state |= PIPE_DIRECTW; error = pipe_build_write_buffer(wpipe, uio); if (error) { wpipe->pipe_state &= ~PIPE_DIRECTW; goto error1; } error = 0; while (!error && (wpipe->pipe_state & PIPE_DIRECTW)) { if (wpipe->pipe_state & PIPE_EOF) { pipelock(wpipe, 0); pipe_destroy_write_buffer(wpipe); pipeunlock(wpipe); pipeselwakeup(wpipe); error = EPIPE; goto error1; } if (wpipe->pipe_state & PIPE_WANTR) { wpipe->pipe_state &= ~PIPE_WANTR; wakeup(wpipe); } pipeselwakeup(wpipe); error = tsleep(wpipe, PRIBIO|PCATCH, "pipdwt", 0); } pipelock(wpipe,0); if (wpipe->pipe_state & PIPE_DIRECTW) { /* * this bit of trickery substitutes a kernel buffer for * the process that might be going away. */ pipe_clone_write_buffer(wpipe); } else { pipe_destroy_write_buffer(wpipe); } pipeunlock(wpipe); return error; error1: wakeup(wpipe); return error; } #endif static int pipe_write(fp, uio, cred) struct file *fp; struct uio *uio; struct ucred *cred; { int error = 0; int orig_resid; struct pipe *wpipe, *rpipe; rpipe = (struct pipe *) fp->f_data; wpipe = rpipe->pipe_peer; /* * detect loss of pipe read side, issue SIGPIPE if lost. */ if ((wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) { return EPIPE; } /* * If it is advantageous to resize the pipe buffer, do * so. */ if ((uio->uio_resid > PIPE_SIZE) && (nbigpipe < LIMITBIGPIPES) && (wpipe->pipe_state & PIPE_DIRECTW) == 0 && (wpipe->pipe_buffer.size <= PIPE_SIZE) && (wpipe->pipe_buffer.cnt == 0)) { if (wpipe->pipe_buffer.buffer) { amountpipekva -= wpipe->pipe_buffer.size; kmem_free(kernel_map, (vm_offset_t)wpipe->pipe_buffer.buffer, wpipe->pipe_buffer.size); } #ifndef PIPE_NODIRECT if (wpipe->pipe_map.kva) { amountpipekva -= wpipe->pipe_buffer.size + PAGE_SIZE; kmem_free(kernel_map, wpipe->pipe_map.kva, wpipe->pipe_buffer.size + PAGE_SIZE); } #endif wpipe->pipe_buffer.in = 0; wpipe->pipe_buffer.out = 0; wpipe->pipe_buffer.cnt = 0; wpipe->pipe_buffer.size = BIG_PIPE_SIZE; wpipe->pipe_buffer.buffer = NULL; ++nbigpipe; #ifndef PIPE_NODIRECT wpipe->pipe_map.cnt = 0; wpipe->pipe_map.kva = 0; wpipe->pipe_map.pos = 0; wpipe->pipe_map.npages = 0; #endif } if( wpipe->pipe_buffer.buffer == NULL) { if ((error = pipelock(wpipe,1)) == 0) { pipespace(wpipe); pipeunlock(wpipe); } else { return error; } } ++wpipe->pipe_busy; orig_resid = uio->uio_resid; while (uio->uio_resid) { int space; #ifndef PIPE_NODIRECT /* * If the transfer is large, we can gain performance if * we do process-to-process copies directly. * If the write is non-blocking, we don't use the * direct write mechanism. */ if ((uio->uio_iov->iov_len >= PIPE_MINDIRECT) && (fp->f_flag & FNONBLOCK) == 0 && (wpipe->pipe_map.kva || (amountpipekva < LIMITPIPEKVA)) && (uio->uio_iov->iov_len >= PIPE_MINDIRECT)) { error = pipe_direct_write( wpipe, uio); if (error) { break; } continue; } #endif /* * Pipe buffered writes cannot be coincidental with * direct writes. We wait until the currently executing * direct write is completed before we start filling the * pipe buffer. */ retrywrite: while (wpipe->pipe_state & PIPE_DIRECTW) { if (wpipe->pipe_state & PIPE_WANTR) { wpipe->pipe_state &= ~PIPE_WANTR; wakeup(wpipe); } error = tsleep(wpipe, PRIBIO|PCATCH, "pipbww", 0); if (error) break; } space = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt; /* Writes of size <= PIPE_BUF must be atomic. */ /* XXX perhaps they need to be contiguous to be atomic? */ if ((space < uio->uio_resid) && (orig_resid <= PIPE_BUF)) space = 0; if (space > 0 && (wpipe->pipe_buffer.cnt < PIPE_SIZE)) { /* * This set the maximum transfer as a segment of * the buffer. */ int size = wpipe->pipe_buffer.size - wpipe->pipe_buffer.in; /* * space is the size left in the buffer */ if (size > space) size = space; /* * now limit it to the size of the uio transfer */ if (size > uio->uio_resid) size = uio->uio_resid; if ((error = pipelock(wpipe,1)) == 0) { /* * It is possible for a direct write to * slip in on us... handle it here... */ if (wpipe->pipe_state & PIPE_DIRECTW) { pipeunlock(wpipe); goto retrywrite; } error = uiomove( &wpipe->pipe_buffer.buffer[wpipe->pipe_buffer.in], size, uio); pipeunlock(wpipe); } if (error) break; wpipe->pipe_buffer.in += size; if (wpipe->pipe_buffer.in >= wpipe->pipe_buffer.size) wpipe->pipe_buffer.in = 0; wpipe->pipe_buffer.cnt += size; } else { /* * If the "read-side" has been blocked, wake it up now. */ if (wpipe->pipe_state & PIPE_WANTR) { wpipe->pipe_state &= ~PIPE_WANTR; wakeup(wpipe); } /* * don't block on non-blocking I/O */ if (fp->f_flag & FNONBLOCK) { error = EAGAIN; break; } /* * We have no more space and have something to offer, * wake up select/poll. */ pipeselwakeup(wpipe); wpipe->pipe_state |= PIPE_WANTW; if (error = tsleep(wpipe, (PRIBIO+1)|PCATCH, "pipewr", 0)) { break; } /* * If read side wants to go away, we just issue a signal * to ourselves. */ if (wpipe->pipe_state & PIPE_EOF) { error = EPIPE; break; } } } --wpipe->pipe_busy; if ((wpipe->pipe_busy == 0) && (wpipe->pipe_state & PIPE_WANT)) { wpipe->pipe_state &= ~(PIPE_WANT|PIPE_WANTR); wakeup(wpipe); } else if (wpipe->pipe_buffer.cnt > 0) { /* * If we have put any characters in the buffer, we wake up * the reader. */ if (wpipe->pipe_state & PIPE_WANTR) { wpipe->pipe_state &= ~PIPE_WANTR; wakeup(wpipe); } } /* * Don't return EPIPE if I/O was successful */ if ((wpipe->pipe_buffer.cnt == 0) && (uio->uio_resid == 0) && (error == EPIPE)) error = 0; if (error == 0) getnanotime(&wpipe->pipe_mtime); /* * We have something to offer, * wake up select/poll. */ if (wpipe->pipe_buffer.cnt) pipeselwakeup(wpipe); return error; } /* * we implement a very minimal set of ioctls for compatibility with sockets. */ int pipe_ioctl(fp, cmd, data, p) struct file *fp; u_long cmd; register caddr_t data; struct proc *p; { register struct pipe *mpipe = (struct pipe *)fp->f_data; switch (cmd) { case FIONBIO: return (0); case FIOASYNC: if (*(int *)data) { mpipe->pipe_state |= PIPE_ASYNC; } else { mpipe->pipe_state &= ~PIPE_ASYNC; } return (0); case FIONREAD: if (mpipe->pipe_state & PIPE_DIRECTW) *(int *)data = mpipe->pipe_map.cnt; else *(int *)data = mpipe->pipe_buffer.cnt; return (0); - case TIOCSPGRP: - mpipe->pipe_pgid = *(int *)data; + case FIOSETOWN: + return (fsetown(*(int *)data, &mpipe->pipe_sigio)); + + case FIOGETOWN: + *(int *)data = fgetown(mpipe->pipe_sigio); return (0); + /* This is deprecated, FIOSETOWN should be used instead. */ + case TIOCSPGRP: + return (fsetown(-(*(int *)data), &mpipe->pipe_sigio)); + + /* This is deprecated, FIOGETOWN should be used instead. */ case TIOCGPGRP: - *(int *)data = mpipe->pipe_pgid; + *(int *)data = -fgetown(mpipe->pipe_sigio); return (0); } return (ENOTTY); } int pipe_poll(fp, events, cred, p) struct file *fp; int events; struct ucred *cred; struct proc *p; { register struct pipe *rpipe = (struct pipe *)fp->f_data; struct pipe *wpipe; int revents = 0; wpipe = rpipe->pipe_peer; if (events & (POLLIN | POLLRDNORM)) if ((rpipe->pipe_state & PIPE_DIRECTW) || (rpipe->pipe_buffer.cnt > 0) || (rpipe->pipe_state & PIPE_EOF)) revents |= events & (POLLIN | POLLRDNORM); if (events & (POLLOUT | POLLWRNORM)) if (wpipe == NULL || (wpipe->pipe_state & PIPE_EOF) || ((wpipe->pipe_state & PIPE_DIRECTW) == 0) && (wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt) >= PIPE_BUF) revents |= events & (POLLOUT | POLLWRNORM); if ((rpipe->pipe_state & PIPE_EOF) || (wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) revents |= POLLHUP; if (revents == 0) { if (events & (POLLIN | POLLRDNORM)) { selrecord(p, &rpipe->pipe_sel); rpipe->pipe_state |= PIPE_SEL; } if (events & (POLLOUT | POLLWRNORM)) { selrecord(p, &wpipe->pipe_sel); wpipe->pipe_state |= PIPE_SEL; } } return (revents); } int pipe_stat(pipe, ub) register struct pipe *pipe; register struct stat *ub; { bzero((caddr_t)ub, sizeof (*ub)); ub->st_mode = S_IFIFO; ub->st_blksize = pipe->pipe_buffer.size; ub->st_size = pipe->pipe_buffer.cnt; ub->st_blocks = (ub->st_size + ub->st_blksize - 1) / ub->st_blksize; ub->st_atimespec = pipe->pipe_atime; ub->st_mtimespec = pipe->pipe_mtime; ub->st_ctimespec = pipe->pipe_ctime; /* * Left as 0: st_dev, st_ino, st_nlink, st_uid, st_gid, st_rdev, * st_flags, st_gen. * XXX (st_dev, st_ino) should be unique. */ return 0; } /* ARGSUSED */ static int pipe_close(fp, p) struct file *fp; struct proc *p; { struct pipe *cpipe = (struct pipe *)fp->f_data; + funsetown(cpipe->pipe_sigio); pipeclose(cpipe); fp->f_data = NULL; return 0; } /* * shutdown the pipe */ static void pipeclose(cpipe) struct pipe *cpipe; { struct pipe *ppipe; if (cpipe) { pipeselwakeup(cpipe); /* * If the other side is blocked, wake it up saying that * we want to close it down. */ while (cpipe->pipe_busy) { wakeup(cpipe); cpipe->pipe_state |= PIPE_WANT|PIPE_EOF; tsleep(cpipe, PRIBIO, "pipecl", 0); } /* * Disconnect from peer */ if (ppipe = cpipe->pipe_peer) { pipeselwakeup(ppipe); ppipe->pipe_state |= PIPE_EOF; wakeup(ppipe); ppipe->pipe_peer = NULL; } /* * free resources */ if (cpipe->pipe_buffer.buffer) { if (cpipe->pipe_buffer.size > PIPE_SIZE) --nbigpipe; amountpipekva -= cpipe->pipe_buffer.size; kmem_free(kernel_map, (vm_offset_t)cpipe->pipe_buffer.buffer, cpipe->pipe_buffer.size); } #ifndef PIPE_NODIRECT if (cpipe->pipe_map.kva) { amountpipekva -= cpipe->pipe_buffer.size + PAGE_SIZE; kmem_free(kernel_map, cpipe->pipe_map.kva, cpipe->pipe_buffer.size + PAGE_SIZE); } #endif zfree(pipe_zone, cpipe); } } Index: head/sys/kern/sys_socket.c =================================================================== --- head/sys/kern/sys_socket.c (revision 41085) +++ head/sys/kern/sys_socket.c (revision 41086) @@ -1,175 +1,182 @@ /* * Copyright (c) 1982, 1986, 1990, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)sys_socket.c 8.1 (Berkeley) 6/10/93 - * $Id: sys_socket.c,v 1.17 1998/03/28 10:33:07 bde Exp $ + * $Id: sys_socket.c,v 1.18 1998/06/07 17:11:40 dfr Exp $ */ #include #include #include #include #include #include #include /* XXX */ #include #include #include +#include #include #include static int soo_read __P((struct file *fp, struct uio *uio, struct ucred *cred)); static int soo_write __P((struct file *fp, struct uio *uio, struct ucred *cred)); static int soo_close __P((struct file *fp, struct proc *p)); struct fileops socketops = { soo_read, soo_write, soo_ioctl, soo_poll, soo_close }; /* ARGSUSED */ static int soo_read(fp, uio, cred) struct file *fp; struct uio *uio; struct ucred *cred; { struct socket *so = (struct socket *)fp->f_data; return so->so_proto->pr_usrreqs->pru_soreceive(so, 0, uio, 0, 0, 0); } /* ARGSUSED */ static int soo_write(fp, uio, cred) struct file *fp; struct uio *uio; struct ucred *cred; { struct socket *so = (struct socket *)fp->f_data; return so->so_proto->pr_usrreqs->pru_sosend(so, 0, uio, 0, 0, 0, uio->uio_procp); } int soo_ioctl(fp, cmd, data, p) struct file *fp; u_long cmd; register caddr_t data; struct proc *p; { register struct socket *so = (struct socket *)fp->f_data; switch (cmd) { case FIONBIO: if (*(int *)data) so->so_state |= SS_NBIO; else so->so_state &= ~SS_NBIO; return (0); case FIOASYNC: if (*(int *)data) { so->so_state |= SS_ASYNC; so->so_rcv.sb_flags |= SB_ASYNC; so->so_snd.sb_flags |= SB_ASYNC; } else { so->so_state &= ~SS_ASYNC; so->so_rcv.sb_flags &= ~SB_ASYNC; so->so_snd.sb_flags &= ~SB_ASYNC; } return (0); case FIONREAD: *(int *)data = so->so_rcv.sb_cc; return (0); - case SIOCSPGRP: - so->so_pgid = *(int *)data; + case FIOSETOWN: + return (fsetown(*(int *)data, &so->so_sigio)); + + case FIOGETOWN: + *(int *)data = fgetown(so->so_sigio); return (0); + case SIOCSPGRP: + return (fsetown(-(*(int *)data), &so->so_sigio)); + case SIOCGPGRP: - *(int *)data = so->so_pgid; + *(int *)data = -fgetown(so->so_sigio); return (0); case SIOCATMARK: *(int *)data = (so->so_state&SS_RCVATMARK) != 0; return (0); } /* * Interface/routing/protocol specific ioctls: * interface and routing ioctls should have a * different entry since a socket's unnecessary */ if (IOCGROUP(cmd) == 'i') return (ifioctl(so, cmd, data, p)); if (IOCGROUP(cmd) == 'r') return (rtioctl(cmd, data, p)); return ((*so->so_proto->pr_usrreqs->pru_control)(so, cmd, data, 0, p)); } int soo_poll(fp, events, cred, p) struct file *fp; int events; struct ucred *cred; struct proc *p; { struct socket *so = (struct socket *)fp->f_data; return so->so_proto->pr_usrreqs->pru_sopoll(so, events, cred, p); } int soo_stat(so, ub) register struct socket *so; register struct stat *ub; { bzero((caddr_t)ub, sizeof (*ub)); ub->st_mode = S_IFSOCK; return ((*so->so_proto->pr_usrreqs->pru_sense)(so, ub)); } /* ARGSUSED */ static int soo_close(fp, p) struct file *fp; struct proc *p; { int error = 0; if (fp->f_data) error = soclose((struct socket *)fp->f_data); fp->f_data = 0; return (error); } Index: head/sys/kern/tty.c =================================================================== --- head/sys/kern/tty.c (revision 41085) +++ head/sys/kern/tty.c (revision 41086) @@ -1,2401 +1,2422 @@ /*- * Copyright (c) 1982, 1986, 1990, 1991, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)tty.c 8.8 (Berkeley) 1/21/94 - * $Id: tty.c,v 1.105 1998/07/11 10:41:15 bde Exp $ + * $Id: tty.c,v 1.106 1998/08/19 04:01:00 bde Exp $ */ /*- * TODO: * o Fix races for sending the start char in ttyflush(). * o Handle inter-byte timeout for "MIN > 0, TIME > 0" in ttyselect(). * With luck, there will be MIN chars before select() returns(). * o Handle CLOCAL consistently for ptys. Perhaps disallow setting it. * o Don't allow input in TS_ZOMBIE case. It would be visible through * FIONREAD. * o Do the new sio locking stuff here and use it to avoid special * case for EXTPROC? * o Lock PENDIN too? * o Move EXTPROC and/or PENDIN to t_state? * o Wrap most of ttioctl in spltty/splx. * o Implement TIOCNOTTY or remove it from . * o Send STOP if IXOFF is toggled off while TS_TBLOCK is set. * o Don't allow certain termios flags to affect disciplines other * than TTYDISC. Cancel their effects before switch disciplines * and ignore them if they are set while we are in another * discipline. * o Handle c_ispeed = 0 to c_ispeed = c_ospeed conversion here instead * of in drivers and fix drivers that write to tp->t_termios. * o Check for TS_CARR_ON being set while everything is closed and not * waiting for carrier. TS_CARR_ON isn't cleared if nothing is open, * so it would live until the next open even if carrier drops. * o Restore TS_WOPEN since it is useful in pstat. It must be cleared * only when _all_ openers leave open(). */ #include "snp.h" #include "opt_compat.h" #include "opt_uconsole.h" #include #include #include #if defined(COMPAT_43) || defined(COMPAT_SUNOS) #include #endif #include #define TTYDEFCHARS #include #undef TTYDEFCHARS #include #include #include #include #include #include #include #include #include +#include #if NSNP > 0 #include #endif #include #include #include #include MALLOC_DEFINE(M_TTYS, "ttys", "tty data structures"); static int proc_compare __P((struct proc *p1, struct proc *p2)); static int ttnread __P((struct tty *tp)); static void ttyecho __P((int c, struct tty *tp)); static int ttyoutput __P((int c, register struct tty *tp)); static void ttypend __P((struct tty *tp)); static void ttyretype __P((struct tty *tp)); static void ttyrub __P((int c, struct tty *tp)); static void ttyrubo __P((struct tty *tp, int cnt)); static void ttyunblock __P((struct tty *tp)); static int ttywflush __P((struct tty *tp)); /* * Table with character classes and parity. The 8th bit indicates parity, * the 7th bit indicates the character is an alphameric or underscore (for * ALTWERASE), and the low 6 bits indicate delay type. If the low 6 bits * are 0 then the character needs no special processing on output; classes * other than 0 might be translated or (not currently) require delays. */ #define E 0x00 /* Even parity. */ #define O 0x80 /* Odd parity. */ #define PARITY(c) (char_type[c] & O) #define ALPHA 0x40 /* Alpha or underscore. */ #define ISALPHA(c) (char_type[(c) & TTY_CHARMASK] & ALPHA) #define CCLASSMASK 0x3f #define CCLASS(c) (char_type[c] & CCLASSMASK) #define BS BACKSPACE #define CC CONTROL #define CR RETURN #define NA ORDINARY | ALPHA #define NL NEWLINE #define NO ORDINARY #define TB TAB #define VT VTAB static u_char const char_type[] = { E|CC, O|CC, O|CC, E|CC, O|CC, E|CC, E|CC, O|CC, /* nul - bel */ O|BS, E|TB, E|NL, O|CC, E|VT, O|CR, O|CC, E|CC, /* bs - si */ O|CC, E|CC, E|CC, O|CC, E|CC, O|CC, O|CC, E|CC, /* dle - etb */ E|CC, O|CC, O|CC, E|CC, O|CC, E|CC, E|CC, O|CC, /* can - us */ O|NO, E|NO, E|NO, O|NO, E|NO, O|NO, O|NO, E|NO, /* sp - ' */ E|NO, O|NO, O|NO, E|NO, O|NO, E|NO, E|NO, O|NO, /* ( - / */ E|NA, O|NA, O|NA, E|NA, O|NA, E|NA, E|NA, O|NA, /* 0 - 7 */ O|NA, E|NA, E|NO, O|NO, E|NO, O|NO, O|NO, E|NO, /* 8 - ? */ O|NO, E|NA, E|NA, O|NA, E|NA, O|NA, O|NA, E|NA, /* @ - G */ E|NA, O|NA, O|NA, E|NA, O|NA, E|NA, E|NA, O|NA, /* H - O */ E|NA, O|NA, O|NA, E|NA, O|NA, E|NA, E|NA, O|NA, /* P - W */ O|NA, E|NA, E|NA, O|NO, E|NO, O|NO, O|NO, O|NA, /* X - _ */ E|NO, O|NA, O|NA, E|NA, O|NA, E|NA, E|NA, O|NA, /* ` - g */ O|NA, E|NA, E|NA, O|NA, E|NA, O|NA, O|NA, E|NA, /* h - o */ O|NA, E|NA, E|NA, O|NA, E|NA, O|NA, O|NA, E|NA, /* p - w */ E|NA, O|NA, O|NA, E|NO, O|NO, E|NO, E|NO, O|CC, /* x - del */ /* * Meta chars; should be settable per character set; * for now, treat them all as normal characters. */ NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, }; #undef BS #undef CC #undef CR #undef NA #undef NL #undef NO #undef TB #undef VT /* Macros to clear/set/test flags. */ #define SET(t, f) (t) |= (f) #define CLR(t, f) (t) &= ~(f) #define ISSET(t, f) ((t) & (f)) #undef MAX_INPUT /* XXX wrong in */ #define MAX_INPUT TTYHOG /* XXX limit is usually larger for !ICANON */ /* * Initial open of tty, or (re)entry to standard tty line discipline. */ int ttyopen(device, tp) dev_t device; register struct tty *tp; { int s; s = spltty(); tp->t_dev = device; if (!ISSET(tp->t_state, TS_ISOPEN)) { SET(tp->t_state, TS_ISOPEN); if (ISSET(tp->t_cflag, CLOCAL)) SET(tp->t_state, TS_CONNECTED); bzero(&tp->t_winsize, sizeof(tp->t_winsize)); } ttsetwater(tp); splx(s); return (0); } /* * Handle close() on a tty line: flush and set to initial state, * bumping generation number so that pending read/write calls * can detect recycling of the tty. * XXX our caller should have done `spltty(); l_close(); ttyclose();' * and l_close() should have flushed, but we repeat the spltty() and * the flush in case there are buggy callers. */ int ttyclose(tp) register struct tty *tp; { int s; + funsetown(tp->t_sigio); s = spltty(); if (constty == tp) constty = NULL; ttyflush(tp, FREAD | FWRITE); clist_free_cblocks(&tp->t_canq); clist_free_cblocks(&tp->t_outq); clist_free_cblocks(&tp->t_rawq); #if NSNP > 0 if (ISSET(tp->t_state, TS_SNOOP) && tp->t_sc != NULL) snpdown((struct snoop *)tp->t_sc); #endif tp->t_gen++; tp->t_line = TTYDISC; tp->t_pgrp = NULL; tp->t_session = NULL; tp->t_state = 0; splx(s); return (0); } #define FLUSHQ(q) { \ if ((q)->c_cc) \ ndflush(q, (q)->c_cc); \ } /* Is 'c' a line delimiter ("break" character)? */ #define TTBREAKC(c, lflag) \ ((c) == '\n' || (((c) == cc[VEOF] || \ (c) == cc[VEOL] || ((c) == cc[VEOL2] && lflag & IEXTEN)) && \ (c) != _POSIX_VDISABLE)) /* * Process input of a single character received on a tty. */ int ttyinput(c, tp) register int c; register struct tty *tp; { register tcflag_t iflag, lflag; register cc_t *cc; int i, err; /* * If input is pending take it first. */ lflag = tp->t_lflag; if (ISSET(lflag, PENDIN)) ttypend(tp); /* * Gather stats. */ if (ISSET(lflag, ICANON)) { ++tk_cancc; ++tp->t_cancc; } else { ++tk_rawcc; ++tp->t_rawcc; } ++tk_nin; /* * Block further input iff: * current input > threshold AND input is available to user program * AND input flow control is enabled and not yet invoked. * The 3 is slop for PARMRK. */ iflag = tp->t_iflag; if (tp->t_rawq.c_cc + tp->t_canq.c_cc > tp->t_ihiwat - 3 && (!ISSET(lflag, ICANON) || tp->t_canq.c_cc != 0) && (ISSET(tp->t_cflag, CRTS_IFLOW) || ISSET(iflag, IXOFF)) && !ISSET(tp->t_state, TS_TBLOCK)) ttyblock(tp); /* Handle exceptional conditions (break, parity, framing). */ cc = tp->t_cc; err = (ISSET(c, TTY_ERRORMASK)); if (err) { CLR(c, TTY_ERRORMASK); if (ISSET(err, TTY_BI)) { if (ISSET(iflag, IGNBRK)) return (0); if (ISSET(iflag, BRKINT)) { ttyflush(tp, FREAD | FWRITE); pgsignal(tp->t_pgrp, SIGINT, 1); goto endcase; } if (ISSET(iflag, PARMRK)) goto parmrk; } else if ((ISSET(err, TTY_PE) && ISSET(iflag, INPCK)) || ISSET(err, TTY_FE)) { if (ISSET(iflag, IGNPAR)) return (0); else if (ISSET(iflag, PARMRK)) { parmrk: if (tp->t_rawq.c_cc + tp->t_canq.c_cc > MAX_INPUT - 3) goto input_overflow; (void)putc(0377 | TTY_QUOTE, &tp->t_rawq); (void)putc(0 | TTY_QUOTE, &tp->t_rawq); (void)putc(c | TTY_QUOTE, &tp->t_rawq); goto endcase; } else c = 0; } } if (!ISSET(tp->t_state, TS_TYPEN) && ISSET(iflag, ISTRIP)) CLR(c, 0x80); if (!ISSET(lflag, EXTPROC)) { /* * Check for literal nexting very first */ if (ISSET(tp->t_state, TS_LNCH)) { SET(c, TTY_QUOTE); CLR(tp->t_state, TS_LNCH); } /* * Scan for special characters. This code * is really just a big case statement with * non-constant cases. The bottom of the * case statement is labeled ``endcase'', so goto * it after a case match, or similar. */ /* * Control chars which aren't controlled * by ICANON, ISIG, or IXON. */ if (ISSET(lflag, IEXTEN)) { if (CCEQ(cc[VLNEXT], c)) { if (ISSET(lflag, ECHO)) { if (ISSET(lflag, ECHOE)) { (void)ttyoutput('^', tp); (void)ttyoutput('\b', tp); } else ttyecho(c, tp); } SET(tp->t_state, TS_LNCH); goto endcase; } if (CCEQ(cc[VDISCARD], c)) { if (ISSET(lflag, FLUSHO)) CLR(tp->t_lflag, FLUSHO); else { ttyflush(tp, FWRITE); ttyecho(c, tp); if (tp->t_rawq.c_cc + tp->t_canq.c_cc) ttyretype(tp); SET(tp->t_lflag, FLUSHO); } goto startoutput; } } /* * Signals. */ if (ISSET(lflag, ISIG)) { if (CCEQ(cc[VINTR], c) || CCEQ(cc[VQUIT], c)) { if (!ISSET(lflag, NOFLSH)) ttyflush(tp, FREAD | FWRITE); ttyecho(c, tp); pgsignal(tp->t_pgrp, CCEQ(cc[VINTR], c) ? SIGINT : SIGQUIT, 1); goto endcase; } if (CCEQ(cc[VSUSP], c)) { if (!ISSET(lflag, NOFLSH)) ttyflush(tp, FREAD); ttyecho(c, tp); pgsignal(tp->t_pgrp, SIGTSTP, 1); goto endcase; } } /* * Handle start/stop characters. */ if (ISSET(iflag, IXON)) { if (CCEQ(cc[VSTOP], c)) { if (!ISSET(tp->t_state, TS_TTSTOP)) { SET(tp->t_state, TS_TTSTOP); #ifdef sun4c /* XXX */ (*tp->t_stop)(tp, 0); #else (*cdevsw[major(tp->t_dev)]->d_stop)(tp, 0); #endif return (0); } if (!CCEQ(cc[VSTART], c)) return (0); /* * if VSTART == VSTOP then toggle */ goto endcase; } if (CCEQ(cc[VSTART], c)) goto restartoutput; } /* * IGNCR, ICRNL, & INLCR */ if (c == '\r') { if (ISSET(iflag, IGNCR)) return (0); else if (ISSET(iflag, ICRNL)) c = '\n'; } else if (c == '\n' && ISSET(iflag, INLCR)) c = '\r'; } if (!ISSET(tp->t_lflag, EXTPROC) && ISSET(lflag, ICANON)) { /* * From here on down canonical mode character * processing takes place. */ /* * erase (^H / ^?) */ if (CCEQ(cc[VERASE], c)) { if (tp->t_rawq.c_cc) ttyrub(unputc(&tp->t_rawq), tp); goto endcase; } /* * kill (^U) */ if (CCEQ(cc[VKILL], c)) { if (ISSET(lflag, ECHOKE) && tp->t_rawq.c_cc == tp->t_rocount && !ISSET(lflag, ECHOPRT)) while (tp->t_rawq.c_cc) ttyrub(unputc(&tp->t_rawq), tp); else { ttyecho(c, tp); if (ISSET(lflag, ECHOK) || ISSET(lflag, ECHOKE)) ttyecho('\n', tp); FLUSHQ(&tp->t_rawq); tp->t_rocount = 0; } CLR(tp->t_state, TS_LOCAL); goto endcase; } /* * word erase (^W) */ if (CCEQ(cc[VWERASE], c) && ISSET(lflag, IEXTEN)) { int ctype; /* * erase whitespace */ while ((c = unputc(&tp->t_rawq)) == ' ' || c == '\t') ttyrub(c, tp); if (c == -1) goto endcase; /* * erase last char of word and remember the * next chars type (for ALTWERASE) */ ttyrub(c, tp); c = unputc(&tp->t_rawq); if (c == -1) goto endcase; if (c == ' ' || c == '\t') { (void)putc(c, &tp->t_rawq); goto endcase; } ctype = ISALPHA(c); /* * erase rest of word */ do { ttyrub(c, tp); c = unputc(&tp->t_rawq); if (c == -1) goto endcase; } while (c != ' ' && c != '\t' && (!ISSET(lflag, ALTWERASE) || ISALPHA(c) == ctype)); (void)putc(c, &tp->t_rawq); goto endcase; } /* * reprint line (^R) */ if (CCEQ(cc[VREPRINT], c) && ISSET(lflag, IEXTEN)) { ttyretype(tp); goto endcase; } /* * ^T - kernel info and generate SIGINFO */ if (CCEQ(cc[VSTATUS], c) && ISSET(lflag, IEXTEN)) { if (ISSET(lflag, ISIG)) pgsignal(tp->t_pgrp, SIGINFO, 1); if (!ISSET(lflag, NOKERNINFO)) ttyinfo(tp); goto endcase; } } /* * Check for input buffer overflow */ if (tp->t_rawq.c_cc + tp->t_canq.c_cc >= MAX_INPUT) { input_overflow: if (ISSET(iflag, IMAXBEL)) { if (tp->t_outq.c_cc < tp->t_ohiwat) (void)ttyoutput(CTRL('g'), tp); } goto endcase; } if ( c == 0377 && ISSET(iflag, PARMRK) && !ISSET(iflag, ISTRIP) && ISSET(iflag, IGNBRK|IGNPAR) != (IGNBRK|IGNPAR)) (void)putc(0377 | TTY_QUOTE, &tp->t_rawq); /* * Put data char in q for user and * wakeup on seeing a line delimiter. */ if (putc(c, &tp->t_rawq) >= 0) { if (!ISSET(lflag, ICANON)) { ttwakeup(tp); ttyecho(c, tp); goto endcase; } if (TTBREAKC(c, lflag)) { tp->t_rocount = 0; catq(&tp->t_rawq, &tp->t_canq); ttwakeup(tp); } else if (tp->t_rocount++ == 0) tp->t_rocol = tp->t_column; if (ISSET(tp->t_state, TS_ERASE)) { /* * end of prterase \.../ */ CLR(tp->t_state, TS_ERASE); (void)ttyoutput('/', tp); } i = tp->t_column; ttyecho(c, tp); if (CCEQ(cc[VEOF], c) && ISSET(lflag, ECHO)) { /* * Place the cursor over the '^' of the ^D. */ i = imin(2, tp->t_column - i); while (i > 0) { (void)ttyoutput('\b', tp); i--; } } } endcase: /* * IXANY means allow any character to restart output. */ if (ISSET(tp->t_state, TS_TTSTOP) && !ISSET(iflag, IXANY) && cc[VSTART] != cc[VSTOP]) return (0); restartoutput: CLR(tp->t_lflag, FLUSHO); CLR(tp->t_state, TS_TTSTOP); startoutput: return (ttstart(tp)); } /* * Output a single character on a tty, doing output processing * as needed (expanding tabs, newline processing, etc.). * Returns < 0 if succeeds, otherwise returns char to resend. * Must be recursive. */ static int ttyoutput(c, tp) register int c; register struct tty *tp; { register tcflag_t oflag; register int col, s; oflag = tp->t_oflag; if (!ISSET(oflag, OPOST)) { if (ISSET(tp->t_lflag, FLUSHO)) return (-1); if (putc(c, &tp->t_outq)) return (c); tk_nout++; tp->t_outcc++; return (-1); } /* * Do tab expansion if OXTABS is set. Special case if we external * processing, we don't do the tab expansion because we'll probably * get it wrong. If tab expansion needs to be done, let it happen * externally. */ CLR(c, ~TTY_CHARMASK); if (c == '\t' && ISSET(oflag, OXTABS) && !ISSET(tp->t_lflag, EXTPROC)) { c = 8 - (tp->t_column & 7); if (!ISSET(tp->t_lflag, FLUSHO)) { s = spltty(); /* Don't interrupt tabs. */ c -= b_to_q(" ", c, &tp->t_outq); tk_nout += c; tp->t_outcc += c; splx(s); } tp->t_column += c; return (c ? -1 : '\t'); } if (c == CEOT && ISSET(oflag, ONOEOT)) return (-1); /* * Newline translation: if ONLCR is set, * translate newline into "\r\n". */ if (c == '\n' && ISSET(tp->t_oflag, ONLCR)) { tk_nout++; tp->t_outcc++; if (putc('\r', &tp->t_outq)) return (c); } tk_nout++; tp->t_outcc++; if (!ISSET(tp->t_lflag, FLUSHO) && putc(c, &tp->t_outq)) return (c); col = tp->t_column; switch (CCLASS(c)) { case BACKSPACE: if (col > 0) --col; break; case CONTROL: break; case NEWLINE: case RETURN: col = 0; break; case ORDINARY: ++col; break; case TAB: col = (col + 8) & ~7; break; } tp->t_column = col; return (-1); } /* * Ioctls for all tty devices. Called after line-discipline specific ioctl * has been called to do discipline-specific functions and/or reject any * of these ioctl commands. */ /* ARGSUSED */ int ttioctl(tp, cmd, data, flag) register struct tty *tp; u_long cmd; int flag; void *data; { register struct proc *p; int s, error; p = curproc; /* XXX */ /* If the ioctl involves modification, hang if in the background. */ switch (cmd) { case TIOCFLUSH: case TIOCSETA: case TIOCSETD: case TIOCSETAF: case TIOCSETAW: #ifdef notdef case TIOCSPGRP: #endif case TIOCSTAT: case TIOCSTI: case TIOCSWINSZ: #if defined(COMPAT_43) || defined(COMPAT_SUNOS) case TIOCLBIC: case TIOCLBIS: case TIOCLSET: case TIOCSETC: case OTIOCSETD: case TIOCSETN: case TIOCSETP: case TIOCSLTC: #endif while (isbackground(p, tp) && (p->p_flag & P_PPWAIT) == 0 && (p->p_sigignore & sigmask(SIGTTOU)) == 0 && (p->p_sigmask & sigmask(SIGTTOU)) == 0) { if (p->p_pgrp->pg_jobc == 0) return (EIO); pgsignal(p->p_pgrp, SIGTTOU, 1); error = ttysleep(tp, &lbolt, TTOPRI | PCATCH, "ttybg1", 0); if (error) return (error); } break; } switch (cmd) { /* Process the ioctl. */ case FIOASYNC: /* set/clear async i/o */ s = spltty(); if (*(int *)data) SET(tp->t_state, TS_ASYNC); else CLR(tp->t_state, TS_ASYNC); splx(s); break; case FIONBIO: /* set/clear non-blocking i/o */ break; /* XXX: delete. */ case FIONREAD: /* get # bytes to read */ s = spltty(); *(int *)data = ttnread(tp); splx(s); break; + + case FIOSETOWN: + /* + * Policy -- Don't allow FIOSETOWN on someone else's + * controlling tty + */ + if (tp->t_session != NULL && !isctty(p, tp)) + return (ENOTTY); + + error = fsetown(*(int *)data, &tp->t_sigio); + if (error) + return (error); + break; + case FIOGETOWN: + if (tp->t_session != NULL && !isctty(p, tp)) + return (ENOTTY); + *(int *)data = fgetown(tp->t_sigio); + break; + case TIOCEXCL: /* set exclusive use of tty */ s = spltty(); SET(tp->t_state, TS_XCLUDE); splx(s); break; case TIOCFLUSH: { /* flush buffers */ register int flags = *(int *)data; if (flags == 0) flags = FREAD | FWRITE; else flags &= FREAD | FWRITE; ttyflush(tp, flags); break; } case TIOCCONS: /* become virtual console */ if (*(int *)data) { if (constty && constty != tp && ISSET(constty->t_state, TS_CONNECTED)) return (EBUSY); #ifndef UCONSOLE if (error = suser(p->p_ucred, &p->p_acflag)) return (error); #endif constty = tp; } else if (tp == constty) constty = NULL; break; case TIOCDRAIN: /* wait till output drained */ error = ttywait(tp); if (error) return (error); break; case TIOCGETA: { /* get termios struct */ struct termios *t = (struct termios *)data; bcopy(&tp->t_termios, t, sizeof(struct termios)); break; } case TIOCGETD: /* get line discipline */ *(int *)data = tp->t_line; break; case TIOCGWINSZ: /* get window size */ *(struct winsize *)data = tp->t_winsize; break; case TIOCGPGRP: /* get pgrp of tty */ if (!isctty(p, tp)) return (ENOTTY); *(int *)data = tp->t_pgrp ? tp->t_pgrp->pg_id : NO_PID; break; #ifdef TIOCHPCL case TIOCHPCL: /* hang up on last close */ s = spltty(); SET(tp->t_cflag, HUPCL); splx(s); break; #endif case TIOCNXCL: /* reset exclusive use of tty */ s = spltty(); CLR(tp->t_state, TS_XCLUDE); splx(s); break; case TIOCOUTQ: /* output queue size */ *(int *)data = tp->t_outq.c_cc; break; case TIOCSETA: /* set termios struct */ case TIOCSETAW: /* drain output, set */ case TIOCSETAF: { /* drn out, fls in, set */ register struct termios *t = (struct termios *)data; if (t->c_ispeed < 0 || t->c_ospeed < 0) return (EINVAL); s = spltty(); if (cmd == TIOCSETAW || cmd == TIOCSETAF) { error = ttywait(tp); if (error) { splx(s); return (error); } if (cmd == TIOCSETAF) ttyflush(tp, FREAD); } if (!ISSET(t->c_cflag, CIGNORE)) { /* * Set device hardware. */ if (tp->t_param && (error = (*tp->t_param)(tp, t))) { splx(s); return (error); } if (ISSET(t->c_cflag, CLOCAL) && !ISSET(tp->t_cflag, CLOCAL)) { /* * XXX disconnections would be too hard to * get rid of without this kludge. The only * way to get rid of controlling terminals * is to exit from the session leader. */ CLR(tp->t_state, TS_ZOMBIE); wakeup(TSA_CARR_ON(tp)); ttwakeup(tp); ttwwakeup(tp); } if ((ISSET(tp->t_state, TS_CARR_ON) || ISSET(t->c_cflag, CLOCAL)) && !ISSET(tp->t_state, TS_ZOMBIE)) SET(tp->t_state, TS_CONNECTED); else CLR(tp->t_state, TS_CONNECTED); tp->t_cflag = t->c_cflag; tp->t_ispeed = t->c_ispeed; tp->t_ospeed = t->c_ospeed; ttsetwater(tp); } if (ISSET(t->c_lflag, ICANON) != ISSET(tp->t_lflag, ICANON) && cmd != TIOCSETAF) { if (ISSET(t->c_lflag, ICANON)) SET(tp->t_lflag, PENDIN); else { /* * XXX we really shouldn't allow toggling * ICANON while we're in a non-termios line * discipline. Now we have to worry about * panicing for a null queue. */ if (tp->t_canq.c_cbreserved > 0 && tp->t_rawq.c_cbreserved > 0) { catq(&tp->t_rawq, &tp->t_canq); /* * XXX the queue limits may be * different, so the old queue * swapping method no longer works. */ catq(&tp->t_canq, &tp->t_rawq); } CLR(tp->t_lflag, PENDIN); } ttwakeup(tp); } tp->t_iflag = t->c_iflag; tp->t_oflag = t->c_oflag; /* * Make the EXTPROC bit read only. */ if (ISSET(tp->t_lflag, EXTPROC)) SET(t->c_lflag, EXTPROC); else CLR(t->c_lflag, EXTPROC); tp->t_lflag = t->c_lflag | ISSET(tp->t_lflag, PENDIN); if (t->c_cc[VMIN] != tp->t_cc[VMIN] || t->c_cc[VTIME] != tp->t_cc[VTIME]) ttwakeup(tp); bcopy(t->c_cc, tp->t_cc, sizeof(t->c_cc)); splx(s); break; } case TIOCSETD: { /* set line discipline */ register int t = *(int *)data; dev_t device = tp->t_dev; if ((u_int)t >= nlinesw) return (ENXIO); if (t != tp->t_line) { s = spltty(); (*linesw[tp->t_line].l_close)(tp, flag); error = (*linesw[t].l_open)(device, tp); if (error) { (void)(*linesw[tp->t_line].l_open)(device, tp); splx(s); return (error); } tp->t_line = t; splx(s); } break; } case TIOCSTART: /* start output, like ^Q */ s = spltty(); if (ISSET(tp->t_state, TS_TTSTOP) || ISSET(tp->t_lflag, FLUSHO)) { CLR(tp->t_lflag, FLUSHO); CLR(tp->t_state, TS_TTSTOP); ttstart(tp); } splx(s); break; case TIOCSTI: /* simulate terminal input */ if (p->p_ucred->cr_uid && (flag & FREAD) == 0) return (EPERM); if (p->p_ucred->cr_uid && !isctty(p, tp)) return (EACCES); s = spltty(); (*linesw[tp->t_line].l_rint)(*(u_char *)data, tp); splx(s); break; case TIOCSTOP: /* stop output, like ^S */ s = spltty(); if (!ISSET(tp->t_state, TS_TTSTOP)) { SET(tp->t_state, TS_TTSTOP); #ifdef sun4c /* XXX */ (*tp->t_stop)(tp, 0); #else (*cdevsw[major(tp->t_dev)]->d_stop)(tp, 0); #endif } splx(s); break; case TIOCSCTTY: /* become controlling tty */ /* Session ctty vnode pointer set in vnode layer. */ if (!SESS_LEADER(p) || ((p->p_session->s_ttyvp || tp->t_session) && (tp->t_session != p->p_session))) return (EPERM); tp->t_session = p->p_session; tp->t_pgrp = p->p_pgrp; p->p_session->s_ttyp = tp; p->p_flag |= P_CONTROLT; break; case TIOCSPGRP: { /* set pgrp of tty */ register struct pgrp *pgrp = pgfind(*(int *)data); if (!isctty(p, tp)) return (ENOTTY); else if (pgrp == NULL || pgrp->pg_session != p->p_session) return (EPERM); tp->t_pgrp = pgrp; break; } case TIOCSTAT: /* simulate control-T */ s = spltty(); ttyinfo(tp); splx(s); break; case TIOCSWINSZ: /* set window size */ if (bcmp((caddr_t)&tp->t_winsize, data, sizeof (struct winsize))) { tp->t_winsize = *(struct winsize *)data; pgsignal(tp->t_pgrp, SIGWINCH, 1); } break; case TIOCSDRAINWAIT: error = suser(p->p_ucred, &p->p_acflag); if (error) return (error); tp->t_timeout = *(int *)data * hz; wakeup(TSA_OCOMPLETE(tp)); wakeup(TSA_OLOWAT(tp)); break; case TIOCGDRAINWAIT: *(int *)data = tp->t_timeout / hz; break; default: #if defined(COMPAT_43) || defined(COMPAT_SUNOS) return (ttcompat(tp, cmd, data, flag)); #else return (ENOIOCTL); #endif } return (0); } int ttypoll(tp, events, p) struct tty *tp; int events; struct proc *p; { int s; int revents = 0; if (tp == NULL) /* XXX used to return ENXIO, but that means true! */ return ((events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)) | POLLHUP); s = spltty(); if (events & (POLLIN | POLLRDNORM)) if (ttnread(tp) > 0 || ISSET(tp->t_state, TS_ZOMBIE)) revents |= events & (POLLIN | POLLRDNORM); else selrecord(p, &tp->t_rsel); if (events & (POLLOUT | POLLWRNORM)) if ((tp->t_outq.c_cc <= tp->t_olowat && ISSET(tp->t_state, TS_CONNECTED)) || ISSET(tp->t_state, TS_ZOMBIE)) revents |= events & (POLLOUT | POLLWRNORM); else selrecord(p, &tp->t_wsel); splx(s); return (revents); } /* * This is a wrapper for compatibility with the select vector used by * cdevsw. It relies on a proper xxxdevtotty routine. */ int ttpoll(dev, events, p) dev_t dev; int events; struct proc *p; { return ttypoll((*cdevsw[major(dev)]->d_devtotty)(dev), events, p); } /* * Must be called at spltty(). */ static int ttnread(tp) struct tty *tp; { int nread; if (ISSET(tp->t_lflag, PENDIN)) ttypend(tp); nread = tp->t_canq.c_cc; if (!ISSET(tp->t_lflag, ICANON)) { nread += tp->t_rawq.c_cc; if (nread < tp->t_cc[VMIN] && tp->t_cc[VTIME] == 0) nread = 0; } return (nread); } /* * Wait for output to drain. */ int ttywait(tp) register struct tty *tp; { int error, s; error = 0; s = spltty(); while ((tp->t_outq.c_cc || ISSET(tp->t_state, TS_BUSY)) && ISSET(tp->t_state, TS_CONNECTED) && tp->t_oproc) { (*tp->t_oproc)(tp); if ((tp->t_outq.c_cc || ISSET(tp->t_state, TS_BUSY)) && ISSET(tp->t_state, TS_CONNECTED)) { SET(tp->t_state, TS_SO_OCOMPLETE); error = ttysleep(tp, TSA_OCOMPLETE(tp), TTOPRI | PCATCH, "ttywai", tp->t_timeout); if (error) { if (error == EWOULDBLOCK) error = EIO; break; } } else break; } if (!error && (tp->t_outq.c_cc || ISSET(tp->t_state, TS_BUSY))) error = EIO; splx(s); return (error); } /* * Flush if successfully wait. */ static int ttywflush(tp) struct tty *tp; { int error; if ((error = ttywait(tp)) == 0) ttyflush(tp, FREAD); return (error); } /* * Flush tty read and/or write queues, notifying anyone waiting. */ void ttyflush(tp, rw) register struct tty *tp; int rw; { register int s; s = spltty(); #if 0 again: #endif if (rw & FWRITE) { FLUSHQ(&tp->t_outq); CLR(tp->t_state, TS_TTSTOP); } #ifdef sun4c /* XXX */ (*tp->t_stop)(tp, rw); #else (*cdevsw[major(tp->t_dev)]->d_stop)(tp, rw); #endif if (rw & FREAD) { FLUSHQ(&tp->t_canq); FLUSHQ(&tp->t_rawq); CLR(tp->t_lflag, PENDIN); tp->t_rocount = 0; tp->t_rocol = 0; CLR(tp->t_state, TS_LOCAL); ttwakeup(tp); if (ISSET(tp->t_state, TS_TBLOCK)) { if (rw & FWRITE) FLUSHQ(&tp->t_outq); ttyunblock(tp); /* * Don't let leave any state that might clobber the * next line discipline (although we should do more * to send the START char). Not clearing the state * may have caused the "putc to a clist with no * reserved cblocks" panic/printf. */ CLR(tp->t_state, TS_TBLOCK); #if 0 /* forget it, sleeping isn't always safe and we don't know when it is */ if (ISSET(tp->t_iflag, IXOFF)) { /* * XXX wait a bit in the hope that the stop * character (if any) will go out. Waiting * isn't good since it allows races. This * will be fixed when the stop character is * put in a special queue. Don't bother with * the checks in ttywait() since the timeout * will save us. */ SET(tp->t_state, TS_SO_OCOMPLETE); ttysleep(tp, TSA_OCOMPLETE(tp), TTOPRI, "ttyfls", hz / 10); /* * Don't try sending the stop character again. */ CLR(tp->t_state, TS_TBLOCK); goto again; } #endif } } if (rw & FWRITE) { FLUSHQ(&tp->t_outq); ttwwakeup(tp); } splx(s); } /* * Copy in the default termios characters. */ void termioschars(t) struct termios *t; { bcopy(ttydefchars, t->c_cc, sizeof t->c_cc); } /* * Old interface. */ void ttychars(tp) struct tty *tp; { termioschars(&tp->t_termios); } /* * Handle input high water. Send stop character for the IXOFF case. Turn * on our input flow control bit and propagate the changes to the driver. * XXX the stop character should be put in a special high priority queue. */ void ttyblock(tp) struct tty *tp; { SET(tp->t_state, TS_TBLOCK); if (ISSET(tp->t_iflag, IXOFF) && tp->t_cc[VSTOP] != _POSIX_VDISABLE && putc(tp->t_cc[VSTOP], &tp->t_outq) != 0) CLR(tp->t_state, TS_TBLOCK); /* try again later */ ttstart(tp); } /* * Handle input low water. Send start character for the IXOFF case. Turn * off our input flow control bit and propagate the changes to the driver. * XXX the start character should be put in a special high priority queue. */ static void ttyunblock(tp) struct tty *tp; { CLR(tp->t_state, TS_TBLOCK); if (ISSET(tp->t_iflag, IXOFF) && tp->t_cc[VSTART] != _POSIX_VDISABLE && putc(tp->t_cc[VSTART], &tp->t_outq) != 0) SET(tp->t_state, TS_TBLOCK); /* try again later */ ttstart(tp); } #ifdef notyet /* Not used by any current (i386) drivers. */ /* * Restart after an inter-char delay. */ void ttrstrt(tp_arg) void *tp_arg; { struct tty *tp; int s; #ifdef DIAGNOSTIC if (tp_arg == NULL) panic("ttrstrt"); #endif tp = tp_arg; s = spltty(); CLR(tp->t_state, TS_TIMEOUT); ttstart(tp); splx(s); } #endif int ttstart(tp) struct tty *tp; { if (tp->t_oproc != NULL) /* XXX: Kludge for pty. */ (*tp->t_oproc)(tp); return (0); } /* * "close" a line discipline */ int ttylclose(tp, flag) struct tty *tp; int flag; { if (flag & FNONBLOCK || ttywflush(tp)) ttyflush(tp, FREAD | FWRITE); return (0); } /* * Handle modem control transition on a tty. * Flag indicates new state of carrier. * Returns 0 if the line should be turned off, otherwise 1. */ int ttymodem(tp, flag) register struct tty *tp; int flag; { if (ISSET(tp->t_state, TS_CARR_ON) && ISSET(tp->t_cflag, MDMBUF)) { /* * MDMBUF: do flow control according to carrier flag * XXX TS_CAR_OFLOW doesn't do anything yet. TS_TTSTOP * works if IXON and IXANY are clear. */ if (flag) { CLR(tp->t_state, TS_CAR_OFLOW); CLR(tp->t_state, TS_TTSTOP); ttstart(tp); } else if (!ISSET(tp->t_state, TS_CAR_OFLOW)) { SET(tp->t_state, TS_CAR_OFLOW); SET(tp->t_state, TS_TTSTOP); #ifdef sun4c /* XXX */ (*tp->t_stop)(tp, 0); #else (*cdevsw[major(tp->t_dev)]->d_stop)(tp, 0); #endif } } else if (flag == 0) { /* * Lost carrier. */ CLR(tp->t_state, TS_CARR_ON); if (ISSET(tp->t_state, TS_ISOPEN) && !ISSET(tp->t_cflag, CLOCAL)) { SET(tp->t_state, TS_ZOMBIE); CLR(tp->t_state, TS_CONNECTED); if (tp->t_session && tp->t_session->s_leader) psignal(tp->t_session->s_leader, SIGHUP); ttyflush(tp, FREAD | FWRITE); return (0); } } else { /* * Carrier now on. */ SET(tp->t_state, TS_CARR_ON); if (!ISSET(tp->t_state, TS_ZOMBIE)) SET(tp->t_state, TS_CONNECTED); wakeup(TSA_CARR_ON(tp)); ttwakeup(tp); ttwwakeup(tp); } return (1); } /* * Reinput pending characters after state switch * call at spltty(). */ static void ttypend(tp) register struct tty *tp; { struct clist tq; register int c; CLR(tp->t_lflag, PENDIN); SET(tp->t_state, TS_TYPEN); /* * XXX this assumes too much about clist internals. It may even * fail if the cblock slush pool is empty. We can't allocate more * cblocks here because we are called from an interrupt handler * and clist_alloc_cblocks() can wait. */ tq = tp->t_rawq; bzero(&tp->t_rawq, sizeof tp->t_rawq); tp->t_rawq.c_cbmax = tq.c_cbmax; tp->t_rawq.c_cbreserved = tq.c_cbreserved; while ((c = getc(&tq)) >= 0) ttyinput(c, tp); CLR(tp->t_state, TS_TYPEN); } /* * Process a read call on a tty device. */ int ttread(tp, uio, flag) register struct tty *tp; struct uio *uio; int flag; { register struct clist *qp; register int c; register tcflag_t lflag; register cc_t *cc = tp->t_cc; register struct proc *p = curproc; int s, first, error = 0; int has_stime = 0, last_cc = 0; long slp = 0; /* XXX this should be renamed `timo'. */ loop: s = spltty(); lflag = tp->t_lflag; /* * take pending input first */ if (ISSET(lflag, PENDIN)) { ttypend(tp); splx(s); /* reduce latency */ s = spltty(); lflag = tp->t_lflag; /* XXX ttypend() clobbers it */ } /* * Hang process if it's in the background. */ if (isbackground(p, tp)) { splx(s); if ((p->p_sigignore & sigmask(SIGTTIN)) || (p->p_sigmask & sigmask(SIGTTIN)) || p->p_flag & P_PPWAIT || p->p_pgrp->pg_jobc == 0) return (EIO); pgsignal(p->p_pgrp, SIGTTIN, 1); error = ttysleep(tp, &lbolt, TTIPRI | PCATCH, "ttybg2", 0); if (error) return (error); goto loop; } if (ISSET(tp->t_state, TS_ZOMBIE)) { splx(s); return (0); /* EOF */ } /* * If canonical, use the canonical queue, * else use the raw queue. * * (should get rid of clists...) */ qp = ISSET(lflag, ICANON) ? &tp->t_canq : &tp->t_rawq; if (flag & IO_NDELAY) { if (qp->c_cc > 0) goto read; if (!ISSET(lflag, ICANON) && cc[VMIN] == 0) { splx(s); return (0); } splx(s); return (EWOULDBLOCK); } if (!ISSET(lflag, ICANON)) { int m = cc[VMIN]; long t = cc[VTIME]; struct timeval stime, timecopy; /* * Check each of the four combinations. * (m > 0 && t == 0) is the normal read case. * It should be fairly efficient, so we check that and its * companion case (m == 0 && t == 0) first. * For the other two cases, we compute the target sleep time * into slp. */ if (t == 0) { if (qp->c_cc < m) goto sleep; if (qp->c_cc > 0) goto read; /* m, t and qp->c_cc are all 0. 0 is enough input. */ splx(s); return (0); } t *= 100000; /* time in us */ #define diff(t1, t2) (((t1).tv_sec - (t2).tv_sec) * 1000000 + \ ((t1).tv_usec - (t2).tv_usec)) if (m > 0) { if (qp->c_cc <= 0) goto sleep; if (qp->c_cc >= m) goto read; getmicrotime(&timecopy); if (!has_stime) { /* first character, start timer */ has_stime = 1; stime = timecopy; slp = t; } else if (qp->c_cc > last_cc) { /* got a character, restart timer */ stime = timecopy; slp = t; } else { /* nothing, check expiration */ slp = t - diff(timecopy, stime); if (slp <= 0) goto read; } last_cc = qp->c_cc; } else { /* m == 0 */ if (qp->c_cc > 0) goto read; getmicrotime(&timecopy); if (!has_stime) { has_stime = 1; stime = timecopy; slp = t; } else { slp = t - diff(timecopy, stime); if (slp <= 0) { /* Timed out, but 0 is enough input. */ splx(s); return (0); } } } #undef diff /* * Rounding down may make us wake up just short * of the target, so we round up. * The formula is ceiling(slp * hz/1000000). * 32-bit arithmetic is enough for hz < 169. * XXX see tvtohz() for how to avoid overflow if hz * is large (divide by `tick' and/or arrange to * use tvtohz() if hz is large). */ slp = (long) (((u_long)slp * hz) + 999999) / 1000000; goto sleep; } if (qp->c_cc <= 0) { sleep: /* * There is no input, or not enough input and we can block. */ error = ttysleep(tp, TSA_HUP_OR_INPUT(tp), TTIPRI | PCATCH, ISSET(tp->t_state, TS_CONNECTED) ? "ttyin" : "ttyhup", (int)slp); splx(s); if (error == EWOULDBLOCK) error = 0; else if (error) return (error); /* * XXX what happens if another process eats some input * while we are asleep (not just here)? It would be * safest to detect changes and reset our state variables * (has_stime and last_cc). */ slp = 0; goto loop; } read: splx(s); /* * Input present, check for input mapping and processing. */ first = 1; if (ISSET(lflag, ICANON | ISIG)) goto slowcase; for (;;) { char ibuf[IBUFSIZ]; int icc; icc = imin(uio->uio_resid, IBUFSIZ); icc = q_to_b(qp, ibuf, icc); if (icc <= 0) { if (first) goto loop; break; } error = uiomove(ibuf, icc, uio); /* * XXX if there was an error then we should ungetc() the * unmoved chars and reduce icc here. */ #if NSNP > 0 if (ISSET(tp->t_lflag, ECHO) && ISSET(tp->t_state, TS_SNOOP) && tp->t_sc != NULL) snpin((struct snoop *)tp->t_sc, ibuf, icc); #endif if (error) break; if (uio->uio_resid == 0) break; first = 0; } goto out; slowcase: for (;;) { c = getc(qp); if (c < 0) { if (first) goto loop; break; } /* * delayed suspend (^Y) */ if (CCEQ(cc[VDSUSP], c) && ISSET(lflag, IEXTEN | ISIG) == (IEXTEN | ISIG)) { pgsignal(tp->t_pgrp, SIGTSTP, 1); if (first) { error = ttysleep(tp, &lbolt, TTIPRI | PCATCH, "ttybg3", 0); if (error) break; goto loop; } break; } /* * Interpret EOF only in canonical mode. */ if (CCEQ(cc[VEOF], c) && ISSET(lflag, ICANON)) break; /* * Give user character. */ error = ureadc(c, uio); if (error) /* XXX should ungetc(c, qp). */ break; #if NSNP > 0 /* * Only snoop directly on input in echo mode. Non-echoed * input will be snooped later iff the application echoes it. */ if (ISSET(tp->t_lflag, ECHO) && ISSET(tp->t_state, TS_SNOOP) && tp->t_sc != NULL) snpinc((struct snoop *)tp->t_sc, (char)c); #endif if (uio->uio_resid == 0) break; /* * In canonical mode check for a "break character" * marking the end of a "line of input". */ if (ISSET(lflag, ICANON) && TTBREAKC(c, lflag)) break; first = 0; } out: /* * Look to unblock input now that (presumably) * the input queue has gone down. */ s = spltty(); if (ISSET(tp->t_state, TS_TBLOCK) && tp->t_rawq.c_cc + tp->t_canq.c_cc <= tp->t_ilowat) ttyunblock(tp); splx(s); return (error); } /* * Check the output queue on tp for space for a kernel message (from uprintf * or tprintf). Allow some space over the normal hiwater mark so we don't * lose messages due to normal flow control, but don't let the tty run amok. * Sleeps here are not interruptible, but we return prematurely if new signals * arrive. */ int ttycheckoutq(tp, wait) register struct tty *tp; int wait; { int hiwat, s, oldsig; hiwat = tp->t_ohiwat; s = spltty(); oldsig = wait ? curproc->p_siglist : 0; if (tp->t_outq.c_cc > hiwat + OBUFSIZ + 100) while (tp->t_outq.c_cc > hiwat) { ttstart(tp); if (tp->t_outq.c_cc <= hiwat) break; if (wait == 0 || curproc->p_siglist != oldsig) { splx(s); return (0); } SET(tp->t_state, TS_SO_OLOWAT); tsleep(TSA_OLOWAT(tp), PZERO - 1, "ttoutq", hz); } splx(s); return (1); } /* * Process a write call on a tty device. */ int ttwrite(tp, uio, flag) register struct tty *tp; register struct uio *uio; int flag; { register char *cp = NULL; register int cc, ce; register struct proc *p; int i, hiwat, cnt, error, s; char obuf[OBUFSIZ]; hiwat = tp->t_ohiwat; cnt = uio->uio_resid; error = 0; cc = 0; loop: s = spltty(); if (ISSET(tp->t_state, TS_ZOMBIE)) { splx(s); if (uio->uio_resid == cnt) error = EIO; goto out; } if (!ISSET(tp->t_state, TS_CONNECTED)) { if (flag & IO_NDELAY) { splx(s); error = EWOULDBLOCK; goto out; } error = ttysleep(tp, TSA_CARR_ON(tp), TTIPRI | PCATCH, "ttydcd", 0); splx(s); if (error) goto out; goto loop; } splx(s); /* * Hang the process if it's in the background. */ p = curproc; if (isbackground(p, tp) && ISSET(tp->t_lflag, TOSTOP) && (p->p_flag & P_PPWAIT) == 0 && (p->p_sigignore & sigmask(SIGTTOU)) == 0 && (p->p_sigmask & sigmask(SIGTTOU)) == 0) { if (p->p_pgrp->pg_jobc == 0) { error = EIO; goto out; } pgsignal(p->p_pgrp, SIGTTOU, 1); error = ttysleep(tp, &lbolt, TTIPRI | PCATCH, "ttybg4", 0); if (error) goto out; goto loop; } /* * Process the user's data in at most OBUFSIZ chunks. Perform any * output translation. Keep track of high water mark, sleep on * overflow awaiting device aid in acquiring new space. */ while (uio->uio_resid > 0 || cc > 0) { if (ISSET(tp->t_lflag, FLUSHO)) { uio->uio_resid = 0; return (0); } if (tp->t_outq.c_cc > hiwat) goto ovhiwat; /* * Grab a hunk of data from the user, unless we have some * leftover from last time. */ if (cc == 0) { cc = imin(uio->uio_resid, OBUFSIZ); cp = obuf; error = uiomove(cp, cc, uio); if (error) { cc = 0; break; } #if NSNP > 0 if (ISSET(tp->t_state, TS_SNOOP) && tp->t_sc != NULL) snpin((struct snoop *)tp->t_sc, cp, cc); #endif } /* * If nothing fancy need be done, grab those characters we * can handle without any of ttyoutput's processing and * just transfer them to the output q. For those chars * which require special processing (as indicated by the * bits in char_type), call ttyoutput. After processing * a hunk of data, look for FLUSHO so ^O's will take effect * immediately. */ while (cc > 0) { if (!ISSET(tp->t_oflag, OPOST)) ce = cc; else { ce = cc - scanc((u_int)cc, (u_char *)cp, char_type, CCLASSMASK); /* * If ce is zero, then we're processing * a special character through ttyoutput. */ if (ce == 0) { tp->t_rocount = 0; if (ttyoutput(*cp, tp) >= 0) { /* No Clists, wait a bit. */ ttstart(tp); if (flag & IO_NDELAY) { error = EWOULDBLOCK; goto out; } error = ttysleep(tp, &lbolt, TTOPRI|PCATCH, "ttybf1", 0); if (error) goto out; goto loop; } cp++; cc--; if (ISSET(tp->t_lflag, FLUSHO) || tp->t_outq.c_cc > hiwat) goto ovhiwat; continue; } } /* * A bunch of normal characters have been found. * Transfer them en masse to the output queue and * continue processing at the top of the loop. * If there are any further characters in this * <= OBUFSIZ chunk, the first should be a character * requiring special handling by ttyoutput. */ tp->t_rocount = 0; i = b_to_q(cp, ce, &tp->t_outq); ce -= i; tp->t_column += ce; cp += ce, cc -= ce, tk_nout += ce; tp->t_outcc += ce; if (i > 0) { /* No Clists, wait a bit. */ ttstart(tp); if (flag & IO_NDELAY) { error = EWOULDBLOCK; goto out; } error = ttysleep(tp, &lbolt, TTOPRI | PCATCH, "ttybf2", 0); if (error) goto out; goto loop; } if (ISSET(tp->t_lflag, FLUSHO) || tp->t_outq.c_cc > hiwat) break; } ttstart(tp); } out: /* * If cc is nonzero, we leave the uio structure inconsistent, as the * offset and iov pointers have moved forward, but it doesn't matter * (the call will either return short or restart with a new uio). */ uio->uio_resid += cc; return (error); ovhiwat: ttstart(tp); s = spltty(); /* * This can only occur if FLUSHO is set in t_lflag, * or if ttstart/oproc is synchronous (or very fast). */ if (tp->t_outq.c_cc <= hiwat) { splx(s); goto loop; } if (flag & IO_NDELAY) { splx(s); uio->uio_resid += cc; return (uio->uio_resid == cnt ? EWOULDBLOCK : 0); } SET(tp->t_state, TS_SO_OLOWAT); error = ttysleep(tp, TSA_OLOWAT(tp), TTOPRI | PCATCH, "ttywri", tp->t_timeout); splx(s); if (error == EWOULDBLOCK) error = EIO; if (error) goto out; goto loop; } /* * Rubout one character from the rawq of tp * as cleanly as possible. */ static void ttyrub(c, tp) register int c; register struct tty *tp; { register char *cp; register int savecol; int tabc, s; if (!ISSET(tp->t_lflag, ECHO) || ISSET(tp->t_lflag, EXTPROC)) return; CLR(tp->t_lflag, FLUSHO); if (ISSET(tp->t_lflag, ECHOE)) { if (tp->t_rocount == 0) { /* * Screwed by ttwrite; retype */ ttyretype(tp); return; } if (c == ('\t' | TTY_QUOTE) || c == ('\n' | TTY_QUOTE)) ttyrubo(tp, 2); else { CLR(c, ~TTY_CHARMASK); switch (CCLASS(c)) { case ORDINARY: ttyrubo(tp, 1); break; case BACKSPACE: case CONTROL: case NEWLINE: case RETURN: case VTAB: if (ISSET(tp->t_lflag, ECHOCTL)) ttyrubo(tp, 2); break; case TAB: if (tp->t_rocount < tp->t_rawq.c_cc) { ttyretype(tp); return; } s = spltty(); savecol = tp->t_column; SET(tp->t_state, TS_CNTTB); SET(tp->t_lflag, FLUSHO); tp->t_column = tp->t_rocol; cp = tp->t_rawq.c_cf; if (cp) tabc = *cp; /* XXX FIX NEXTC */ for (; cp; cp = nextc(&tp->t_rawq, cp, &tabc)) ttyecho(tabc, tp); CLR(tp->t_lflag, FLUSHO); CLR(tp->t_state, TS_CNTTB); splx(s); /* savecol will now be length of the tab. */ savecol -= tp->t_column; tp->t_column += savecol; if (savecol > 8) savecol = 8; /* overflow screw */ while (--savecol >= 0) (void)ttyoutput('\b', tp); break; default: /* XXX */ #define PANICSTR "ttyrub: would panic c = %d, val = %d\n" (void)printf(PANICSTR, c, CCLASS(c)); #ifdef notdef panic(PANICSTR, c, CCLASS(c)); #endif } } } else if (ISSET(tp->t_lflag, ECHOPRT)) { if (!ISSET(tp->t_state, TS_ERASE)) { SET(tp->t_state, TS_ERASE); (void)ttyoutput('\\', tp); } ttyecho(c, tp); } else ttyecho(tp->t_cc[VERASE], tp); --tp->t_rocount; } /* * Back over cnt characters, erasing them. */ static void ttyrubo(tp, cnt) register struct tty *tp; int cnt; { while (cnt-- > 0) { (void)ttyoutput('\b', tp); (void)ttyoutput(' ', tp); (void)ttyoutput('\b', tp); } } /* * ttyretype -- * Reprint the rawq line. Note, it is assumed that c_cc has already * been checked. */ static void ttyretype(tp) register struct tty *tp; { register char *cp; int s, c; /* Echo the reprint character. */ if (tp->t_cc[VREPRINT] != _POSIX_VDISABLE) ttyecho(tp->t_cc[VREPRINT], tp); (void)ttyoutput('\n', tp); /* * XXX * FIX: NEXTC IS BROKEN - DOESN'T CHECK QUOTE * BIT OF FIRST CHAR. */ s = spltty(); for (cp = tp->t_canq.c_cf, c = (cp != NULL ? *cp : 0); cp != NULL; cp = nextc(&tp->t_canq, cp, &c)) ttyecho(c, tp); for (cp = tp->t_rawq.c_cf, c = (cp != NULL ? *cp : 0); cp != NULL; cp = nextc(&tp->t_rawq, cp, &c)) ttyecho(c, tp); CLR(tp->t_state, TS_ERASE); splx(s); tp->t_rocount = tp->t_rawq.c_cc; tp->t_rocol = 0; } /* * Echo a typed character to the terminal. */ static void ttyecho(c, tp) register int c; register struct tty *tp; { if (!ISSET(tp->t_state, TS_CNTTB)) CLR(tp->t_lflag, FLUSHO); if ((!ISSET(tp->t_lflag, ECHO) && (c != '\n' || !ISSET(tp->t_lflag, ECHONL))) || ISSET(tp->t_lflag, EXTPROC)) return; if (ISSET(tp->t_lflag, ECHOCTL) && ((ISSET(c, TTY_CHARMASK) <= 037 && c != '\t' && c != '\n') || ISSET(c, TTY_CHARMASK) == 0177)) { (void)ttyoutput('^', tp); CLR(c, ~TTY_CHARMASK); if (c == 0177) c = '?'; else c += 'A' - 1; } (void)ttyoutput(c, tp); } /* * Wake up any readers on a tty. */ void ttwakeup(tp) register struct tty *tp; { if (tp->t_rsel.si_pid != 0) selwakeup(&tp->t_rsel); - if (ISSET(tp->t_state, TS_ASYNC)) - pgsignal(tp->t_pgrp, SIGIO, 1); + if (ISSET(tp->t_state, TS_ASYNC) && tp->t_sigio != NULL) + pgsigio(tp->t_sigio, SIGIO, (tp->t_session != NULL)); wakeup(TSA_HUP_OR_INPUT(tp)); } /* * Wake up any writers on a tty. */ void ttwwakeup(tp) register struct tty *tp; { if (tp->t_wsel.si_pid != 0 && tp->t_outq.c_cc <= tp->t_olowat) selwakeup(&tp->t_wsel); if (ISSET(tp->t_state, TS_BUSY | TS_SO_OCOMPLETE) == TS_SO_OCOMPLETE && tp->t_outq.c_cc == 0) { CLR(tp->t_state, TS_SO_OCOMPLETE); wakeup(TSA_OCOMPLETE(tp)); } if (ISSET(tp->t_state, TS_SO_OLOWAT) && tp->t_outq.c_cc <= tp->t_olowat) { CLR(tp->t_state, TS_SO_OLOWAT); wakeup(TSA_OLOWAT(tp)); } } /* * Look up a code for a specified speed in a conversion table; * used by drivers to map software speed values to hardware parameters. */ int ttspeedtab(speed, table) int speed; register struct speedtab *table; { for ( ; table->sp_speed != -1; table++) if (table->sp_speed == speed) return (table->sp_code); return (-1); } /* * Set input and output watermarks and buffer sizes. For input, the * high watermark is about one second's worth of input above empty, the * low watermark is slightly below high water, and the buffer size is a * driver-dependent amount above high water. For output, the watermarks * are near the ends of the buffer, with about 1 second's worth of input * between them. All this only applies to the standard line discipline. */ void ttsetwater(tp) struct tty *tp; { register int cps, ttmaxhiwat, x; /* Input. */ clist_alloc_cblocks(&tp->t_canq, TTYHOG, 512); switch (tp->t_ispeedwat) { case (speed_t)-1: cps = tp->t_ispeed / 10; break; case 0: /* * This case is for old drivers that don't know about * t_ispeedwat. Arrange for them to get the old buffer * sizes and watermarks. */ cps = TTYHOG - 2 * 256; tp->t_ififosize = 2 * 256; break; default: cps = tp->t_ispeedwat / 10; break; } tp->t_ihiwat = cps; tp->t_ilowat = 7 * cps / 8; x = cps + tp->t_ififosize; clist_alloc_cblocks(&tp->t_rawq, x, x); /* Output. */ switch (tp->t_ospeedwat) { case (speed_t)-1: cps = tp->t_ospeed / 10; ttmaxhiwat = 2 * TTMAXHIWAT; break; case 0: cps = tp->t_ospeed / 10; ttmaxhiwat = TTMAXHIWAT; break; default: cps = tp->t_ospeedwat / 10; ttmaxhiwat = 8 * TTMAXHIWAT; break; } #define CLAMP(x, h, l) ((x) > h ? h : ((x) < l) ? l : (x)) tp->t_olowat = x = CLAMP(cps / 2, TTMAXLOWAT, TTMINLOWAT); x += cps; x = CLAMP(x, ttmaxhiwat, TTMINHIWAT); /* XXX clamps are too magic */ tp->t_ohiwat = roundup(x, CBSIZE); /* XXX for compat */ x = imax(tp->t_ohiwat, TTMAXHIWAT); /* XXX for compat/safety */ x += OBUFSIZ + 100; clist_alloc_cblocks(&tp->t_outq, x, x); #undef CLAMP } /* * Report on state of foreground process group. */ void ttyinfo(tp) register struct tty *tp; { register struct proc *p, *pick; struct timeval utime, stime; int tmp; if (ttycheckoutq(tp,0) == 0) return; /* Print load average. */ tmp = (averunnable.ldavg[0] * 100 + FSCALE / 2) >> FSHIFT; ttyprintf(tp, "load: %d.%02d ", tmp / 100, tmp % 100); if (tp->t_session == NULL) ttyprintf(tp, "not a controlling terminal\n"); else if (tp->t_pgrp == NULL) ttyprintf(tp, "no foreground process group\n"); else if ((p = tp->t_pgrp->pg_members.lh_first) == 0) ttyprintf(tp, "empty foreground process group\n"); else { /* Pick interesting process. */ for (pick = NULL; p != 0; p = p->p_pglist.le_next) if (proc_compare(pick, p)) pick = p; ttyprintf(tp, " cmd: %s %d [%s] ", pick->p_comm, pick->p_pid, pick->p_stat == SRUN ? "running" : pick->p_wmesg ? pick->p_wmesg : "iowait"); calcru(pick, &utime, &stime, NULL); /* Print user time. */ ttyprintf(tp, "%ld.%02ldu ", utime.tv_sec, utime.tv_usec / 10000); /* Print system time. */ ttyprintf(tp, "%ld.%02lds ", stime.tv_sec, stime.tv_usec / 10000); #define pgtok(a) (((a) * PAGE_SIZE) / 1024) /* Print percentage cpu, resident set size. */ tmp = (pick->p_pctcpu * 10000 + FSCALE / 2) >> FSHIFT; ttyprintf(tp, "%d%% %ldk\n", tmp / 100, pick->p_stat == SIDL || pick->p_stat == SZOMB ? 0 : #ifdef pmap_resident_count (long)pgtok(pmap_resident_count(&pick->p_vmspace->vm_pmap)) #else (long)pgtok(pick->p_vmspace->vm_rssize) #endif ); } tp->t_rocount = 0; /* so pending input will be retyped if BS */ } /* * Returns 1 if p2 is "better" than p1 * * The algorithm for picking the "interesting" process is thus: * * 1) Only foreground processes are eligible - implied. * 2) Runnable processes are favored over anything else. The runner * with the highest cpu utilization is picked (p_estcpu). Ties are * broken by picking the highest pid. * 3) The sleeper with the shortest sleep time is next. With ties, * we pick out just "short-term" sleepers (P_SINTR == 0). * 4) Further ties are broken by picking the highest pid. */ #define ISRUN(p) (((p)->p_stat == SRUN) || ((p)->p_stat == SIDL)) #define TESTAB(a, b) ((a)<<1 | (b)) #define ONLYA 2 #define ONLYB 1 #define BOTH 3 static int proc_compare(p1, p2) register struct proc *p1, *p2; { if (p1 == NULL) return (1); /* * see if at least one of them is runnable */ switch (TESTAB(ISRUN(p1), ISRUN(p2))) { case ONLYA: return (0); case ONLYB: return (1); case BOTH: /* * tie - favor one with highest recent cpu utilization */ if (p2->p_estcpu > p1->p_estcpu) return (1); if (p1->p_estcpu > p2->p_estcpu) return (0); return (p2->p_pid > p1->p_pid); /* tie - return highest pid */ } /* * weed out zombies */ switch (TESTAB(p1->p_stat == SZOMB, p2->p_stat == SZOMB)) { case ONLYA: return (1); case ONLYB: return (0); case BOTH: return (p2->p_pid > p1->p_pid); /* tie - return highest pid */ } /* * pick the one with the smallest sleep time */ if (p2->p_slptime > p1->p_slptime) return (0); if (p1->p_slptime > p2->p_slptime) return (1); /* * favor one sleeping in a non-interruptible sleep */ if (p1->p_flag & P_SINTR && (p2->p_flag & P_SINTR) == 0) return (1); if (p2->p_flag & P_SINTR && (p1->p_flag & P_SINTR) == 0) return (0); return (p2->p_pid > p1->p_pid); /* tie - return highest pid */ } /* * Output char to tty; console putchar style. */ int tputchar(c, tp) int c; struct tty *tp; { register int s; s = spltty(); if (!ISSET(tp->t_state, TS_CONNECTED)) { splx(s); return (-1); } if (c == '\n') (void)ttyoutput('\r', tp); (void)ttyoutput(c, tp); ttstart(tp); splx(s); return (0); } /* * Sleep on chan, returning ERESTART if tty changed while we napped and * returning any errors (e.g. EINTR/EWOULDBLOCK) reported by tsleep. If * the tty is revoked, restarting a pending call will redo validation done * at the start of the call. */ int ttysleep(tp, chan, pri, wmesg, timo) struct tty *tp; void *chan; int pri, timo; char *wmesg; { int error; int gen; gen = tp->t_gen; error = tsleep(chan, pri, wmesg, timo); if (error) return (error); return (tp->t_gen == gen ? 0 : ERESTART); } #ifdef notyet /* * XXX this is usable not useful or used. Most tty drivers have * ifdefs for using ttymalloc() but assume a different interface. */ /* * Allocate a tty struct. Clists in the struct will be allocated by * ttyopen(). */ struct tty * ttymalloc() { struct tty *tp; tp = malloc(sizeof *tp, M_TTYS, M_WAITOK); bzero(tp, sizeof *tp); return (tp); } #endif #if 0 /* XXX not yet usable: session leader holds a ref (see kern_exit.c). */ /* * Free a tty struct. Clists in the struct should have been freed by * ttyclose(). */ void ttyfree(tp) struct tty *tp; { free(tp, M_TTYS); } #endif /* 0 */ Index: head/sys/kern/uipc_sockbuf.c =================================================================== --- head/sys/kern/uipc_sockbuf.c (revision 41085) +++ head/sys/kern/uipc_sockbuf.c (revision 41086) @@ -1,961 +1,957 @@ /* * Copyright (c) 1982, 1986, 1988, 1990, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)uipc_socket2.c 8.1 (Berkeley) 6/10/93 - * $Id: uipc_socket2.c,v 1.39 1998/09/05 13:24:39 bde Exp $ + * $Id: uipc_socket2.c,v 1.40 1998/11/04 20:22:11 fenner Exp $ */ #include #include #include #include #include #include #include #include #include #include #include #include #include /* * Primitive routines for operating on sockets and socket buffers */ u_long sb_max = SB_MAX; /* XXX should be static */ static u_long sb_efficiency = 8; /* parameter for sbreserve() */ /* * Procedures to manipulate state flags of socket * and do appropriate wakeups. Normal sequence from the * active (originating) side is that soisconnecting() is * called during processing of connect() call, * resulting in an eventual call to soisconnected() if/when the * connection is established. When the connection is torn down * soisdisconnecting() is called during processing of disconnect() call, * and soisdisconnected() is called when the connection to the peer * is totally severed. The semantics of these routines are such that * connectionless protocols can call soisconnected() and soisdisconnected() * only, bypassing the in-progress calls when setting up a ``connection'' * takes no time. * * From the passive side, a socket is created with * two queues of sockets: so_q0 for connections in progress * and so_q for connections already made and awaiting user acceptance. * As a protocol is preparing incoming connections, it creates a socket * structure queued on so_q0 by calling sonewconn(). When the connection * is established, soisconnected() is called, and transfers the * socket structure to so_q, making it available to accept(). * * If a socket is closed with sockets on either * so_q0 or so_q, these sockets are dropped. * * If higher level protocols are implemented in * the kernel, the wakeups done here will sometimes * cause software-interrupt process scheduling. */ void soisconnecting(so) register struct socket *so; { so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING); so->so_state |= SS_ISCONNECTING; } void soisconnected(so) register struct socket *so; { register struct socket *head = so->so_head; so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING); so->so_state |= SS_ISCONNECTED; if (head && (so->so_state & SS_INCOMP)) { TAILQ_REMOVE(&head->so_incomp, so, so_list); head->so_incqlen--; so->so_state &= ~SS_INCOMP; TAILQ_INSERT_TAIL(&head->so_comp, so, so_list); so->so_state |= SS_COMP; sorwakeup(head); wakeup_one(&head->so_timeo); } else { wakeup(&so->so_timeo); sorwakeup(so); sowwakeup(so); } } void soisdisconnecting(so) register struct socket *so; { so->so_state &= ~SS_ISCONNECTING; so->so_state |= (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE); wakeup((caddr_t)&so->so_timeo); sowwakeup(so); sorwakeup(so); } void soisdisconnected(so) register struct socket *so; { so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING); so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE); wakeup((caddr_t)&so->so_timeo); sowwakeup(so); sorwakeup(so); } /* * Return a random connection that hasn't been serviced yet and * is eligible for discard. There is a one in qlen chance that * we will return a null, saying that there are no dropable * requests. In this case, the protocol specific code should drop * the new request. This insures fairness. * * This may be used in conjunction with protocol specific queue * congestion routines. */ struct socket * sodropablereq(head) register struct socket *head; { register struct socket *so; unsigned int i, j, qlen; static int rnd; static struct timeval old_runtime; static unsigned int cur_cnt, old_cnt; struct timeval tv; getmicrouptime(&tv); if ((i = (tv.tv_sec - old_runtime.tv_sec)) != 0) { old_runtime = tv; old_cnt = cur_cnt / i; cur_cnt = 0; } so = TAILQ_FIRST(&head->so_incomp); if (!so) return (so); qlen = head->so_incqlen; if (++cur_cnt > qlen || old_cnt > qlen) { rnd = (314159 * rnd + 66329) & 0xffff; j = ((qlen + 1) * rnd) >> 16; while (j-- && so) so = TAILQ_NEXT(so, so_list); } return (so); } /* * When an attempt at a new connection is noted on a socket * which accepts connections, sonewconn is called. If the * connection is possible (subject to space constraints, etc.) * then we allocate a new structure, propoerly linked into the * data structure of the original socket, and return this. * Connstatus may be 0, or SO_ISCONFIRMING, or SO_ISCONNECTED. */ struct socket * sonewconn(head, connstatus) register struct socket *head; int connstatus; { register struct socket *so; if (head->so_qlen > 3 * head->so_qlimit / 2) return ((struct socket *)0); so = soalloc(0); if (so == NULL) return ((struct socket *)0); so->so_head = head; so->so_type = head->so_type; so->so_options = head->so_options &~ SO_ACCEPTCONN; so->so_linger = head->so_linger; so->so_state = head->so_state | SS_NOFDREF; so->so_proto = head->so_proto; so->so_timeo = head->so_timeo; - so->so_pgid = head->so_pgid; + fsetown(fgetown(head->so_sigio), &so->so_sigio); so->so_uid = head->so_uid; (void) soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat); if ((*so->so_proto->pr_usrreqs->pru_attach)(so, 0, NULL)) { sodealloc(so); return ((struct socket *)0); } if (connstatus) { TAILQ_INSERT_TAIL(&head->so_comp, so, so_list); so->so_state |= SS_COMP; } else { TAILQ_INSERT_TAIL(&head->so_incomp, so, so_list); so->so_state |= SS_INCOMP; head->so_incqlen++; } head->so_qlen++; if (connstatus) { sorwakeup(head); wakeup((caddr_t)&head->so_timeo); so->so_state |= connstatus; } return (so); } /* * Socantsendmore indicates that no more data will be sent on the * socket; it would normally be applied to a socket when the user * informs the system that no more data is to be sent, by the protocol * code (in case PRU_SHUTDOWN). Socantrcvmore indicates that no more data * will be received, and will normally be applied to the socket by a * protocol when it detects that the peer will send no more data. * Data queued for reading in the socket may yet be read. */ void socantsendmore(so) struct socket *so; { so->so_state |= SS_CANTSENDMORE; sowwakeup(so); } void socantrcvmore(so) struct socket *so; { so->so_state |= SS_CANTRCVMORE; sorwakeup(so); } /* * Wait for data to arrive at/drain from a socket buffer. */ int sbwait(sb) struct sockbuf *sb; { sb->sb_flags |= SB_WAIT; return (tsleep((caddr_t)&sb->sb_cc, (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "sbwait", sb->sb_timeo)); } /* * Lock a sockbuf already known to be locked; * return any error returned from sleep (EINTR). */ int sb_lock(sb) register struct sockbuf *sb; { int error; while (sb->sb_flags & SB_LOCK) { sb->sb_flags |= SB_WANT; error = tsleep((caddr_t)&sb->sb_flags, (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK|PCATCH, "sblock", 0); if (error) return (error); } sb->sb_flags |= SB_LOCK; return (0); } /* * Wakeup processes waiting on a socket buffer. * Do asynchronous notification via SIGIO * if the socket has the SS_ASYNC flag set. */ void sowakeup(so, sb) register struct socket *so; register struct sockbuf *sb; { struct proc *p; selwakeup(&sb->sb_sel); sb->sb_flags &= ~SB_SEL; if (sb->sb_flags & SB_WAIT) { sb->sb_flags &= ~SB_WAIT; wakeup((caddr_t)&sb->sb_cc); } - if (so->so_state & SS_ASYNC) { - if (so->so_pgid < 0) - gsignal(-so->so_pgid, SIGIO); - else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0) - psignal(p, SIGIO); - } + if ((so->so_state & SS_ASYNC) && so->so_sigio != NULL) + pgsigio(so->so_sigio, SIGIO, 0); if (sb->sb_flags & SB_UPCALL) (*so->so_upcall)(so, so->so_upcallarg, M_DONTWAIT); } /* * Socket buffer (struct sockbuf) utility routines. * * Each socket contains two socket buffers: one for sending data and * one for receiving data. Each buffer contains a queue of mbufs, * information about the number of mbufs and amount of data in the * queue, and other fields allowing select() statements and notification * on data availability to be implemented. * * Data stored in a socket buffer is maintained as a list of records. * Each record is a list of mbufs chained together with the m_next * field. Records are chained together with the m_nextpkt field. The upper * level routine soreceive() expects the following conventions to be * observed when placing information in the receive buffer: * * 1. If the protocol requires each message be preceded by the sender's * name, then a record containing that name must be present before * any associated data (mbuf's must be of type MT_SONAME). * 2. If the protocol supports the exchange of ``access rights'' (really * just additional data associated with the message), and there are * ``rights'' to be received, then a record containing this data * should be present (mbuf's must be of type MT_RIGHTS). * 3. If a name or rights record exists, then it must be followed by * a data record, perhaps of zero length. * * Before using a new socket structure it is first necessary to reserve * buffer space to the socket, by calling sbreserve(). This should commit * some of the available buffer space in the system buffer pool for the * socket (currently, it does nothing but enforce limits). The space * should be released by calling sbrelease() when the socket is destroyed. */ int soreserve(so, sndcc, rcvcc) register struct socket *so; u_long sndcc, rcvcc; { if (sbreserve(&so->so_snd, sndcc) == 0) goto bad; if (sbreserve(&so->so_rcv, rcvcc) == 0) goto bad2; if (so->so_rcv.sb_lowat == 0) so->so_rcv.sb_lowat = 1; if (so->so_snd.sb_lowat == 0) so->so_snd.sb_lowat = MCLBYTES; if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat) so->so_snd.sb_lowat = so->so_snd.sb_hiwat; return (0); bad2: sbrelease(&so->so_snd); bad: return (ENOBUFS); } /* * Allot mbufs to a sockbuf. * Attempt to scale mbmax so that mbcnt doesn't become limiting * if buffering efficiency is near the normal case. */ int sbreserve(sb, cc) struct sockbuf *sb; u_long cc; { if ((u_quad_t)cc > (u_quad_t)sb_max * MCLBYTES / (MSIZE + MCLBYTES)) return (0); sb->sb_hiwat = cc; sb->sb_mbmax = min(cc * sb_efficiency, sb_max); if (sb->sb_lowat > sb->sb_hiwat) sb->sb_lowat = sb->sb_hiwat; return (1); } /* * Free mbufs held by a socket, and reserved mbuf space. */ void sbrelease(sb) struct sockbuf *sb; { sbflush(sb); sb->sb_hiwat = sb->sb_mbmax = 0; } /* * Routines to add and remove * data from an mbuf queue. * * The routines sbappend() or sbappendrecord() are normally called to * append new mbufs to a socket buffer, after checking that adequate * space is available, comparing the function sbspace() with the amount * of data to be added. sbappendrecord() differs from sbappend() in * that data supplied is treated as the beginning of a new record. * To place a sender's address, optional access rights, and data in a * socket receive buffer, sbappendaddr() should be used. To place * access rights and data in a socket receive buffer, sbappendrights() * should be used. In either case, the new data begins a new record. * Note that unlike sbappend() and sbappendrecord(), these routines check * for the caller that there will be enough space to store the data. * Each fails if there is not enough space, or if it cannot find mbufs * to store additional information in. * * Reliable protocols may use the socket send buffer to hold data * awaiting acknowledgement. Data is normally copied from a socket * send buffer in a protocol with m_copy for output to a peer, * and then removing the data from the socket buffer with sbdrop() * or sbdroprecord() when the data is acknowledged by the peer. */ /* * Append mbuf chain m to the last record in the * socket buffer sb. The additional space associated * the mbuf chain is recorded in sb. Empty mbufs are * discarded and mbufs are compacted where possible. */ void sbappend(sb, m) struct sockbuf *sb; struct mbuf *m; { register struct mbuf *n; if (m == 0) return; n = sb->sb_mb; if (n) { while (n->m_nextpkt) n = n->m_nextpkt; do { if (n->m_flags & M_EOR) { sbappendrecord(sb, m); /* XXXXXX!!!! */ return; } } while (n->m_next && (n = n->m_next)); } sbcompress(sb, m, n); } #ifdef SOCKBUF_DEBUG void sbcheck(sb) register struct sockbuf *sb; { register struct mbuf *m; register struct mbuf *n = 0; register u_long len = 0, mbcnt = 0; for (m = sb->sb_mb; m; m = n) { n = m->m_nextpkt; for (; m; m = m->m_next) { len += m->m_len; mbcnt += MSIZE; if (m->m_flags & M_EXT) /*XXX*/ /* pretty sure this is bogus */ mbcnt += m->m_ext.ext_size; } } if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) { printf("cc %ld != %ld || mbcnt %ld != %ld\n", len, sb->sb_cc, mbcnt, sb->sb_mbcnt); panic("sbcheck"); } } #endif /* * As above, except the mbuf chain * begins a new record. */ void sbappendrecord(sb, m0) register struct sockbuf *sb; register struct mbuf *m0; { register struct mbuf *m; if (m0 == 0) return; m = sb->sb_mb; if (m) while (m->m_nextpkt) m = m->m_nextpkt; /* * Put the first mbuf on the queue. * Note this permits zero length records. */ sballoc(sb, m0); if (m) m->m_nextpkt = m0; else sb->sb_mb = m0; m = m0->m_next; m0->m_next = 0; if (m && (m0->m_flags & M_EOR)) { m0->m_flags &= ~M_EOR; m->m_flags |= M_EOR; } sbcompress(sb, m, m0); } /* * As above except that OOB data * is inserted at the beginning of the sockbuf, * but after any other OOB data. */ void sbinsertoob(sb, m0) register struct sockbuf *sb; register struct mbuf *m0; { register struct mbuf *m; register struct mbuf **mp; if (m0 == 0) return; for (mp = &sb->sb_mb; *mp ; mp = &((*mp)->m_nextpkt)) { m = *mp; again: switch (m->m_type) { case MT_OOBDATA: continue; /* WANT next train */ case MT_CONTROL: m = m->m_next; if (m) goto again; /* inspect THIS train further */ } break; } /* * Put the first mbuf on the queue. * Note this permits zero length records. */ sballoc(sb, m0); m0->m_nextpkt = *mp; *mp = m0; m = m0->m_next; m0->m_next = 0; if (m && (m0->m_flags & M_EOR)) { m0->m_flags &= ~M_EOR; m->m_flags |= M_EOR; } sbcompress(sb, m, m0); } /* * Append address and data, and optionally, control (ancillary) data * to the receive queue of a socket. If present, * m0 must include a packet header with total length. * Returns 0 if no space in sockbuf or insufficient mbufs. */ int sbappendaddr(sb, asa, m0, control) register struct sockbuf *sb; struct sockaddr *asa; struct mbuf *m0, *control; { register struct mbuf *m, *n; int space = asa->sa_len; if (m0 && (m0->m_flags & M_PKTHDR) == 0) panic("sbappendaddr"); if (m0) space += m0->m_pkthdr.len; for (n = control; n; n = n->m_next) { space += n->m_len; if (n->m_next == 0) /* keep pointer to last control buf */ break; } if (space > sbspace(sb)) return (0); if (asa->sa_len > MLEN) return (0); MGET(m, M_DONTWAIT, MT_SONAME); if (m == 0) return (0); m->m_len = asa->sa_len; bcopy((caddr_t)asa, mtod(m, caddr_t), asa->sa_len); if (n) n->m_next = m0; /* concatenate data to control */ else control = m0; m->m_next = control; for (n = m; n; n = n->m_next) sballoc(sb, n); n = sb->sb_mb; if (n) { while (n->m_nextpkt) n = n->m_nextpkt; n->m_nextpkt = m; } else sb->sb_mb = m; return (1); } int sbappendcontrol(sb, m0, control) struct sockbuf *sb; struct mbuf *control, *m0; { register struct mbuf *m, *n; int space = 0; if (control == 0) panic("sbappendcontrol"); for (m = control; ; m = m->m_next) { space += m->m_len; if (m->m_next == 0) break; } n = m; /* save pointer to last control buffer */ for (m = m0; m; m = m->m_next) space += m->m_len; if (space > sbspace(sb)) return (0); n->m_next = m0; /* concatenate data to control */ for (m = control; m; m = m->m_next) sballoc(sb, m); n = sb->sb_mb; if (n) { while (n->m_nextpkt) n = n->m_nextpkt; n->m_nextpkt = control; } else sb->sb_mb = control; return (1); } /* * Compress mbuf chain m into the socket * buffer sb following mbuf n. If n * is null, the buffer is presumed empty. */ void sbcompress(sb, m, n) register struct sockbuf *sb; register struct mbuf *m, *n; { register int eor = 0; register struct mbuf *o; while (m) { eor |= m->m_flags & M_EOR; if (m->m_len == 0 && (eor == 0 || (((o = m->m_next) || (o = n)) && o->m_type == m->m_type))) { m = m_free(m); continue; } if (n && (n->m_flags & (M_EXT | M_EOR)) == 0 && (n->m_data + n->m_len + m->m_len) < &n->m_dat[MLEN] && n->m_type == m->m_type) { bcopy(mtod(m, caddr_t), mtod(n, caddr_t) + n->m_len, (unsigned)m->m_len); n->m_len += m->m_len; sb->sb_cc += m->m_len; m = m_free(m); continue; } if (n) n->m_next = m; else sb->sb_mb = m; sballoc(sb, m); n = m; m->m_flags &= ~M_EOR; m = m->m_next; n->m_next = 0; } if (eor) { if (n) n->m_flags |= eor; else printf("semi-panic: sbcompress\n"); } } /* * Free all mbufs in a sockbuf. * Check that all resources are reclaimed. */ void sbflush(sb) register struct sockbuf *sb; { if (sb->sb_flags & SB_LOCK) panic("sbflush: locked"); while (sb->sb_mbcnt && sb->sb_cc) sbdrop(sb, (int)sb->sb_cc); if (sb->sb_cc || sb->sb_mb || sb->sb_mbcnt) panic("sbflush: cc %ld || mb %p || mbcnt %ld", sb->sb_cc, (void *)sb->sb_mb, sb->sb_mbcnt); } /* * Drop data from (the front of) a sockbuf. */ void sbdrop(sb, len) register struct sockbuf *sb; register int len; { register struct mbuf *m, *mn; struct mbuf *next; next = (m = sb->sb_mb) ? m->m_nextpkt : 0; while (len > 0) { if (m == 0) { if (next == 0) panic("sbdrop"); m = next; next = m->m_nextpkt; continue; } if (m->m_len > len) { m->m_len -= len; m->m_data += len; sb->sb_cc -= len; break; } len -= m->m_len; sbfree(sb, m); MFREE(m, mn); m = mn; } while (m && m->m_len == 0) { sbfree(sb, m); MFREE(m, mn); m = mn; } if (m) { sb->sb_mb = m; m->m_nextpkt = next; } else sb->sb_mb = next; } /* * Drop a record off the front of a sockbuf * and move the next record to the front. */ void sbdroprecord(sb) register struct sockbuf *sb; { register struct mbuf *m, *mn; m = sb->sb_mb; if (m) { sb->sb_mb = m->m_nextpkt; do { sbfree(sb, m); MFREE(m, mn); m = mn; } while (m); } } /* * Create a "control" mbuf containing the specified data * with the specified type for presentation on a socket buffer. */ struct mbuf * sbcreatecontrol(p, size, type, level) caddr_t p; register int size; int type, level; { register struct cmsghdr *cp; struct mbuf *m; if ((m = m_get(M_DONTWAIT, MT_CONTROL)) == NULL) return ((struct mbuf *) NULL); cp = mtod(m, struct cmsghdr *); /* XXX check size? */ (void)memcpy(CMSG_DATA(cp), p, size); size += sizeof(*cp); m->m_len = size; cp->cmsg_len = size; cp->cmsg_level = level; cp->cmsg_type = type; return (m); } /* * Some routines that return EOPNOTSUPP for entry points that are not * supported by a protocol. Fill in as needed. */ int pru_accept_notsupp(struct socket *so, struct sockaddr **nam) { return EOPNOTSUPP; } int pru_connect_notsupp(struct socket *so, struct sockaddr *nam, struct proc *p) { return EOPNOTSUPP; } int pru_connect2_notsupp(struct socket *so1, struct socket *so2) { return EOPNOTSUPP; } int pru_control_notsupp(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp, struct proc *p) { return EOPNOTSUPP; } int pru_listen_notsupp(struct socket *so, struct proc *p) { return EOPNOTSUPP; } int pru_rcvd_notsupp(struct socket *so, int flags) { return EOPNOTSUPP; } int pru_rcvoob_notsupp(struct socket *so, struct mbuf *m, int flags) { return EOPNOTSUPP; } /* * This isn't really a ``null'' operation, but it's the default one * and doesn't do anything destructive. */ int pru_sense_null(struct socket *so, struct stat *sb) { sb->st_blksize = so->so_snd.sb_hiwat; return 0; } /* * Make a copy of a sockaddr in a malloced buffer of type M_SONAME. */ struct sockaddr * dup_sockaddr(sa, canwait) struct sockaddr *sa; int canwait; { struct sockaddr *sa2; MALLOC(sa2, struct sockaddr *, sa->sa_len, M_SONAME, canwait ? M_WAITOK : M_NOWAIT); if (sa2) bcopy(sa, sa2, sa->sa_len); return sa2; } /* * Create an external-format (``xsocket'') structure using the information * in the kernel-format socket structure pointed to by so. This is done * to reduce the spew of irrelevant information over this interface, * to isolate user code from changes in the kernel structure, and * potentially to provide information-hiding if we decide that * some of this information should be hidden from users. */ void sotoxsocket(struct socket *so, struct xsocket *xso) { xso->xso_len = sizeof *xso; xso->xso_so = so; xso->so_type = so->so_type; xso->so_options = so->so_options; xso->so_linger = so->so_linger; xso->so_state = so->so_state; xso->so_pcb = so->so_pcb; xso->xso_protocol = so->so_proto->pr_protocol; xso->xso_family = so->so_proto->pr_domain->dom_family; xso->so_qlen = so->so_qlen; xso->so_incqlen = so->so_incqlen; xso->so_qlimit = so->so_qlimit; xso->so_timeo = so->so_timeo; xso->so_error = so->so_error; - xso->so_pgid = so->so_pgid; + xso->so_pgid = so->so_sigio ? so->so_sigio->sio_pgid : 0; xso->so_oobmark = so->so_oobmark; sbtoxsockbuf(&so->so_snd, &xso->so_snd); sbtoxsockbuf(&so->so_rcv, &xso->so_rcv); xso->so_uid = so->so_uid; } /* * This does the same for sockbufs. Note that the xsockbuf structure, * since it is always embedded in a socket, does not include a self * pointer nor a length. We make this entry point public in case * some other mechanism needs it. */ void sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb) { xsb->sb_cc = sb->sb_cc; xsb->sb_hiwat = sb->sb_hiwat; xsb->sb_mbcnt = sb->sb_mbcnt; xsb->sb_mbmax = sb->sb_mbmax; xsb->sb_lowat = sb->sb_lowat; xsb->sb_flags = sb->sb_flags; xsb->sb_timeo = sb->sb_timeo; } /* * Here is the definition of some of the basic objects in the kern.ipc * branch of the MIB. */ SYSCTL_NODE(_kern, KERN_IPC, ipc, CTLFLAG_RW, 0, "IPC"); /* This takes the place of kern.maxsockbuf, which moved to kern.ipc. */ static int dummy; SYSCTL_INT(_kern, KERN_DUMMY, dummy, CTLFLAG_RW, &dummy, 0, ""); SYSCTL_INT(_kern_ipc, KIPC_MAXSOCKBUF, maxsockbuf, CTLFLAG_RW, &sb_max, 0, ""); SYSCTL_INT(_kern_ipc, OID_AUTO, maxsockets, CTLFLAG_RD, &maxsockets, 0, ""); SYSCTL_INT(_kern_ipc, KIPC_SOCKBUF_WASTE, sockbuf_waste_factor, CTLFLAG_RW, &sb_efficiency, 0, ""); SYSCTL_INT(_kern_ipc, KIPC_NMBCLUSTERS, nmbclusters, CTLFLAG_RD, &nmbclusters, 0, ""); Index: head/sys/kern/uipc_socket.c =================================================================== --- head/sys/kern/uipc_socket.c (revision 41085) +++ head/sys/kern/uipc_socket.c (revision 41086) @@ -1,1224 +1,1223 @@ /* * Copyright (c) 1982, 1986, 1988, 1990, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94 - * $Id: uipc_socket.c,v 1.44 1998/08/31 15:34:55 wollman Exp $ + * $Id: uipc_socket.c,v 1.45 1998/08/31 18:07:23 wollman Exp $ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include struct vm_zone *socket_zone; so_gen_t so_gencnt; /* generation count for sockets */ MALLOC_DEFINE(M_SONAME, "soname", "socket name"); MALLOC_DEFINE(M_PCB, "pcb", "protocol control block"); static int somaxconn = SOMAXCONN; SYSCTL_INT(_kern_ipc, KIPC_SOMAXCONN, somaxconn, CTLFLAG_RW, &somaxconn, 0, ""); /* * Socket operation routines. * These routines are called by the routines in * sys_socket.c or from a system process, and * implement the semantics of socket operations by * switching out to the protocol specific routines. */ /* * Get a socket structure from our zone, and initialize it. * We don't implement `waitok' yet (see comments in uipc_domain.c). * Note that it would probably be better to allocate socket * and PCB at the same time, but I'm not convinced that all * the protocols can be easily modified to do this. */ struct socket * soalloc(waitok) int waitok; { struct socket *so; so = zalloci(socket_zone); if (so) { /* XXX race condition for reentrant kernel */ bzero(so, sizeof *so); so->so_gencnt = ++so_gencnt; so->so_zone = socket_zone; } return so; } int socreate(dom, aso, type, proto, p) int dom; struct socket **aso; register int type; int proto; struct proc *p; { register struct protosw *prp; register struct socket *so; register int error; if (proto) prp = pffindproto(dom, proto, type); else prp = pffindtype(dom, type); if (prp == 0 || prp->pr_usrreqs->pru_attach == 0) return (EPROTONOSUPPORT); if (prp->pr_type != type) return (EPROTOTYPE); so = soalloc(p != 0); if (so == 0) return (ENOBUFS); TAILQ_INIT(&so->so_incomp); TAILQ_INIT(&so->so_comp); so->so_type = type; if (p != 0) so->so_uid = p->p_ucred->cr_uid; so->so_proto = prp; error = (*prp->pr_usrreqs->pru_attach)(so, proto, p); if (error) { so->so_state |= SS_NOFDREF; sofree(so); return (error); } *aso = so; return (0); } int sobind(so, nam, p) struct socket *so; struct sockaddr *nam; struct proc *p; { int s = splnet(); int error; error = (*so->so_proto->pr_usrreqs->pru_bind)(so, nam, p); splx(s); return (error); } void sodealloc(so) struct socket *so; { so->so_gencnt = ++so_gencnt; zfreei(so->so_zone, so); } int solisten(so, backlog, p) register struct socket *so; int backlog; struct proc *p; { int s, error; s = splnet(); error = (*so->so_proto->pr_usrreqs->pru_listen)(so, p); if (error) { splx(s); return (error); } if (so->so_comp.tqh_first == NULL) so->so_options |= SO_ACCEPTCONN; if (backlog < 0 || backlog > somaxconn) backlog = somaxconn; so->so_qlimit = backlog; splx(s); return (0); } void sofree(so) register struct socket *so; { struct socket *head = so->so_head; if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) return; if (head != NULL) { if (so->so_state & SS_INCOMP) { TAILQ_REMOVE(&head->so_incomp, so, so_list); head->so_incqlen--; } else if (so->so_state & SS_COMP) { TAILQ_REMOVE(&head->so_comp, so, so_list); } else { panic("sofree: not queued"); } head->so_qlen--; so->so_state &= ~(SS_INCOMP|SS_COMP); so->so_head = NULL; } sbrelease(&so->so_snd); sorflush(so); sodealloc(so); } /* * Close a socket on last file table reference removal. * Initiate disconnect if connected. * Free socket when disconnect complete. */ int soclose(so) register struct socket *so; { int s = splnet(); /* conservative */ int error = 0; + funsetown(so->so_sigio); if (so->so_options & SO_ACCEPTCONN) { struct socket *sp, *sonext; for (sp = so->so_incomp.tqh_first; sp != NULL; sp = sonext) { sonext = sp->so_list.tqe_next; (void) soabort(sp); } for (sp = so->so_comp.tqh_first; sp != NULL; sp = sonext) { sonext = sp->so_list.tqe_next; (void) soabort(sp); } } if (so->so_pcb == 0) goto discard; if (so->so_state & SS_ISCONNECTED) { if ((so->so_state & SS_ISDISCONNECTING) == 0) { error = sodisconnect(so); if (error) goto drop; } if (so->so_options & SO_LINGER) { if ((so->so_state & SS_ISDISCONNECTING) && (so->so_state & SS_NBIO)) goto drop; while (so->so_state & SS_ISCONNECTED) { error = tsleep((caddr_t)&so->so_timeo, PSOCK | PCATCH, "soclos", so->so_linger); if (error) break; } } } drop: if (so->so_pcb) { int error2 = (*so->so_proto->pr_usrreqs->pru_detach)(so); if (error == 0) error = error2; } discard: if (so->so_state & SS_NOFDREF) panic("soclose: NOFDREF"); so->so_state |= SS_NOFDREF; sofree(so); splx(s); return (error); } /* * Must be called at splnet... */ int soabort(so) struct socket *so; { return (*so->so_proto->pr_usrreqs->pru_abort)(so); } int soaccept(so, nam) register struct socket *so; struct sockaddr **nam; { int s = splnet(); int error; if ((so->so_state & SS_NOFDREF) == 0) panic("soaccept: !NOFDREF"); so->so_state &= ~SS_NOFDREF; error = (*so->so_proto->pr_usrreqs->pru_accept)(so, nam); splx(s); return (error); } int soconnect(so, nam, p) register struct socket *so; struct sockaddr *nam; struct proc *p; { int s; int error; if (so->so_options & SO_ACCEPTCONN) return (EOPNOTSUPP); s = splnet(); /* * If protocol is connection-based, can only connect once. * Otherwise, if connected, try to disconnect first. * This allows user to disconnect by connecting to, e.g., * a null address. */ if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) && ((so->so_proto->pr_flags & PR_CONNREQUIRED) || (error = sodisconnect(so)))) error = EISCONN; else error = (*so->so_proto->pr_usrreqs->pru_connect)(so, nam, p); splx(s); return (error); } int soconnect2(so1, so2) register struct socket *so1; struct socket *so2; { int s = splnet(); int error; error = (*so1->so_proto->pr_usrreqs->pru_connect2)(so1, so2); splx(s); return (error); } int sodisconnect(so) register struct socket *so; { int s = splnet(); int error; if ((so->so_state & SS_ISCONNECTED) == 0) { error = ENOTCONN; goto bad; } if (so->so_state & SS_ISDISCONNECTING) { error = EALREADY; goto bad; } error = (*so->so_proto->pr_usrreqs->pru_disconnect)(so); bad: splx(s); return (error); } #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK) /* * Send on a socket. * If send must go all at once and message is larger than * send buffering, then hard error. * Lock against other senders. * If must go all at once and not enough room now, then * inform user that this would block and do nothing. * Otherwise, if nonblocking, send as much as possible. * The data to be sent is described by "uio" if nonzero, * otherwise by the mbuf chain "top" (which must be null * if uio is not). Data provided in mbuf chain must be small * enough to send all at once. * * Returns nonzero on error, timeout or signal; callers * must check for short counts if EINTR/ERESTART are returned. * Data and control buffers are freed on return. */ int sosend(so, addr, uio, top, control, flags, p) register struct socket *so; struct sockaddr *addr; struct uio *uio; struct mbuf *top; struct mbuf *control; int flags; struct proc *p; { struct mbuf **mp; register struct mbuf *m; register long space, len, resid; int clen = 0, error, s, dontroute, mlen; int atomic = sosendallatonce(so) || top; if (uio) resid = uio->uio_resid; else resid = top->m_pkthdr.len; /* * In theory resid should be unsigned. * However, space must be signed, as it might be less than 0 * if we over-committed, and we must use a signed comparison * of space and resid. On the other hand, a negative resid * causes us to loop sending 0-length segments to the protocol. * * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM * type sockets since that's an error. */ if (resid < 0 || so->so_type == SOCK_STREAM && (flags & MSG_EOR)) { error = EINVAL; goto out; } dontroute = (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 && (so->so_proto->pr_flags & PR_ATOMIC); if (p) p->p_stats->p_ru.ru_msgsnd++; if (control) clen = control->m_len; #define snderr(errno) { error = errno; splx(s); goto release; } restart: error = sblock(&so->so_snd, SBLOCKWAIT(flags)); if (error) goto out; do { s = splnet(); if (so->so_state & SS_CANTSENDMORE) snderr(EPIPE); if (so->so_error) { error = so->so_error; so->so_error = 0; splx(s); goto release; } if ((so->so_state & SS_ISCONNECTED) == 0) { /* * `sendto' and `sendmsg' is allowed on a connection- * based socket if it supports implied connect. * Return ENOTCONN if not connected and no address is * supplied. */ if ((so->so_proto->pr_flags & PR_CONNREQUIRED) && (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) { if ((so->so_state & SS_ISCONFIRMING) == 0 && !(resid == 0 && clen != 0)) snderr(ENOTCONN); } else if (addr == 0) snderr(so->so_proto->pr_flags & PR_CONNREQUIRED ? ENOTCONN : EDESTADDRREQ); } space = sbspace(&so->so_snd); if (flags & MSG_OOB) space += 1024; if ((atomic && resid > so->so_snd.sb_hiwat) || clen > so->so_snd.sb_hiwat) snderr(EMSGSIZE); if (space < resid + clen && uio && (atomic || space < so->so_snd.sb_lowat || space < clen)) { if (so->so_state & SS_NBIO) snderr(EWOULDBLOCK); sbunlock(&so->so_snd); error = sbwait(&so->so_snd); splx(s); if (error) goto out; goto restart; } splx(s); mp = ⊤ space -= clen; do { if (uio == NULL) { /* * Data is prepackaged in "top". */ resid = 0; if (flags & MSG_EOR) top->m_flags |= M_EOR; } else do { if (top == 0) { MGETHDR(m, M_WAIT, MT_DATA); mlen = MHLEN; m->m_pkthdr.len = 0; m->m_pkthdr.rcvif = (struct ifnet *)0; } else { MGET(m, M_WAIT, MT_DATA); mlen = MLEN; } if (resid >= MINCLSIZE) { MCLGET(m, M_WAIT); if ((m->m_flags & M_EXT) == 0) goto nopages; mlen = MCLBYTES; len = min(min(mlen, resid), space); } else { nopages: len = min(min(mlen, resid), space); /* * For datagram protocols, leave room * for protocol headers in first mbuf. */ if (atomic && top == 0 && len < mlen) MH_ALIGN(m, len); } space -= len; error = uiomove(mtod(m, caddr_t), (int)len, uio); resid = uio->uio_resid; m->m_len = len; *mp = m; top->m_pkthdr.len += len; if (error) goto release; mp = &m->m_next; if (resid <= 0) { if (flags & MSG_EOR) top->m_flags |= M_EOR; break; } } while (space > 0 && atomic); if (dontroute) so->so_options |= SO_DONTROUTE; s = splnet(); /* XXX */ error = (*so->so_proto->pr_usrreqs->pru_send)(so, (flags & MSG_OOB) ? PRUS_OOB : /* * If the user set MSG_EOF, the protocol * understands this flag and nothing left to * send then use PRU_SEND_EOF instead of PRU_SEND. */ ((flags & MSG_EOF) && (so->so_proto->pr_flags & PR_IMPLOPCL) && (resid <= 0)) ? PRUS_EOF : 0, top, addr, control, p); splx(s); if (dontroute) so->so_options &= ~SO_DONTROUTE; clen = 0; control = 0; top = 0; mp = ⊤ if (error) goto release; } while (resid && space > 0); } while (resid); release: sbunlock(&so->so_snd); out: if (top) m_freem(top); if (control) m_freem(control); return (error); } /* * Implement receive operations on a socket. * We depend on the way that records are added to the sockbuf * by sbappend*. In particular, each record (mbufs linked through m_next) * must begin with an address if the protocol so specifies, * followed by an optional mbuf or mbufs containing ancillary data, * and then zero or more mbufs of data. * In order to avoid blocking network interrupts for the entire time here, * we splx() while doing the actual copy to user space. * Although the sockbuf is locked, new data may still be appended, * and thus we must maintain consistency of the sockbuf during that time. * * The caller may receive the data as a single mbuf chain by supplying * an mbuf **mp0 for use in returning the chain. The uio is then used * only for the count in uio_resid. */ int soreceive(so, psa, uio, mp0, controlp, flagsp) register struct socket *so; struct sockaddr **psa; struct uio *uio; struct mbuf **mp0; struct mbuf **controlp; int *flagsp; { register struct mbuf *m, **mp; register int flags, len, error, s, offset; struct protosw *pr = so->so_proto; struct mbuf *nextrecord; int moff, type = 0; int orig_resid = uio->uio_resid; mp = mp0; if (psa) *psa = 0; if (controlp) *controlp = 0; if (flagsp) flags = *flagsp &~ MSG_EOR; else flags = 0; if (flags & MSG_OOB) { m = m_get(M_WAIT, MT_DATA); error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK); if (error) goto bad; do { error = uiomove(mtod(m, caddr_t), (int) min(uio->uio_resid, m->m_len), uio); m = m_free(m); } while (uio->uio_resid && error == 0 && m); bad: if (m) m_freem(m); return (error); } if (mp) *mp = (struct mbuf *)0; if (so->so_state & SS_ISCONFIRMING && uio->uio_resid) (*pr->pr_usrreqs->pru_rcvd)(so, 0); restart: error = sblock(&so->so_rcv, SBLOCKWAIT(flags)); if (error) return (error); s = splnet(); m = so->so_rcv.sb_mb; /* * If we have less data than requested, block awaiting more * (subject to any timeout) if: * 1. the current count is less than the low water mark, or * 2. MSG_WAITALL is set, and it is possible to do the entire * receive operation at once if we block (resid <= hiwat). * 3. MSG_DONTWAIT is not set * If MSG_WAITALL is set but resid is larger than the receive buffer, * we have to do the receive in sections, and thus risk returning * a short count if a timeout or signal occurs after we start. */ if (m == 0 || (((flags & MSG_DONTWAIT) == 0 && so->so_rcv.sb_cc < uio->uio_resid) && (so->so_rcv.sb_cc < so->so_rcv.sb_lowat || ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) && m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) { #ifdef DIAGNOSTIC if (m == 0 && so->so_rcv.sb_cc) panic("receive 1"); #endif if (so->so_error) { if (m) goto dontblock; error = so->so_error; if ((flags & MSG_PEEK) == 0) so->so_error = 0; goto release; } if (so->so_state & SS_CANTRCVMORE) { if (m) goto dontblock; else goto release; } for (; m; m = m->m_next) if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) { m = so->so_rcv.sb_mb; goto dontblock; } if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && (so->so_proto->pr_flags & PR_CONNREQUIRED)) { error = ENOTCONN; goto release; } if (uio->uio_resid == 0) goto release; if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) { error = EWOULDBLOCK; goto release; } sbunlock(&so->so_rcv); error = sbwait(&so->so_rcv); splx(s); if (error) return (error); goto restart; } dontblock: if (uio->uio_procp) uio->uio_procp->p_stats->p_ru.ru_msgrcv++; nextrecord = m->m_nextpkt; if (pr->pr_flags & PR_ADDR) { #ifdef DIAGNOSTIC if (m->m_type != MT_SONAME) panic("receive 1a"); #endif orig_resid = 0; if (psa) *psa = dup_sockaddr(mtod(m, struct sockaddr *), mp0 == 0); if (flags & MSG_PEEK) { m = m->m_next; } else { sbfree(&so->so_rcv, m); MFREE(m, so->so_rcv.sb_mb); m = so->so_rcv.sb_mb; } } while (m && m->m_type == MT_CONTROL && error == 0) { if (flags & MSG_PEEK) { if (controlp) *controlp = m_copy(m, 0, m->m_len); m = m->m_next; } else { sbfree(&so->so_rcv, m); if (controlp) { if (pr->pr_domain->dom_externalize && mtod(m, struct cmsghdr *)->cmsg_type == SCM_RIGHTS) error = (*pr->pr_domain->dom_externalize)(m); *controlp = m; so->so_rcv.sb_mb = m->m_next; m->m_next = 0; m = so->so_rcv.sb_mb; } else { MFREE(m, so->so_rcv.sb_mb); m = so->so_rcv.sb_mb; } } if (controlp) { orig_resid = 0; controlp = &(*controlp)->m_next; } } if (m) { if ((flags & MSG_PEEK) == 0) m->m_nextpkt = nextrecord; type = m->m_type; if (type == MT_OOBDATA) flags |= MSG_OOB; } moff = 0; offset = 0; while (m && uio->uio_resid > 0 && error == 0) { if (m->m_type == MT_OOBDATA) { if (type != MT_OOBDATA) break; } else if (type == MT_OOBDATA) break; #ifdef DIAGNOSTIC else if (m->m_type != MT_DATA && m->m_type != MT_HEADER) panic("receive 3"); #endif so->so_state &= ~SS_RCVATMARK; len = uio->uio_resid; if (so->so_oobmark && len > so->so_oobmark - offset) len = so->so_oobmark - offset; if (len > m->m_len - moff) len = m->m_len - moff; /* * If mp is set, just pass back the mbufs. * Otherwise copy them out via the uio, then free. * Sockbuf must be consistent here (points to current mbuf, * it points to next record) when we drop priority; * we must note any additions to the sockbuf when we * block interrupts again. */ if (mp == 0) { splx(s); error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio); s = splnet(); if (error) goto release; } else uio->uio_resid -= len; if (len == m->m_len - moff) { if (m->m_flags & M_EOR) flags |= MSG_EOR; if (flags & MSG_PEEK) { m = m->m_next; moff = 0; } else { nextrecord = m->m_nextpkt; sbfree(&so->so_rcv, m); if (mp) { *mp = m; mp = &m->m_next; so->so_rcv.sb_mb = m = m->m_next; *mp = (struct mbuf *)0; } else { MFREE(m, so->so_rcv.sb_mb); m = so->so_rcv.sb_mb; } if (m) m->m_nextpkt = nextrecord; } } else { if (flags & MSG_PEEK) moff += len; else { if (mp) *mp = m_copym(m, 0, len, M_WAIT); m->m_data += len; m->m_len -= len; so->so_rcv.sb_cc -= len; } } if (so->so_oobmark) { if ((flags & MSG_PEEK) == 0) { so->so_oobmark -= len; if (so->so_oobmark == 0) { so->so_state |= SS_RCVATMARK; break; } } else { offset += len; if (offset == so->so_oobmark) break; } } if (flags & MSG_EOR) break; /* * If the MSG_WAITALL flag is set (for non-atomic socket), * we must not quit until "uio->uio_resid == 0" or an error * termination. If a signal/timeout occurs, return * with a short count but without error. * Keep sockbuf locked against other readers. */ while (flags & MSG_WAITALL && m == 0 && uio->uio_resid > 0 && !sosendallatonce(so) && !nextrecord) { if (so->so_error || so->so_state & SS_CANTRCVMORE) break; error = sbwait(&so->so_rcv); if (error) { sbunlock(&so->so_rcv); splx(s); return (0); } m = so->so_rcv.sb_mb; if (m) nextrecord = m->m_nextpkt; } } if (m && pr->pr_flags & PR_ATOMIC) { flags |= MSG_TRUNC; if ((flags & MSG_PEEK) == 0) (void) sbdroprecord(&so->so_rcv); } if ((flags & MSG_PEEK) == 0) { if (m == 0) so->so_rcv.sb_mb = nextrecord; if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) (*pr->pr_usrreqs->pru_rcvd)(so, flags); } if (orig_resid == uio->uio_resid && orig_resid && (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) { sbunlock(&so->so_rcv); splx(s); goto restart; } if (flagsp) *flagsp |= flags; release: sbunlock(&so->so_rcv); splx(s); return (error); } int soshutdown(so, how) register struct socket *so; register int how; { register struct protosw *pr = so->so_proto; how++; if (how & FREAD) sorflush(so); if (how & FWRITE) return ((*pr->pr_usrreqs->pru_shutdown)(so)); return (0); } void sorflush(so) register struct socket *so; { register struct sockbuf *sb = &so->so_rcv; register struct protosw *pr = so->so_proto; register int s; struct sockbuf asb; sb->sb_flags |= SB_NOINTR; (void) sblock(sb, M_WAITOK); s = splimp(); socantrcvmore(so); sbunlock(sb); asb = *sb; bzero((caddr_t)sb, sizeof (*sb)); splx(s); if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose) (*pr->pr_domain->dom_dispose)(asb.sb_mb); sbrelease(&asb); } /* * Perhaps this routine, and sooptcopyout(), below, ought to come in * an additional variant to handle the case where the option value needs * to be some kind of integer, but not a specific size. * In addition to their use here, these functions are also called by the * protocol-level pr_ctloutput() routines. */ int sooptcopyin(sopt, buf, len, minlen) struct sockopt *sopt; void *buf; size_t len; size_t minlen; { size_t valsize; /* * If the user gives us more than we wanted, we ignore it, * but if we don't get the minimum length the caller * wants, we return EINVAL. On success, sopt->sopt_valsize * is set to however much we actually retrieved. */ if ((valsize = sopt->sopt_valsize) < minlen) return EINVAL; if (valsize > len) sopt->sopt_valsize = valsize = len; if (sopt->sopt_p != 0) return (copyin(sopt->sopt_val, buf, valsize)); bcopy(sopt->sopt_val, buf, valsize); return 0; } int sosetopt(so, sopt) struct socket *so; struct sockopt *sopt; { int error, optval; struct linger l; struct timeval tv; short val; error = 0; if (sopt->sopt_level != SOL_SOCKET) { if (so->so_proto && so->so_proto->pr_ctloutput) return ((*so->so_proto->pr_ctloutput) (so, sopt)); error = ENOPROTOOPT; } else { switch (sopt->sopt_name) { case SO_LINGER: error = sooptcopyin(sopt, &l, sizeof l, sizeof l); if (error) goto bad; so->so_linger = l.l_linger; if (l.l_onoff) so->so_options |= SO_LINGER; else so->so_options &= ~SO_LINGER; break; case SO_DEBUG: case SO_KEEPALIVE: case SO_DONTROUTE: case SO_USELOOPBACK: case SO_BROADCAST: case SO_REUSEADDR: case SO_REUSEPORT: case SO_OOBINLINE: case SO_TIMESTAMP: error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); if (error) goto bad; if (optval) so->so_options |= sopt->sopt_name; else so->so_options &= ~sopt->sopt_name; break; case SO_SNDBUF: case SO_RCVBUF: case SO_SNDLOWAT: case SO_RCVLOWAT: error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); if (error) goto bad; /* * Values < 1 make no sense for any of these * options, so disallow them. */ if (optval < 1) { error = EINVAL; goto bad; } switch (sopt->sopt_name) { case SO_SNDBUF: case SO_RCVBUF: if (sbreserve(sopt->sopt_name == SO_SNDBUF ? &so->so_snd : &so->so_rcv, (u_long) optval) == 0) { error = ENOBUFS; goto bad; } break; /* * Make sure the low-water is never greater than * the high-water. */ case SO_SNDLOWAT: so->so_snd.sb_lowat = (optval > so->so_snd.sb_hiwat) ? so->so_snd.sb_hiwat : optval; break; case SO_RCVLOWAT: so->so_rcv.sb_lowat = (optval > so->so_rcv.sb_hiwat) ? so->so_rcv.sb_hiwat : optval; break; } break; case SO_SNDTIMEO: case SO_RCVTIMEO: error = sooptcopyin(sopt, &tv, sizeof tv, sizeof tv); if (error) goto bad; if (tv.tv_sec > SHRT_MAX / hz - hz) { error = EDOM; goto bad; } val = tv.tv_sec * hz + tv.tv_usec / tick; switch (sopt->sopt_name) { case SO_SNDTIMEO: so->so_snd.sb_timeo = val; break; case SO_RCVTIMEO: so->so_rcv.sb_timeo = val; break; } break; default: error = ENOPROTOOPT; break; } if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) { (void) ((*so->so_proto->pr_ctloutput) (so, sopt)); } } bad: return (error); } /* Helper routine for getsockopt */ int sooptcopyout(sopt, buf, len) struct sockopt *sopt; void *buf; size_t len; { int error; size_t valsize; error = 0; /* * Documented get behavior is that we always return a value, * possibly truncated to fit in the user's buffer. * Traditional behavior is that we always tell the user * precisely how much we copied, rather than something useful * like the total amount we had available for her. * Note that this interface is not idempotent; the entire answer must * generated ahead of time. */ valsize = min(len, sopt->sopt_valsize); sopt->sopt_valsize = valsize; if (sopt->sopt_val != 0) { if (sopt->sopt_p != 0) error = copyout(buf, sopt->sopt_val, valsize); else bcopy(buf, sopt->sopt_val, valsize); } return error; } int sogetopt(so, sopt) struct socket *so; struct sockopt *sopt; { int error, optval; struct linger l; struct timeval tv; error = 0; if (sopt->sopt_level != SOL_SOCKET) { if (so->so_proto && so->so_proto->pr_ctloutput) { return ((*so->so_proto->pr_ctloutput) (so, sopt)); } else return (ENOPROTOOPT); } else { switch (sopt->sopt_name) { case SO_LINGER: l.l_onoff = so->so_options & SO_LINGER; l.l_linger = so->so_linger; error = sooptcopyout(sopt, &l, sizeof l); break; case SO_USELOOPBACK: case SO_DONTROUTE: case SO_DEBUG: case SO_KEEPALIVE: case SO_REUSEADDR: case SO_REUSEPORT: case SO_BROADCAST: case SO_OOBINLINE: case SO_TIMESTAMP: optval = so->so_options & sopt->sopt_name; integer: error = sooptcopyout(sopt, &optval, sizeof optval); break; case SO_TYPE: optval = so->so_type; goto integer; case SO_ERROR: optval = so->so_error; so->so_error = 0; goto integer; case SO_SNDBUF: optval = so->so_snd.sb_hiwat; goto integer; case SO_RCVBUF: optval = so->so_rcv.sb_hiwat; goto integer; case SO_SNDLOWAT: optval = so->so_snd.sb_lowat; goto integer; case SO_RCVLOWAT: optval = so->so_rcv.sb_lowat; goto integer; case SO_SNDTIMEO: case SO_RCVTIMEO: optval = (sopt->sopt_name == SO_SNDTIMEO ? so->so_snd.sb_timeo : so->so_rcv.sb_timeo); tv.tv_sec = optval / hz; tv.tv_usec = (optval % hz) * tick; error = sooptcopyout(sopt, &tv, sizeof tv); break; default: error = ENOPROTOOPT; break; } return (error); } } void sohasoutofband(so) register struct socket *so; { struct proc *p; - if (so->so_pgid < 0) - gsignal(-so->so_pgid, SIGURG); - else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0) - psignal(p, SIGURG); + if (so->so_sigio != NULL) + pgsigio(so->so_sigio, SIGURG, 0); selwakeup(&so->so_rcv.sb_sel); } int sopoll(struct socket *so, int events, struct ucred *cred, struct proc *p) { int revents = 0; int s = splnet(); if (events & (POLLIN | POLLRDNORM)) if (soreadable(so)) revents |= events & (POLLIN | POLLRDNORM); if (events & (POLLOUT | POLLWRNORM)) if (sowriteable(so)) revents |= events & (POLLOUT | POLLWRNORM); if (events & (POLLPRI | POLLRDBAND)) if (so->so_oobmark || (so->so_state & SS_RCVATMARK)) revents |= events & (POLLPRI | POLLRDBAND); if (revents == 0) { if (events & (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND)) { selrecord(p, &so->so_rcv.sb_sel); so->so_rcv.sb_flags |= SB_SEL; } if (events & (POLLOUT | POLLWRNORM)) { selrecord(p, &so->so_snd.sb_sel); so->so_snd.sb_flags |= SB_SEL; } } splx(s); return (revents); } Index: head/sys/kern/uipc_socket2.c =================================================================== --- head/sys/kern/uipc_socket2.c (revision 41085) +++ head/sys/kern/uipc_socket2.c (revision 41086) @@ -1,961 +1,957 @@ /* * Copyright (c) 1982, 1986, 1988, 1990, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)uipc_socket2.c 8.1 (Berkeley) 6/10/93 - * $Id: uipc_socket2.c,v 1.39 1998/09/05 13:24:39 bde Exp $ + * $Id: uipc_socket2.c,v 1.40 1998/11/04 20:22:11 fenner Exp $ */ #include #include #include #include #include #include #include #include #include #include #include #include #include /* * Primitive routines for operating on sockets and socket buffers */ u_long sb_max = SB_MAX; /* XXX should be static */ static u_long sb_efficiency = 8; /* parameter for sbreserve() */ /* * Procedures to manipulate state flags of socket * and do appropriate wakeups. Normal sequence from the * active (originating) side is that soisconnecting() is * called during processing of connect() call, * resulting in an eventual call to soisconnected() if/when the * connection is established. When the connection is torn down * soisdisconnecting() is called during processing of disconnect() call, * and soisdisconnected() is called when the connection to the peer * is totally severed. The semantics of these routines are such that * connectionless protocols can call soisconnected() and soisdisconnected() * only, bypassing the in-progress calls when setting up a ``connection'' * takes no time. * * From the passive side, a socket is created with * two queues of sockets: so_q0 for connections in progress * and so_q for connections already made and awaiting user acceptance. * As a protocol is preparing incoming connections, it creates a socket * structure queued on so_q0 by calling sonewconn(). When the connection * is established, soisconnected() is called, and transfers the * socket structure to so_q, making it available to accept(). * * If a socket is closed with sockets on either * so_q0 or so_q, these sockets are dropped. * * If higher level protocols are implemented in * the kernel, the wakeups done here will sometimes * cause software-interrupt process scheduling. */ void soisconnecting(so) register struct socket *so; { so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING); so->so_state |= SS_ISCONNECTING; } void soisconnected(so) register struct socket *so; { register struct socket *head = so->so_head; so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING); so->so_state |= SS_ISCONNECTED; if (head && (so->so_state & SS_INCOMP)) { TAILQ_REMOVE(&head->so_incomp, so, so_list); head->so_incqlen--; so->so_state &= ~SS_INCOMP; TAILQ_INSERT_TAIL(&head->so_comp, so, so_list); so->so_state |= SS_COMP; sorwakeup(head); wakeup_one(&head->so_timeo); } else { wakeup(&so->so_timeo); sorwakeup(so); sowwakeup(so); } } void soisdisconnecting(so) register struct socket *so; { so->so_state &= ~SS_ISCONNECTING; so->so_state |= (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE); wakeup((caddr_t)&so->so_timeo); sowwakeup(so); sorwakeup(so); } void soisdisconnected(so) register struct socket *so; { so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING); so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE); wakeup((caddr_t)&so->so_timeo); sowwakeup(so); sorwakeup(so); } /* * Return a random connection that hasn't been serviced yet and * is eligible for discard. There is a one in qlen chance that * we will return a null, saying that there are no dropable * requests. In this case, the protocol specific code should drop * the new request. This insures fairness. * * This may be used in conjunction with protocol specific queue * congestion routines. */ struct socket * sodropablereq(head) register struct socket *head; { register struct socket *so; unsigned int i, j, qlen; static int rnd; static struct timeval old_runtime; static unsigned int cur_cnt, old_cnt; struct timeval tv; getmicrouptime(&tv); if ((i = (tv.tv_sec - old_runtime.tv_sec)) != 0) { old_runtime = tv; old_cnt = cur_cnt / i; cur_cnt = 0; } so = TAILQ_FIRST(&head->so_incomp); if (!so) return (so); qlen = head->so_incqlen; if (++cur_cnt > qlen || old_cnt > qlen) { rnd = (314159 * rnd + 66329) & 0xffff; j = ((qlen + 1) * rnd) >> 16; while (j-- && so) so = TAILQ_NEXT(so, so_list); } return (so); } /* * When an attempt at a new connection is noted on a socket * which accepts connections, sonewconn is called. If the * connection is possible (subject to space constraints, etc.) * then we allocate a new structure, propoerly linked into the * data structure of the original socket, and return this. * Connstatus may be 0, or SO_ISCONFIRMING, or SO_ISCONNECTED. */ struct socket * sonewconn(head, connstatus) register struct socket *head; int connstatus; { register struct socket *so; if (head->so_qlen > 3 * head->so_qlimit / 2) return ((struct socket *)0); so = soalloc(0); if (so == NULL) return ((struct socket *)0); so->so_head = head; so->so_type = head->so_type; so->so_options = head->so_options &~ SO_ACCEPTCONN; so->so_linger = head->so_linger; so->so_state = head->so_state | SS_NOFDREF; so->so_proto = head->so_proto; so->so_timeo = head->so_timeo; - so->so_pgid = head->so_pgid; + fsetown(fgetown(head->so_sigio), &so->so_sigio); so->so_uid = head->so_uid; (void) soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat); if ((*so->so_proto->pr_usrreqs->pru_attach)(so, 0, NULL)) { sodealloc(so); return ((struct socket *)0); } if (connstatus) { TAILQ_INSERT_TAIL(&head->so_comp, so, so_list); so->so_state |= SS_COMP; } else { TAILQ_INSERT_TAIL(&head->so_incomp, so, so_list); so->so_state |= SS_INCOMP; head->so_incqlen++; } head->so_qlen++; if (connstatus) { sorwakeup(head); wakeup((caddr_t)&head->so_timeo); so->so_state |= connstatus; } return (so); } /* * Socantsendmore indicates that no more data will be sent on the * socket; it would normally be applied to a socket when the user * informs the system that no more data is to be sent, by the protocol * code (in case PRU_SHUTDOWN). Socantrcvmore indicates that no more data * will be received, and will normally be applied to the socket by a * protocol when it detects that the peer will send no more data. * Data queued for reading in the socket may yet be read. */ void socantsendmore(so) struct socket *so; { so->so_state |= SS_CANTSENDMORE; sowwakeup(so); } void socantrcvmore(so) struct socket *so; { so->so_state |= SS_CANTRCVMORE; sorwakeup(so); } /* * Wait for data to arrive at/drain from a socket buffer. */ int sbwait(sb) struct sockbuf *sb; { sb->sb_flags |= SB_WAIT; return (tsleep((caddr_t)&sb->sb_cc, (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "sbwait", sb->sb_timeo)); } /* * Lock a sockbuf already known to be locked; * return any error returned from sleep (EINTR). */ int sb_lock(sb) register struct sockbuf *sb; { int error; while (sb->sb_flags & SB_LOCK) { sb->sb_flags |= SB_WANT; error = tsleep((caddr_t)&sb->sb_flags, (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK|PCATCH, "sblock", 0); if (error) return (error); } sb->sb_flags |= SB_LOCK; return (0); } /* * Wakeup processes waiting on a socket buffer. * Do asynchronous notification via SIGIO * if the socket has the SS_ASYNC flag set. */ void sowakeup(so, sb) register struct socket *so; register struct sockbuf *sb; { struct proc *p; selwakeup(&sb->sb_sel); sb->sb_flags &= ~SB_SEL; if (sb->sb_flags & SB_WAIT) { sb->sb_flags &= ~SB_WAIT; wakeup((caddr_t)&sb->sb_cc); } - if (so->so_state & SS_ASYNC) { - if (so->so_pgid < 0) - gsignal(-so->so_pgid, SIGIO); - else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0) - psignal(p, SIGIO); - } + if ((so->so_state & SS_ASYNC) && so->so_sigio != NULL) + pgsigio(so->so_sigio, SIGIO, 0); if (sb->sb_flags & SB_UPCALL) (*so->so_upcall)(so, so->so_upcallarg, M_DONTWAIT); } /* * Socket buffer (struct sockbuf) utility routines. * * Each socket contains two socket buffers: one for sending data and * one for receiving data. Each buffer contains a queue of mbufs, * information about the number of mbufs and amount of data in the * queue, and other fields allowing select() statements and notification * on data availability to be implemented. * * Data stored in a socket buffer is maintained as a list of records. * Each record is a list of mbufs chained together with the m_next * field. Records are chained together with the m_nextpkt field. The upper * level routine soreceive() expects the following conventions to be * observed when placing information in the receive buffer: * * 1. If the protocol requires each message be preceded by the sender's * name, then a record containing that name must be present before * any associated data (mbuf's must be of type MT_SONAME). * 2. If the protocol supports the exchange of ``access rights'' (really * just additional data associated with the message), and there are * ``rights'' to be received, then a record containing this data * should be present (mbuf's must be of type MT_RIGHTS). * 3. If a name or rights record exists, then it must be followed by * a data record, perhaps of zero length. * * Before using a new socket structure it is first necessary to reserve * buffer space to the socket, by calling sbreserve(). This should commit * some of the available buffer space in the system buffer pool for the * socket (currently, it does nothing but enforce limits). The space * should be released by calling sbrelease() when the socket is destroyed. */ int soreserve(so, sndcc, rcvcc) register struct socket *so; u_long sndcc, rcvcc; { if (sbreserve(&so->so_snd, sndcc) == 0) goto bad; if (sbreserve(&so->so_rcv, rcvcc) == 0) goto bad2; if (so->so_rcv.sb_lowat == 0) so->so_rcv.sb_lowat = 1; if (so->so_snd.sb_lowat == 0) so->so_snd.sb_lowat = MCLBYTES; if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat) so->so_snd.sb_lowat = so->so_snd.sb_hiwat; return (0); bad2: sbrelease(&so->so_snd); bad: return (ENOBUFS); } /* * Allot mbufs to a sockbuf. * Attempt to scale mbmax so that mbcnt doesn't become limiting * if buffering efficiency is near the normal case. */ int sbreserve(sb, cc) struct sockbuf *sb; u_long cc; { if ((u_quad_t)cc > (u_quad_t)sb_max * MCLBYTES / (MSIZE + MCLBYTES)) return (0); sb->sb_hiwat = cc; sb->sb_mbmax = min(cc * sb_efficiency, sb_max); if (sb->sb_lowat > sb->sb_hiwat) sb->sb_lowat = sb->sb_hiwat; return (1); } /* * Free mbufs held by a socket, and reserved mbuf space. */ void sbrelease(sb) struct sockbuf *sb; { sbflush(sb); sb->sb_hiwat = sb->sb_mbmax = 0; } /* * Routines to add and remove * data from an mbuf queue. * * The routines sbappend() or sbappendrecord() are normally called to * append new mbufs to a socket buffer, after checking that adequate * space is available, comparing the function sbspace() with the amount * of data to be added. sbappendrecord() differs from sbappend() in * that data supplied is treated as the beginning of a new record. * To place a sender's address, optional access rights, and data in a * socket receive buffer, sbappendaddr() should be used. To place * access rights and data in a socket receive buffer, sbappendrights() * should be used. In either case, the new data begins a new record. * Note that unlike sbappend() and sbappendrecord(), these routines check * for the caller that there will be enough space to store the data. * Each fails if there is not enough space, or if it cannot find mbufs * to store additional information in. * * Reliable protocols may use the socket send buffer to hold data * awaiting acknowledgement. Data is normally copied from a socket * send buffer in a protocol with m_copy for output to a peer, * and then removing the data from the socket buffer with sbdrop() * or sbdroprecord() when the data is acknowledged by the peer. */ /* * Append mbuf chain m to the last record in the * socket buffer sb. The additional space associated * the mbuf chain is recorded in sb. Empty mbufs are * discarded and mbufs are compacted where possible. */ void sbappend(sb, m) struct sockbuf *sb; struct mbuf *m; { register struct mbuf *n; if (m == 0) return; n = sb->sb_mb; if (n) { while (n->m_nextpkt) n = n->m_nextpkt; do { if (n->m_flags & M_EOR) { sbappendrecord(sb, m); /* XXXXXX!!!! */ return; } } while (n->m_next && (n = n->m_next)); } sbcompress(sb, m, n); } #ifdef SOCKBUF_DEBUG void sbcheck(sb) register struct sockbuf *sb; { register struct mbuf *m; register struct mbuf *n = 0; register u_long len = 0, mbcnt = 0; for (m = sb->sb_mb; m; m = n) { n = m->m_nextpkt; for (; m; m = m->m_next) { len += m->m_len; mbcnt += MSIZE; if (m->m_flags & M_EXT) /*XXX*/ /* pretty sure this is bogus */ mbcnt += m->m_ext.ext_size; } } if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) { printf("cc %ld != %ld || mbcnt %ld != %ld\n", len, sb->sb_cc, mbcnt, sb->sb_mbcnt); panic("sbcheck"); } } #endif /* * As above, except the mbuf chain * begins a new record. */ void sbappendrecord(sb, m0) register struct sockbuf *sb; register struct mbuf *m0; { register struct mbuf *m; if (m0 == 0) return; m = sb->sb_mb; if (m) while (m->m_nextpkt) m = m->m_nextpkt; /* * Put the first mbuf on the queue. * Note this permits zero length records. */ sballoc(sb, m0); if (m) m->m_nextpkt = m0; else sb->sb_mb = m0; m = m0->m_next; m0->m_next = 0; if (m && (m0->m_flags & M_EOR)) { m0->m_flags &= ~M_EOR; m->m_flags |= M_EOR; } sbcompress(sb, m, m0); } /* * As above except that OOB data * is inserted at the beginning of the sockbuf, * but after any other OOB data. */ void sbinsertoob(sb, m0) register struct sockbuf *sb; register struct mbuf *m0; { register struct mbuf *m; register struct mbuf **mp; if (m0 == 0) return; for (mp = &sb->sb_mb; *mp ; mp = &((*mp)->m_nextpkt)) { m = *mp; again: switch (m->m_type) { case MT_OOBDATA: continue; /* WANT next train */ case MT_CONTROL: m = m->m_next; if (m) goto again; /* inspect THIS train further */ } break; } /* * Put the first mbuf on the queue. * Note this permits zero length records. */ sballoc(sb, m0); m0->m_nextpkt = *mp; *mp = m0; m = m0->m_next; m0->m_next = 0; if (m && (m0->m_flags & M_EOR)) { m0->m_flags &= ~M_EOR; m->m_flags |= M_EOR; } sbcompress(sb, m, m0); } /* * Append address and data, and optionally, control (ancillary) data * to the receive queue of a socket. If present, * m0 must include a packet header with total length. * Returns 0 if no space in sockbuf or insufficient mbufs. */ int sbappendaddr(sb, asa, m0, control) register struct sockbuf *sb; struct sockaddr *asa; struct mbuf *m0, *control; { register struct mbuf *m, *n; int space = asa->sa_len; if (m0 && (m0->m_flags & M_PKTHDR) == 0) panic("sbappendaddr"); if (m0) space += m0->m_pkthdr.len; for (n = control; n; n = n->m_next) { space += n->m_len; if (n->m_next == 0) /* keep pointer to last control buf */ break; } if (space > sbspace(sb)) return (0); if (asa->sa_len > MLEN) return (0); MGET(m, M_DONTWAIT, MT_SONAME); if (m == 0) return (0); m->m_len = asa->sa_len; bcopy((caddr_t)asa, mtod(m, caddr_t), asa->sa_len); if (n) n->m_next = m0; /* concatenate data to control */ else control = m0; m->m_next = control; for (n = m; n; n = n->m_next) sballoc(sb, n); n = sb->sb_mb; if (n) { while (n->m_nextpkt) n = n->m_nextpkt; n->m_nextpkt = m; } else sb->sb_mb = m; return (1); } int sbappendcontrol(sb, m0, control) struct sockbuf *sb; struct mbuf *control, *m0; { register struct mbuf *m, *n; int space = 0; if (control == 0) panic("sbappendcontrol"); for (m = control; ; m = m->m_next) { space += m->m_len; if (m->m_next == 0) break; } n = m; /* save pointer to last control buffer */ for (m = m0; m; m = m->m_next) space += m->m_len; if (space > sbspace(sb)) return (0); n->m_next = m0; /* concatenate data to control */ for (m = control; m; m = m->m_next) sballoc(sb, m); n = sb->sb_mb; if (n) { while (n->m_nextpkt) n = n->m_nextpkt; n->m_nextpkt = control; } else sb->sb_mb = control; return (1); } /* * Compress mbuf chain m into the socket * buffer sb following mbuf n. If n * is null, the buffer is presumed empty. */ void sbcompress(sb, m, n) register struct sockbuf *sb; register struct mbuf *m, *n; { register int eor = 0; register struct mbuf *o; while (m) { eor |= m->m_flags & M_EOR; if (m->m_len == 0 && (eor == 0 || (((o = m->m_next) || (o = n)) && o->m_type == m->m_type))) { m = m_free(m); continue; } if (n && (n->m_flags & (M_EXT | M_EOR)) == 0 && (n->m_data + n->m_len + m->m_len) < &n->m_dat[MLEN] && n->m_type == m->m_type) { bcopy(mtod(m, caddr_t), mtod(n, caddr_t) + n->m_len, (unsigned)m->m_len); n->m_len += m->m_len; sb->sb_cc += m->m_len; m = m_free(m); continue; } if (n) n->m_next = m; else sb->sb_mb = m; sballoc(sb, m); n = m; m->m_flags &= ~M_EOR; m = m->m_next; n->m_next = 0; } if (eor) { if (n) n->m_flags |= eor; else printf("semi-panic: sbcompress\n"); } } /* * Free all mbufs in a sockbuf. * Check that all resources are reclaimed. */ void sbflush(sb) register struct sockbuf *sb; { if (sb->sb_flags & SB_LOCK) panic("sbflush: locked"); while (sb->sb_mbcnt && sb->sb_cc) sbdrop(sb, (int)sb->sb_cc); if (sb->sb_cc || sb->sb_mb || sb->sb_mbcnt) panic("sbflush: cc %ld || mb %p || mbcnt %ld", sb->sb_cc, (void *)sb->sb_mb, sb->sb_mbcnt); } /* * Drop data from (the front of) a sockbuf. */ void sbdrop(sb, len) register struct sockbuf *sb; register int len; { register struct mbuf *m, *mn; struct mbuf *next; next = (m = sb->sb_mb) ? m->m_nextpkt : 0; while (len > 0) { if (m == 0) { if (next == 0) panic("sbdrop"); m = next; next = m->m_nextpkt; continue; } if (m->m_len > len) { m->m_len -= len; m->m_data += len; sb->sb_cc -= len; break; } len -= m->m_len; sbfree(sb, m); MFREE(m, mn); m = mn; } while (m && m->m_len == 0) { sbfree(sb, m); MFREE(m, mn); m = mn; } if (m) { sb->sb_mb = m; m->m_nextpkt = next; } else sb->sb_mb = next; } /* * Drop a record off the front of a sockbuf * and move the next record to the front. */ void sbdroprecord(sb) register struct sockbuf *sb; { register struct mbuf *m, *mn; m = sb->sb_mb; if (m) { sb->sb_mb = m->m_nextpkt; do { sbfree(sb, m); MFREE(m, mn); m = mn; } while (m); } } /* * Create a "control" mbuf containing the specified data * with the specified type for presentation on a socket buffer. */ struct mbuf * sbcreatecontrol(p, size, type, level) caddr_t p; register int size; int type, level; { register struct cmsghdr *cp; struct mbuf *m; if ((m = m_get(M_DONTWAIT, MT_CONTROL)) == NULL) return ((struct mbuf *) NULL); cp = mtod(m, struct cmsghdr *); /* XXX check size? */ (void)memcpy(CMSG_DATA(cp), p, size); size += sizeof(*cp); m->m_len = size; cp->cmsg_len = size; cp->cmsg_level = level; cp->cmsg_type = type; return (m); } /* * Some routines that return EOPNOTSUPP for entry points that are not * supported by a protocol. Fill in as needed. */ int pru_accept_notsupp(struct socket *so, struct sockaddr **nam) { return EOPNOTSUPP; } int pru_connect_notsupp(struct socket *so, struct sockaddr *nam, struct proc *p) { return EOPNOTSUPP; } int pru_connect2_notsupp(struct socket *so1, struct socket *so2) { return EOPNOTSUPP; } int pru_control_notsupp(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp, struct proc *p) { return EOPNOTSUPP; } int pru_listen_notsupp(struct socket *so, struct proc *p) { return EOPNOTSUPP; } int pru_rcvd_notsupp(struct socket *so, int flags) { return EOPNOTSUPP; } int pru_rcvoob_notsupp(struct socket *so, struct mbuf *m, int flags) { return EOPNOTSUPP; } /* * This isn't really a ``null'' operation, but it's the default one * and doesn't do anything destructive. */ int pru_sense_null(struct socket *so, struct stat *sb) { sb->st_blksize = so->so_snd.sb_hiwat; return 0; } /* * Make a copy of a sockaddr in a malloced buffer of type M_SONAME. */ struct sockaddr * dup_sockaddr(sa, canwait) struct sockaddr *sa; int canwait; { struct sockaddr *sa2; MALLOC(sa2, struct sockaddr *, sa->sa_len, M_SONAME, canwait ? M_WAITOK : M_NOWAIT); if (sa2) bcopy(sa, sa2, sa->sa_len); return sa2; } /* * Create an external-format (``xsocket'') structure using the information * in the kernel-format socket structure pointed to by so. This is done * to reduce the spew of irrelevant information over this interface, * to isolate user code from changes in the kernel structure, and * potentially to provide information-hiding if we decide that * some of this information should be hidden from users. */ void sotoxsocket(struct socket *so, struct xsocket *xso) { xso->xso_len = sizeof *xso; xso->xso_so = so; xso->so_type = so->so_type; xso->so_options = so->so_options; xso->so_linger = so->so_linger; xso->so_state = so->so_state; xso->so_pcb = so->so_pcb; xso->xso_protocol = so->so_proto->pr_protocol; xso->xso_family = so->so_proto->pr_domain->dom_family; xso->so_qlen = so->so_qlen; xso->so_incqlen = so->so_incqlen; xso->so_qlimit = so->so_qlimit; xso->so_timeo = so->so_timeo; xso->so_error = so->so_error; - xso->so_pgid = so->so_pgid; + xso->so_pgid = so->so_sigio ? so->so_sigio->sio_pgid : 0; xso->so_oobmark = so->so_oobmark; sbtoxsockbuf(&so->so_snd, &xso->so_snd); sbtoxsockbuf(&so->so_rcv, &xso->so_rcv); xso->so_uid = so->so_uid; } /* * This does the same for sockbufs. Note that the xsockbuf structure, * since it is always embedded in a socket, does not include a self * pointer nor a length. We make this entry point public in case * some other mechanism needs it. */ void sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb) { xsb->sb_cc = sb->sb_cc; xsb->sb_hiwat = sb->sb_hiwat; xsb->sb_mbcnt = sb->sb_mbcnt; xsb->sb_mbmax = sb->sb_mbmax; xsb->sb_lowat = sb->sb_lowat; xsb->sb_flags = sb->sb_flags; xsb->sb_timeo = sb->sb_timeo; } /* * Here is the definition of some of the basic objects in the kern.ipc * branch of the MIB. */ SYSCTL_NODE(_kern, KERN_IPC, ipc, CTLFLAG_RW, 0, "IPC"); /* This takes the place of kern.maxsockbuf, which moved to kern.ipc. */ static int dummy; SYSCTL_INT(_kern, KERN_DUMMY, dummy, CTLFLAG_RW, &dummy, 0, ""); SYSCTL_INT(_kern_ipc, KIPC_MAXSOCKBUF, maxsockbuf, CTLFLAG_RW, &sb_max, 0, ""); SYSCTL_INT(_kern_ipc, OID_AUTO, maxsockets, CTLFLAG_RD, &maxsockets, 0, ""); SYSCTL_INT(_kern_ipc, KIPC_SOCKBUF_WASTE, sockbuf_waste_factor, CTLFLAG_RW, &sb_efficiency, 0, ""); SYSCTL_INT(_kern_ipc, KIPC_NMBCLUSTERS, nmbclusters, CTLFLAG_RD, &nmbclusters, 0, ""); Index: head/sys/net/bpf.c =================================================================== --- head/sys/net/bpf.c (revision 41085) +++ head/sys/net/bpf.c (revision 41086) @@ -1,1311 +1,1314 @@ /* * Copyright (c) 1990, 1991, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from the Stanford/CMU enet packet filter, * (net/enet.c) distributed as part of 4.3BSD, and code contributed * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence * Berkeley Laboratory. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)bpf.c 8.2 (Berkeley) 3/28/94 * - * $Id: bpf.c,v 1.43 1998/10/04 23:04:48 alex Exp $ + * $Id: bpf.c,v 1.44 1998/10/08 00:32:08 alex Exp $ */ #include "bpfilter.h" #if NBPFILTER > 0 #ifndef __GNUC__ #define inline #else #define inline __inline #endif #include #include #include #include #include #include #include #include #include #include #include +#include #if defined(sparc) && BSD < 199103 #include #endif #include #include #include #include #include #include #include #include #include #include #include "opt_devfs.h" #ifdef DEVFS #include #endif /*DEVFS*/ /* * Older BSDs don't have kernel malloc. */ #if BSD < 199103 extern bcopy(); static caddr_t bpf_alloc(); #include #define BPF_BUFSIZE (MCLBYTES-8) #define UIOMOVE(cp, len, code, uio) uiomove(cp, len, code, uio) #else #define BPF_BUFSIZE 4096 #define UIOMOVE(cp, len, code, uio) uiomove(cp, len, uio) #endif #define PRINET 26 /* interruptible */ /* * The default read buffer size is patchable. */ static int bpf_bufsize = BPF_BUFSIZE; SYSCTL_INT(_debug, OID_AUTO, bpf_bufsize, CTLFLAG_RW, &bpf_bufsize, 0, ""); /* * bpf_iflist is the list of interfaces; each corresponds to an ifnet * bpf_dtab holds the descriptors, indexed by minor device # */ static struct bpf_if *bpf_iflist; static struct bpf_d bpf_dtab[NBPFILTER]; static int bpf_dtab_init; static int bpf_allocbufs __P((struct bpf_d *)); static void bpf_attachd __P((struct bpf_d *d, struct bpf_if *bp)); static void bpf_detachd __P((struct bpf_d *d)); static void bpf_freed __P((struct bpf_d *)); static void bpf_ifname __P((struct ifnet *, struct ifreq *)); static void bpf_mcopy __P((const void *, void *, size_t)); static int bpf_movein __P((struct uio *, int, struct mbuf **, struct sockaddr *, int *)); static int bpf_setif __P((struct bpf_d *, struct ifreq *)); static inline void bpf_wakeup __P((struct bpf_d *)); static void catchpacket __P((struct bpf_d *, u_char *, u_int, u_int, void (*)(const void *, void *, size_t))); static void reset_d __P((struct bpf_d *)); static int bpf_setf __P((struct bpf_d *, struct bpf_program *)); static d_open_t bpfopen; static d_close_t bpfclose; static d_read_t bpfread; static d_write_t bpfwrite; static d_ioctl_t bpfioctl; static d_poll_t bpfpoll; #define CDEV_MAJOR 23 static struct cdevsw bpf_cdevsw = { bpfopen, bpfclose, bpfread, bpfwrite, /*23*/ bpfioctl, nostop, nullreset, nodevtotty,/* bpf */ bpfpoll, nommap, NULL, "bpf", NULL, -1 }; static int bpf_movein(uio, linktype, mp, sockp, datlen) register struct uio *uio; int linktype, *datlen; register struct mbuf **mp; register struct sockaddr *sockp; { struct mbuf *m; int error; int len; int hlen; /* * Build a sockaddr based on the data link layer type. * We do this at this level because the ethernet header * is copied directly into the data field of the sockaddr. * In the case of SLIP, there is no header and the packet * is forwarded as is. * Also, we are careful to leave room at the front of the mbuf * for the link level header. */ switch (linktype) { case DLT_SLIP: sockp->sa_family = AF_INET; hlen = 0; break; case DLT_EN10MB: sockp->sa_family = AF_UNSPEC; /* XXX Would MAXLINKHDR be better? */ hlen = sizeof(struct ether_header); break; case DLT_FDDI: #if defined(__FreeBSD__) || defined(__bsdi__) sockp->sa_family = AF_IMPLINK; hlen = 0; #else sockp->sa_family = AF_UNSPEC; /* XXX 4(FORMAC)+6(dst)+6(src)+3(LLC)+5(SNAP) */ hlen = 24; #endif break; case DLT_RAW: case DLT_NULL: sockp->sa_family = AF_UNSPEC; hlen = 0; break; #ifdef __FreeBSD__ case DLT_ATM_RFC1483: /* * en atm driver requires 4-byte atm pseudo header. * though it isn't standard, vpi:vci needs to be * specified anyway. */ sockp->sa_family = AF_UNSPEC; hlen = 12; /* XXX 4(ATM_PH) + 3(LLC) + 5(SNAP) */ break; #endif default: return (EIO); } len = uio->uio_resid; *datlen = len - hlen; if ((unsigned)len > MCLBYTES) return (EIO); MGETHDR(m, M_WAIT, MT_DATA); if (m == 0) return (ENOBUFS); if (len > MHLEN) { #if BSD >= 199103 MCLGET(m, M_WAIT); if ((m->m_flags & M_EXT) == 0) { #else MCLGET(m); if (m->m_len != MCLBYTES) { #endif error = ENOBUFS; goto bad; } } m->m_pkthdr.len = m->m_len = len; m->m_pkthdr.rcvif = NULL; *mp = m; /* * Make room for link header. */ if (hlen != 0) { m->m_pkthdr.len -= hlen; m->m_len -= hlen; #if BSD >= 199103 m->m_data += hlen; /* XXX */ #else m->m_off += hlen; #endif error = UIOMOVE((caddr_t)sockp->sa_data, hlen, UIO_WRITE, uio); if (error) goto bad; } error = UIOMOVE(mtod(m, caddr_t), len - hlen, UIO_WRITE, uio); if (!error) return (0); bad: m_freem(m); return (error); } /* * Attach file to the bpf interface, i.e. make d listen on bp. * Must be called at splimp. */ static void bpf_attachd(d, bp) struct bpf_d *d; struct bpf_if *bp; { /* * Point d at bp, and add d to the interface's list of listeners. * Finally, point the driver's bpf cookie at the interface so * it will divert packets to bpf. */ d->bd_bif = bp; d->bd_next = bp->bif_dlist; bp->bif_dlist = d; bp->bif_ifp->if_bpf = bp; } /* * Detach a file from its interface. */ static void bpf_detachd(d) struct bpf_d *d; { struct bpf_d **p; struct bpf_if *bp; bp = d->bd_bif; /* * Check if this descriptor had requested promiscuous mode. * If so, turn it off. */ if (d->bd_promisc) { d->bd_promisc = 0; if (ifpromisc(bp->bif_ifp, 0)) /* * Something is really wrong if we were able to put * the driver into promiscuous mode, but can't * take it out. */ panic("bpf: ifpromisc failed"); } /* Remove d from the interface's descriptor list. */ p = &bp->bif_dlist; while (*p != d) { p = &(*p)->bd_next; if (*p == 0) panic("bpf_detachd: descriptor not in list"); } *p = (*p)->bd_next; if (bp->bif_dlist == 0) /* * Let the driver know that there are no more listeners. */ d->bd_bif->bif_ifp->if_bpf = 0; d->bd_bif = 0; } /* * Mark a descriptor free by making it point to itself. * This is probably cheaper than marking with a constant since * the address should be in a register anyway. */ #define D_ISFREE(d) ((d) == (d)->bd_next) #define D_MARKFREE(d) ((d)->bd_next = (d)) #define D_MARKUSED(d) ((d)->bd_next = 0) /* * Open ethernet device. Returns ENXIO for illegal minor device number, * EBUSY if file is open by another process. */ /* ARGSUSED */ static int bpfopen(dev, flags, fmt, p) dev_t dev; int flags; int fmt; struct proc *p; { register struct bpf_d *d; if (minor(dev) >= NBPFILTER) return (ENXIO); /* * Each minor can be opened by only one process. If the requested * minor is in use, return EBUSY. */ d = &bpf_dtab[minor(dev)]; if (!D_ISFREE(d)) return (EBUSY); /* Mark "free" and do most initialization. */ bzero((char *)d, sizeof(*d)); d->bd_bufsize = bpf_bufsize; d->bd_sig = SIGIO; return (0); } /* * Close the descriptor by detaching it from its interface, * deallocating its buffers, and marking it free. */ /* ARGSUSED */ static int bpfclose(dev, flags, fmt, p) dev_t dev; int flags; int fmt; struct proc *p; { register struct bpf_d *d = &bpf_dtab[minor(dev)]; register int s; + funsetown(d->bd_sigio); s = splimp(); if (d->bd_bif) bpf_detachd(d); splx(s); bpf_freed(d); return (0); } /* * Support for SunOS, which does not have tsleep. */ #if BSD < 199103 static bpf_timeout(arg) caddr_t arg; { struct bpf_d *d = (struct bpf_d *)arg; d->bd_timedout = 1; wakeup(arg); } #define BPF_SLEEP(chan, pri, s, t) bpf_sleep((struct bpf_d *)chan) int bpf_sleep(d) register struct bpf_d *d; { register int rto = d->bd_rtout; register int st; if (rto != 0) { d->bd_timedout = 0; timeout(bpf_timeout, (caddr_t)d, rto); } st = sleep((caddr_t)d, PRINET|PCATCH); if (rto != 0) { if (d->bd_timedout == 0) untimeout(bpf_timeout, (caddr_t)d); else if (st == 0) return EWOULDBLOCK; } return (st != 0) ? EINTR : 0; } #else #define BPF_SLEEP tsleep #endif /* * Rotate the packet buffers in descriptor d. Move the store buffer * into the hold slot, and the free buffer into the store slot. * Zero the length of the new store buffer. */ #define ROTATE_BUFFERS(d) \ (d)->bd_hbuf = (d)->bd_sbuf; \ (d)->bd_hlen = (d)->bd_slen; \ (d)->bd_sbuf = (d)->bd_fbuf; \ (d)->bd_slen = 0; \ (d)->bd_fbuf = 0; /* * bpfread - read next chunk of packets from buffers */ static int bpfread(dev, uio, ioflag) dev_t dev; register struct uio *uio; int ioflag; { register struct bpf_d *d = &bpf_dtab[minor(dev)]; int error; int s; /* * Restrict application to use a buffer the same size as * as kernel buffers. */ if (uio->uio_resid != d->bd_bufsize) return (EINVAL); s = splimp(); /* * If the hold buffer is empty, then do a timed sleep, which * ends when the timeout expires or when enough packets * have arrived to fill the store buffer. */ while (d->bd_hbuf == 0) { if (d->bd_immediate && d->bd_slen != 0) { /* * A packet(s) either arrived since the previous * read or arrived while we were asleep. * Rotate the buffers and return what's here. */ ROTATE_BUFFERS(d); break; } if (ioflag & IO_NDELAY) error = EWOULDBLOCK; else error = BPF_SLEEP((caddr_t)d, PRINET|PCATCH, "bpf", d->bd_rtout); if (error == EINTR || error == ERESTART) { splx(s); return (error); } if (error == EWOULDBLOCK) { /* * On a timeout, return what's in the buffer, * which may be nothing. If there is something * in the store buffer, we can rotate the buffers. */ if (d->bd_hbuf) /* * We filled up the buffer in between * getting the timeout and arriving * here, so we don't need to rotate. */ break; if (d->bd_slen == 0) { splx(s); return (0); } ROTATE_BUFFERS(d); break; } } /* * At this point, we know we have something in the hold slot. */ splx(s); /* * Move data from hold buffer into user space. * We know the entire buffer is transferred since * we checked above that the read buffer is bpf_bufsize bytes. */ error = UIOMOVE(d->bd_hbuf, d->bd_hlen, UIO_READ, uio); s = splimp(); d->bd_fbuf = d->bd_hbuf; d->bd_hbuf = 0; d->bd_hlen = 0; splx(s); return (error); } /* * If there are processes sleeping on this descriptor, wake them up. */ static inline void bpf_wakeup(d) register struct bpf_d *d; { struct proc *p; wakeup((caddr_t)d); - if (d->bd_async && d->bd_sig) - if (d->bd_pgid > 0) - gsignal (d->bd_pgid, d->bd_sig); - else if (p = pfind (-d->bd_pgid)) - psignal (p, d->bd_sig); + if (d->bd_async && d->bd_sig && d->bd_sigio) + pgsigio(d->bd_sigio, d->bd_sig, 0); #if BSD >= 199103 selwakeup(&d->bd_sel); /* XXX */ d->bd_sel.si_pid = 0; #else if (d->bd_selproc) { selwakeup(d->bd_selproc, (int)d->bd_selcoll); d->bd_selcoll = 0; d->bd_selproc = 0; } #endif } static int bpfwrite(dev, uio, ioflag) dev_t dev; struct uio *uio; int ioflag; { register struct bpf_d *d = &bpf_dtab[minor(dev)]; struct ifnet *ifp; struct mbuf *m; int error, s; static struct sockaddr dst; int datlen; if (d->bd_bif == 0) return (ENXIO); ifp = d->bd_bif->bif_ifp; if (uio->uio_resid == 0) return (0); error = bpf_movein(uio, (int)d->bd_bif->bif_dlt, &m, &dst, &datlen); if (error) return (error); if (datlen > ifp->if_mtu) return (EMSGSIZE); s = splnet(); #if BSD >= 199103 error = (*ifp->if_output)(ifp, m, &dst, (struct rtentry *)0); #else error = (*ifp->if_output)(ifp, m, &dst); #endif splx(s); /* * The driver frees the mbuf. */ return (error); } /* * Reset a descriptor by flushing its packet buffer and clearing the * receive and drop counts. Should be called at splimp. */ static void reset_d(d) struct bpf_d *d; { if (d->bd_hbuf) { /* Free the hold buffer. */ d->bd_fbuf = d->bd_hbuf; d->bd_hbuf = 0; } d->bd_slen = 0; d->bd_hlen = 0; d->bd_rcount = 0; d->bd_dcount = 0; } /* * FIONREAD Check for read packet available. * SIOCGIFADDR Get interface address - convenient hook to driver. * BIOCGBLEN Get buffer len [for read()]. * BIOCSETF Set ethernet read filter. * BIOCFLUSH Flush read packet buffer. * BIOCPROMISC Put interface into promiscuous mode. * BIOCGDLT Get link layer type. * BIOCGETIF Get interface name. * BIOCSETIF Set interface. * BIOCSRTIMEOUT Set read timeout. * BIOCGRTIMEOUT Get read timeout. * BIOCGSTATS Get packet stats. * BIOCIMMEDIATE Set immediate mode. * BIOCVERSION Get filter language version. */ /* ARGSUSED */ static int bpfioctl(dev, cmd, addr, flags, p) dev_t dev; u_long cmd; caddr_t addr; int flags; struct proc *p; { register struct bpf_d *d = &bpf_dtab[minor(dev)]; int s, error = 0; switch (cmd) { default: error = EINVAL; break; /* * Check for read packet available. */ case FIONREAD: { int n; s = splimp(); n = d->bd_slen; if (d->bd_hbuf) n += d->bd_hlen; splx(s); *(int *)addr = n; break; } case SIOCGIFADDR: { struct ifnet *ifp; if (d->bd_bif == 0) error = EINVAL; else { ifp = d->bd_bif->bif_ifp; error = (*ifp->if_ioctl)(ifp, cmd, addr); } break; } /* * Get buffer len [for read()]. */ case BIOCGBLEN: *(u_int *)addr = d->bd_bufsize; break; /* * Set buffer length. */ case BIOCSBLEN: #if BSD < 199103 error = EINVAL; #else if (d->bd_bif != 0) error = EINVAL; else { register u_int size = *(u_int *)addr; if (size > BPF_MAXBUFSIZE) *(u_int *)addr = size = BPF_MAXBUFSIZE; else if (size < BPF_MINBUFSIZE) *(u_int *)addr = size = BPF_MINBUFSIZE; d->bd_bufsize = size; } #endif break; /* * Set link layer read filter. */ case BIOCSETF: error = bpf_setf(d, (struct bpf_program *)addr); break; /* * Flush read packet buffer. */ case BIOCFLUSH: s = splimp(); reset_d(d); splx(s); break; /* * Put interface into promiscuous mode. */ case BIOCPROMISC: if (d->bd_bif == 0) { /* * No interface attached yet. */ error = EINVAL; break; } s = splimp(); if (d->bd_promisc == 0) { error = ifpromisc(d->bd_bif->bif_ifp, 1); if (error == 0) d->bd_promisc = 1; } splx(s); break; /* * Get device parameters. */ case BIOCGDLT: if (d->bd_bif == 0) error = EINVAL; else *(u_int *)addr = d->bd_bif->bif_dlt; break; /* * Set interface name. */ case BIOCGETIF: if (d->bd_bif == 0) error = EINVAL; else bpf_ifname(d->bd_bif->bif_ifp, (struct ifreq *)addr); break; /* * Set interface. */ case BIOCSETIF: error = bpf_setif(d, (struct ifreq *)addr); break; /* * Set read timeout. */ case BIOCSRTIMEOUT: { struct timeval *tv = (struct timeval *)addr; /* * Subtract 1 tick from tvtohz() since this isn't * a one-shot timer. */ if ((error = itimerfix(tv)) == 0) d->bd_rtout = tvtohz(tv) - 1; break; } /* * Get read timeout. */ case BIOCGRTIMEOUT: { struct timeval *tv = (struct timeval *)addr; tv->tv_sec = d->bd_rtout / hz; tv->tv_usec = (d->bd_rtout % hz) * tick; break; } /* * Get packet stats. */ case BIOCGSTATS: { struct bpf_stat *bs = (struct bpf_stat *)addr; bs->bs_recv = d->bd_rcount; bs->bs_drop = d->bd_dcount; break; } /* * Set immediate mode. */ case BIOCIMMEDIATE: d->bd_immediate = *(u_int *)addr; break; case BIOCVERSION: { struct bpf_version *bv = (struct bpf_version *)addr; bv->bv_major = BPF_MAJOR_VERSION; bv->bv_minor = BPF_MINOR_VERSION; break; } case FIONBIO: /* Non-blocking I/O */ break; case FIOASYNC: /* Send signal on receive packets */ d->bd_async = *(int *)addr; break; -/* N.B. ioctl (FIOSETOWN) and fcntl (F_SETOWN) both end up doing the - equivalent of a TIOCSPGRP and hence end up here. *However* TIOCSPGRP's arg - is a process group if it's positive and a process id if it's negative. This - is exactly the opposite of what the other two functions want! Therefore - there is code in ioctl and fcntl to negate the arg before calling here. */ + case FIOSETOWN: + error = fsetown(*(int *)addr, &d->bd_sigio); + break; - case TIOCSPGRP: /* Process or group to send signals to */ - d->bd_pgid = *(int *)addr; + case FIOGETOWN: + *(int *)addr = fgetown(d->bd_sigio); break; + /* This is deprecated, FIOSETOWN should be used instead. */ + case TIOCSPGRP: + error = fsetown(-(*(int *)addr), &d->bd_sigio); + break; + + /* This is deprecated, FIOGETOWN should be used instead. */ case TIOCGPGRP: - *(int *)addr = d->bd_pgid; + *(int *)addr = -fgetown(d->bd_sigio); break; case BIOCSRSIG: /* Set receive signal */ { u_int sig; sig = *(u_int *)addr; if (sig >= NSIG) error = EINVAL; else d->bd_sig = sig; break; } case BIOCGRSIG: *(u_int *)addr = d->bd_sig; break; } return (error); } /* * Set d's packet filter program to fp. If this file already has a filter, * free it and replace it. Returns EINVAL for bogus requests. */ static int bpf_setf(d, fp) struct bpf_d *d; struct bpf_program *fp; { struct bpf_insn *fcode, *old; u_int flen, size; int s; old = d->bd_filter; if (fp->bf_insns == 0) { if (fp->bf_len != 0) return (EINVAL); s = splimp(); d->bd_filter = 0; reset_d(d); splx(s); if (old != 0) free((caddr_t)old, M_DEVBUF); return (0); } flen = fp->bf_len; if (flen > BPF_MAXINSNS) return (EINVAL); size = flen * sizeof(*fp->bf_insns); fcode = (struct bpf_insn *)malloc(size, M_DEVBUF, M_WAITOK); if (copyin((caddr_t)fp->bf_insns, (caddr_t)fcode, size) == 0 && bpf_validate(fcode, (int)flen)) { s = splimp(); d->bd_filter = fcode; reset_d(d); splx(s); if (old != 0) free((caddr_t)old, M_DEVBUF); return (0); } free((caddr_t)fcode, M_DEVBUF); return (EINVAL); } /* * Detach a file from its current interface (if attached at all) and attach * to the interface indicated by the name stored in ifr. * Return an errno or 0. */ static int bpf_setif(d, ifr) struct bpf_d *d; struct ifreq *ifr; { struct bpf_if *bp; int s, error; struct ifnet *theywant; theywant = ifunit(ifr->ifr_name); if (theywant == 0) return ENXIO; /* * Look through attached interfaces for the named one. */ for (bp = bpf_iflist; bp != 0; bp = bp->bif_next) { struct ifnet *ifp = bp->bif_ifp; if (ifp == 0 || ifp != theywant) continue; /* * We found the requested interface. * If it's not up, return an error. * Allocate the packet buffers if we need to. * If we're already attached to requested interface, * just flush the buffer. */ if ((ifp->if_flags & IFF_UP) == 0) return (ENETDOWN); if (d->bd_sbuf == 0) { error = bpf_allocbufs(d); if (error != 0) return (error); } s = splimp(); if (bp != d->bd_bif) { if (d->bd_bif) /* * Detach if attached to something else. */ bpf_detachd(d); bpf_attachd(d, bp); } reset_d(d); splx(s); return (0); } /* Not found. */ return (ENXIO); } /* * Convert an interface name plus unit number of an ifp to a single * name which is returned in the ifr. */ static void bpf_ifname(ifp, ifr) struct ifnet *ifp; struct ifreq *ifr; { char *s = ifp->if_name; char *d = ifr->ifr_name; while (*d++ = *s++) continue; d--; /* back to the null */ /* XXX Assume that unit number is less than 10. */ *d++ = ifp->if_unit + '0'; *d = '\0'; } /* * Support for select() and poll() system calls * * Return true iff the specific operation will not block indefinitely. * Otherwise, return false but make a note that a selwakeup() must be done. */ int bpfpoll(dev, events, p) register dev_t dev; int events; struct proc *p; { register struct bpf_d *d; register int s; int revents = 0; /* * An imitation of the FIONREAD ioctl code. */ d = &bpf_dtab[minor(dev)]; s = splimp(); if (events & (POLLIN | POLLRDNORM)) if (d->bd_hlen != 0 || (d->bd_immediate && d->bd_slen != 0)) revents |= events & (POLLIN | POLLRDNORM); else selrecord(p, &d->bd_sel); splx(s); return (revents); } /* * Incoming linkage from device drivers. Process the packet pkt, of length * pktlen, which is stored in a contiguous buffer. The packet is parsed * by each process' filter, and if accepted, stashed into the corresponding * buffer. */ void bpf_tap(ifp, pkt, pktlen) struct ifnet *ifp; register u_char *pkt; register u_int pktlen; { struct bpf_if *bp; register struct bpf_d *d; register u_int slen; /* * Note that the ipl does not have to be raised at this point. * The only problem that could arise here is that if two different * interfaces shared any data. This is not the case. */ bp = ifp->if_bpf; for (d = bp->bif_dlist; d != 0; d = d->bd_next) { ++d->bd_rcount; slen = bpf_filter(d->bd_filter, pkt, pktlen, pktlen); if (slen != 0) catchpacket(d, pkt, pktlen, slen, bcopy); } } /* * Copy data from an mbuf chain into a buffer. This code is derived * from m_copydata in sys/uipc_mbuf.c. */ static void bpf_mcopy(src_arg, dst_arg, len) const void *src_arg; void *dst_arg; register size_t len; { register const struct mbuf *m; register u_int count; u_char *dst; m = src_arg; dst = dst_arg; while (len > 0) { if (m == 0) panic("bpf_mcopy"); count = min(m->m_len, len); bcopy(mtod(m, void *), dst, count); m = m->m_next; dst += count; len -= count; } } /* * Incoming linkage from device drivers, when packet is in an mbuf chain. */ void bpf_mtap(ifp, m) struct ifnet *ifp; struct mbuf *m; { struct bpf_if *bp = ifp->if_bpf; struct bpf_d *d; u_int pktlen, slen; struct mbuf *m0; pktlen = 0; for (m0 = m; m0 != 0; m0 = m0->m_next) pktlen += m0->m_len; for (d = bp->bif_dlist; d != 0; d = d->bd_next) { ++d->bd_rcount; slen = bpf_filter(d->bd_filter, (u_char *)m, pktlen, 0); if (slen != 0) catchpacket(d, (u_char *)m, pktlen, slen, bpf_mcopy); } } /* * Move the packet data from interface memory (pkt) into the * store buffer. Return 1 if it's time to wakeup a listener (buffer full), * otherwise 0. "copy" is the routine called to do the actual data * transfer. bcopy is passed in to copy contiguous chunks, while * bpf_mcopy is passed in to copy mbuf chains. In the latter case, * pkt is really an mbuf. */ static void catchpacket(d, pkt, pktlen, snaplen, cpfn) register struct bpf_d *d; register u_char *pkt; register u_int pktlen, snaplen; register void (*cpfn) __P((const void *, void *, size_t)); { register struct bpf_hdr *hp; register int totlen, curlen; register int hdrlen = d->bd_bif->bif_hdrlen; /* * Figure out how many bytes to move. If the packet is * greater or equal to the snapshot length, transfer that * much. Otherwise, transfer the whole packet (unless * we hit the buffer size limit). */ totlen = hdrlen + min(snaplen, pktlen); if (totlen > d->bd_bufsize) totlen = d->bd_bufsize; /* * Round up the end of the previous packet to the next longword. */ curlen = BPF_WORDALIGN(d->bd_slen); if (curlen + totlen > d->bd_bufsize) { /* * This packet will overflow the storage buffer. * Rotate the buffers if we can, then wakeup any * pending reads. */ if (d->bd_fbuf == 0) { /* * We haven't completed the previous read yet, * so drop the packet. */ ++d->bd_dcount; return; } ROTATE_BUFFERS(d); bpf_wakeup(d); curlen = 0; } else if (d->bd_immediate) /* * Immediate mode is set. A packet arrived so any * reads should be woken up. */ bpf_wakeup(d); /* * Append the bpf header. */ hp = (struct bpf_hdr *)(d->bd_sbuf + curlen); #if BSD >= 199103 microtime(&hp->bh_tstamp); #elif defined(sun) uniqtime(&hp->bh_tstamp); #else hp->bh_tstamp = time; #endif hp->bh_datalen = pktlen; hp->bh_hdrlen = hdrlen; /* * Copy the packet data into the store buffer and update its length. */ (*cpfn)(pkt, (u_char *)hp + hdrlen, (hp->bh_caplen = totlen - hdrlen)); d->bd_slen = curlen + totlen; } /* * Initialize all nonzero fields of a descriptor. */ static int bpf_allocbufs(d) register struct bpf_d *d; { d->bd_fbuf = (caddr_t)malloc(d->bd_bufsize, M_DEVBUF, M_WAITOK); if (d->bd_fbuf == 0) return (ENOBUFS); d->bd_sbuf = (caddr_t)malloc(d->bd_bufsize, M_DEVBUF, M_WAITOK); if (d->bd_sbuf == 0) { free(d->bd_fbuf, M_DEVBUF); return (ENOBUFS); } d->bd_slen = 0; d->bd_hlen = 0; return (0); } /* * Free buffers currently in use by a descriptor. * Called on close. */ static void bpf_freed(d) register struct bpf_d *d; { /* * We don't need to lock out interrupts since this descriptor has * been detached from its interface and it yet hasn't been marked * free. */ if (d->bd_sbuf != 0) { free(d->bd_sbuf, M_DEVBUF); if (d->bd_hbuf != 0) free(d->bd_hbuf, M_DEVBUF); if (d->bd_fbuf != 0) free(d->bd_fbuf, M_DEVBUF); } if (d->bd_filter) free((caddr_t)d->bd_filter, M_DEVBUF); D_MARKFREE(d); } /* * Attach an interface to bpf. driverp is a pointer to a (struct bpf_if *) * in the driver's softc; dlt is the link layer type; hdrlen is the fixed * size of the link header (variable length headers not yet supported). */ void bpfattach(ifp, dlt, hdrlen) struct ifnet *ifp; u_int dlt, hdrlen; { struct bpf_if *bp; int i; bp = (struct bpf_if *)malloc(sizeof(*bp), M_DEVBUF, M_DONTWAIT); if (bp == 0) panic("bpfattach"); bp->bif_dlist = 0; bp->bif_ifp = ifp; bp->bif_dlt = dlt; bp->bif_next = bpf_iflist; bpf_iflist = bp; bp->bif_ifp->if_bpf = 0; /* * Compute the length of the bpf header. This is not necessarily * equal to SIZEOF_BPF_HDR because we want to insert spacing such * that the network layer header begins on a longword boundary (for * performance reasons and to alleviate alignment restrictions). */ bp->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen; /* * Mark all the descriptors free if this hasn't been done. */ if (!bpf_dtab_init) { for (i = 0; i < NBPFILTER; ++i) D_MARKFREE(&bpf_dtab[i]); bpf_dtab_init = 1; } if (bootverbose) printf("bpf: %s%d attached\n", ifp->if_name, ifp->if_unit); } #ifdef DEVFS static void *bpf_devfs_token[NBPFILTER]; #endif static int bpf_devsw_installed; static void bpf_drvinit __P((void *unused)); static void bpf_drvinit(unused) void *unused; { dev_t dev; #ifdef DEVFS int i; #endif if( ! bpf_devsw_installed ) { dev = makedev(CDEV_MAJOR, 0); cdevsw_add(&dev,&bpf_cdevsw, NULL); bpf_devsw_installed = 1; #ifdef DEVFS for ( i = 0 ; i < NBPFILTER ; i++ ) { bpf_devfs_token[i] = devfs_add_devswf(&bpf_cdevsw, i, DV_CHR, 0, 0, 0600, "bpf%d", i); } #endif } } SYSINIT(bpfdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR,bpf_drvinit,NULL) #endif Index: head/sys/net/bpfdesc.h =================================================================== --- head/sys/net/bpfdesc.h (revision 41085) +++ head/sys/net/bpfdesc.h (revision 41086) @@ -1,103 +1,103 @@ /* * Copyright (c) 1990, 1991, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from the Stanford/CMU enet packet filter, * (net/enet.c) distributed as part of 4.3BSD, and code contributed * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence * Berkeley Laboratory. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)bpfdesc.h 8.1 (Berkeley) 6/10/93 * - * $Id$ + * $Id: bpfdesc.h,v 1.10 1997/02/22 09:40:57 peter Exp $ */ #ifndef _NET_BPFDESC_H_ #define _NET_BPFDESC_H_ #include /* * Descriptor associated with each open bpf file. */ struct bpf_d { struct bpf_d *bd_next; /* Linked list of descriptors */ /* * Buffer slots: two mbuf clusters buffer the incoming packets. * The model has three slots. Sbuf is always occupied. * sbuf (store) - Receive interrupt puts packets here. * hbuf (hold) - When sbuf is full, put cluster here and * wakeup read (replace sbuf with fbuf). * fbuf (free) - When read is done, put cluster here. * On receiving, if sbuf is full and fbuf is 0, packet is dropped. */ caddr_t bd_sbuf; /* store slot */ caddr_t bd_hbuf; /* hold slot */ caddr_t bd_fbuf; /* free slot */ int bd_slen; /* current length of store buffer */ int bd_hlen; /* current length of hold buffer */ int bd_bufsize; /* absolute length of buffers */ struct bpf_if * bd_bif; /* interface descriptor */ u_long bd_rtout; /* Read timeout in 'ticks' */ struct bpf_insn *bd_filter; /* filter code */ u_long bd_rcount; /* number of packets received */ u_long bd_dcount; /* number of packets dropped */ u_char bd_promisc; /* true if listening promiscuously */ u_char bd_state; /* idle, waiting, or timed out */ u_char bd_immediate; /* true to return on packet arrival */ int bd_async; /* non-zero if packet reception should generate signal */ int bd_sig; /* signal to send upon packet reception */ - pid_t bd_pgid; /* process or group id for signal */ + struct sigio * bd_sigio; /* information for SIGIO */ #if BSD < 199103 u_char bd_selcoll; /* true if selects collide */ int bd_timedout; struct proc * bd_selproc; /* process that last selected us */ #else u_char bd_pad; /* explicit alignment */ struct selinfo bd_sel; /* bsd select info */ #endif }; /* * Descriptor associated with each attached hardware interface. */ struct bpf_if { struct bpf_if *bif_next; /* list of all interfaces */ struct bpf_d *bif_dlist; /* descriptor list */ u_int bif_dlt; /* link layer type */ u_int bif_hdrlen; /* length of header (with padding) */ struct ifnet *bif_ifp; /* corresponding interface */ }; #endif Index: head/sys/net/if_tun.c =================================================================== --- head/sys/net/if_tun.c (revision 41085) +++ head/sys/net/if_tun.c (revision 41086) @@ -1,638 +1,645 @@ /* $NetBSD: if_tun.c,v 1.14 1994/06/29 06:36:25 cgd Exp $ */ /* * Copyright (c) 1988, Julian Onions * Nottingham University 1987. * * This source may be freely distributed, however I would be interested * in any changes that are made. * * This driver takes packets off the IP i/f and hands them up to a * user process to have its wicked way with. This driver has it's * roots in a similar driver written by Phil Cockcroft (formerly) at * UCL. This driver is based much more on read/write/poll mode of * operation though. */ #include "tun.h" #if NTUN > 0 #include "opt_devfs.h" #include "opt_inet.h" #include #include #include #include #include #include #include #include #include #include +#include #include #include #ifdef DEVFS #include #endif /*DEVFS*/ #include #include /* * XXX stop from including . doesn't * exist if we are an LKM. */ #undef KERNEL #include #define KERNEL #include #include #include #ifdef INET #include #include #endif #ifdef NS #include #include #endif #include "bpfilter.h" #if NBPFILTER > 0 #include #endif #include #include static void tunattach __P((void *)); PSEUDO_SET(tunattach, if_tun); #define TUNDEBUG if (tundebug) printf static int tundebug = 0; SYSCTL_INT(_debug, OID_AUTO, if_tun_debug, CTLFLAG_RW, &tundebug, 0, ""); static struct tun_softc tunctl[NTUN]; static int tunoutput __P((struct ifnet *, struct mbuf *, struct sockaddr *, struct rtentry *rt)); static int tunifioctl __P((struct ifnet *, u_long, caddr_t)); static int tuninit __P((int)); static d_open_t tunopen; static d_close_t tunclose; static d_read_t tunread; static d_write_t tunwrite; static d_ioctl_t tunioctl; static d_poll_t tunpoll; #define CDEV_MAJOR 52 static struct cdevsw tun_cdevsw = { tunopen, tunclose, tunread, tunwrite, tunioctl, nullstop, noreset, nodevtotty, tunpoll, nommap, nostrategy, "tun", NULL, -1 }; static int tun_devsw_installed; #ifdef DEVFS static void *tun_devfs_token[NTUN]; #endif #define minor_val(n) ((((n) & ~0xff) << 8) | ((n) & 0xff)) #define dev_val(n) (((n) >> 8) | ((n) & 0xff)) static void tunattach(dummy) void *dummy; { register int i; struct ifnet *ifp; dev_t dev; if ( tun_devsw_installed ) return; dev = makedev(CDEV_MAJOR, 0); cdevsw_add(&dev, &tun_cdevsw, NULL); tun_devsw_installed = 1; for ( i = 0; i < NTUN; i++ ) { #ifdef DEVFS tun_devfs_token[i] = devfs_add_devswf(&tun_cdevsw, minor_val(i), DV_CHR, UID_UUCP, GID_DIALER, 0600, "tun%d", i); #endif tunctl[i].tun_flags = TUN_INITED; ifp = &tunctl[i].tun_if; ifp->if_unit = i; ifp->if_name = "tun"; ifp->if_mtu = TUNMTU; ifp->if_ioctl = tunifioctl; ifp->if_output = tunoutput; ifp->if_flags = IFF_POINTOPOINT | IFF_MULTICAST; ifp->if_snd.ifq_maxlen = ifqmaxlen; if_attach(ifp); #if NBPFILTER > 0 bpfattach(ifp, DLT_NULL, sizeof(u_int)); #endif } } /* * tunnel open - must be superuser & the device must be * configured in */ static int tunopen(dev, flag, mode, p) dev_t dev; int flag, mode; struct proc *p; { struct ifnet *ifp; struct tun_softc *tp; register int unit, error; error = suser(p->p_ucred, &p->p_acflag); if (error) return (error); if ((unit = dev_val(minor(dev))) >= NTUN) return (ENXIO); tp = &tunctl[unit]; if (tp->tun_flags & TUN_OPEN) return EBUSY; ifp = &tp->tun_if; tp->tun_flags |= TUN_OPEN; TUNDEBUG("%s%d: open\n", ifp->if_name, ifp->if_unit); return (0); } /* * tunclose - close the device - mark i/f down & delete * routing info */ static int tunclose(dev, foo, bar, p) dev_t dev; int foo; int bar; struct proc *p; { register int unit = dev_val(minor(dev)), s; struct tun_softc *tp = &tunctl[unit]; struct ifnet *ifp = &tp->tun_if; struct mbuf *m; tp->tun_flags &= ~TUN_OPEN; /* * junk all pending output */ do { s = splimp(); IF_DEQUEUE(&ifp->if_snd, m); splx(s); if (m) m_freem(m); } while (m); if (ifp->if_flags & IFF_UP) { s = splimp(); if_down(ifp); if (ifp->if_flags & IFF_RUNNING) { /* find internet addresses and delete routes */ register struct ifaddr *ifa; for (ifa = ifp->if_addrhead.tqh_first; ifa; ifa = ifa->ifa_link.tqe_next) { if (ifa->ifa_addr->sa_family == AF_INET) { rtinit(ifa, (int)RTM_DELETE, tp->tun_flags & TUN_DSTADDR ? RTF_HOST : 0); } } } splx(s); } - tp->tun_pgrp = 0; + funsetown(tp->tun_sigio); selwakeup(&tp->tun_rsel); TUNDEBUG ("%s%d: closed\n", ifp->if_name, ifp->if_unit); return (0); } static int tuninit(unit) int unit; { struct tun_softc *tp = &tunctl[unit]; struct ifnet *ifp = &tp->tun_if; register struct ifaddr *ifa; TUNDEBUG("%s%d: tuninit\n", ifp->if_name, ifp->if_unit); ifp->if_flags |= IFF_UP | IFF_RUNNING; getmicrotime(&ifp->if_lastchange); for (ifa = ifp->if_addrhead.tqh_first; ifa; ifa = ifa->ifa_link.tqe_next) { #ifdef INET if (ifa->ifa_addr->sa_family == AF_INET) { struct sockaddr_in *si; si = (struct sockaddr_in *)ifa->ifa_addr; if (si && si->sin_addr.s_addr) tp->tun_flags |= TUN_IASET; si = (struct sockaddr_in *)ifa->ifa_dstaddr; if (si && si->sin_addr.s_addr) tp->tun_flags |= TUN_DSTADDR; } #endif } return 0; } /* * Process an ioctl request. */ int tunifioctl(ifp, cmd, data) struct ifnet *ifp; u_long cmd; caddr_t data; { register struct ifreq *ifr = (struct ifreq *)data; int error = 0, s; s = splimp(); switch(cmd) { case SIOCSIFADDR: tuninit(ifp->if_unit); TUNDEBUG("%s%d: address set\n", ifp->if_name, ifp->if_unit); break; case SIOCSIFDSTADDR: tuninit(ifp->if_unit); TUNDEBUG("%s%d: destination address set\n", ifp->if_name, ifp->if_unit); break; case SIOCSIFMTU: ifp->if_mtu = ifr->ifr_mtu; TUNDEBUG("%s%d: mtu set\n", ifp->if_name, ifp->if_unit); break; case SIOCADDMULTI: case SIOCDELMULTI: break; default: error = EINVAL; } splx(s); return (error); } /* * tunoutput - queue packets from higher level ready to put out. */ int tunoutput(ifp, m0, dst, rt) struct ifnet *ifp; struct mbuf *m0; struct sockaddr *dst; struct rtentry *rt; { struct tun_softc *tp = &tunctl[ifp->if_unit]; struct proc *p; int s; TUNDEBUG ("%s%d: tunoutput\n", ifp->if_name, ifp->if_unit); if ((tp->tun_flags & TUN_READY) != TUN_READY) { TUNDEBUG ("%s%d: not ready 0%o\n", ifp->if_name, ifp->if_unit, tp->tun_flags); m_freem (m0); return EHOSTDOWN; } #if NBPFILTER > 0 /* BPF write needs to be handled specially */ if (dst->sa_family == AF_UNSPEC) { dst->sa_family = *(mtod(m0, int *)); m0->m_len -= sizeof(int); m0->m_pkthdr.len -= sizeof(int); m0->m_data += sizeof(int); } if (ifp->if_bpf) { /* * We need to prepend the address family as * a four byte field. Cons up a dummy header * to pacify bpf. This is safe because bpf * will only read from the mbuf (i.e., it won't * try to free it or keep a pointer to it). */ struct mbuf m; u_int af = dst->sa_family; m.m_next = m0; m.m_len = 4; m.m_data = (char *)⁡ bpf_mtap(ifp, &m); } #endif switch(dst->sa_family) { #ifdef INET case AF_INET: s = splimp(); if (IF_QFULL(&ifp->if_snd)) { IF_DROP(&ifp->if_snd); m_freem(m0); splx(s); ifp->if_collisions++; return (ENOBUFS); } ifp->if_obytes += m0->m_pkthdr.len; IF_ENQUEUE(&ifp->if_snd, m0); splx(s); ifp->if_opackets++; break; #endif default: m_freem(m0); return EAFNOSUPPORT; } if (tp->tun_flags & TUN_RWAIT) { tp->tun_flags &= ~TUN_RWAIT; wakeup((caddr_t)tp); } - if (tp->tun_flags & TUN_ASYNC && tp->tun_pgrp) { - if (tp->tun_pgrp > 0) - gsignal(tp->tun_pgrp, SIGIO); - else if ((p = pfind(-tp->tun_pgrp)) != 0) - psignal(p, SIGIO); - } + if (tp->tun_flags & TUN_ASYNC && tp->tun_sigio) + pgsigio(tp->tun_sigio, SIGIO, 0); selwakeup(&tp->tun_rsel); return 0; } /* * the cdevsw interface is now pretty minimal. */ static int tunioctl(dev, cmd, data, flag, p) dev_t dev; u_long cmd; caddr_t data; int flag; struct proc *p; { int unit = dev_val(minor(dev)), s; struct tun_softc *tp = &tunctl[unit]; struct tuninfo *tunp; switch (cmd) { case TUNSIFINFO: tunp = (struct tuninfo *)data; tp->tun_if.if_mtu = tunp->mtu; tp->tun_if.if_type = tunp->type; tp->tun_if.if_baudrate = tunp->baudrate; break; case TUNGIFINFO: tunp = (struct tuninfo *)data; tunp->mtu = tp->tun_if.if_mtu; tunp->type = tp->tun_if.if_type; tunp->baudrate = tp->tun_if.if_baudrate; break; case TUNSDEBUG: tundebug = *(int *)data; break; case TUNGDEBUG: *(int *)data = tundebug; break; case FIONBIO: break; case FIOASYNC: if (*(int *)data) tp->tun_flags |= TUN_ASYNC; else tp->tun_flags &= ~TUN_ASYNC; break; case FIONREAD: s = splimp(); if (tp->tun_if.if_snd.ifq_head) { struct mbuf *mb = tp->tun_if.if_snd.ifq_head; for( *(int *)data = 0; mb != 0; mb = mb->m_next) *(int *)data += mb->m_len; } else *(int *)data = 0; splx(s); break; + case FIOSETOWN: + return (fsetown(*(int *)data, &tp->tun_sigio)); + + case FIOGETOWN: + *(int *)data = fgetown(tp->tun_sigio); + return (0); + + /* This is deprecated, FIOSETOWN should be used instead. */ case TIOCSPGRP: - tp->tun_pgrp = *(int *)data; - break; + return (fsetown(-(*(int *)data), &tp->tun_sigio)); + + /* This is deprecated, FIOGETOWN should be used instead. */ case TIOCGPGRP: - *(int *)data = tp->tun_pgrp; - break; + *(int *)data = -fgetown(tp->tun_sigio); + return (0); + default: return (ENOTTY); } return (0); } /* * The cdevsw read interface - reads a packet at a time, or at * least as much of a packet as can be read. */ static int tunread(dev, uio, flag) dev_t dev; struct uio *uio; int flag; { int unit = dev_val(minor(dev)); struct tun_softc *tp = &tunctl[unit]; struct ifnet *ifp = &tp->tun_if; struct mbuf *m, *m0; int error=0, len, s; TUNDEBUG ("%s%d: read\n", ifp->if_name, ifp->if_unit); if ((tp->tun_flags & TUN_READY) != TUN_READY) { TUNDEBUG ("%s%d: not ready 0%o\n", ifp->if_name, ifp->if_unit, tp->tun_flags); return EHOSTDOWN; } tp->tun_flags &= ~TUN_RWAIT; s = splimp(); do { IF_DEQUEUE(&ifp->if_snd, m0); if (m0 == 0) { if (flag & IO_NDELAY) { splx(s); return EWOULDBLOCK; } tp->tun_flags |= TUN_RWAIT; if( error = tsleep((caddr_t)tp, PCATCH | (PZERO + 1), "tunread", 0)) { splx(s); return error; } } } while (m0 == 0); splx(s); while (m0 && uio->uio_resid > 0 && error == 0) { len = min(uio->uio_resid, m0->m_len); if (len == 0) break; error = uiomove(mtod(m0, caddr_t), len, uio); MFREE(m0, m); m0 = m; } if (m0) { TUNDEBUG("Dropping mbuf\n"); m_freem(m0); } return error; } /* * the cdevsw write interface - an atomic write is a packet - or else! */ static int tunwrite(dev, uio, flag) dev_t dev; struct uio *uio; int flag; { int unit = dev_val(minor(dev)); struct ifnet *ifp = &tunctl[unit].tun_if; struct mbuf *top, **mp, *m; int error=0, s, tlen, mlen; TUNDEBUG("%s%d: tunwrite\n", ifp->if_name, ifp->if_unit); if (uio->uio_resid < 0 || uio->uio_resid > TUNMRU) { TUNDEBUG("%s%d: len=%d!\n", ifp->if_name, ifp->if_unit, uio->uio_resid); return EIO; } tlen = uio->uio_resid; /* get a header mbuf */ MGETHDR(m, M_DONTWAIT, MT_DATA); if (m == NULL) return ENOBUFS; mlen = MHLEN; top = 0; mp = ⊤ while (error == 0 && uio->uio_resid > 0) { m->m_len = min(mlen, uio->uio_resid); error = uiomove(mtod (m, caddr_t), m->m_len, uio); *mp = m; mp = &m->m_next; if (uio->uio_resid > 0) { MGET (m, M_DONTWAIT, MT_DATA); if (m == 0) { error = ENOBUFS; break; } mlen = MLEN; } } if (error) { if (top) m_freem (top); return error; } top->m_pkthdr.len = tlen; top->m_pkthdr.rcvif = ifp; #if NBPFILTER > 0 if (ifp->if_bpf) { /* * We need to prepend the address family as * a four byte field. Cons up a dummy header * to pacify bpf. This is safe because bpf * will only read from the mbuf (i.e., it won't * try to free it or keep a pointer to it). */ struct mbuf m; u_int af = AF_INET; m.m_next = top; m.m_len = 4; m.m_data = (char *)⁡ bpf_mtap(ifp, &m); } #endif #ifdef INET s = splimp(); if (IF_QFULL (&ipintrq)) { IF_DROP(&ipintrq); splx(s); ifp->if_collisions++; m_freem(top); return ENOBUFS; } IF_ENQUEUE(&ipintrq, top); splx(s); ifp->if_ibytes += tlen; ifp->if_ipackets++; schednetisr(NETISR_IP); #endif return error; } /* * tunpoll - the poll interface, this is only useful on reads * really. The write detect always returns true, write never blocks * anyway, it either accepts the packet or drops it. */ static int tunpoll(dev, events, p) dev_t dev; int events; struct proc *p; { int unit = dev_val(minor(dev)), s; struct tun_softc *tp = &tunctl[unit]; struct ifnet *ifp = &tp->tun_if; int revents = 0; s = splimp(); TUNDEBUG("%s%d: tunpoll\n", ifp->if_name, ifp->if_unit); if (events & (POLLIN | POLLRDNORM)) if (ifp->if_snd.ifq_len > 0) { TUNDEBUG("%s%d: tunpoll q=%d\n", ifp->if_name, ifp->if_unit, ifp->if_snd.ifq_len); revents |= events & (POLLIN | POLLRDNORM); } else { TUNDEBUG("%s%d: tunpoll waiting\n", ifp->if_name, ifp->if_unit); selrecord(p, &tp->tun_rsel); } if (events & (POLLOUT | POLLWRNORM)) revents |= events & (POLLOUT | POLLWRNORM); splx(s); return (revents); } #endif /* NTUN */ Index: head/sys/net/if_tunvar.h =================================================================== --- head/sys/net/if_tunvar.h (revision 41085) +++ head/sys/net/if_tunvar.h (revision 41086) @@ -1,50 +1,50 @@ /*- * Copyright (c) 1998 Brian Somers * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $Id:$ + * $Id: if_tunvar.h,v 1.1 1998/01/11 17:52:33 brian Exp $ */ #ifndef _NET_IF_TUNVAR_H_ #define _NET_IF_TUNVAR_H_ struct tun_softc { u_short tun_flags; /* misc flags */ #define TUN_OPEN 0x0001 #define TUN_INITED 0x0002 #define TUN_RCOLL 0x0004 #define TUN_IASET 0x0008 #define TUN_DSTADDR 0x0010 #define TUN_RWAIT 0x0040 #define TUN_ASYNC 0x0080 #define TUN_READY (TUN_OPEN | TUN_INITED) struct ifnet tun_if; /* the interface */ - int tun_pgrp; /* the process group - if any */ + struct sigio *tun_sigio; /* information for SIGIO */ struct selinfo tun_rsel; /* read select */ struct selinfo tun_wsel; /* write select (not used) */ }; #endif /* !_NET_IF_TUNVAR_H_ */ Index: head/sys/sys/filedesc.h =================================================================== --- head/sys/sys/filedesc.h (revision 41085) +++ head/sys/sys/filedesc.h (revision 41086) @@ -1,114 +1,144 @@ /* * Copyright (c) 1990, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)filedesc.h 8.1 (Berkeley) 6/2/93 - * $Id: filedesc.h,v 1.12 1997/10/12 20:25:57 phk Exp $ + * $Id: filedesc.h,v 1.13 1997/12/05 18:58:10 bde Exp $ */ #ifndef _SYS_FILEDESC_H_ #define _SYS_FILEDESC_H_ +#include + /* * This structure is used for the management of descriptors. It may be * shared by multiple processes. * * A process is initially started out with NDFILE descriptors stored within * this structure, selected to be enough for typical applications based on * the historical limit of 20 open files (and the usage of descriptors by * shells). If these descriptors are exhausted, a larger descriptor table * may be allocated, up to a process' resource limit; the internal arrays * are then unused. The initial expansion is set to NDEXTENT; each time * it runs out, it is doubled until the resource limit is reached. NDEXTENT * should be selected to be the biggest multiple of OFILESIZE (see below) * that will fit in a power-of-two sized piece of memory. */ #define NDFILE 20 #define NDEXTENT 50 /* 250 bytes in 256-byte alloc. */ struct filedesc { struct file **fd_ofiles; /* file structures for open files */ char *fd_ofileflags; /* per-process open file flags */ struct vnode *fd_cdir; /* current directory */ struct vnode *fd_rdir; /* root directory */ int fd_nfiles; /* number of open files allocated */ u_short fd_lastfile; /* high-water mark of fd_ofiles */ u_short fd_freefile; /* approx. next free file */ u_short fd_cmask; /* mask for file creation */ u_short fd_refcnt; /* reference count */ }; /* * Basic allocation of descriptors: * one of the above, plus arrays for NDFILE descriptors. */ struct filedesc0 { struct filedesc fd_fd; /* * These arrays are used when the number of open files is * <= NDFILE, and are then pointed to by the pointers above. */ struct file *fd_dfiles[NDFILE]; char fd_dfileflags[NDFILE]; }; /* * Per-process open flags. */ #define UF_EXCLOSE 0x01 /* auto-close on exec */ #define UF_MAPPED 0x02 /* mapped from device */ /* * Storage required per open file descriptor. */ #define OFILESIZE (sizeof(struct file *) + sizeof(char)) +/* + * This structure that holds the information needed to send a SIGIO or + * a SIGURG signal to a process or process group when new data arrives + * on a device or socket. The structure is placed on an SLIST belonging + * to the proc or pgrp so that the entire list may be revoked when the + * process exits or the process group disappears. + */ +struct sigio { + union { + struct proc *siu_proc; /* Process to receive SIGIO/SIGURG */ + struct pgrp *siu_pgrp; /* Process group to receive ... */ + } sio_u; + SLIST_ENTRY(sigio) sio_pgsigio; /* sigio's for process or group */ + struct sigio **sio_myref; /* location of the pointer that holds + * the reference to this structure */ + struct ucred *sio_ucred; /* Current credentials */ + uid_t sio_ruid; /* Real user id */ + pid_t sio_pgid; /* pgid for signals */ +}; +#define sio_proc sio_u.siu_proc +#define sio_pgrp sio_u.siu_pgrp + +SLIST_HEAD(sigiolst, sigio); + #ifdef KERNEL /* * Kernel global variables and routines. */ int dupfdopen __P((struct filedesc *, int, int, int, int)); int fdalloc __P((struct proc *p, int want, int *result)); int fdavail __P((struct proc *p, int n)); int falloc __P((struct proc *p, struct file **resultfp, int *resultfd)); void ffree __P((struct file *)); struct filedesc *fdinit __P((struct proc *p)); struct filedesc *fdshare __P((struct proc *p)); struct filedesc *fdcopy __P((struct proc *p)); void fdfree __P((struct proc *p)); int closef __P((struct file *fp,struct proc *p)); void fdcloseexec __P((struct proc *p)); int getvnode __P((struct filedesc *fdp, int fd, struct file **fpp)); int fdissequential __P((struct file *)); void fdsequential __P((struct file *, int)); +pid_t fgetown __P((struct sigio *)); +int fsetown __P((pid_t, struct sigio **)); +void funsetown __P((struct sigio *)); +void funsetownlst __P((struct sigiolst *)); #endif #endif Index: head/sys/sys/pipe.h =================================================================== --- head/sys/sys/pipe.h (revision 41085) +++ head/sys/sys/pipe.h (revision 41086) @@ -1,115 +1,115 @@ /* * Copyright (c) 1996 John S. Dyson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice immediately at the beginning of the file, without modification, * this list of conditions, and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Absolutely no warranty of function or purpose is made by the author * John S. Dyson. * 4. This work was done expressly for inclusion into FreeBSD. Other use * is allowed if this notation is included. * 5. Modifications may be freely made to this file if the above conditions * are met. * - * $Id: pipe.h,v 1.9 1997/04/09 16:53:45 bde Exp $ + * $Id: pipe.h,v 1.10 1998/03/26 20:53:26 phk Exp $ */ #ifndef _SYS_PIPE_H_ #define _SYS_PIPE_H_ #ifndef KERNEL #include /* for struct timeval */ #include /* for struct selinfo */ #include /* for vm_page_t */ #include /* for PAGE_SIZE */ #endif /* * Pipe buffer size, keep moderate in value, pipes take kva space. */ #ifndef PIPE_SIZE #define PIPE_SIZE 16384 #endif #ifndef BIG_PIPE_SIZE #define BIG_PIPE_SIZE (64*1024) #endif /* * PIPE_MINDIRECT MUST be smaller than PIPE_SIZE and MUST be bigger * than PIPE_BUF. */ #ifndef PIPE_MINDIRECT #define PIPE_MINDIRECT 8192 #endif #define PIPENPAGES (BIG_PIPE_SIZE / PAGE_SIZE + 1) /* * Pipe buffer information. * Separate in, out, cnt are used to simplify calculations. * Buffered write is active when the buffer.cnt field is set. */ struct pipebuf { u_int cnt; /* number of chars currently in buffer */ u_int in; /* in pointer */ u_int out; /* out pointer */ u_int size; /* size of buffer */ caddr_t buffer; /* kva of buffer */ struct vm_object *object; /* VM object containing buffer */ }; /* * Information to support direct transfers between processes for pipes. */ struct pipemapping { vm_offset_t kva; /* kernel virtual address */ vm_size_t cnt; /* number of chars in buffer */ vm_size_t pos; /* current position of transfer */ int npages; /* number of pages */ vm_page_t ms[PIPENPAGES]; /* pages in source process */ }; /* * Bits in pipe_state. */ #define PIPE_ASYNC 0x004 /* Async? I/O. */ #define PIPE_WANTR 0x008 /* Reader wants some characters. */ #define PIPE_WANTW 0x010 /* Writer wants space to put characters. */ #define PIPE_WANT 0x020 /* Pipe is wanted to be run-down. */ #define PIPE_SEL 0x040 /* Pipe has a select active. */ #define PIPE_EOF 0x080 /* Pipe is in EOF condition. */ #define PIPE_LOCK 0x100 /* Process has exclusive access to pointers/data. */ #define PIPE_LWANT 0x200 /* Process wants exclusive access to pointers/data. */ #define PIPE_DIRECTW 0x400 /* Pipe direct write active. */ #define PIPE_DIRECTOK 0x800 /* Direct mode ok. */ /* * Per-pipe data structure. * Two of these are linked together to produce bi-directional pipes. */ struct pipe { struct pipebuf pipe_buffer; /* data storage */ struct pipemapping pipe_map; /* pipe mapping for direct I/O */ struct selinfo pipe_sel; /* for compat with select */ struct timespec pipe_atime; /* time of last access */ struct timespec pipe_mtime; /* time of last modify */ struct timespec pipe_ctime; /* time of status change */ - int pipe_pgid; /* process/group for async I/O */ + struct sigio *pipe_sigio; /* information for SIGIO */ struct pipe *pipe_peer; /* link with other direction */ u_int pipe_state; /* pipe status info */ int pipe_busy; /* busy flag, mostly to handle rundown sanely */ }; #ifdef KERNEL int pipe_stat __P((struct pipe *pipe, struct stat *ub)); #endif #endif /* !_SYS_PIPE_H_ */ Index: head/sys/sys/proc.h =================================================================== --- head/sys/sys/proc.h (revision 41085) +++ head/sys/sys/proc.h (revision 41086) @@ -1,356 +1,359 @@ /*- * Copyright (c) 1986, 1989, 1991, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)proc.h 8.15 (Berkeley) 5/19/95 - * $Id: proc.h,v 1.58 1998/05/28 09:30:26 phk Exp $ + * $Id: proc.h,v 1.59 1998/11/09 15:08:04 truckman Exp $ */ #ifndef _SYS_PROC_H_ #define _SYS_PROC_H_ #include /* Machine-dependent proc substruct. */ #include /* For struct callout_handle. */ #include /* For struct rtprio. */ #include /* For struct selinfo. */ #include #ifndef KERNEL #include /* For structs itimerval, timeval. */ #endif #include #include +#include /* For struct sigiolst */ /* * One structure allocated per session. */ struct session { int s_count; /* Ref cnt; pgrps in session. */ struct proc *s_leader; /* Session leader. */ struct vnode *s_ttyvp; /* Vnode of controlling terminal. */ struct tty *s_ttyp; /* Controlling terminal. */ pid_t s_sid; /* Session ID */ char s_login[roundup(MAXLOGNAME, sizeof(long))]; /* Setlogin() name. */ }; /* * One structure allocated per process group. */ struct pgrp { LIST_ENTRY(pgrp) pg_hash; /* Hash chain. */ LIST_HEAD(, proc) pg_members; /* Pointer to pgrp members. */ struct session *pg_session; /* Pointer to session. */ + struct sigiolst pg_sigiolst; /* List of sigio sources */ pid_t pg_id; /* Pgrp id. */ int pg_jobc; /* # procs qualifying pgrp for job control */ }; /* * Description of a process. * * This structure contains the information needed to manage a thread of * control, known in UN*X as a process; it has references to substructures * containing descriptions of things that the process uses, but may share * with related processes. The process structure and the substructures * are always addressable except for those marked "(PROC ONLY)" below, * which might be addressable only on a processor on which the process * is running. */ struct proc { TAILQ_ENTRY(proc) p_procq; /* run/sleep queue. */ LIST_ENTRY(proc) p_list; /* List of all processes. */ /* substructures: */ struct pcred *p_cred; /* Process owner's identity. */ struct filedesc *p_fd; /* Ptr to open files structure. */ struct pstats *p_stats; /* Accounting/statistics (PROC ONLY). */ struct plimit *p_limit; /* Process limits. */ struct vm_object *p_upages_obj;/* Upages object */ struct sigacts *p_sigacts; /* Signal actions, state (PROC ONLY). */ #define p_ucred p_cred->pc_ucred #define p_rlimit p_limit->pl_rlimit int p_flag; /* P_* flags. */ char p_stat; /* S* process status. */ char p_pad1[3]; pid_t p_pid; /* Process identifier. */ LIST_ENTRY(proc) p_hash; /* Hash chain. */ LIST_ENTRY(proc) p_pglist; /* List of processes in pgrp. */ struct proc *p_pptr; /* Pointer to parent process. */ LIST_ENTRY(proc) p_sibling; /* List of sibling processes. */ LIST_HEAD(, proc) p_children; /* Pointer to list of children. */ struct callout_handle p_ithandle; /* * Callout handle for scheduling * p_realtimer. */ /* The following fields are all zeroed upon creation in fork. */ #define p_startzero p_oppid pid_t p_oppid; /* Save parent pid during ptrace. XXX */ int p_dupfd; /* Sideways return value from fdopen. XXX */ struct vmspace *p_vmspace; /* Address space. */ /* scheduling */ u_int p_estcpu; /* Time averaged value of p_cpticks. */ int p_cpticks; /* Ticks of cpu time. */ fixpt_t p_pctcpu; /* %cpu for this process during p_swtime */ void *p_wchan; /* Sleep address. */ const char *p_wmesg; /* Reason for sleep. */ u_int p_swtime; /* Time swapped in or out. */ u_int p_slptime; /* Time since last blocked. */ struct itimerval p_realtimer; /* Alarm timer. */ u_int64_t p_runtime; /* Real time in microsec. */ struct timeval p_switchtime; /* When last scheduled */ u_quad_t p_uticks; /* Statclock hits in user mode. */ u_quad_t p_sticks; /* Statclock hits in system mode. */ u_quad_t p_iticks; /* Statclock hits processing intr. */ int p_traceflag; /* Kernel trace points. */ struct vnode *p_tracep; /* Trace to vnode. */ int p_siglist; /* Signals arrived but not delivered. */ struct vnode *p_textvp; /* Vnode of executable. */ char p_lock; /* Process lock (prevent swap) count. */ char p_oncpu; /* Which cpu we are on */ char p_lastcpu; /* Last cpu we were on */ char p_pad2; /* alignment */ short p_locks; /* DEBUG: lockmgr count of held locks */ short p_simple_locks; /* DEBUG: count of held simple locks */ unsigned int p_stops; /* procfs event bitmask */ unsigned int p_stype; /* procfs stop event type */ char p_step; /* procfs stop *once* flag */ unsigned char p_pfsflags; /* procfs flags */ char p_pad3[2]; /* padding for alignment */ register_t p_retval[2]; /* syscall aux returns */ + struct sigiolst p_sigiolst; /* List of sigio sources */ /* End area that is zeroed on creation. */ #define p_endzero p_startcopy /* The following fields are all copied upon creation in fork. */ #define p_startcopy p_sigmask sigset_t p_sigmask; /* Current signal mask. */ sigset_t p_sigignore; /* Signals being ignored. */ sigset_t p_sigcatch; /* Signals being caught by user. */ u_char p_priority; /* Process priority. */ u_char p_usrpri; /* User-priority based on p_cpu and p_nice. */ char p_nice; /* Process "nice" value. */ char p_comm[MAXCOMLEN+1]; struct pgrp *p_pgrp; /* Pointer to process group. */ struct sysentvec *p_sysent; /* System call dispatch information. */ struct rtprio p_rtprio; /* Realtime priority. */ /* End area that is copied on creation. */ #define p_endcopy p_addr struct user *p_addr; /* Kernel virtual addr of u-area (PROC ONLY). */ struct mdproc p_md; /* Any machine-dependent fields. */ u_short p_xstat; /* Exit status for wait; also stop signal. */ u_short p_acflag; /* Accounting flags. */ struct rusage *p_ru; /* Exit information. XXX */ int p_nthreads; /* number of threads (only in leader) */ void *p_aioinfo; /* ASYNC I/O info */ int p_wakeup; /* thread id */ struct proc *p_peers; struct proc *p_leader; }; #define p_session p_pgrp->pg_session #define p_pgid p_pgrp->pg_id /* Status values. */ #define SIDL 1 /* Process being created by fork. */ #define SRUN 2 /* Currently runnable. */ #define SSLEEP 3 /* Sleeping on an address. */ #define SSTOP 4 /* Process debugging or suspension. */ #define SZOMB 5 /* Awaiting collection by parent. */ /* These flags are kept in p_flags. */ #define P_ADVLOCK 0x00001 /* Process may hold a POSIX advisory lock. */ #define P_CONTROLT 0x00002 /* Has a controlling terminal. */ #define P_INMEM 0x00004 /* Loaded into memory. */ #define P_NOCLDSTOP 0x00008 /* No SIGCHLD when children stop. */ #define P_PPWAIT 0x00010 /* Parent is waiting for child to exec/exit. */ #define P_PROFIL 0x00020 /* Has started profiling. */ #define P_SELECT 0x00040 /* Selecting; wakeup/waiting danger. */ #define P_SINTR 0x00080 /* Sleep is interruptible. */ #define P_SUGID 0x00100 /* Had set id privileges since last exec. */ #define P_SYSTEM 0x00200 /* System proc: no sigs, stats or swapping. */ #define P_TIMEOUT 0x00400 /* Timing out during sleep. */ #define P_TRACED 0x00800 /* Debugged process being traced. */ #define P_WAITED 0x01000 /* Debugging process has waited for child. */ #define P_WEXIT 0x02000 /* Working on exiting. */ #define P_EXEC 0x04000 /* Process called exec. */ /* Should probably be changed into a hold count. */ #define P_NOSWAP 0x08000 /* Another flag to prevent swap out. */ #define P_PHYSIO 0x10000 /* Doing physical I/O. */ /* Should be moved to machine-dependent areas. */ #define P_OWEUPC 0x20000 /* Owe process an addupc() call at next ast. */ #define P_SWAPPING 0x40000 /* Process is being swapped. */ #define P_SWAPINREQ 0x80000 /* Swapin request due to wakeup */ /* Marked a kernel thread */ #define P_KTHREADP 0x200000 /* Process is really a kernel thread */ #define P_NOCLDWAIT 0x400000 /* No zombies if child dies */ /* * MOVE TO ucred.h? * * Shareable process credentials (always resident). This includes a reference * to the current user credentials as well as real and saved ids that may be * used to change ids. */ struct pcred { struct ucred *pc_ucred; /* Current credentials. */ uid_t p_ruid; /* Real user id. */ uid_t p_svuid; /* Saved effective user id. */ gid_t p_rgid; /* Real group id. */ gid_t p_svgid; /* Saved effective group id. */ int p_refcnt; /* Number of references. */ }; #ifdef KERNEL #ifdef MALLOC_DECLARE MALLOC_DECLARE(M_SESSION); MALLOC_DECLARE(M_SUBPROC); #endif /* * We use process IDs <= PID_MAX; PID_MAX + 1 must also fit in a pid_t, * as it is used to represent "no process group". */ #define PID_MAX 30000 #define NO_PID 30001 #define SESS_LEADER(p) ((p)->p_session->s_leader == (p)) #define SESSHOLD(s) ((s)->s_count++) #define SESSRELE(s) { \ if (--(s)->s_count == 0) \ FREE(s, M_SESSION); \ } extern void stopevent(struct proc*, unsigned int, unsigned int); #define STOPEVENT(p,e,v) do { \ if ((p)->p_stops & (e)) stopevent(p,e,v); } while (0) /* hold process U-area in memory, normally for ptrace/procfs work */ #define PHOLD(p) { \ if ((p)->p_lock++ == 0 && ((p)->p_flag & P_INMEM) == 0) \ faultin(p); \ } #define PRELE(p) (--(p)->p_lock) #define PIDHASH(pid) (&pidhashtbl[(pid) & pidhash]) extern LIST_HEAD(pidhashhead, proc) *pidhashtbl; extern u_long pidhash; #define PGRPHASH(pgid) (&pgrphashtbl[(pgid) & pgrphash]) extern LIST_HEAD(pgrphashhead, pgrp) *pgrphashtbl; extern u_long pgrphash; extern struct proc *curproc; /* Current running proc. */ extern struct proc proc0; /* Process slot for swapper. */ extern int nprocs, maxproc; /* Current and max number of procs. */ extern int maxprocperuid; /* Max procs per uid. */ extern struct timeval switchtime; /* Uptime at last context switch */ LIST_HEAD(proclist, proc); extern struct proclist allproc; /* List of all processes. */ extern struct proclist zombproc; /* List of zombie processes. */ extern struct proc *initproc, *pageproc; /* Process slots for init, pager. */ #define NQS 32 /* 32 run queues. */ extern struct prochd qs[]; extern struct prochd rtqs[]; extern struct prochd idqs[]; extern int whichqs; /* Bit mask summary of non-empty Q's. */ extern int whichrtqs; /* Bit mask summary of non-empty Q's. */ extern int whichidqs; /* Bit mask summary of non-empty Q's. */ struct prochd { struct proc *ph_link; /* Linked list of running processes. */ struct proc *ph_rlink; }; struct proc *pfind __P((pid_t)); /* Find process by id. */ struct pgrp *pgfind __P((pid_t)); /* Find process group by id. */ struct vm_zone; extern struct vm_zone *proc_zone; int chgproccnt __P((uid_t uid, int diff)); int enterpgrp __P((struct proc *p, pid_t pgid, int mksess)); void fixjobc __P((struct proc *p, struct pgrp *pgrp, int entering)); int inferior __P((struct proc *p)); int leavepgrp __P((struct proc *p)); void mi_switch __P((void)); void procinit __P((void)); void resetpriority __P((struct proc *)); int roundrobin_interval __P((void)); void setrunnable __P((struct proc *)); void setrunqueue __P((struct proc *)); void sleepinit __P((void)); void remrq __P((struct proc *)); void cpu_switch __P((struct proc *)); void unsleep __P((struct proc *)); void wakeup_one __P((void *chan)); void cpu_exit __P((struct proc *)) __dead2; void exit1 __P((struct proc *, int)) __dead2; void cpu_fork __P((struct proc *, struct proc *)); int fork1 __P((struct proc *, int)); int trace_req __P((struct proc *)); void cpu_wait __P((struct proc *)); int cpu_coredump __P((struct proc *, struct vnode *, struct ucred *)); void setsugid __P((struct proc *p)); #endif /* KERNEL */ #endif /* !_SYS_PROC_H_ */ Index: head/sys/sys/signalvar.h =================================================================== --- head/sys/sys/signalvar.h (revision 41085) +++ head/sys/sys/signalvar.h (revision 41086) @@ -1,178 +1,180 @@ /* * Copyright (c) 1991, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)signalvar.h 8.6 (Berkeley) 2/19/95 - * $Id: signalvar.h,v 1.18 1998/03/28 10:33:23 bde Exp $ + * $Id: signalvar.h,v 1.19 1998/09/14 05:36:51 jdp Exp $ */ #ifndef _SYS_SIGNALVAR_H_ /* tmp for user.h */ #define _SYS_SIGNALVAR_H_ #include /* * Kernel signal definitions and data structures, * not exported to user programs. */ /* * Process signal actions and state, needed only within the process * (not necessarily resident). */ struct sigacts { sig_t ps_sigact[NSIG]; /* disposition of signals */ sigset_t ps_catchmask[NSIG]; /* signals to be blocked */ sigset_t ps_sigonstack; /* signals to take on sigstack */ sigset_t ps_sigintr; /* signals that interrupt syscalls */ sigset_t ps_sigreset; /* signals that reset when caught */ sigset_t ps_signodefer; /* signals not masked while handled */ sigset_t ps_oldmask; /* saved mask from before sigpause */ int ps_flags; /* signal flags, below */ struct sigaltstack ps_sigstk; /* sp & on stack state variable */ int ps_sig; /* for core dump/debugger XXX */ u_long ps_code; /* for core dump/debugger XXX */ sigset_t ps_usertramp; /* SunOS compat; libc sigtramp XXX */ }; /* signal flags */ #define SAS_OLDMASK 0x01 /* need to restore mask before pause */ #define SAS_ALTSTACK 0x02 /* have alternate signal stack */ /* additional signal action values, used only temporarily/internally */ #define SIG_CATCH ((__sighandler_t *)2) #define SIG_HOLD ((__sighandler_t *)3) /* * get signal action for process and signal; currently only for current process */ #define SIGACTION(p, sig) (p->p_sigacts->ps_sigact[(sig)]) /* * Determine signal that should be delivered to process p, the current * process, 0 if none. If there is a pending stop signal with default * action, the process stops in issignal(). */ #define CURSIG(p) \ (((p)->p_siglist == 0 || \ (((p)->p_flag & P_TRACED) == 0 && \ ((p)->p_siglist & ~(p)->p_sigmask) == 0)) ? \ 0 : issignal(p)) /* * Clear a pending signal from a process. */ #define CLRSIG(p, sig) { (p)->p_siglist &= ~sigmask(sig); } /* * Signal properties and actions. * The array below categorizes the signals and their default actions * according to the following properties: */ #define SA_KILL 0x01 /* terminates process by default */ #define SA_CORE 0x02 /* ditto and coredumps */ #define SA_STOP 0x04 /* suspend process */ #define SA_TTYSTOP 0x08 /* ditto, from tty */ #define SA_IGNORE 0x10 /* ignore by default */ #define SA_CONT 0x20 /* continue if suspended */ #define SA_CANTMASK 0x40 /* non-maskable, catchable */ #ifdef SIGPROP static int sigprop[NSIG + 1] = { 0, /* unused */ SA_KILL, /* SIGHUP */ SA_KILL, /* SIGINT */ SA_KILL|SA_CORE, /* SIGQUIT */ SA_KILL|SA_CORE, /* SIGILL */ SA_KILL|SA_CORE, /* SIGTRAP */ SA_KILL|SA_CORE, /* SIGABRT */ SA_KILL|SA_CORE, /* SIGEMT */ SA_KILL|SA_CORE, /* SIGFPE */ SA_KILL, /* SIGKILL */ SA_KILL|SA_CORE, /* SIGBUS */ SA_KILL|SA_CORE, /* SIGSEGV */ SA_KILL|SA_CORE, /* SIGSYS */ SA_KILL, /* SIGPIPE */ SA_KILL, /* SIGALRM */ SA_KILL, /* SIGTERM */ SA_IGNORE, /* SIGURG */ SA_STOP, /* SIGSTOP */ SA_STOP|SA_TTYSTOP, /* SIGTSTP */ SA_IGNORE|SA_CONT, /* SIGCONT */ SA_IGNORE, /* SIGCHLD */ SA_STOP|SA_TTYSTOP, /* SIGTTIN */ SA_STOP|SA_TTYSTOP, /* SIGTTOU */ SA_IGNORE, /* SIGIO */ SA_KILL, /* SIGXCPU */ SA_KILL, /* SIGXFSZ */ SA_KILL, /* SIGVTALRM */ SA_KILL, /* SIGPROF */ SA_IGNORE, /* SIGWINCH */ SA_IGNORE, /* SIGINFO */ SA_KILL, /* SIGUSR1 */ SA_KILL, /* SIGUSR2 */ }; #define contsigmask (sigmask(SIGCONT)) #define stopsigmask (sigmask(SIGSTOP) | sigmask(SIGTSTP) | \ sigmask(SIGTTIN) | sigmask(SIGTTOU)) #endif /* SIGPROP */ #define sigcantmask (sigmask(SIGKILL) | sigmask(SIGSTOP)) #ifdef KERNEL struct pgrp; struct proc; +struct sigio; extern int sugid_coredump; /* Sysctl variable kern.sugid_coredump */ /* * Machine-independent functions: */ void execsigs __P((struct proc *p)); char *expand_name __P((const char*, int, int)); void gsignal __P((int pgid, int sig)); int issignal __P((struct proc *p)); void killproc __P((struct proc *p, char *why)); +void pgsigio __P((struct sigio *, int signum, int checkctty)); void pgsignal __P((struct pgrp *pgrp, int sig, int checkctty)); void postsig __P((int sig)); void psignal __P((struct proc *p, int sig)); void sigexit __P((struct proc *p, int signum)); void siginit __P((struct proc *p)); void trapsignal __P((struct proc *p, int sig, u_long code)); /* * Machine-dependent functions: */ void sendsig __P((sig_t action, int sig, int returnmask, u_long code)); #endif /* KERNEL */ #endif /* !_SYS_SIGNALVAR_H_ */ Index: head/sys/sys/socketvar.h =================================================================== --- head/sys/sys/socketvar.h (revision 41085) +++ head/sys/sys/socketvar.h (revision 41086) @@ -1,364 +1,364 @@ /*- * Copyright (c) 1982, 1986, 1990, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)socketvar.h 8.3 (Berkeley) 2/19/95 - * $Id: socketvar.h,v 1.29 1998/08/23 03:07:17 wollman Exp $ + * $Id: socketvar.h,v 1.30 1998/11/05 14:28:25 dg Exp $ */ #ifndef _SYS_SOCKETVAR_H_ #define _SYS_SOCKETVAR_H_ #include /* for TAILQ macros */ #include /* for struct selinfo */ /* * Kernel structure per socket. * Contains send and receive buffer queues, * handle on protocol and pointer to protocol * private data and error information. */ typedef u_quad_t so_gen_t; struct socket { struct vm_zone *so_zone; /* zone we were allocated from */ short so_type; /* generic type, see socket.h */ short so_options; /* from socket call, see socket.h */ short so_linger; /* time to linger while closing */ short so_state; /* internal state flags SS_*, below */ caddr_t so_pcb; /* protocol control block */ struct protosw *so_proto; /* protocol handle */ /* * Variables for connection queuing. * Socket where accepts occur is so_head in all subsidiary sockets. * If so_head is 0, socket is not related to an accept. * For head socket so_q0 queues partially completed connections, * while so_q is a queue of connections ready to be accepted. * If a connection is aborted and it has so_head set, then * it has to be pulled out of either so_q0 or so_q. * We allow connections to queue up based on current queue lengths * and limit on number of queued connections for this socket. */ struct socket *so_head; /* back pointer to accept socket */ TAILQ_HEAD(, socket) so_incomp; /* queue of partial unaccepted connections */ TAILQ_HEAD(, socket) so_comp; /* queue of complete unaccepted connections */ TAILQ_ENTRY(socket) so_list; /* list of unaccepted connections */ short so_qlen; /* number of unaccepted connections */ short so_incqlen; /* number of unaccepted incomplete connections */ short so_qlimit; /* max number queued connections */ short so_timeo; /* connection timeout */ u_short so_error; /* error affecting connection */ - pid_t so_pgid; /* pgid for signals */ + struct sigio *so_sigio; /* information for SIGIO/SIGURG */ u_long so_oobmark; /* chars to oob mark */ /* * Variables for socket buffering. */ struct sockbuf { u_long sb_cc; /* actual chars in buffer */ u_long sb_hiwat; /* max actual char count */ u_long sb_mbcnt; /* chars of mbufs used */ u_long sb_mbmax; /* max chars of mbufs to use */ long sb_lowat; /* low water mark */ struct mbuf *sb_mb; /* the mbuf chain */ struct selinfo sb_sel; /* process selecting read/write */ short sb_flags; /* flags, see below */ short sb_timeo; /* timeout for read/write */ } so_rcv, so_snd; #define SB_MAX (256*1024) /* default for max chars in sockbuf */ #define SB_LOCK 0x01 /* lock on data queue */ #define SB_WANT 0x02 /* someone is waiting to lock */ #define SB_WAIT 0x04 /* someone is waiting for data/space */ #define SB_SEL 0x08 /* someone is selecting */ #define SB_ASYNC 0x10 /* ASYNC I/O, need signals */ #define SB_UPCALL 0x20 /* someone wants an upcall */ #define SB_NOINTR 0x40 /* operations not interruptible */ void (*so_upcall) __P((struct socket *, void *, int)); void *so_upcallarg; uid_t so_uid; /* who opened the socket */ /* NB: generation count must not be first; easiest to make it last. */ so_gen_t so_gencnt; /* generation count */ }; /* * Socket state bits. */ #define SS_NOFDREF 0x0001 /* no file table ref any more */ #define SS_ISCONNECTED 0x0002 /* socket connected to a peer */ #define SS_ISCONNECTING 0x0004 /* in process of connecting to peer */ #define SS_ISDISCONNECTING 0x0008 /* in process of disconnecting */ #define SS_CANTSENDMORE 0x0010 /* can't send more data to peer */ #define SS_CANTRCVMORE 0x0020 /* can't receive more data from peer */ #define SS_RCVATMARK 0x0040 /* at mark on input */ #define SS_NBIO 0x0100 /* non-blocking ops */ #define SS_ASYNC 0x0200 /* async i/o notify */ #define SS_ISCONFIRMING 0x0400 /* deciding to accept connection req */ #define SS_INCOMP 0x0800 /* unaccepted, incomplete connection */ #define SS_COMP 0x1000 /* unaccepted, complete connection */ /* * Externalized form of struct socket used by the sysctl(3) interface. */ struct xsocket { size_t xso_len; /* length of this structure */ struct socket *xso_so; /* makes a convenient handle sometimes */ short so_type; short so_options; short so_linger; short so_state; caddr_t so_pcb; /* another convenient handle */ int xso_protocol; int xso_family; short so_qlen; short so_incqlen; short so_qlimit; short so_timeo; u_short so_error; pid_t so_pgid; u_long so_oobmark; struct xsockbuf { u_long sb_cc; u_long sb_hiwat; u_long sb_mbcnt; u_long sb_mbmax; long sb_lowat; short sb_flags; short sb_timeo; } so_rcv, so_snd; uid_t so_uid; /* XXX */ }; /* * Macros for sockets and socket buffering. */ /* * Do we need to notify the other side when I/O is possible? */ #define sb_notify(sb) (((sb)->sb_flags & (SB_WAIT|SB_SEL|SB_ASYNC|SB_UPCALL)) != 0) /* * How much space is there in a socket buffer (so->so_snd or so->so_rcv)? * This is problematical if the fields are unsigned, as the space might * still be negative (cc > hiwat or mbcnt > mbmax). Should detect * overflow and return 0. Should use "lmin" but it doesn't exist now. */ #define sbspace(sb) \ ((long) imin((int)((sb)->sb_hiwat - (sb)->sb_cc), \ (int)((sb)->sb_mbmax - (sb)->sb_mbcnt))) /* do we have to send all at once on a socket? */ #define sosendallatonce(so) \ ((so)->so_proto->pr_flags & PR_ATOMIC) /* can we read something from so? */ #define soreadable(so) \ ((so)->so_rcv.sb_cc >= (so)->so_rcv.sb_lowat || \ ((so)->so_state & SS_CANTRCVMORE) || \ (so)->so_comp.tqh_first || (so)->so_error) /* can we write something to so? */ #define sowriteable(so) \ ((sbspace(&(so)->so_snd) >= (so)->so_snd.sb_lowat && \ (((so)->so_state&SS_ISCONNECTED) || \ ((so)->so_proto->pr_flags&PR_CONNREQUIRED)==0)) || \ ((so)->so_state & SS_CANTSENDMORE) || \ (so)->so_error) /* adjust counters in sb reflecting allocation of m */ #define sballoc(sb, m) { \ (sb)->sb_cc += (m)->m_len; \ (sb)->sb_mbcnt += MSIZE; \ if ((m)->m_flags & M_EXT) \ (sb)->sb_mbcnt += (m)->m_ext.ext_size; \ } /* adjust counters in sb reflecting freeing of m */ #define sbfree(sb, m) { \ (sb)->sb_cc -= (m)->m_len; \ (sb)->sb_mbcnt -= MSIZE; \ if ((m)->m_flags & M_EXT) \ (sb)->sb_mbcnt -= (m)->m_ext.ext_size; \ } /* * Set lock on sockbuf sb; sleep if lock is already held. * Unless SB_NOINTR is set on sockbuf, sleep is interruptible. * Returns error without lock if sleep is interrupted. */ #define sblock(sb, wf) ((sb)->sb_flags & SB_LOCK ? \ (((wf) == M_WAITOK) ? sb_lock(sb) : EWOULDBLOCK) : \ ((sb)->sb_flags |= SB_LOCK), 0) /* release lock on sockbuf sb */ #define sbunlock(sb) { \ (sb)->sb_flags &= ~SB_LOCK; \ if ((sb)->sb_flags & SB_WANT) { \ (sb)->sb_flags &= ~SB_WANT; \ wakeup((caddr_t)&(sb)->sb_flags); \ } \ } #define sorwakeup(so) do { \ if (sb_notify(&(so)->so_rcv)) \ sowakeup((so), &(so)->so_rcv); \ } while (0) #define sowwakeup(so) do { \ if (sb_notify(&(so)->so_snd)) \ sowakeup((so), &(so)->so_snd); \ } while (0) #ifdef KERNEL /* * Argument structure for sosetopt et seq. This is in the KERNEL * section because it will never be visible to user code. */ enum sopt_dir { SOPT_GET, SOPT_SET }; struct sockopt { enum sopt_dir sopt_dir; /* is this a get or a set? */ int sopt_level; /* second arg of [gs]etsockopt */ int sopt_name; /* third arg of [gs]etsockopt */ void *sopt_val; /* fourth arg of [gs]etsockopt */ size_t sopt_valsize; /* (almost) fifth arg of [gs]etsockopt */ struct proc *sopt_p; /* calling process or null if kernel */ }; struct sf_buf { SLIST_ENTRY(sf_buf) free_list; /* list of free buffer slots */ int refcnt; /* reference count */ struct vm_page *m; /* currently mapped page */ vm_offset_t kva; /* va of mapping */ }; #ifdef MALLOC_DECLARE MALLOC_DECLARE(M_PCB); MALLOC_DECLARE(M_SONAME); #endif extern int maxsockets; extern u_long sb_max; extern struct vm_zone *socket_zone; extern so_gen_t so_gencnt; struct file; struct filedesc; struct mbuf; struct sockaddr; struct stat; struct ucred; struct uio; /* * File operations on sockets. */ int soo_ioctl __P((struct file *fp, u_long cmd, caddr_t data, struct proc *p)); int soo_poll __P((struct file *fp, int events, struct ucred *cred, struct proc *p)); int soo_stat __P((struct socket *so, struct stat *ub)); /* * From uipc_socket and friends */ struct sockaddr *dup_sockaddr __P((struct sockaddr *sa, int canwait)); int getsock __P((struct filedesc *fdp, int fdes, struct file **fpp)); int sockargs __P((struct mbuf **mp, caddr_t buf, int buflen, int type)); int getsockaddr __P((struct sockaddr **namp, caddr_t uaddr, size_t len)); void sbappend __P((struct sockbuf *sb, struct mbuf *m)); int sbappendaddr __P((struct sockbuf *sb, struct sockaddr *asa, struct mbuf *m0, struct mbuf *control)); int sbappendcontrol __P((struct sockbuf *sb, struct mbuf *m0, struct mbuf *control)); void sbappendrecord __P((struct sockbuf *sb, struct mbuf *m0)); void sbcheck __P((struct sockbuf *sb)); void sbcompress __P((struct sockbuf *sb, struct mbuf *m, struct mbuf *n)); struct mbuf * sbcreatecontrol __P((caddr_t p, int size, int type, int level)); void sbdrop __P((struct sockbuf *sb, int len)); void sbdroprecord __P((struct sockbuf *sb)); void sbflush __P((struct sockbuf *sb)); void sbinsertoob __P((struct sockbuf *sb, struct mbuf *m0)); void sbrelease __P((struct sockbuf *sb)); int sbreserve __P((struct sockbuf *sb, u_long cc)); void sbtoxsockbuf __P((struct sockbuf *sb, struct xsockbuf *xsb)); int sbwait __P((struct sockbuf *sb)); int sb_lock __P((struct sockbuf *sb)); int soabort __P((struct socket *so)); int soaccept __P((struct socket *so, struct sockaddr **nam)); struct socket *soalloc __P((int waitok)); int sobind __P((struct socket *so, struct sockaddr *nam, struct proc *p)); void socantrcvmore __P((struct socket *so)); void socantsendmore __P((struct socket *so)); int soclose __P((struct socket *so)); int soconnect __P((struct socket *so, struct sockaddr *nam, struct proc *p)); int soconnect2 __P((struct socket *so1, struct socket *so2)); int socreate __P((int dom, struct socket **aso, int type, int proto, struct proc *p)); void sodealloc __P((struct socket *so)); int sodisconnect __P((struct socket *so)); void sofree __P((struct socket *so)); int sogetopt __P((struct socket *so, struct sockopt *sopt)); void sohasoutofband __P((struct socket *so)); void soisconnected __P((struct socket *so)); void soisconnecting __P((struct socket *so)); void soisdisconnected __P((struct socket *so)); void soisdisconnecting __P((struct socket *so)); int solisten __P((struct socket *so, int backlog, struct proc *p)); struct socket * sodropablereq __P((struct socket *head)); struct socket * sonewconn __P((struct socket *head, int connstatus)); int sooptcopyin __P((struct sockopt *sopt, void *buf, size_t len, size_t minlen)); int sooptcopyout __P((struct sockopt *sopt, void *buf, size_t len)); int sopoll __P((struct socket *so, int events, struct ucred *cred, struct proc *p)); int soreceive __P((struct socket *so, struct sockaddr **paddr, struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp)); int soreserve __P((struct socket *so, u_long sndcc, u_long rcvcc)); void sorflush __P((struct socket *so)); int sosend __P((struct socket *so, struct sockaddr *addr, struct uio *uio, struct mbuf *top, struct mbuf *control, int flags, struct proc *p)); int sosetopt __P((struct socket *so, struct sockopt *sopt)); int soshutdown __P((struct socket *so, int how)); void sotoxsocket __P((struct socket *so, struct xsocket *xso)); void sowakeup __P((struct socket *so, struct sockbuf *sb)); #endif /* KERNEL */ #endif /* !_SYS_SOCKETVAR_H_ */ Index: head/sys/sys/tty.h =================================================================== --- head/sys/sys/tty.h (revision 41085) +++ head/sys/sys/tty.h (revision 41086) @@ -1,273 +1,274 @@ /*- * Copyright (c) 1982, 1986, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)tty.h 8.6 (Berkeley) 1/21/94 - * $Id: tty.h,v 1.41 1998/03/07 15:36:25 bde Exp $ + * $Id: tty.h,v 1.42 1998/06/07 17:13:04 dfr Exp $ */ #ifndef _SYS_TTY_H_ #define _SYS_TTY_H_ #include #include /* For struct selinfo. */ /* * Clists are character lists, which is a variable length linked list * of cblocks, with a count of the number of characters in the list. */ struct clist { int c_cc; /* Number of characters in the clist. */ int c_cbcount; /* Number of cblocks. */ int c_cbmax; /* Max # cblocks allowed for this clist. */ int c_cbreserved; /* # cblocks reserved for this clist. */ char *c_cf; /* Pointer to the first cblock. */ char *c_cl; /* Pointer to the last cblock. */ }; /* * Per-tty structure. * * Should be split in two, into device and tty drivers. * Glue could be masks of what to echo and circular buffer * (low, high, timeout). */ struct tty { struct clist t_rawq; /* Device raw input queue. */ long t_rawcc; /* Raw input queue statistics. */ struct clist t_canq; /* Device canonical queue. */ long t_cancc; /* Canonical queue statistics. */ struct clist t_outq; /* Device output queue. */ long t_outcc; /* Output queue statistics. */ int t_line; /* Interface to device drivers. */ dev_t t_dev; /* Device. */ int t_state; /* Device and driver (TS*) state. */ int t_flags; /* Tty flags. */ int t_timeout; /* Timeout for ttywait() */ struct pgrp *t_pgrp; /* Foreground process group. */ struct session *t_session; /* Enclosing session. */ + struct sigio *t_sigio; /* information for SIGIO */ struct selinfo t_rsel; /* Tty read/oob select. */ struct selinfo t_wsel; /* Tty write select. */ struct termios t_termios; /* Termios state. */ struct winsize t_winsize; /* Window size. */ /* Start output. */ void (*t_oproc) __P((struct tty *)); /* Stop output. */ void (*t_stop) __P((struct tty *, int)); /* Set hardware state. */ int (*t_param) __P((struct tty *, struct termios *)); void *t_sc; /* XXX: net/if_sl.c:sl_softc. */ int t_column; /* Tty output column. */ int t_rocount, t_rocol; /* Tty. */ int t_ififosize; /* Total size of upstream fifos. */ int t_ihiwat; /* High water mark for input. */ int t_ilowat; /* Low water mark for input. */ speed_t t_ispeedwat; /* t_ispeed override for watermarks. */ int t_ohiwat; /* High water mark for output. */ int t_olowat; /* Low water mark for output. */ speed_t t_ospeedwat; /* t_ospeed override for watermarks. */ int t_gen; /* Generation number. */ }; #define t_cc t_termios.c_cc #define t_cflag t_termios.c_cflag #define t_iflag t_termios.c_iflag #define t_ispeed t_termios.c_ispeed #define t_lflag t_termios.c_lflag #define t_min t_termios.c_min #define t_oflag t_termios.c_oflag #define t_ospeed t_termios.c_ospeed #define t_time t_termios.c_time #define TTIPRI 25 /* Sleep priority for tty reads. */ #define TTOPRI 26 /* Sleep priority for tty writes. */ /* * User data unfortunately has to be copied through buffers on the way to * and from clists. The buffers are on the stack so their sizes must be * fairly small. */ #define IBUFSIZ 384 /* Should be >= max value of MIN. */ #define OBUFSIZ 100 #ifndef TTYHOG #define TTYHOG 1024 #endif #ifdef KERNEL #define TTMAXHIWAT roundup(2048, CBSIZE) #define TTMINHIWAT roundup(100, CBSIZE) #define TTMAXLOWAT 256 #define TTMINLOWAT 32 #endif /* These flags are kept in t_state. */ #define TS_SO_OLOWAT 0x00001 /* Wake up when output <= low water. */ #define TS_ASYNC 0x00002 /* Tty in async I/O mode. */ #define TS_BUSY 0x00004 /* Draining output. */ #define TS_CARR_ON 0x00008 /* Carrier is present. */ #define TS_FLUSH 0x00010 /* Outq has been flushed during DMA. */ #define TS_ISOPEN 0x00020 /* Open has completed. */ #define TS_TBLOCK 0x00040 /* Further input blocked. */ #define TS_TIMEOUT 0x00080 /* Wait for output char processing. */ #define TS_TTSTOP 0x00100 /* Output paused. */ #ifdef notyet #define TS_WOPEN 0x00200 /* Open in progress. */ #endif #define TS_XCLUDE 0x00400 /* Tty requires exclusivity. */ /* State for intra-line fancy editing work. */ #define TS_BKSL 0x00800 /* State for lowercase \ work. */ #define TS_CNTTB 0x01000 /* Counting tab width, ignore FLUSHO. */ #define TS_ERASE 0x02000 /* Within a \.../ for PRTRUB. */ #define TS_LNCH 0x04000 /* Next character is literal. */ #define TS_TYPEN 0x08000 /* Retyping suspended input (PENDIN). */ #define TS_LOCAL (TS_BKSL | TS_CNTTB | TS_ERASE | TS_LNCH | TS_TYPEN) /* Extras. */ #define TS_CAN_BYPASS_L_RINT 0x010000 /* Device in "raw" mode. */ #define TS_CONNECTED 0x020000 /* Connection open. */ #define TS_SNOOP 0x040000 /* Device is being snooped on. */ #define TS_SO_OCOMPLETE 0x080000 /* Wake up when output completes. */ #define TS_ZOMBIE 0x100000 /* Connection lost. */ /* Hardware flow-control-invoked bits. */ #define TS_CAR_OFLOW 0x200000 /* For MDMBUF (XXX handle in driver). */ #ifdef notyet #define TS_CTS_OFLOW 0x400000 /* For CCTS_OFLOW. */ #define TS_DSR_OFLOW 0x800000 /* For CDSR_OFLOW. */ #endif /* Character type information. */ #define ORDINARY 0 #define CONTROL 1 #define BACKSPACE 2 #define NEWLINE 3 #define TAB 4 #define VTAB 5 #define RETURN 6 struct speedtab { int sp_speed; /* Speed. */ int sp_code; /* Code. */ }; /* Modem control commands (driver). */ #define DMSET 0 #define DMBIS 1 #define DMBIC 2 #define DMGET 3 /* Flags on a character passed to ttyinput. */ #define TTY_CHARMASK 0x000000ff /* Character mask */ #define TTY_QUOTE 0x00000100 /* Character quoted */ #define TTY_ERRORMASK 0xff000000 /* Error mask */ #define TTY_FE 0x01000000 /* Framing error */ #define TTY_PE 0x02000000 /* Parity error */ #define TTY_OE 0x04000000 /* Overrun error */ #define TTY_BI 0x08000000 /* Break condition */ /* Is tp controlling terminal for p? */ #define isctty(p, tp) \ ((p)->p_session == (tp)->t_session && (p)->p_flag & P_CONTROLT) /* Is p in background of tp? */ #define isbackground(p, tp) \ (isctty((p), (tp)) && (p)->p_pgrp != (tp)->t_pgrp) /* Unique sleep addresses. */ #define TSA_CARR_ON(tp) ((void *)&(tp)->t_rawq) #define TSA_HUP_OR_INPUT(tp) ((void *)&(tp)->t_rawq.c_cf) #define TSA_OCOMPLETE(tp) ((void *)&(tp)->t_outq.c_cl) #define TSA_OLOWAT(tp) ((void *)&(tp)->t_outq) #define TSA_PTC_READ(tp) ((void *)&(tp)->t_outq.c_cf) #define TSA_PTC_WRITE(tp) ((void *)&(tp)->t_rawq.c_cl) #define TSA_PTS_READ(tp) ((void *)&(tp)->t_canq) #ifdef KERNEL #ifdef MALLOC_DECLARE MALLOC_DECLARE(M_TTYS); #endif extern struct tty *constty; /* Temporary virtual console. */ int b_to_q __P((char *cp, int cc, struct clist *q)); void catq __P((struct clist *from, struct clist *to)); void clist_alloc_cblocks __P((struct clist *q, int ccmax, int ccres)); void clist_free_cblocks __P((struct clist *q)); /* void clist_init __P((void)); */ /* defined in systm.h for main() */ int getc __P((struct clist *q)); void ndflush __P((struct clist *q, int cc)); int ndqb __P((struct clist *q, int flag)); char *nextc __P((struct clist *q, char *cp, int *c)); int putc __P((int c, struct clist *q)); int q_to_b __P((struct clist *q, char *cp, int cc)); int unputc __P((struct clist *q)); int ttcompat __P((struct tty *tp, u_long com, caddr_t data, int flag)); int ttsetcompat __P((struct tty *tp, u_long *com, caddr_t data, struct termios *term)); void termioschars __P((struct termios *t)); int tputchar __P((int c, struct tty *tp)); int ttioctl __P((struct tty *tp, u_long com, void *data, int flag)); int ttread __P((struct tty *tp, struct uio *uio, int flag)); void ttrstrt __P((void *tp)); int ttypoll __P((struct tty *tp, int events, struct proc *p)); int ttpoll __P((dev_t dev, int events, struct proc *p)); void ttsetwater __P((struct tty *tp)); int ttspeedtab __P((int speed, struct speedtab *table)); int ttstart __P((struct tty *tp)); void ttwakeup __P((struct tty *tp)); int ttwrite __P((struct tty *tp, struct uio *uio, int flag)); void ttwwakeup __P((struct tty *tp)); void ttyblock __P((struct tty *tp)); void ttychars __P((struct tty *tp)); int ttycheckoutq __P((struct tty *tp, int wait)); int ttyclose __P((struct tty *tp)); void ttyflush __P((struct tty *tp, int rw)); void ttyinfo __P((struct tty *tp)); int ttyinput __P((int c, struct tty *tp)); int ttylclose __P((struct tty *tp, int flag)); int ttymodem __P((struct tty *tp, int flag)); int ttyopen __P((dev_t device, struct tty *tp)); int ttysleep __P((struct tty *tp, void *chan, int pri, char *wmesg, int timeout)); int ttywait __P((struct tty *tp)); struct tty *ttymalloc __P((void)); void ttyfree __P((struct tty *)); #endif /* KERNEL */ #endif /* !_SYS_TTY_H_ */