Index: head/sys/kern/kern_descrip.c
===================================================================
--- head/sys/kern/kern_descrip.c	(revision 41085)
+++ head/sys/kern/kern_descrip.c	(revision 41086)
@@ -1,1208 +1,1310 @@
 /*
  * Copyright (c) 1982, 1986, 1989, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)kern_descrip.c	8.6 (Berkeley) 4/19/94
- * $Id: kern_descrip.c,v 1.54 1998/07/15 06:10:16 bde Exp $
+ * $Id: kern_descrip.c,v 1.55 1998/07/29 17:38:13 bde Exp $
  */
 
 #include "opt_compat.h"
 #include "opt_devfs.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/sysproto.h>
 #include <sys/conf.h>
 #include <sys/filedesc.h>
 #include <sys/kernel.h>
 #include <sys/sysctl.h>
 #include <sys/vnode.h>
 #include <sys/proc.h>
 #include <sys/file.h>
 #include <sys/socketvar.h>
 #include <sys/stat.h>
 #include <sys/filio.h>
 #include <sys/ttycom.h>
 #include <sys/fcntl.h>
 #include <sys/malloc.h>
 #include <sys/unistd.h>
 #include <sys/resourcevar.h>
 #include <sys/pipe.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 
 #ifdef DEVFS
 #include <sys/devfsext.h>
 #endif /*DEVFS*/
 
 static MALLOC_DEFINE(M_FILEDESC, "file desc", "Open file descriptor table");
 MALLOC_DEFINE(M_FILE, "file", "Open file structure");
+static MALLOC_DEFINE(M_SIGIO, "sigio", "sigio structures");
 
 
 static	 d_open_t  fdopen;
 #define NUMFDESC 64
 
 #define CDEV_MAJOR 22
 static struct cdevsw fildesc_cdevsw = 
 	{ fdopen,	noclose,	noread,		nowrite,
 	  noioc,	nostop,		nullreset,	nodevtotty,
 	  seltrue,	nommap,		nostrat };
 
 static int finishdup __P((struct filedesc *fdp, int old, int new, register_t *retval));
 /*
  * Descriptor management.
  */
 struct filelist filehead;	/* head of list of open files */
 int nfiles;			/* actual number of open files */
 extern int cmask;	
 
 /*
  * System calls on descriptors.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct getdtablesize_args {
 	int	dummy;
 };
 #endif
 /* ARGSUSED */
 int
 getdtablesize(p, uap)
 	struct proc *p;
 	struct getdtablesize_args *uap;
 {
 
 	p->p_retval[0] = 
 	    min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
 	return (0);
 }
 
 /*
  * Duplicate a file descriptor to a particular value.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct dup2_args {
 	u_int	from;
 	u_int	to;
 };
 #endif
 /* ARGSUSED */
 int
 dup2(p, uap)
 	struct proc *p;
 	struct dup2_args *uap;
 {
 	register struct filedesc *fdp = p->p_fd;
 	register u_int old = uap->from, new = uap->to;
 	int i, error;
 
 	if (old >= fdp->fd_nfiles ||
 	    fdp->fd_ofiles[old] == NULL ||
 	    new >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
 	    new >= maxfilesperproc)
 		return (EBADF);
 	if (old == new) {
 		p->p_retval[0] = new;
 		return (0);
 	}
 	if (new >= fdp->fd_nfiles) {
 		if ((error = fdalloc(p, new, &i)))
 			return (error);
 		if (new != i)
 			panic("dup2: fdalloc");
 	} else if (fdp->fd_ofiles[new]) {
 		if (fdp->fd_ofileflags[new] & UF_MAPPED)
 			(void) munmapfd(p, new);
 		/*
 		 * dup2() must succeed even if the close has an error.
 		 */
 		(void) closef(fdp->fd_ofiles[new], p);
 	}
 	return (finishdup(fdp, (int)old, (int)new, p->p_retval));
 }
 
 /*
  * Duplicate a file descriptor.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct dup_args {
 	u_int	fd;
 };
 #endif
 /* ARGSUSED */
 int
 dup(p, uap)
 	struct proc *p;
 	struct dup_args *uap;
 {
 	register struct filedesc *fdp;
 	u_int old;
 	int new, error;
 
 	old = uap->fd;
 
 #if 0
 	/*
 	 * XXX Compatibility
 	 */
 	if (old &~ 077) { uap->fd &= 077; return (dup2(p, uap, p->p_retval)); }
 #endif
 
 	fdp = p->p_fd;
 	if (old >= fdp->fd_nfiles || fdp->fd_ofiles[old] == NULL)
 		return (EBADF);
 	if ((error = fdalloc(p, 0, &new)))
 		return (error);
 	return (finishdup(fdp, (int)old, new, p->p_retval));
 }
 
 /*
  * The file control system call.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct fcntl_args {
 	int	fd;
 	int	cmd;
 	long	arg;
 };
 #endif
 /* ARGSUSED */
 int
 fcntl(p, uap)
 	struct proc *p;
 	register struct fcntl_args *uap;
 {
 	register struct filedesc *fdp = p->p_fd;
 	register struct file *fp;
 	register char *pop;
 	struct vnode *vp;
 	int i, tmp, error, flg = F_POSIX;
 	struct flock fl;
 	u_int newmin;
 
 	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
 	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
 		return (EBADF);
 	pop = &fdp->fd_ofileflags[uap->fd];
 	switch (uap->cmd) {
 
 	case F_DUPFD:
 		newmin = uap->arg;
 		if (newmin >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
 		    newmin >= maxfilesperproc)
 			return (EINVAL);
 		if ((error = fdalloc(p, newmin, &i)))
 			return (error);
 		return (finishdup(fdp, uap->fd, i, p->p_retval));
 
 	case F_GETFD:
 		p->p_retval[0] = *pop & 1;
 		return (0);
 
 	case F_SETFD:
 		*pop = (*pop &~ 1) | (uap->arg & 1);
 		return (0);
 
 	case F_GETFL:
 		p->p_retval[0] = OFLAGS(fp->f_flag);
 		return (0);
 
 	case F_SETFL:
 		fp->f_flag &= ~FCNTLFLAGS;
 		fp->f_flag |= FFLAGS(uap->arg & ~O_ACCMODE) & FCNTLFLAGS;
 		tmp = fp->f_flag & FNONBLOCK;
 		error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p);
 		if (error)
 			return (error);
 		tmp = fp->f_flag & FASYNC;
 		error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p);
 		if (!error)
 			return (0);
 		fp->f_flag &= ~FNONBLOCK;
 		tmp = 0;
 		(void) (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p);
 		return (error);
 
 	case F_GETOWN:
-		if (fp->f_type == DTYPE_SOCKET) {
-			p->p_retval[0] = ((struct socket *)fp->f_data)->so_pgid;
-			return (0);
-		}
 		error = (*fp->f_ops->fo_ioctl)
-			(fp, TIOCGPGRP, (caddr_t)p->p_retval, p);
-		p->p_retval[0] = - p->p_retval[0];
+			(fp, FIOGETOWN, (caddr_t)p->p_retval, p);
 		return (error);
 
 	case F_SETOWN:
-		if (fp->f_type == DTYPE_SOCKET) {
-			((struct socket *)fp->f_data)->so_pgid = uap->arg;
-			return (0);
-		}
-		if (uap->arg <= 0) {
-			uap->arg = -uap->arg;
-		} else {
-			struct proc *p1 = pfind(uap->arg);
-			if (p1 == 0)
-				return (ESRCH);
-			uap->arg = p1->p_pgrp->pg_id;
-		}
 		return ((*fp->f_ops->fo_ioctl)
-			(fp, TIOCSPGRP, (caddr_t)&uap->arg, p));
+			(fp, FIOSETOWN, (caddr_t)&uap->arg, p));
 
 	case F_SETLKW:
 		flg |= F_WAIT;
 		/* Fall into F_SETLK */
 
 	case F_SETLK:
 		if (fp->f_type != DTYPE_VNODE)
 			return (EBADF);
 		vp = (struct vnode *)fp->f_data;
 		/* Copy in the lock structure */
 		error = copyin((caddr_t)(intptr_t)uap->arg, (caddr_t)&fl,
 		    sizeof(fl));
 		if (error)
 			return (error);
 		if (fl.l_whence == SEEK_CUR)
 			fl.l_start += fp->f_offset;
 		switch (fl.l_type) {
 
 		case F_RDLCK:
 			if ((fp->f_flag & FREAD) == 0)
 				return (EBADF);
 			p->p_flag |= P_ADVLOCK;
 			return (VOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &fl, flg));
 
 		case F_WRLCK:
 			if ((fp->f_flag & FWRITE) == 0)
 				return (EBADF);
 			p->p_flag |= P_ADVLOCK;
 			return (VOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &fl, flg));
 
 		case F_UNLCK:
 			return (VOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &fl,
 				F_POSIX));
 
 		default:
 			return (EINVAL);
 		}
 
 	case F_GETLK:
 		if (fp->f_type != DTYPE_VNODE)
 			return (EBADF);
 		vp = (struct vnode *)fp->f_data;
 		/* Copy in the lock structure */
 		error = copyin((caddr_t)(intptr_t)uap->arg, (caddr_t)&fl,
 		    sizeof(fl));
 		if (error)
 			return (error);
 		if (fl.l_type != F_RDLCK && fl.l_type != F_WRLCK &&
 		    fl.l_type != F_UNLCK)
 			return (EINVAL);
 		if (fl.l_whence == SEEK_CUR)
 			fl.l_start += fp->f_offset;
 		if ((error = VOP_ADVLOCK(vp,(caddr_t)p,F_GETLK,&fl,F_POSIX)))
 			return (error);
 		return (copyout((caddr_t)&fl, (caddr_t)(intptr_t)uap->arg,
 		    sizeof(fl)));
 
 	default:
 		return (EINVAL);
 	}
 	/* NOTREACHED */
 }
 
 /*
  * Common code for dup, dup2, and fcntl(F_DUPFD).
  */
 static int
 finishdup(fdp, old, new, retval)
 	register struct filedesc *fdp;
 	register int old, new;
 	register_t *retval;
 {
 	register struct file *fp;
 
 	fp = fdp->fd_ofiles[old];
 	fdp->fd_ofiles[new] = fp;
 	fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] &~ UF_EXCLOSE;
 	fp->f_count++;
 	if (new > fdp->fd_lastfile)
 		fdp->fd_lastfile = new;
 	*retval = new;
 	return (0);
+}
+
+/*
+ * If sigio is on the list associated with a process or process group,
+ * disable signalling from the device, remove sigio from the list and
+ * free sigio.
+ */
+void
+funsetown(sigio)
+	struct sigio *sigio;
+{
+	int s;
+
+	if (sigio == NULL)
+		return;
+	s = splhigh();
+	*(sigio->sio_myref) = NULL;
+	splx(s);
+	if (sigio->sio_pgid < 0) {
+		SLIST_REMOVE(&sigio->sio_pgrp->pg_sigiolst, sigio,
+			     sigio, sio_pgsigio);
+	} else /* if ((*sigiop)->sio_pgid > 0) */ {
+		SLIST_REMOVE(&sigio->sio_proc->p_sigiolst, sigio,
+			     sigio, sio_pgsigio);
+	}
+	crfree(sigio->sio_ucred);
+	FREE(sigio, M_SIGIO);
+}
+
+/* Free a list of sigio structures. */
+void
+funsetownlst(sigiolst)
+	struct sigiolst *sigiolst;
+{
+	struct sigio *sigio;
+
+	while ((sigio = sigiolst->slh_first) != NULL)
+		funsetown(sigio);
+}
+
+/*
+ * This is common code for FIOSETOWN ioctl called by fcntl(fd, F_SETOWN, arg).
+ *
+ * After permission checking, add a sigio structure to the sigio list for
+ * the process or process group.
+ */
+int
+fsetown(pgid, sigiop)
+	pid_t pgid;
+	struct sigio **sigiop;
+{
+	struct proc *proc = NULL;
+	struct pgrp *pgrp = NULL;
+	struct sigio *sigio;
+	int s;
+
+	if (pgid == 0) {
+		funsetown(*sigiop);
+		return (0);
+	} else if (pgid > 0) {
+		proc = pfind(pgid);
+		if (proc == NULL)
+			return (ESRCH);
+		/*
+		 * Policy - Don't allow a process to FSETOWN a process
+		 * in another session.
+		 *
+		 * Remove this test to allow maximum flexibility or
+		 * restrict FSETOWN to the current process or process
+		 * group for maximum safety.
+		 */
+		else if (proc->p_session != curproc->p_session)
+			return (EPERM);
+	} else /* if (pgid < 0) */ {
+		pgrp = pgfind(-pgid);
+		if (pgrp == NULL)
+			return (ESRCH);
+		/*
+		 * Policy - Don't allow a process to FSETOWN a process
+		 * in another session.
+		 *
+		 * Remove this test to allow maximum flexibility or
+		 * restrict FSETOWN to the current process or process
+		 * group for maximum safety.
+		 */
+		else if (pgrp->pg_session != curproc->p_session)
+			return (EPERM);
+	}
+	funsetown(*sigiop);
+	MALLOC(sigio, struct sigio *, sizeof(struct sigio), M_SIGIO,
+	       M_WAITOK);
+	if (pgid > 0) {
+		SLIST_INSERT_HEAD(&proc->p_sigiolst, sigio, sio_pgsigio);
+		sigio->sio_proc = proc;
+	} else {
+		SLIST_INSERT_HEAD(&pgrp->pg_sigiolst, sigio, sio_pgsigio);
+		sigio->sio_pgrp = pgrp;
+	}
+	sigio->sio_pgid = pgid;
+	crhold(curproc->p_ucred);
+	sigio->sio_ucred = curproc->p_ucred;
+	/* It would be convenient if p_ruid was in ucred. */
+	sigio->sio_ruid = curproc->p_cred->p_ruid;
+	sigio->sio_myref = sigiop;
+	s = splhigh();
+	*sigiop = sigio;
+	splx(s);
+	return (0);
+}
+
+/*
+ * This is common code for FIOGETOWN ioctl called by fcntl(fd, F_GETOWN, arg).
+ */
+pid_t
+fgetown(sigio)
+	struct sigio *sigio;
+{
+	return (sigio != NULL ? sigio->sio_pgid : 0);
 }
 
 /*
  * Close a file descriptor.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct close_args {
         int     fd;
 };
 #endif
 /* ARGSUSED */
 int
 close(p, uap)
 	struct proc *p;
 	struct close_args *uap;
 {
 	register struct filedesc *fdp = p->p_fd;
 	register struct file *fp;
 	register int fd = uap->fd;
 	register u_char *pf;
 
 	if ((unsigned)fd >= fdp->fd_nfiles ||
 	    (fp = fdp->fd_ofiles[fd]) == NULL)
 		return (EBADF);
 	pf = (u_char *)&fdp->fd_ofileflags[fd];
 	if (*pf & UF_MAPPED)
 		(void) munmapfd(p, fd);
 	fdp->fd_ofiles[fd] = NULL;
 	while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
 		fdp->fd_lastfile--;
 	if (fd < fdp->fd_freefile)
 		fdp->fd_freefile = fd;
 	*pf = 0;
 	return (closef(fp, p));
 }
 
 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
 /*
  * Return status information about a file descriptor.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct ofstat_args {
 	int	fd;
 	struct	ostat *sb;
 };
 #endif
 /* ARGSUSED */
 int
 ofstat(p, uap)
 	struct proc *p;
 	register struct ofstat_args *uap;
 {
 	register struct filedesc *fdp = p->p_fd;
 	register struct file *fp;
 	struct stat ub;
 	struct ostat oub;
 	int error;
 
 	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
 	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
 		return (EBADF);
 	switch (fp->f_type) {
 
 	case DTYPE_FIFO:
 	case DTYPE_VNODE:
 		error = vn_stat((struct vnode *)fp->f_data, &ub, p);
 		break;
 
 	case DTYPE_SOCKET:
 		error = soo_stat((struct socket *)fp->f_data, &ub);
 		break;
 
 	case DTYPE_PIPE:
 		error = pipe_stat((struct pipe *)fp->f_data, &ub);
 		break;
 
 	default:
 		panic("ofstat");
 		/*NOTREACHED*/
 	}
 	cvtstat(&ub, &oub);
 	if (error == 0)
 		error = copyout((caddr_t)&oub, (caddr_t)uap->sb, sizeof (oub));
 	return (error);
 }
 #endif /* COMPAT_43 || COMPAT_SUNOS */
 
 /*
  * Return status information about a file descriptor.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct fstat_args {
 	int	fd;
 	struct	stat *sb;
 };
 #endif
 /* ARGSUSED */
 int
 fstat(p, uap)
 	struct proc *p;
 	register struct fstat_args *uap;
 {
 	register struct filedesc *fdp = p->p_fd;
 	register struct file *fp;
 	struct stat ub;
 	int error;
 
 	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
 	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
 		return (EBADF);
 	switch (fp->f_type) {
 
 	case DTYPE_FIFO:
 	case DTYPE_VNODE:
 		error = vn_stat((struct vnode *)fp->f_data, &ub, p);
 		break;
 
 	case DTYPE_SOCKET:
 		error = soo_stat((struct socket *)fp->f_data, &ub);
 		break;
 
 	case DTYPE_PIPE:
 		error = pipe_stat((struct pipe *)fp->f_data, &ub);
 		break;
 
 	default:
 		panic("fstat");
 		/*NOTREACHED*/
 	}
 	if (error == 0)
 		error = copyout((caddr_t)&ub, (caddr_t)uap->sb, sizeof (ub));
 	return (error);
 }
 
 /*
  * Return status information about a file descriptor.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct nfstat_args {
 	int	fd;
 	struct	nstat *sb;
 };
 #endif
 /* ARGSUSED */
 int
 nfstat(p, uap)
 	struct proc *p;
 	register struct nfstat_args *uap;
 {
 	register struct filedesc *fdp = p->p_fd;
 	register struct file *fp;
 	struct stat ub;
 	struct nstat nub;
 	int error;
 
 	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
 	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
 		return (EBADF);
 	switch (fp->f_type) {
 
 	case DTYPE_FIFO:
 	case DTYPE_VNODE:
 		error = vn_stat((struct vnode *)fp->f_data, &ub, p);
 		break;
 
 	case DTYPE_SOCKET:
 		error = soo_stat((struct socket *)fp->f_data, &ub);
 		break;
 
 	case DTYPE_PIPE:
 		error = pipe_stat((struct pipe *)fp->f_data, &ub);
 		break;
 
 	default:
 		panic("fstat");
 		/*NOTREACHED*/
 	}
 	if (error == 0) {
 		cvtnstat(&ub, &nub);
 		error = copyout((caddr_t)&nub, (caddr_t)uap->sb, sizeof (nub));
 	}
 	return (error);
 }
 
 /*
  * Return pathconf information about a file descriptor.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct fpathconf_args {
 	int	fd;
 	int	name;
 };
 #endif
 /* ARGSUSED */
 int
 fpathconf(p, uap)
 	struct proc *p;
 	register struct fpathconf_args *uap;
 {
 	struct filedesc *fdp = p->p_fd;
 	struct file *fp;
 	struct vnode *vp;
 
 	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
 	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
 		return (EBADF);
 	switch (fp->f_type) {
 
 	case DTYPE_PIPE:
 	case DTYPE_SOCKET:
 		if (uap->name != _PC_PIPE_BUF)
 			return (EINVAL);
 		p->p_retval[0] = PIPE_BUF;
 		return (0);
 
 	case DTYPE_FIFO:
 	case DTYPE_VNODE:
 		vp = (struct vnode *)fp->f_data;
 		return (VOP_PATHCONF(vp, uap->name, p->p_retval));
 
 	default:
 		panic("fpathconf");
 	}
 	/*NOTREACHED*/
 }
 
 /*
  * Allocate a file descriptor for the process.
  */
 static int fdexpand;
 SYSCTL_INT(_debug, OID_AUTO, fdexpand, CTLFLAG_RD, &fdexpand, 0, "");
 
 int
 fdalloc(p, want, result)
 	struct proc *p;
 	int want;
 	int *result;
 {
 	register struct filedesc *fdp = p->p_fd;
 	register int i;
 	int lim, last, nfiles;
 	struct file **newofile;
 	char *newofileflags;
 
 	/*
 	 * Search for a free descriptor starting at the higher
 	 * of want or fd_freefile.  If that fails, consider
 	 * expanding the ofile array.
 	 */
 	lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
 	for (;;) {
 		last = min(fdp->fd_nfiles, lim);
 		if ((i = want) < fdp->fd_freefile)
 			i = fdp->fd_freefile;
 		for (; i < last; i++) {
 			if (fdp->fd_ofiles[i] == NULL) {
 				fdp->fd_ofileflags[i] = 0;
 				if (i > fdp->fd_lastfile)
 					fdp->fd_lastfile = i;
 				if (want <= fdp->fd_freefile)
 					fdp->fd_freefile = i;
 				*result = i;
 				return (0);
 			}
 		}
 
 		/*
 		 * No space in current array.  Expand?
 		 */
 		if (fdp->fd_nfiles >= lim)
 			return (EMFILE);
 		if (fdp->fd_nfiles < NDEXTENT)
 			nfiles = NDEXTENT;
 		else
 			nfiles = 2 * fdp->fd_nfiles;
 		MALLOC(newofile, struct file **, nfiles * OFILESIZE,
 		    M_FILEDESC, M_WAITOK);
 		newofileflags = (char *) &newofile[nfiles];
 		/*
 		 * Copy the existing ofile and ofileflags arrays
 		 * and zero the new portion of each array.
 		 */
 		bcopy(fdp->fd_ofiles, newofile,
 			(i = sizeof(struct file *) * fdp->fd_nfiles));
 		bzero((char *)newofile + i, nfiles * sizeof(struct file *) - i);
 		bcopy(fdp->fd_ofileflags, newofileflags,
 			(i = sizeof(char) * fdp->fd_nfiles));
 		bzero(newofileflags + i, nfiles * sizeof(char) - i);
 		if (fdp->fd_nfiles > NDFILE)
 			FREE(fdp->fd_ofiles, M_FILEDESC);
 		fdp->fd_ofiles = newofile;
 		fdp->fd_ofileflags = newofileflags;
 		fdp->fd_nfiles = nfiles;
 		fdexpand++;
 	}
 	return (0);
 }
 
 /*
  * Check to see whether n user file descriptors
  * are available to the process p.
  */
 int
 fdavail(p, n)
 	struct proc *p;
 	register int n;
 {
 	register struct filedesc *fdp = p->p_fd;
 	register struct file **fpp;
 	register int i, lim, last;
 
 	lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
 	if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0)
 		return (1);
 
 	last = min(fdp->fd_nfiles, lim);
 	fpp = &fdp->fd_ofiles[fdp->fd_freefile];
 	for (i = last - fdp->fd_freefile; --i >= 0; fpp++)
 		if (*fpp == NULL && --n <= 0)
 			return (1);
 	return (0);
 }
 
 /*
  * Create a new open file structure and allocate
  * a file decriptor for the process that refers to it.
  */
 int
 falloc(p, resultfp, resultfd)
 	register struct proc *p;
 	struct file **resultfp;
 	int *resultfd;
 {
 	register struct file *fp, *fq;
 	int error, i;
 
 	if ((error = fdalloc(p, 0, &i)))
 		return (error);
 	if (nfiles >= maxfiles) {
 		tablefull("file");
 		return (ENFILE);
 	}
 	/*
 	 * Allocate a new file descriptor.
 	 * If the process has file descriptor zero open, add to the list
 	 * of open files at that point, otherwise put it at the front of
 	 * the list of open files.
 	 */
 	nfiles++;
 	MALLOC(fp, struct file *, sizeof(struct file), M_FILE, M_WAITOK);
 	bzero(fp, sizeof(struct file));
 	if ((fq = p->p_fd->fd_ofiles[0])) {
 		LIST_INSERT_AFTER(fq, fp, f_list);
 	} else {
 		LIST_INSERT_HEAD(&filehead, fp, f_list);
 	}
 	p->p_fd->fd_ofiles[i] = fp;
 	fp->f_count = 1;
 	fp->f_cred = p->p_ucred;
 	fp->f_seqcount = 1;
 	crhold(fp->f_cred);
 	if (resultfp)
 		*resultfp = fp;
 	if (resultfd)
 		*resultfd = i;
 	return (0);
 }
 
 /*
  * Free a file descriptor.
  */
 void
 ffree(fp)
 	register struct file *fp;
 {
 	LIST_REMOVE(fp, f_list);
 	crfree(fp->f_cred);
 #ifdef DIAGNOSTIC
 	fp->f_count = 0;
 #endif
 	nfiles--;
 	FREE(fp, M_FILE);
 }
 
 /*
  * Build a new filedesc structure.
  */
 struct filedesc *
 fdinit(p)
 	struct proc *p;
 {
 	register struct filedesc0 *newfdp;
 	register struct filedesc *fdp = p->p_fd;
 
 	MALLOC(newfdp, struct filedesc0 *, sizeof(struct filedesc0),
 	    M_FILEDESC, M_WAITOK);
 	bzero(newfdp, sizeof(struct filedesc0));
 	newfdp->fd_fd.fd_cdir = fdp->fd_cdir;
 	VREF(newfdp->fd_fd.fd_cdir);
 	newfdp->fd_fd.fd_rdir = fdp->fd_rdir;
 	VREF(newfdp->fd_fd.fd_rdir);
 
 	/* Create the file descriptor table. */
 	newfdp->fd_fd.fd_refcnt = 1;
 	newfdp->fd_fd.fd_cmask = cmask;
 	newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles;
 	newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags;
 	newfdp->fd_fd.fd_nfiles = NDFILE;
 
 	newfdp->fd_fd.fd_freefile = 0;
 	newfdp->fd_fd.fd_lastfile = 0;
 
 	return (&newfdp->fd_fd);
 }
 
 /*
  * Share a filedesc structure.
  */
 struct filedesc *
 fdshare(p)
 	struct proc *p;
 {
 	p->p_fd->fd_refcnt++;
 	return (p->p_fd);
 }
 
 /*
  * Copy a filedesc structure.
  */
 struct filedesc *
 fdcopy(p)
 	struct proc *p;
 {
 	register struct filedesc *newfdp, *fdp = p->p_fd;
 	register struct file **fpp;
 	register int i;
 
 /*
  * Certain daemons might not have file descriptors
  */
 	if (fdp == NULL)
 		return NULL;
 
 	MALLOC(newfdp, struct filedesc *, sizeof(struct filedesc0),
 	    M_FILEDESC, M_WAITOK);
 	bcopy(fdp, newfdp, sizeof(struct filedesc));
 	VREF(newfdp->fd_cdir);
 	VREF(newfdp->fd_rdir);
 	newfdp->fd_refcnt = 1;
 
 	/*
 	 * If the number of open files fits in the internal arrays
 	 * of the open file structure, use them, otherwise allocate
 	 * additional memory for the number of descriptors currently
 	 * in use.
 	 */
 	if (newfdp->fd_lastfile < NDFILE) {
 		newfdp->fd_ofiles = ((struct filedesc0 *) newfdp)->fd_dfiles;
 		newfdp->fd_ofileflags =
 		    ((struct filedesc0 *) newfdp)->fd_dfileflags;
 		i = NDFILE;
 	} else {
 		/*
 		 * Compute the smallest multiple of NDEXTENT needed
 		 * for the file descriptors currently in use,
 		 * allowing the table to shrink.
 		 */
 		i = newfdp->fd_nfiles;
 		while (i > 2 * NDEXTENT && i > newfdp->fd_lastfile * 2)
 			i /= 2;
 		MALLOC(newfdp->fd_ofiles, struct file **, i * OFILESIZE,
 		    M_FILEDESC, M_WAITOK);
 		newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i];
 	}
 	newfdp->fd_nfiles = i;
 	bcopy(fdp->fd_ofiles, newfdp->fd_ofiles, i * sizeof(struct file **));
 	bcopy(fdp->fd_ofileflags, newfdp->fd_ofileflags, i * sizeof(char));
 	fpp = newfdp->fd_ofiles;
 	for (i = newfdp->fd_lastfile; i-- >= 0; fpp++)
 		if (*fpp != NULL)
 			(*fpp)->f_count++;
 	return (newfdp);
 }
 
 /*
  * Release a filedesc structure.
  */
 void
 fdfree(p)
 	struct proc *p;
 {
 	register struct filedesc *fdp = p->p_fd;
 	struct file **fpp;
 	register int i;
 
 /*
  * Certain daemons might not have file descriptors
  */
 	if (fdp == NULL)
 		return;
 
 	if (--fdp->fd_refcnt > 0)
 		return;
 	fpp = fdp->fd_ofiles;
 	for (i = fdp->fd_lastfile; i-- >= 0; fpp++)
 		if (*fpp)
 			(void) closef(*fpp, p);
 	if (fdp->fd_nfiles > NDFILE)
 		FREE(fdp->fd_ofiles, M_FILEDESC);
 	vrele(fdp->fd_cdir);
 	vrele(fdp->fd_rdir);
 	FREE(fdp, M_FILEDESC);
 }
 
 /*
  * Close any files on exec?
  */
 void
 fdcloseexec(p)
 	struct proc *p;
 {
 	struct filedesc *fdp = p->p_fd;
 	struct file **fpp;
 	char *fdfp;
 	register int i;
 
 /*
  * Certain daemons might not have file descriptors
  */
 	if (fdp == NULL)
 		return;
 
 	fpp = fdp->fd_ofiles;
 	fdfp = fdp->fd_ofileflags;
 	for (i = 0; i <= fdp->fd_lastfile; i++, fpp++, fdfp++)
 		if (*fpp != NULL && (*fdfp & UF_EXCLOSE)) {
 			if (*fdfp & UF_MAPPED)
 				(void) munmapfd(p, i);
 			(void) closef(*fpp, p);
 			*fpp = NULL;
 			*fdfp = 0;
 			if (i < fdp->fd_freefile)
 				fdp->fd_freefile = i;
 		}
 	while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
 		fdp->fd_lastfile--;
 }
 
 /*
  * Internal form of close.
  * Decrement reference count on file structure.
  * Note: p may be NULL when closing a file
  * that was being passed in a message.
  */
 int
 closef(fp, p)
 	register struct file *fp;
 	register struct proc *p;
 {
 	struct vnode *vp;
 	struct flock lf;
 	int error;
 
 	if (fp == NULL)
 		return (0);
 	/*
 	 * POSIX record locking dictates that any close releases ALL
 	 * locks owned by this process.  This is handled by setting
 	 * a flag in the unlock to free ONLY locks obeying POSIX
 	 * semantics, and not to free BSD-style file locks.
 	 * If the descriptor was in a message, POSIX-style locks
 	 * aren't passed with the descriptor.
 	 */
 	if (p && (p->p_flag & P_ADVLOCK) && fp->f_type == DTYPE_VNODE) {
 		lf.l_whence = SEEK_SET;
 		lf.l_start = 0;
 		lf.l_len = 0;
 		lf.l_type = F_UNLCK;
 		vp = (struct vnode *)fp->f_data;
 		(void) VOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &lf, F_POSIX);
 	}
 	if (--fp->f_count > 0)
 		return (0);
 	if (fp->f_count < 0)
 		panic("closef: count < 0");
 	if ((fp->f_flag & FHASLOCK) && fp->f_type == DTYPE_VNODE) {
 		lf.l_whence = SEEK_SET;
 		lf.l_start = 0;
 		lf.l_len = 0;
 		lf.l_type = F_UNLCK;
 		vp = (struct vnode *)fp->f_data;
 		(void) VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
 	}
 	if (fp->f_ops)
 		error = (*fp->f_ops->fo_close)(fp, p);
 	else
 		error = 0;
 	ffree(fp);
 	return (error);
 }
 
 /*
  * Apply an advisory lock on a file descriptor.
  *
  * Just attempt to get a record lock of the requested type on
  * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0).
  */
 #ifndef _SYS_SYSPROTO_H_
 struct flock_args {
 	int	fd;
 	int	how;
 };
 #endif
 /* ARGSUSED */
 int
 flock(p, uap)
 	struct proc *p;
 	register struct flock_args *uap;
 {
 	register struct filedesc *fdp = p->p_fd;
 	register struct file *fp;
 	struct vnode *vp;
 	struct flock lf;
 
 	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
 	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
 		return (EBADF);
 	if (fp->f_type != DTYPE_VNODE)
 		return (EOPNOTSUPP);
 	vp = (struct vnode *)fp->f_data;
 	lf.l_whence = SEEK_SET;
 	lf.l_start = 0;
 	lf.l_len = 0;
 	if (uap->how & LOCK_UN) {
 		lf.l_type = F_UNLCK;
 		fp->f_flag &= ~FHASLOCK;
 		return (VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK));
 	}
 	if (uap->how & LOCK_EX)
 		lf.l_type = F_WRLCK;
 	else if (uap->how & LOCK_SH)
 		lf.l_type = F_RDLCK;
 	else
 		return (EBADF);
 	fp->f_flag |= FHASLOCK;
 	if (uap->how & LOCK_NB)
 		return (VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK));
 	return (VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK|F_WAIT));
 }
 
 /*
  * File Descriptor pseudo-device driver (/dev/fd/).
  *
  * Opening minor device N dup()s the file (if any) connected to file
  * descriptor N belonging to the calling process.  Note that this driver
  * consists of only the ``open()'' routine, because all subsequent
  * references to this file will be direct to the other driver.
  */
 /* ARGSUSED */
 static int
 fdopen(dev, mode, type, p)
 	dev_t dev;
 	int mode, type;
 	struct proc *p;
 {
 
 	/*
 	 * XXX Kludge: set curproc->p_dupfd to contain the value of the
 	 * the file descriptor being sought for duplication. The error
 	 * return ensures that the vnode for this device will be released
 	 * by vn_open. Open will detect this special error and take the
 	 * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN
 	 * will simply report the error.
 	 */
 	p->p_dupfd = minor(dev);
 	return (ENODEV);
 }
 
 /*
  * Duplicate the specified descriptor to a free descriptor.
  */
 int
 dupfdopen(fdp, indx, dfd, mode, error)
 	register struct filedesc *fdp;
 	register int indx, dfd;
 	int mode;
 	int error;
 {
 	register struct file *wfp;
 	struct file *fp;
 
 	/*
 	 * If the to-be-dup'd fd number is greater than the allowed number
 	 * of file descriptors, or the fd to be dup'd has already been
 	 * closed, reject.  Note, check for new == old is necessary as
 	 * falloc could allocate an already closed to-be-dup'd descriptor
 	 * as the new descriptor.
 	 */
 	fp = fdp->fd_ofiles[indx];
 	if ((u_int)dfd >= fdp->fd_nfiles ||
 	    (wfp = fdp->fd_ofiles[dfd]) == NULL || fp == wfp)
 		return (EBADF);
 
 	/*
 	 * There are two cases of interest here.
 	 *
 	 * For ENODEV simply dup (dfd) to file descriptor
 	 * (indx) and return.
 	 *
 	 * For ENXIO steal away the file structure from (dfd) and
 	 * store it in (indx).  (dfd) is effectively closed by
 	 * this operation.
 	 *
 	 * Any other error code is just returned.
 	 */
 	switch (error) {
 	case ENODEV:
 		/*
 		 * Check that the mode the file is being opened for is a
 		 * subset of the mode of the existing descriptor.
 		 */
 		if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag)
 			return (EACCES);
 		fdp->fd_ofiles[indx] = wfp;
 		fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
 		wfp->f_count++;
 		if (indx > fdp->fd_lastfile)
 			fdp->fd_lastfile = indx;
 		return (0);
 
 	case ENXIO:
 		/*
 		 * Steal away the file pointer from dfd, and stuff it into indx.
 		 */
 		fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd];
 		fdp->fd_ofiles[dfd] = NULL;
 		fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
 		fdp->fd_ofileflags[dfd] = 0;
 		/*
 		 * Complete the clean up of the filedesc structure by
 		 * recomputing the various hints.
 		 */
 		if (indx > fdp->fd_lastfile)
 			fdp->fd_lastfile = indx;
 		else
 			while (fdp->fd_lastfile > 0 &&
 			       fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
 				fdp->fd_lastfile--;
 			if (dfd < fdp->fd_freefile)
 				fdp->fd_freefile = dfd;
 		return (0);
 
 	default:
 		return (error);
 	}
 	/* NOTREACHED */
 }
 
 /*
  * Get file structures.
  */
 static int
 sysctl_kern_file SYSCTL_HANDLER_ARGS
 {
 	int error;
 	struct file *fp;
 
 	if (!req->oldptr) {
 		/*
 		 * overestimate by 10 files
 		 */
 		return (SYSCTL_OUT(req, 0, sizeof(filehead) + 
 				(nfiles + 10) * sizeof(struct file)));
 	}
 
 	error = SYSCTL_OUT(req, (caddr_t)&filehead, sizeof(filehead));
 	if (error)
 		return (error);
 
 	/*
 	 * followed by an array of file structures
 	 */
 	for (fp = filehead.lh_first; fp != NULL; fp = fp->f_list.le_next) {
 		error = SYSCTL_OUT(req, (caddr_t)fp, sizeof (struct file));
 		if (error)
 			return (error);
 	}
 	return (0);
 }
 
 SYSCTL_PROC(_kern, KERN_FILE, file, CTLTYPE_OPAQUE|CTLFLAG_RD,
 	0, 0, sysctl_kern_file, "S,file", "");
 
 SYSCTL_INT(_kern, KERN_MAXFILESPERPROC, maxfilesperproc,
 	CTLFLAG_RW, &maxfilesperproc, 0, "");
 
 SYSCTL_INT(_kern, KERN_MAXFILES, maxfiles, CTLFLAG_RW, &maxfiles, 0, "");
 
 static fildesc_devsw_installed = 0;
 #ifdef DEVFS
 static	void *devfs_token_stdin;
 static	void *devfs_token_stdout;
 static	void *devfs_token_stderr;
 static	void *devfs_token_fildesc[NUMFDESC];
 #endif
 
 static void 	fildesc_drvinit(void *unused)
 {
 	dev_t dev;
 #ifdef DEVFS
 	int fd;
 #endif
 
 	if( ! fildesc_devsw_installed ) {
 		dev = makedev(CDEV_MAJOR,0);
 		cdevsw_add(&dev,&fildesc_cdevsw,NULL);
 		fildesc_devsw_installed = 1;
 #ifdef DEVFS
 		for (fd = 0; fd < NUMFDESC; fd++)
 			devfs_token_fildesc[fd] =
 				devfs_add_devswf(&fildesc_cdevsw, fd, DV_CHR,
 						 UID_BIN, GID_BIN, 0666,
 						 "fd/%d", fd);
 		devfs_token_stdin =
 			devfs_add_devswf(&fildesc_cdevsw, 0, DV_CHR,
 					 UID_ROOT, GID_WHEEL, 0666,
 					 "stdin");
 		devfs_token_stdout =
 			devfs_add_devswf(&fildesc_cdevsw, 1, DV_CHR,
 					 UID_ROOT, GID_WHEEL, 0666,
 					 "stdout");
 		devfs_token_stderr =
 			devfs_add_devswf(&fildesc_cdevsw, 2, DV_CHR,
 					 UID_ROOT, GID_WHEEL, 0666,
 					 "stderr");
 #endif
     	}
 }
 
 SYSINIT(fildescdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR,
 					fildesc_drvinit,NULL)
 
 
Index: head/sys/kern/kern_exit.c
===================================================================
--- head/sys/kern/kern_exit.c	(revision 41085)
+++ head/sys/kern/kern_exit.c	(revision 41086)
@@ -1,594 +1,600 @@
 /*
  * Copyright (c) 1982, 1986, 1989, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)kern_exit.c	8.7 (Berkeley) 2/12/94
- * $Id: kern_exit.c,v 1.67 1998/06/05 21:44:20 dg Exp $
+ * $Id: kern_exit.c,v 1.68 1998/11/10 09:16:29 peter Exp $
  */
 
 #include "opt_compat.h"
 #include "opt_ktrace.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/sysproto.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/proc.h>
 #include <sys/pioctl.h>
 #include <sys/tty.h>
 #include <sys/wait.h>
 #include <sys/vnode.h>
 #include <sys/resourcevar.h>
 #include <sys/signalvar.h>
 #include <sys/ptrace.h>
 #include <sys/acct.h>		/* for acct_process() function prototype */
 #include <sys/filedesc.h>
 #include <sys/shm.h>
 #include <sys/sem.h>
 #include <sys/aio.h>
 
 #ifdef COMPAT_43
 #include <machine/reg.h>
 #include <machine/psl.h>
 #endif
 #include <machine/limits.h>	/* for UCHAR_MAX = typeof(p_priority)_MAX */
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <sys/lock.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_zone.h>
 
 static MALLOC_DEFINE(M_ZOMBIE, "zombie", "zombie proc status");
 
 static int wait1 __P((struct proc *, struct wait_args *, int));
 
 /*
  * callout list for things to do at exit time
  */
 typedef struct exit_list_element {
 	struct exit_list_element *next;
 	exitlist_fn function;
 } *ele_p;
 
 static ele_p exit_list;
 
 /*
  * exit --
  *	Death of process.
  */
 void
 exit(p, uap)
 	struct proc *p;
 	struct rexit_args /* {
 		int	rval;
 	} */ *uap;
 {
 
 	exit1(p, W_EXITCODE(uap->rval, 0));
 	/* NOTREACHED */
 }
 
 /*
  * Exit: deallocate address space and other resources, change proc state
  * to zombie, and unlink proc from allproc and parent's lists.  Save exit
  * status and rusage for wait().  Check for child processes and orphan them.
  */
 void
 exit1(p, rv)
 	register struct proc *p;
 	int rv;
 {
 	register struct proc *q, *nq;
 	register struct vmspace *vm;
 	ele_p ep = exit_list;
 
 	if (p->p_pid == 1) {
 		printf("init died (signal %d, exit %d)\n",
 		    WTERMSIG(rv), WEXITSTATUS(rv));
 		panic("Going nowhere without my init!");
 	}
 
 	aio_proc_rundown(p);
 
 	/* are we a task leader? */
 	if(p == p->p_leader) {
         	struct kill_args killArgs;
 		killArgs.signum = SIGKILL;
 		q = p->p_peers;
 		while(q) {
 			killArgs.pid = q->p_pid;
 			/*
 		         * The interface for kill is better
 			 * than the internal signal
 			 */
 			kill(p, &killArgs);
 			nq = q;
 			q = q->p_peers;
 			/*
 			 * orphan the threads so we don't mess up
 			 * when they call exit
 			 */
 			nq->p_peers = 0;
 			nq->p_leader = nq;
 		}
 
 	/* otherwise are we a peer? */
 	} else if(p->p_peers) {
 		q = p->p_leader;
 		while(q->p_peers != p)
 			q = q->p_peers;
 		q->p_peers = p->p_peers;
 	}
 
 #ifdef PGINPROF
 	vmsizmon();
 #endif
 	STOPEVENT(p, S_EXIT, rv);
 
 	/* 
 	 * Check if any LKMs need anything done at process exit.
 	 * e.g. SYSV IPC stuff
 	 * XXX what if one of these generates an error?
 	 */
 	while (ep) {
 		(*ep->function)(p);
 		ep = ep->next;
 	}
 
 	if (p->p_flag & P_PROFIL)
 		stopprofclock(p);
 	MALLOC(p->p_ru, struct rusage *, sizeof(struct rusage),
 		M_ZOMBIE, M_WAITOK);
 	/*
 	 * If parent is waiting for us to exit or exec,
 	 * P_PPWAIT is set; we will wakeup the parent below.
 	 */
 	p->p_flag &= ~(P_TRACED | P_PPWAIT);
 	p->p_flag |= P_WEXIT;
 	p->p_sigignore = ~0;
 	p->p_siglist = 0;
 	if (timevalisset(&p->p_realtimer.it_value))
 		untimeout(realitexpire, (caddr_t)p, p->p_ithandle);
+
+	/*
+	 * Reset any sigio structures pointing to us as a result of
+	 * F_SETOWN with our pid.
+	 */
+	funsetownlst(&p->p_sigiolst);
 
 	/*
 	 * Close open files and release open-file table.
 	 * This may block!
 	 */
 	fdfree(p);
 
 	/*
 	 * XXX Shutdown SYSV semaphores
 	 */
 	semexit(p);
 
 	/* The next two chunks should probably be moved to vmspace_exit. */
 	vm = p->p_vmspace;
 	/*
 	 * Release user portion of address space.
 	 * This releases references to vnodes,
 	 * which could cause I/O if the file has been unlinked.
 	 * Need to do this early enough that we can still sleep.
 	 * Can't free the entire vmspace as the kernel stack
 	 * may be mapped within that space also.
 	 */
 	if (vm->vm_refcnt == 1) {
 		if (vm->vm_shm)
 			shmexit(p);
 		pmap_remove_pages(&vm->vm_pmap, VM_MIN_ADDRESS,
 		    VM_MAXUSER_ADDRESS);
 		(void) vm_map_remove(&vm->vm_map, VM_MIN_ADDRESS,
 		    VM_MAXUSER_ADDRESS);
 	}
 
 	if (SESS_LEADER(p)) {
 		register struct session *sp = p->p_session;
 
 		if (sp->s_ttyvp) {
 			/*
 			 * Controlling process.
 			 * Signal foreground pgrp,
 			 * drain controlling terminal
 			 * and revoke access to controlling terminal.
 			 */
 			if (sp->s_ttyp && (sp->s_ttyp->t_session == sp)) {
 				if (sp->s_ttyp->t_pgrp)
 					pgsignal(sp->s_ttyp->t_pgrp, SIGHUP, 1);
 				(void) ttywait(sp->s_ttyp);
 				/*
 				 * The tty could have been revoked
 				 * if we blocked.
 				 */
 				if (sp->s_ttyvp)
 					VOP_REVOKE(sp->s_ttyvp, REVOKEALL);
 			}
 			if (sp->s_ttyvp)
 				vrele(sp->s_ttyvp);
 			sp->s_ttyvp = NULL;
 			/*
 			 * s_ttyp is not zero'd; we use this to indicate
 			 * that the session once had a controlling terminal.
 			 * (for logging and informational purposes)
 			 */
 		}
 		sp->s_leader = NULL;
 	}
 	fixjobc(p, p->p_pgrp, 0);
 	(void)acct_process(p);
 #ifdef KTRACE
 	/*
 	 * release trace file
 	 */
 	p->p_traceflag = 0;	/* don't trace the vrele() */
 	if (p->p_tracep)
 		vrele(p->p_tracep);
 #endif
 	/*
 	 * Remove proc from allproc queue and pidhash chain.
 	 * Place onto zombproc.  Unlink from parent's child list.
 	 */
 	LIST_REMOVE(p, p_list);
 	LIST_INSERT_HEAD(&zombproc, p, p_list);
 	p->p_stat = SZOMB;
 
 	LIST_REMOVE(p, p_hash);
 
 	q = p->p_children.lh_first;
 	if (q)		/* only need this if any child is S_ZOMB */
 		wakeup((caddr_t) initproc);
 	for (; q != 0; q = nq) {
 		nq = q->p_sibling.le_next;
 		LIST_REMOVE(q, p_sibling);
 		LIST_INSERT_HEAD(&initproc->p_children, q, p_sibling);
 		q->p_pptr = initproc;
 		/*
 		 * Traced processes are killed
 		 * since their existence means someone is screwing up.
 		 */
 		if (q->p_flag & P_TRACED) {
 			q->p_flag &= ~P_TRACED;
 			psignal(q, SIGKILL);
 		}
 	}
 
 	/*
 	 * Save exit status and final rusage info, adding in child rusage
 	 * info and self times.
 	 */
 	p->p_xstat = rv;
 	*p->p_ru = p->p_stats->p_ru;
 	calcru(p, &p->p_ru->ru_utime, &p->p_ru->ru_stime, NULL);
 	ruadd(p->p_ru, &p->p_stats->p_cru);
 
 	/*
 	 * Notify parent that we're gone.  If parent has the P_NOCLDWAIT
 	 * flag set, notify process 1 instead (and hope it will handle
 	 * this situation).
 	 */
 	if (p->p_pptr->p_flag & P_NOCLDWAIT) {
 		struct proc *pp = p->p_pptr;
 		proc_reparent(p, initproc);
 		/*
 		 * If this was the last child of our parent, notify
 		 * parent, so in case he was wait(2)ing, he will
 		 * continue.
 		 */
 		if (LIST_EMPTY(&pp->p_children))
 			wakeup((caddr_t)pp);
 	}
 
 	psignal(p->p_pptr, SIGCHLD);
 	wakeup((caddr_t)p->p_pptr);
 #if defined(tahoe)
 	/* move this to cpu_exit */
 	p->p_addr->u_pcb.pcb_savacc.faddr = (float *)NULL;
 #endif
 	/*
 	 * Clear curproc after we've done all operations
 	 * that could block, and before tearing down the rest
 	 * of the process state that might be used from clock, etc.
 	 * Also, can't clear curproc while we're still runnable,
 	 * as we're not on a run queue (we are current, just not
 	 * a proper proc any longer!).
 	 *
 	 * Other substructures are freed from wait().
 	 */
 	curproc = NULL;
 	if (--p->p_limit->p_refcnt == 0) {
 		FREE(p->p_limit, M_SUBPROC);
 		p->p_limit = NULL;
 	}
 
 	/*
 	 * Finally, call machine-dependent code to release the remaining
 	 * resources including address space, the kernel stack and pcb.
 	 * The address space is released by "vmspace_free(p->p_vmspace)";
 	 * This is machine-dependent, as we may have to change stacks
 	 * or ensure that the current one isn't reallocated before we
 	 * finish.  cpu_exit will end with a call to cpu_switch(), finishing
 	 * our execution (pun intended).
 	 */
 	cpu_exit(p);
 }
 
 #ifdef COMPAT_43
 #if defined(hp300) || defined(luna68k)
 #include <machine/frame.h>
 #define GETPS(rp)	((struct frame *)(rp))->f_sr
 #else
 #define GETPS(rp)	(rp)[PS]
 #endif
 
 int
 owait(p, uap)
 	struct proc *p;
 	register struct owait_args /* {
 		int     dummy;
 	} */ *uap;
 {
 	struct wait_args w;
 
 #ifdef PSL_ALLCC
 	if ((GETPS(p->p_md.md_regs) & PSL_ALLCC) != PSL_ALLCC) {
 		w.options = 0;
 		w.rusage = NULL;
 	} else {
 		w.options = p->p_md.md_regs[R0];
 		w.rusage = (struct rusage *)p->p_md.md_regs[R1];
 	}
 #else
 	w.options = 0;
 	w.rusage = NULL;
 #endif
 	w.pid = WAIT_ANY;
 	w.status = NULL;
 	return (wait1(p, &w, 1));
 }
 #endif /* COMPAT_43 */
 
 int
 wait4(p, uap)
 	struct proc *p;
 	struct wait_args *uap;
 {
 
 	return (wait1(p, uap, 0));
 }
 
 static int
 wait1(q, uap, compat)
 	register struct proc *q;
 	register struct wait_args /* {
 		int pid;
 		int *status;
 		int options;
 		struct rusage *rusage;
 	} */ *uap;
 	int compat;
 {
 	register int nfound;
 	register struct proc *p, *t;
 	int status, error;
 
 	if (uap->pid == 0)
 		uap->pid = -q->p_pgid;
 	if (uap->options &~ (WUNTRACED|WNOHANG))
 		return (EINVAL);
 loop:
 	nfound = 0;
 	for (p = q->p_children.lh_first; p != 0; p = p->p_sibling.le_next) {
 		if (uap->pid != WAIT_ANY &&
 		    p->p_pid != uap->pid && p->p_pgid != -uap->pid)
 			continue;
 		nfound++;
 		if (p->p_stat == SZOMB) {
 			/* charge childs scheduling cpu usage to parent */
 			if (curproc->p_pid != 1) {
 				curproc->p_estcpu = min(curproc->p_estcpu +
 				    p->p_estcpu, UCHAR_MAX);
 			}
 
 			q->p_retval[0] = p->p_pid;
 #ifdef COMPAT_43
 			if (compat)
 				q->p_retval[1] = p->p_xstat;
 			else
 #endif
 			if (uap->status) {
 				status = p->p_xstat;	/* convert to int */
 				if ((error = copyout((caddr_t)&status,
 				    (caddr_t)uap->status, sizeof(status))))
 					return (error);
 			}
 			if (uap->rusage && (error = copyout((caddr_t)p->p_ru,
 			    (caddr_t)uap->rusage, sizeof (struct rusage))))
 				return (error);
 			/*
 			 * If we got the child via a ptrace 'attach',
 			 * we need to give it back to the old parent.
 			 */
 			if (p->p_oppid && (t = pfind(p->p_oppid))) {
 				p->p_oppid = 0;
 				proc_reparent(p, t);
 				psignal(t, SIGCHLD);
 				wakeup((caddr_t)t);
 				return (0);
 			}
 			p->p_xstat = 0;
 			ruadd(&q->p_stats->p_cru, p->p_ru);
 			FREE(p->p_ru, M_ZOMBIE);
 			p->p_ru = NULL;
 
 			/*
 			 * Decrement the count of procs running with this uid.
 			 */
 			(void)chgproccnt(p->p_cred->p_ruid, -1);
 
 			/*
 			 * Release reference to text vnode
 			 */
 			if (p->p_textvp)
 				vrele(p->p_textvp);
 
 			/*
 			 * Free up credentials.
 			 */
 			if (--p->p_cred->p_refcnt == 0) {
 				crfree(p->p_cred->pc_ucred);
 				FREE(p->p_cred, M_SUBPROC);
 				p->p_cred = NULL;
 			}
 
 			/*
 			 * Finally finished with old proc entry.
 			 * Unlink it from its process group and free it.
 			 */
 			leavepgrp(p);
 			LIST_REMOVE(p, p_list);	/* off zombproc */
 			LIST_REMOVE(p, p_sibling);
 
 			/*
 			 * Give machine-dependent layer a chance
 			 * to free anything that cpu_exit couldn't
 			 * release while still running in process context.
 			 */
 			cpu_wait(p);
 			zfree(proc_zone, p);
 			nprocs--;
 			return (0);
 		}
 		if (p->p_stat == SSTOP && (p->p_flag & P_WAITED) == 0 &&
 		    (p->p_flag & P_TRACED || uap->options & WUNTRACED)) {
 			p->p_flag |= P_WAITED;
 			q->p_retval[0] = p->p_pid;
 #ifdef COMPAT_43
 			if (compat) {
 				q->p_retval[1] = W_STOPCODE(p->p_xstat);
 				error = 0;
 			} else
 #endif
 			if (uap->status) {
 				status = W_STOPCODE(p->p_xstat);
 				error = copyout((caddr_t)&status,
 					(caddr_t)uap->status, sizeof(status));
 			} else
 				error = 0;
 			return (error);
 		}
 	}
 	if (nfound == 0)
 		return (ECHILD);
 	if (uap->options & WNOHANG) {
 		q->p_retval[0] = 0;
 		return (0);
 	}
 	if ((error = tsleep((caddr_t)q, PWAIT | PCATCH, "wait", 0)))
 		return (error);
 	goto loop;
 }
 
 /*
  * make process 'parent' the new parent of process 'child'.
  */
 void
 proc_reparent(child, parent)
 	register struct proc *child;
 	register struct proc *parent;
 {
 
 	if (child->p_pptr == parent)
 		return;
 
 	LIST_REMOVE(child, p_sibling);
 	LIST_INSERT_HEAD(&parent->p_children, child, p_sibling);
 	child->p_pptr = parent;
 }
 
 /*
  * The next two functions are to handle adding/deleting items on the
  * exit callout list
  * 
  * at_exit():
  * Take the arguments given and put them onto the exit callout list,
  * However first make sure that it's not already there.
  * returns 0 on success.
  */
 int
 at_exit(function)
 	exitlist_fn function;
 {
 	ele_p ep;
 
 	/* Be noisy if the programmer has lost track of things */
 	if (rm_at_exit(function)) 
 		printf("exit callout entry already present\n");
 	ep = malloc(sizeof(*ep), M_TEMP, M_NOWAIT);
 	if (ep == NULL)
 		return (ENOMEM);
 	ep->next = exit_list;
 	ep->function = function;
 	exit_list = ep;
 	return (0);
 }
 /*
  * Scan the exit callout list for the given items and remove them.
  * Returns the number of items removed.
  * Logically this can only be 0 or 1.
  */
 int
 rm_at_exit(function)
 	exitlist_fn function;
 {
 	ele_p *epp, ep;
 	int count;
 
 	count = 0;
 	epp = &exit_list;
 	ep = *epp;
 	while (ep) {
 		if (ep->function == function) {
 			*epp = ep->next;
 			free(ep, M_TEMP);
 			count++;
 		} else {
 			epp = &ep->next;
 		}
 		ep = *epp;
 	}
 	return (count);
 }
Index: head/sys/kern/kern_proc.c
===================================================================
--- head/sys/kern/kern_proc.c	(revision 41085)
+++ head/sys/kern/kern_proc.c	(revision 41086)
@@ -1,600 +1,608 @@
 /*
  * Copyright (c) 1982, 1986, 1989, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)kern_proc.c	8.7 (Berkeley) 2/14/95
- * $Id: kern_proc.c,v 1.37 1998/07/11 07:45:40 bde Exp $
+ * $Id: kern_proc.c,v 1.38 1998/11/09 15:07:41 truckman Exp $
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/sysctl.h>
 #include <sys/proc.h>
 #include <sys/malloc.h>
 #include <sys/tty.h>
 #include <sys/signalvar.h>
 #include <vm/vm.h>
 #include <sys/lock.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <sys/user.h>
 #include <vm/vm_zone.h>
+#include <sys/filedesc.h>
 
 static MALLOC_DEFINE(M_PGRP, "pgrp", "process group header");
 MALLOC_DEFINE(M_SESSION, "session", "session header");
 static MALLOC_DEFINE(M_PROC, "proc", "Proc structures");
 MALLOC_DEFINE(M_SUBPROC, "subproc", "Proc sub-structures");
 
 struct prochd qs[NQS];		/* as good a place as any... */
 struct prochd rtqs[NQS];	/* Space for REALTIME queues too */
 struct prochd idqs[NQS];	/* Space for IDLE queues too */
 
 static void pgdelete	__P((struct pgrp *));
 
 /*
  * Structure associated with user cacheing.
  */
 struct uidinfo {
 	LIST_ENTRY(uidinfo) ui_hash;
 	uid_t	ui_uid;
 	long	ui_proccnt;
 };
 #define	UIHASH(uid)	(&uihashtbl[(uid) & uihash])
 static LIST_HEAD(uihashhead, uidinfo) *uihashtbl;
 static u_long uihash;		/* size of hash table - 1 */
 
 static void	orphanpg __P((struct pgrp *pg));
 
 /*
  * Other process lists
  */
 struct pidhashhead *pidhashtbl;
 u_long pidhash;
 struct pgrphashhead *pgrphashtbl;
 u_long pgrphash;
 struct proclist allproc;
 struct proclist zombproc;
 vm_zone_t proc_zone;
 
 /*
  * Initialize global process hashing structures.
  */
 void
 procinit()
 {
 
 	LIST_INIT(&allproc);
 	LIST_INIT(&zombproc);
 	pidhashtbl = hashinit(maxproc / 4, M_PROC, &pidhash);
 	pgrphashtbl = hashinit(maxproc / 4, M_PROC, &pgrphash);
 	uihashtbl = hashinit(maxproc / 16, M_PROC, &uihash);
 	proc_zone = zinit("PROC", sizeof (struct proc), 0, 0, 5);
 }
 
 /*
  * Change the count associated with number of processes
  * a given user is using.
  */
 int
 chgproccnt(uid, diff)
 	uid_t	uid;
 	int	diff;
 {
 	register struct uidinfo *uip;
 	register struct uihashhead *uipp;
 
 	uipp = UIHASH(uid);
 	for (uip = uipp->lh_first; uip != 0; uip = uip->ui_hash.le_next)
 		if (uip->ui_uid == uid)
 			break;
 	if (uip) {
 		uip->ui_proccnt += diff;
 		if (uip->ui_proccnt > 0)
 			return (uip->ui_proccnt);
 		if (uip->ui_proccnt < 0)
 			panic("chgproccnt: procs < 0");
 		LIST_REMOVE(uip, ui_hash);
 		FREE(uip, M_PROC);
 		return (0);
 	}
 	if (diff <= 0) {
 		if (diff == 0)
 			return(0);
 		panic("chgproccnt: lost user");
 	}
 	MALLOC(uip, struct uidinfo *, sizeof(*uip), M_PROC, M_WAITOK);
 	LIST_INSERT_HEAD(uipp, uip, ui_hash);
 	uip->ui_uid = uid;
 	uip->ui_proccnt = diff;
 	return (diff);
 }
 
 /*
  * Is p an inferior of the current process?
  */
 int
 inferior(p)
 	register struct proc *p;
 {
 
 	for (; p != curproc; p = p->p_pptr)
 		if (p->p_pid == 0)
 			return (0);
 	return (1);
 }
 
 /*
  * Locate a process by number
  */
 struct proc *
 pfind(pid)
 	register pid_t pid;
 {
 	register struct proc *p;
 
 	for (p = PIDHASH(pid)->lh_first; p != 0; p = p->p_hash.le_next)
 		if (p->p_pid == pid)
 			return (p);
 	return (NULL);
 }
 
 /*
  * Locate a process group by number
  */
 struct pgrp *
 pgfind(pgid)
 	register pid_t pgid;
 {
 	register struct pgrp *pgrp;
 
 	for (pgrp = PGRPHASH(pgid)->lh_first; pgrp != 0;
 	     pgrp = pgrp->pg_hash.le_next)
 		if (pgrp->pg_id == pgid)
 			return (pgrp);
 	return (NULL);
 }
 
 /*
  * Move p to a new or existing process group (and session)
  */
 int
 enterpgrp(p, pgid, mksess)
 	register struct proc *p;
 	pid_t pgid;
 	int mksess;
 {
 	register struct pgrp *pgrp = pgfind(pgid);
 
 #ifdef DIAGNOSTIC
 	if (pgrp != NULL && mksess)	/* firewalls */
 		panic("enterpgrp: setsid into non-empty pgrp");
 	if (SESS_LEADER(p))
 		panic("enterpgrp: session leader attempted setpgrp");
 #endif
 	if (pgrp == NULL) {
 		pid_t savepid = p->p_pid;
 		struct proc *np;
 		/*
 		 * new process group
 		 */
 #ifdef DIAGNOSTIC
 		if (p->p_pid != pgid)
 			panic("enterpgrp: new pgrp and pid != pgid");
 #endif
 		MALLOC(pgrp, struct pgrp *, sizeof(struct pgrp), M_PGRP,
 		    M_WAITOK);
 		if ((np = pfind(savepid)) == NULL || np != p)
 			return (ESRCH);
 		if (mksess) {
 			register struct session *sess;
 
 			/*
 			 * new session
 			 */
 			MALLOC(sess, struct session *, sizeof(struct session),
 			    M_SESSION, M_WAITOK);
 			sess->s_leader = p;
 			sess->s_sid = p->p_pid;
 			sess->s_count = 1;
 			sess->s_ttyvp = NULL;
 			sess->s_ttyp = NULL;
 			bcopy(p->p_session->s_login, sess->s_login,
 			    sizeof(sess->s_login));
 			p->p_flag &= ~P_CONTROLT;
 			pgrp->pg_session = sess;
 #ifdef DIAGNOSTIC
 			if (p != curproc)
 				panic("enterpgrp: mksession and p != curproc");
 #endif
 		} else {
 			pgrp->pg_session = p->p_session;
 			pgrp->pg_session->s_count++;
 		}
 		pgrp->pg_id = pgid;
 		LIST_INIT(&pgrp->pg_members);
 		LIST_INSERT_HEAD(PGRPHASH(pgid), pgrp, pg_hash);
 		pgrp->pg_jobc = 0;
+		SLIST_INIT(&pgrp->pg_sigiolst);
 	} else if (pgrp == p->p_pgrp)
 		return (0);
 
 	/*
 	 * Adjust eligibility of affected pgrps to participate in job control.
 	 * Increment eligibility counts before decrementing, otherwise we
 	 * could reach 0 spuriously during the first call.
 	 */
 	fixjobc(p, pgrp, 1);
 	fixjobc(p, p->p_pgrp, 0);
 
 	LIST_REMOVE(p, p_pglist);
 	if (p->p_pgrp->pg_members.lh_first == 0)
 		pgdelete(p->p_pgrp);
 	p->p_pgrp = pgrp;
 	LIST_INSERT_HEAD(&pgrp->pg_members, p, p_pglist);
 	return (0);
 }
 
 /*
  * remove process from process group
  */
 int
 leavepgrp(p)
 	register struct proc *p;
 {
 
 	LIST_REMOVE(p, p_pglist);
 	if (p->p_pgrp->pg_members.lh_first == 0)
 		pgdelete(p->p_pgrp);
 	p->p_pgrp = 0;
 	return (0);
 }
 
 /*
  * delete a process group
  */
 static void
 pgdelete(pgrp)
 	register struct pgrp *pgrp;
 {
+
+	/*
+	 * Reset any sigio structures pointing to us as a result of
+	 * F_SETOWN with our pgid.
+	 */
+	funsetownlst(&pgrp->pg_sigiolst);
 
 	if (pgrp->pg_session->s_ttyp != NULL &&
 	    pgrp->pg_session->s_ttyp->t_pgrp == pgrp)
 		pgrp->pg_session->s_ttyp->t_pgrp = NULL;
 	LIST_REMOVE(pgrp, pg_hash);
 	if (--pgrp->pg_session->s_count == 0)
 		FREE(pgrp->pg_session, M_SESSION);
 	FREE(pgrp, M_PGRP);
 }
 
 /*
  * Adjust pgrp jobc counters when specified process changes process group.
  * We count the number of processes in each process group that "qualify"
  * the group for terminal job control (those with a parent in a different
  * process group of the same session).  If that count reaches zero, the
  * process group becomes orphaned.  Check both the specified process'
  * process group and that of its children.
  * entering == 0 => p is leaving specified group.
  * entering == 1 => p is entering specified group.
  */
 void
 fixjobc(p, pgrp, entering)
 	register struct proc *p;
 	register struct pgrp *pgrp;
 	int entering;
 {
 	register struct pgrp *hispgrp;
 	register struct session *mysession = pgrp->pg_session;
 
 	/*
 	 * Check p's parent to see whether p qualifies its own process
 	 * group; if so, adjust count for p's process group.
 	 */
 	if ((hispgrp = p->p_pptr->p_pgrp) != pgrp &&
 	    hispgrp->pg_session == mysession)
 		if (entering)
 			pgrp->pg_jobc++;
 		else if (--pgrp->pg_jobc == 0)
 			orphanpg(pgrp);
 
 	/*
 	 * Check this process' children to see whether they qualify
 	 * their process groups; if so, adjust counts for children's
 	 * process groups.
 	 */
 	for (p = p->p_children.lh_first; p != 0; p = p->p_sibling.le_next)
 		if ((hispgrp = p->p_pgrp) != pgrp &&
 		    hispgrp->pg_session == mysession &&
 		    p->p_stat != SZOMB)
 			if (entering)
 				hispgrp->pg_jobc++;
 			else if (--hispgrp->pg_jobc == 0)
 				orphanpg(hispgrp);
 }
 
 /*
  * A process group has become orphaned;
  * if there are any stopped processes in the group,
  * hang-up all process in that group.
  */
 static void
 orphanpg(pg)
 	struct pgrp *pg;
 {
 	register struct proc *p;
 
 	for (p = pg->pg_members.lh_first; p != 0; p = p->p_pglist.le_next) {
 		if (p->p_stat == SSTOP) {
 			for (p = pg->pg_members.lh_first; p != 0;
 			    p = p->p_pglist.le_next) {
 				psignal(p, SIGHUP);
 				psignal(p, SIGCONT);
 			}
 			return;
 		}
 	}
 }
 
 #include "opt_ddb.h"
 #ifdef DDB
 #include <ddb/ddb.h>
 
 DB_SHOW_COMMAND(pgrpdump, pgrpdump)
 {
 	register struct pgrp *pgrp;
 	register struct proc *p;
 	register int i;
 
 	for (i = 0; i <= pgrphash; i++) {
 		if (pgrp = pgrphashtbl[i].lh_first) {
 			printf("\tindx %d\n", i);
 			for (; pgrp != 0; pgrp = pgrp->pg_hash.le_next) {
 				printf(
 			"\tpgrp %p, pgid %ld, sess %p, sesscnt %d, mem %p\n",
 				    (void *)pgrp, (long)pgrp->pg_id,
 				    (void *)pgrp->pg_session,
 				    pgrp->pg_session->s_count,
 				    (void *)pgrp->pg_members.lh_first);
 				for (p = pgrp->pg_members.lh_first; p != 0;
 				    p = p->p_pglist.le_next) {
 					printf("\t\tpid %ld addr %p pgrp %p\n", 
 					    (long)p->p_pid, (void *)p,
 					    (void *)p->p_pgrp);
 				}
 			}
 		}
 	}
 }
 #endif /* DDB */
 
 /*
  * Fill in an eproc structure for the specified process.
  */
 void
 fill_eproc(p, ep)
 	register struct proc *p;
 	register struct eproc *ep;
 {
 	register struct tty *tp;
 
 	bzero(ep, sizeof(*ep));
 
 	ep->e_paddr = p;
 	if (p->p_cred) {
 		ep->e_pcred = *p->p_cred;
 		if (p->p_ucred)
 			ep->e_ucred = *p->p_ucred;
 	}
 	if (p->p_stat != SIDL && p->p_stat != SZOMB && p->p_vmspace != NULL) {
 		register struct vmspace *vm = p->p_vmspace;
 
 #ifdef pmap_resident_count
 		ep->e_vm.vm_rssize = pmap_resident_count(&vm->vm_pmap); /*XXX*/
 #else
 		ep->e_vm.vm_rssize = vm->vm_rssize;
 #endif
 		ep->e_vm.vm_tsize = vm->vm_tsize;
 		ep->e_vm.vm_dsize = vm->vm_dsize;
 		ep->e_vm.vm_ssize = vm->vm_ssize;
 		ep->e_vm.vm_taddr = vm->vm_taddr;
 		ep->e_vm.vm_daddr = vm->vm_daddr;
 		ep->e_vm.vm_minsaddr = vm->vm_minsaddr;
 		ep->e_vm.vm_maxsaddr = vm->vm_maxsaddr;
 		ep->e_vm.vm_map = vm->vm_map;
 #ifndef sparc
 		ep->e_vm.vm_pmap = vm->vm_pmap;
 #endif
 	}
 	if (p->p_pptr)
 		ep->e_ppid = p->p_pptr->p_pid;
 	if (p->p_pgrp) {
 		ep->e_pgid = p->p_pgrp->pg_id;
 		ep->e_jobc = p->p_pgrp->pg_jobc;
 		ep->e_sess = p->p_pgrp->pg_session;
 
 		if (ep->e_sess) {
 			bcopy(ep->e_sess->s_login, ep->e_login, sizeof(ep->e_login));
 			if (ep->e_sess->s_ttyvp)
 				ep->e_flag = EPROC_CTTY;
 			if (p->p_session && SESS_LEADER(p))
 				ep->e_flag |= EPROC_SLEADER;
 		}
 	}
 	if ((p->p_flag & P_CONTROLT) &&
 	    (ep->e_sess != NULL) &&
 	    ((tp = ep->e_sess->s_ttyp) != NULL)) {
 		ep->e_tdev = tp->t_dev;
 		ep->e_tpgid = tp->t_pgrp ? tp->t_pgrp->pg_id : NO_PID;
 		ep->e_tsess = tp->t_session;
 	} else
 		ep->e_tdev = NODEV;
 	if (p->p_wmesg) {
 		strncpy(ep->e_wmesg, p->p_wmesg, WMESGLEN);
 		ep->e_wmesg[WMESGLEN] = 0;
 	}
 }
 
 static struct proc *
 zpfind(pid_t pid)
 {
 	struct proc *p;
 
 	for (p = zombproc.lh_first; p != 0; p = p->p_list.le_next)
 		if (p->p_pid == pid)
 			return (p);
 	return (NULL);
 }
 
 
 static int
 sysctl_out_proc(struct proc *p, struct sysctl_req *req, int doingzomb)
 {
 	struct eproc eproc;
 	int error;
 	pid_t pid = p->p_pid;
 
 	fill_eproc(p, &eproc);
 	error = SYSCTL_OUT(req,(caddr_t)p, sizeof(struct proc));
 	if (error)
 		return (error);
 	error = SYSCTL_OUT(req,(caddr_t)&eproc, sizeof(eproc));
 	if (error)
 		return (error);
 	if (!doingzomb && pid && (pfind(pid) != p))
 		return EAGAIN;
 	if (doingzomb && zpfind(pid) != p)
 		return EAGAIN;
 	return (0);
 }
 
 static int
 sysctl_kern_proc SYSCTL_HANDLER_ARGS
 {
 	int *name = (int*) arg1;
 	u_int namelen = arg2;
 	struct proc *p;
 	int doingzomb;
 	int error = 0;
 
 	if (oidp->oid_number == KERN_PROC_PID) {
 		if (namelen != 1) 
 			return (EINVAL);
 		p = pfind((pid_t)name[0]);
 		if (!p)
 			return (0);
 		error = sysctl_out_proc(p, req, 0);
 		return (error);
 	}
 	if (oidp->oid_number == KERN_PROC_ALL && !namelen)
 		;
 	else if (oidp->oid_number != KERN_PROC_ALL && namelen == 1)
 		;
 	else
 		return (EINVAL);
 	
 	if (!req->oldptr) {
 		/* overestimate by 5 procs */
 		error = SYSCTL_OUT(req, 0, sizeof (struct kinfo_proc) * 5);
 		if (error)
 			return (error);
 	}
 	for (doingzomb=0 ; doingzomb < 2 ; doingzomb++) {
 		if (!doingzomb)
 			p = allproc.lh_first;
 		else
 			p = zombproc.lh_first;
 		for (; p != 0; p = p->p_list.le_next) {
 			/*
 			 * Skip embryonic processes.
 			 */
 			if (p->p_stat == SIDL)
 				continue;
 			/*
 			 * TODO - make more efficient (see notes below).
 			 * do by session.
 			 */
 			switch (oidp->oid_number) {
 
 			case KERN_PROC_PGRP:
 				/* could do this by traversing pgrp */
 				if (p->p_pgrp == NULL || 
 				    p->p_pgrp->pg_id != (pid_t)name[0])
 					continue;
 				break;
 
 			case KERN_PROC_TTY:
 				if ((p->p_flag & P_CONTROLT) == 0 ||
 				    p->p_session == NULL ||
 				    p->p_session->s_ttyp == NULL ||
 				    p->p_session->s_ttyp->t_dev != (dev_t)name[0])
 					continue;
 				break;
 
 			case KERN_PROC_UID:
 				if (p->p_ucred == NULL || 
 				    p->p_ucred->cr_uid != (uid_t)name[0])
 					continue;
 				break;
 
 			case KERN_PROC_RUID:
 				if (p->p_ucred == NULL || 
 				    p->p_cred->p_ruid != (uid_t)name[0])
 					continue;
 				break;
 			}
 
 			error = sysctl_out_proc(p, req, doingzomb);
 			if (error)
 				return (error);
 		}
 	}
 	return (0);
 }
 
 
 SYSCTL_NODE(_kern, KERN_PROC, proc, CTLFLAG_RD,  0, "Process table");
 
 SYSCTL_PROC(_kern_proc, KERN_PROC_ALL, all, CTLFLAG_RD|CTLTYPE_STRUCT,
 	0, 0, sysctl_kern_proc, "S,proc", "");
 
 SYSCTL_NODE(_kern_proc, KERN_PROC_PGRP, pgrp, CTLFLAG_RD, 
 	sysctl_kern_proc, "Process table");
 
 SYSCTL_NODE(_kern_proc, KERN_PROC_TTY, tty, CTLFLAG_RD, 
 	sysctl_kern_proc, "Process table");
 
 SYSCTL_NODE(_kern_proc, KERN_PROC_UID, uid, CTLFLAG_RD, 
 	sysctl_kern_proc, "Process table");
 
 SYSCTL_NODE(_kern_proc, KERN_PROC_RUID, ruid, CTLFLAG_RD, 
 	sysctl_kern_proc, "Process table");
 
 SYSCTL_NODE(_kern_proc, KERN_PROC_PID, pid, CTLFLAG_RD, 
 	sysctl_kern_proc, "Process table");
Index: head/sys/kern/kern_sig.c
===================================================================
--- head/sys/kern/kern_sig.c	(revision 41085)
+++ head/sys/kern/kern_sig.c	(revision 41086)
@@ -1,1346 +1,1383 @@
 /*
  * Copyright (c) 1982, 1986, 1989, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)kern_sig.c	8.7 (Berkeley) 4/18/94
- * $Id: kern_sig.c,v 1.47 1998/09/14 23:25:18 jdp Exp $
+ * $Id: kern_sig.c,v 1.48 1998/10/21 16:31:38 jdp Exp $
  */
 
 #include "opt_compat.h"
 #include "opt_ktrace.h"
 
 #define	SIGPROP		/* include signal properties table */
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/sysproto.h>
 #include <sys/signalvar.h>
 #include <sys/resourcevar.h>
 #include <sys/namei.h>
 #include <sys/vnode.h>
 #include <sys/proc.h>
 #include <sys/pioctl.h>
 #include <sys/systm.h>
 #include <sys/acct.h>
 #include <sys/fcntl.h>
 #include <sys/wait.h>
 #include <sys/ktrace.h>
 #include <sys/syslog.h>
 #include <sys/stat.h>
 #include <sys/sysent.h>
 #include <sys/sysctl.h>
 #include <sys/malloc.h>
 
 #include <machine/cpu.h>
 #ifdef SMP
 #include <machine/smp.h>
 #endif
 
 static int killpg1	__P((struct proc *cp, int signum, int pgid, int all));
 static void setsigvec	__P((struct proc *p, int signum, struct sigaction *sa));
 static void stop	__P((struct proc *));
 
 static int	kern_logsigexit = 1;
 SYSCTL_INT(_kern, KERN_LOGSIGEXIT, logsigexit, CTLFLAG_RW, &kern_logsigexit, 0, "");
 
 /*
  * Can process p, with pcred pc, send the signal signum to process q?
  */
 #define CANSIGNAL(p, pc, q, signum) \
 	((pc)->pc_ucred->cr_uid == 0 || \
 	    (pc)->p_ruid == (q)->p_cred->p_ruid || \
 	    (pc)->pc_ucred->cr_uid == (q)->p_cred->p_ruid || \
 	    (pc)->p_ruid == (q)->p_ucred->cr_uid || \
 	    (pc)->pc_ucred->cr_uid == (q)->p_ucred->cr_uid || \
 	    ((signum) == SIGCONT && (q)->p_session == (p)->p_session))
 
+/*
+ * Policy -- Can real uid ruid with ucred uc send a signal to process q?
+ */
+#define CANSIGIO(ruid, uc, q) \
+	((uc)->cr_uid == 0 || \
+	    (ruid) == (q)->p_cred->p_ruid || \
+	    (uc)->cr_uid == (q)->p_cred->p_ruid || \
+	    (ruid) == (q)->p_ucred->cr_uid || \
+	    (uc)->cr_uid == (q)->p_ucred->cr_uid)
+
 int sugid_coredump;
 SYSCTL_INT(_kern, OID_AUTO, sugid_coredump, CTLFLAG_RW, &sugid_coredump, 0, "");
 
 #ifndef _SYS_SYSPROTO_H_
 struct sigaction_args {
 	int	signum;
 	struct	sigaction *nsa;
 	struct	sigaction *osa;
 };
 #endif
 /* ARGSUSED */
 int
 sigaction(p, uap)
 	struct proc *p;
 	register struct sigaction_args *uap;
 {
 	struct sigaction vec;
 	register struct sigaction *sa;
 	register struct sigacts *ps = p->p_sigacts;
 	register int signum;
 	int bit, error;
 
 	signum = uap->signum;
 	if (signum <= 0 || signum >= NSIG)
 		return (EINVAL);
 	sa = &vec;
 	if (uap->osa) {
 		sa->sa_handler = ps->ps_sigact[signum];
 		sa->sa_mask = ps->ps_catchmask[signum];
 		bit = sigmask(signum);
 		sa->sa_flags = 0;
 		if ((ps->ps_sigonstack & bit) != 0)
 			sa->sa_flags |= SA_ONSTACK;
 		if ((ps->ps_sigintr & bit) == 0)
 			sa->sa_flags |= SA_RESTART;
 		if ((ps->ps_sigreset & bit) != 0)
 			sa->sa_flags |= SA_RESETHAND;
 		if ((ps->ps_signodefer & bit) != 0)
 			sa->sa_flags |= SA_NODEFER;
 		if (signum == SIGCHLD && p->p_flag & P_NOCLDSTOP)
 			sa->sa_flags |= SA_NOCLDSTOP;
 		if (signum == SIGCHLD && p->p_flag & P_NOCLDWAIT)
 			sa->sa_flags |= SA_NOCLDWAIT;
 		if ((error = copyout((caddr_t)sa, (caddr_t)uap->osa,
 		    sizeof (vec))))
 			return (error);
 	}
 	if (uap->nsa) {
 		if ((error = copyin((caddr_t)uap->nsa, (caddr_t)sa,
 		    sizeof (vec))))
 			return (error);
 		if ((signum == SIGKILL || signum == SIGSTOP) &&
 		    sa->sa_handler != SIG_DFL)
 			return (EINVAL);
 		setsigvec(p, signum, sa);
 	}
 	return (0);
 }
 
 static void
 setsigvec(p, signum, sa)
 	register struct proc *p;
 	int signum;
 	register struct sigaction *sa;
 {
 	register struct sigacts *ps = p->p_sigacts;
 	register int bit;
 
 	bit = sigmask(signum);
 	/*
 	 * Change setting atomically.
 	 */
 	(void) splhigh();
 	ps->ps_sigact[signum] = sa->sa_handler;
 	ps->ps_catchmask[signum] = sa->sa_mask &~ sigcantmask;
 	if ((sa->sa_flags & SA_RESTART) == 0)
 		ps->ps_sigintr |= bit;
 	else
 		ps->ps_sigintr &= ~bit;
 	if (sa->sa_flags & SA_ONSTACK)
 		ps->ps_sigonstack |= bit;
 	else
 		ps->ps_sigonstack &= ~bit;
 	if (sa->sa_flags & SA_RESETHAND)
 		ps->ps_sigreset |= bit;
 	else
 		ps->ps_sigreset &= ~bit;
 	if (sa->sa_flags & SA_NODEFER)
 		ps->ps_signodefer |= bit;
 	else
 		ps->ps_signodefer &= ~bit;
 #ifdef COMPAT_SUNOS
 	if (sa->sa_flags & SA_USERTRAMP)
 		ps->ps_usertramp |= bit;
 	else
 		ps->ps_usertramp &= ~bit;
 #endif
 	if (signum == SIGCHLD) {
 		if (sa->sa_flags & SA_NOCLDSTOP)
 			p->p_flag |= P_NOCLDSTOP;
 		else
 			p->p_flag &= ~P_NOCLDSTOP;
 		if (sa->sa_flags & SA_NOCLDWAIT) {
 			/*
 			 * Paranoia: since SA_NOCLDWAIT is implemented by
 			 * reparenting the dying child to PID 1 (and
 			 * trust it to reap the zombie), PID 1 itself is
 			 * forbidden to set SA_NOCLDWAIT.
 			 */
 			if (p->p_pid == 1)
 				p->p_flag &= ~P_NOCLDWAIT;
 			else
 				p->p_flag |= P_NOCLDWAIT;
 		} else
 			p->p_flag &= ~P_NOCLDWAIT;
 	}
 	/*
 	 * Set bit in p_sigignore for signals that are set to SIG_IGN,
 	 * and for signals set to SIG_DFL where the default is to ignore.
 	 * However, don't put SIGCONT in p_sigignore,
 	 * as we have to restart the process.
 	 */
 	if (sa->sa_handler == SIG_IGN ||
 	    (sigprop[signum] & SA_IGNORE && sa->sa_handler == SIG_DFL)) {
 		p->p_siglist &= ~bit;		/* never to be seen again */
 		if (signum != SIGCONT)
 			p->p_sigignore |= bit;	/* easier in psignal */
 		p->p_sigcatch &= ~bit;
 	} else {
 		p->p_sigignore &= ~bit;
 		if (sa->sa_handler == SIG_DFL)
 			p->p_sigcatch &= ~bit;
 		else
 			p->p_sigcatch |= bit;
 	}
 	(void) spl0();
 }
 
 /*
  * Initialize signal state for process 0;
  * set to ignore signals that are ignored by default.
  */
 void
 siginit(p)
 	struct proc *p;
 {
 	register int i;
 
 	for (i = 0; i < NSIG; i++)
 		if (sigprop[i] & SA_IGNORE && i != SIGCONT)
 			p->p_sigignore |= sigmask(i);
 }
 
 /*
  * Reset signals for an exec of the specified process.
  */
 void
 execsigs(p)
 	register struct proc *p;
 {
 	register struct sigacts *ps = p->p_sigacts;
 	register int nc, mask;
 
 	/*
 	 * Reset caught signals.  Held signals remain held
 	 * through p_sigmask (unless they were caught,
 	 * and are now ignored by default).
 	 */
 	while (p->p_sigcatch) {
 		nc = ffs((long)p->p_sigcatch);
 		mask = sigmask(nc);
 		p->p_sigcatch &= ~mask;
 		if (sigprop[nc] & SA_IGNORE) {
 			if (nc != SIGCONT)
 				p->p_sigignore |= mask;
 			p->p_siglist &= ~mask;
 		}
 		ps->ps_sigact[nc] = SIG_DFL;
 	}
 	/*
 	 * Reset stack state to the user stack.
 	 * Clear set of signals caught on the signal stack.
 	 */
 	ps->ps_sigstk.ss_flags = SS_DISABLE;
 	ps->ps_sigstk.ss_size = 0;
 	ps->ps_sigstk.ss_sp = 0;
 	ps->ps_flags = 0;
 }
 
 /*
  * Manipulate signal mask.
  * Note that we receive new mask, not pointer,
  * and return old mask as return value;
  * the library stub does the rest.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct sigprocmask_args {
 	int	how;
 	sigset_t mask;
 };
 #endif
 int
 sigprocmask(p, uap)
 	register struct proc *p;
 	struct sigprocmask_args *uap;
 {
 	int error = 0;
 
 	p->p_retval[0] = p->p_sigmask;
 	(void) splhigh();
 
 	switch (uap->how) {
 	case SIG_BLOCK:
 		p->p_sigmask |= uap->mask &~ sigcantmask;
 		break;
 
 	case SIG_UNBLOCK:
 		p->p_sigmask &= ~uap->mask;
 		break;
 
 	case SIG_SETMASK:
 		p->p_sigmask = uap->mask &~ sigcantmask;
 		break;
 
 	default:
 		error = EINVAL;
 		break;
 	}
 	(void) spl0();
 	return (error);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct sigpending_args {
 	int	dummy;
 };
 #endif
 /* ARGSUSED */
 int
 sigpending(p, uap)
 	struct proc *p;
 	struct sigpending_args *uap;
 {
 
 	p->p_retval[0] = p->p_siglist;
 	return (0);
 }
 
 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
 /*
  * Generalized interface signal handler, 4.3-compatible.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct osigvec_args {
 	int	signum;
 	struct	sigvec *nsv;
 	struct	sigvec *osv;
 };
 #endif
 /* ARGSUSED */
 int
 osigvec(p, uap)
 	struct proc *p;
 	register struct osigvec_args *uap;
 {
 	struct sigvec vec;
 	register struct sigacts *ps = p->p_sigacts;
 	register struct sigvec *sv;
 	register int signum;
 	int bit, error;
 
 	signum = uap->signum;
 	if (signum <= 0 || signum >= NSIG)
 		return (EINVAL);
 	sv = &vec;
 	if (uap->osv) {
 		*(sig_t *)&sv->sv_handler = ps->ps_sigact[signum];
 		sv->sv_mask = ps->ps_catchmask[signum];
 		bit = sigmask(signum);
 		sv->sv_flags = 0;
 		if ((ps->ps_sigonstack & bit) != 0)
 			sv->sv_flags |= SV_ONSTACK;
 		if ((ps->ps_sigintr & bit) != 0)
 			sv->sv_flags |= SV_INTERRUPT;
 		if ((ps->ps_sigreset & bit) != 0)
 			sv->sv_flags |= SV_RESETHAND;
 		if ((ps->ps_signodefer & bit) != 0)
 			sv->sv_flags |= SV_NODEFER;
 #ifndef COMPAT_SUNOS
 		if (signum == SIGCHLD && p->p_flag & P_NOCLDSTOP)
 			sv->sv_flags |= SV_NOCLDSTOP;
 #endif
 		if ((error = copyout((caddr_t)sv, (caddr_t)uap->osv,
 		    sizeof (vec))))
 			return (error);
 	}
 	if (uap->nsv) {
 		if ((error = copyin((caddr_t)uap->nsv, (caddr_t)sv,
 		    sizeof (vec))))
 			return (error);
 		if ((signum == SIGKILL || signum == SIGSTOP) &&
 		    sv->sv_handler != SIG_DFL)
 			return (EINVAL);
 #ifdef COMPAT_SUNOS
 		sv->sv_flags |= SA_USERTRAMP;
 #endif
 		sv->sv_flags ^= SA_RESTART;	/* opposite of SV_INTERRUPT */
 		setsigvec(p, signum, (struct sigaction *)sv);
 	}
 	return (0);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct osigblock_args {
 	int	mask;
 };
 #endif
 int
 osigblock(p, uap)
 	register struct proc *p;
 	struct osigblock_args *uap;
 {
 
 	(void) splhigh();
 	p->p_retval[0] = p->p_sigmask;
 	p->p_sigmask |= uap->mask &~ sigcantmask;
 	(void) spl0();
 	return (0);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct osigsetmask_args {
 	int	mask;
 };
 #endif
 int
 osigsetmask(p, uap)
 	struct proc *p;
 	struct osigsetmask_args *uap;
 {
 
 	(void) splhigh();
 	p->p_retval[0] = p->p_sigmask;
 	p->p_sigmask = uap->mask &~ sigcantmask;
 	(void) spl0();
 	return (0);
 }
 #endif /* COMPAT_43 || COMPAT_SUNOS */
 
 /*
  * Suspend process until signal, providing mask to be set
  * in the meantime.  Note nonstandard calling convention:
  * libc stub passes mask, not pointer, to save a copyin.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct sigsuspend_args {
 	sigset_t mask;
 };
 #endif
 /* ARGSUSED */
 int
 sigsuspend(p, uap)
 	register struct proc *p;
 	struct sigsuspend_args *uap;
 {
 	register struct sigacts *ps = p->p_sigacts;
 
 	/*
 	 * When returning from sigpause, we want
 	 * the old mask to be restored after the
 	 * signal handler has finished.  Thus, we
 	 * save it here and mark the sigacts structure
 	 * to indicate this.
 	 */
 	ps->ps_oldmask = p->p_sigmask;
 	ps->ps_flags |= SAS_OLDMASK;
 	p->p_sigmask = uap->mask &~ sigcantmask;
 	while (tsleep((caddr_t) ps, PPAUSE|PCATCH, "pause", 0) == 0)
 		/* void */;
 	/* always return EINTR rather than ERESTART... */
 	return (EINTR);
 }
 
 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
 #ifndef _SYS_SYSPROTO_H_
 struct osigstack_args {
 	struct	sigstack *nss;
 	struct	sigstack *oss;
 };
 #endif
 /* ARGSUSED */
 int
 osigstack(p, uap)
 	struct proc *p;
 	register struct osigstack_args *uap;
 {
 	struct sigstack ss;
 	struct sigacts *psp;
 	int error = 0;
 
 	psp = p->p_sigacts;
 	ss.ss_sp = psp->ps_sigstk.ss_sp;
 	ss.ss_onstack = psp->ps_sigstk.ss_flags & SS_ONSTACK;
 	if (uap->oss && (error = copyout((caddr_t)&ss, (caddr_t)uap->oss,
 	    sizeof (struct sigstack))))
 		return (error);
 	if (uap->nss && (error = copyin((caddr_t)uap->nss, (caddr_t)&ss,
 	    sizeof (ss))) == 0) {
 		psp->ps_sigstk.ss_sp = ss.ss_sp;
 		psp->ps_sigstk.ss_size = 0;
 		psp->ps_sigstk.ss_flags |= ss.ss_onstack & SS_ONSTACK;
 		psp->ps_flags |= SAS_ALTSTACK;
 	}
 	return (error);
 }
 #endif /* COMPAT_43 || COMPAT_SUNOS */
 
 #ifndef _SYS_SYSPROTO_H_
 struct sigaltstack_args {
 	struct	sigaltstack *nss;
 	struct	sigaltstack *oss;
 };
 #endif
 /* ARGSUSED */
 int
 sigaltstack(p, uap)
 	struct proc *p;
 	register struct sigaltstack_args *uap;
 {
 	struct sigacts *psp;
 	struct sigaltstack ss;
 	int error;
 
 	psp = p->p_sigacts;
 	if ((psp->ps_flags & SAS_ALTSTACK) == 0)
 		psp->ps_sigstk.ss_flags |= SS_DISABLE;
 	if (uap->oss && (error = copyout((caddr_t)&psp->ps_sigstk,
 	    (caddr_t)uap->oss, sizeof (struct sigaltstack))))
 		return (error);
 	if (uap->nss == 0)
 		return (0);
 	if ((error = copyin((caddr_t)uap->nss, (caddr_t)&ss, sizeof (ss))))
 		return (error);
 	if (ss.ss_flags & SS_DISABLE) {
 		if (psp->ps_sigstk.ss_flags & SS_ONSTACK)
 			return (EINVAL);
 		psp->ps_flags &= ~SAS_ALTSTACK;
 		psp->ps_sigstk.ss_flags = ss.ss_flags;
 		return (0);
 	}
 	if (ss.ss_size < MINSIGSTKSZ)
 		return (ENOMEM);
 	psp->ps_flags |= SAS_ALTSTACK;
 	psp->ps_sigstk= ss;
 	return (0);
 }
 
 /*
  * Common code for kill process group/broadcast kill.
  * cp is calling process.
  */
 int
 killpg1(cp, signum, pgid, all)
 	register struct proc *cp;
 	int signum, pgid, all;
 {
 	register struct proc *p;
 	register struct pcred *pc = cp->p_cred;
 	struct pgrp *pgrp;
 	int nfound = 0;
 
 	if (all)
 		/*
 		 * broadcast
 		 */
 		for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) {
 			if (p->p_pid <= 1 || p->p_flag & P_SYSTEM ||
 			    p == cp || !CANSIGNAL(cp, pc, p, signum))
 				continue;
 			nfound++;
 			if (signum)
 				psignal(p, signum);
 		}
 	else {
 		if (pgid == 0)
 			/*
 			 * zero pgid means send to my process group.
 			 */
 			pgrp = cp->p_pgrp;
 		else {
 			pgrp = pgfind(pgid);
 			if (pgrp == NULL)
 				return (ESRCH);
 		}
 		for (p = pgrp->pg_members.lh_first; p != 0;
 		     p = p->p_pglist.le_next) {
 			if (p->p_pid <= 1 || p->p_flag & P_SYSTEM ||
 			    p->p_stat == SZOMB ||
 			    !CANSIGNAL(cp, pc, p, signum))
 				continue;
 			nfound++;
 			if (signum)
 				psignal(p, signum);
 		}
 	}
 	return (nfound ? 0 : ESRCH);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct kill_args {
 	int	pid;
 	int	signum;
 };
 #endif
 /* ARGSUSED */
 int
 kill(cp, uap)
 	register struct proc *cp;
 	register struct kill_args *uap;
 {
 	register struct proc *p;
 	register struct pcred *pc = cp->p_cred;
 
 	if ((u_int)uap->signum >= NSIG)
 		return (EINVAL);
 	if (uap->pid > 0) {
 		/* kill single process */
 		if ((p = pfind(uap->pid)) == NULL)
 			return (ESRCH);
 		if (!CANSIGNAL(cp, pc, p, uap->signum))
 			return (EPERM);
 		if (uap->signum)
 			psignal(p, uap->signum);
 		return (0);
 	}
 	switch (uap->pid) {
 	case -1:		/* broadcast signal */
 		return (killpg1(cp, uap->signum, 0, 1));
 	case 0:			/* signal own process group */
 		return (killpg1(cp, uap->signum, 0, 0));
 	default:		/* negative explicit process group */
 		return (killpg1(cp, uap->signum, -uap->pid, 0));
 	}
 	/* NOTREACHED */
 }
 
 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
 #ifndef _SYS_SYSPROTO_H_
 struct okillpg_args {
 	int	pgid;
 	int	signum;
 };
 #endif
 /* ARGSUSED */
 int
 okillpg(p, uap)
 	struct proc *p;
 	register struct okillpg_args *uap;
 {
 
 	if ((u_int)uap->signum >= NSIG)
 		return (EINVAL);
 	return (killpg1(p, uap->signum, uap->pgid, 0));
 }
 #endif /* COMPAT_43 || COMPAT_SUNOS */
 
 /*
  * Send a signal to a process group.
  */
 void
 gsignal(pgid, signum)
 	int pgid, signum;
 {
 	struct pgrp *pgrp;
 
 	if (pgid && (pgrp = pgfind(pgid)))
 		pgsignal(pgrp, signum, 0);
 }
 
 /*
  * Send a signal to a process group.  If checktty is 1,
  * limit to members which have a controlling terminal.
  */
 void
 pgsignal(pgrp, signum, checkctty)
 	struct pgrp *pgrp;
 	int signum, checkctty;
 {
 	register struct proc *p;
 
 	if (pgrp)
 		for (p = pgrp->pg_members.lh_first; p != 0;
 		     p = p->p_pglist.le_next)
 			if (checkctty == 0 || p->p_flag & P_CONTROLT)
 				psignal(p, signum);
 }
 
 /*
  * Send a signal caused by a trap to the current process.
  * If it will be caught immediately, deliver it with correct code.
  * Otherwise, post it normally.
  */
 void
 trapsignal(p, signum, code)
 	struct proc *p;
 	register int signum;
 	u_long code;
 {
 	register struct sigacts *ps = p->p_sigacts;
 	int mask;
 
 	mask = sigmask(signum);
 	if ((p->p_flag & P_TRACED) == 0 && (p->p_sigcatch & mask) != 0 &&
 	    (p->p_sigmask & mask) == 0) {
 		p->p_stats->p_ru.ru_nsignals++;
 #ifdef KTRACE
 		if (KTRPOINT(p, KTR_PSIG))
 			ktrpsig(p->p_tracep, signum, ps->ps_sigact[signum],
 				p->p_sigmask, code);
 #endif
 		(*p->p_sysent->sv_sendsig)(ps->ps_sigact[signum], signum,
 						p->p_sigmask, code);
 		p->p_sigmask |= ps->ps_catchmask[signum] |
 				(mask & ~ps->ps_signodefer);
 		if ((ps->ps_sigreset & mask) != 0) {
 			/*
 			 * See setsigvec() for origin of this code.
 			 */
 			p->p_sigcatch &= ~mask;
 			if (signum != SIGCONT && sigprop[signum] & SA_IGNORE)
 				p->p_sigignore |= mask;
 			ps->ps_sigact[signum] = SIG_DFL;
 		}
 	} else {
 		ps->ps_code = code;	/* XXX for core dump/debugger */
 		ps->ps_sig = signum;	/* XXX to verify code */
 		psignal(p, signum);
 	}
 }
 
 /*
  * Send the signal to the process.  If the signal has an action, the action
  * is usually performed by the target process rather than the caller; we add
  * the signal to the set of pending signals for the process.
  *
  * Exceptions:
  *   o When a stop signal is sent to a sleeping process that takes the
  *     default action, the process is stopped without awakening it.
  *   o SIGCONT restarts stopped processes (or puts them back to sleep)
  *     regardless of the signal action (eg, blocked or ignored).
  *
  * Other ignored signals are discarded immediately.
  */
 void
 psignal(p, signum)
 	register struct proc *p;
 	register int signum;
 {
 	register int s, prop;
 	register sig_t action;
 	int mask;
 
 	if ((u_int)signum >= NSIG || signum == 0) {
 		printf("psignal: signum %d\n", signum);
 		panic("psignal signal number");
 	}
 	mask = sigmask(signum);
 	prop = sigprop[signum];
 
 	/*
 	 * If proc is traced, always give parent a chance;
 	 * if signal event is tracked by procfs, give *that*
 	 * a chance, as well.
 	 */
 	if ((p->p_flag & P_TRACED) || (p->p_stops & S_SIG))
 		action = SIG_DFL;
 	else {
 		/*
 		 * If the signal is being ignored,
 		 * then we forget about it immediately.
 		 * (Note: we don't set SIGCONT in p_sigignore,
 		 * and if it is set to SIG_IGN,
 		 * action will be SIG_DFL here.)
 		 */
 		if (p->p_sigignore & mask)
 			return;
 		if (p->p_sigmask & mask)
 			action = SIG_HOLD;
 		else if (p->p_sigcatch & mask)
 			action = SIG_CATCH;
 		else
 			action = SIG_DFL;
 	}
 
 	if (p->p_nice > NZERO && action == SIG_DFL && (prop & SA_KILL) &&
 	    (p->p_flag & P_TRACED) == 0)
 		p->p_nice = NZERO;
 
 	if (prop & SA_CONT)
 		p->p_siglist &= ~stopsigmask;
 
 	if (prop & SA_STOP) {
 		/*
 		 * If sending a tty stop signal to a member of an orphaned
 		 * process group, discard the signal here if the action
 		 * is default; don't stop the process below if sleeping,
 		 * and don't clear any pending SIGCONT.
 		 */
 		if (prop & SA_TTYSTOP && p->p_pgrp->pg_jobc == 0 &&
 		    action == SIG_DFL)
 		        return;
 		p->p_siglist &= ~contsigmask;
 	}
 	p->p_siglist |= mask;
 
 	/*
 	 * Defer further processing for signals which are held,
 	 * except that stopped processes must be continued by SIGCONT.
 	 */
 	if (action == SIG_HOLD && ((prop & SA_CONT) == 0 || p->p_stat != SSTOP))
 		return;
 	s = splhigh();
 	switch (p->p_stat) {
 
 	case SSLEEP:
 		/*
 		 * If process is sleeping uninterruptibly
 		 * we can't interrupt the sleep... the signal will
 		 * be noticed when the process returns through
 		 * trap() or syscall().
 		 */
 		if ((p->p_flag & P_SINTR) == 0)
 			goto out;
 		/*
 		 * Process is sleeping and traced... make it runnable
 		 * so it can discover the signal in issignal() and stop
 		 * for the parent.
 		 */
 		if (p->p_flag & P_TRACED)
 			goto run;
 		/*
 		 * If SIGCONT is default (or ignored) and process is
 		 * asleep, we are finished; the process should not
 		 * be awakened.
 		 */
 		if ((prop & SA_CONT) && action == SIG_DFL) {
 			p->p_siglist &= ~mask;
 			goto out;
 		}
 		/*
 		 * When a sleeping process receives a stop
 		 * signal, process immediately if possible.
 		 * All other (caught or default) signals
 		 * cause the process to run.
 		 */
 		if (prop & SA_STOP) {
 			if (action != SIG_DFL)
 				goto runfast;
 			/*
 			 * If a child holding parent blocked,
 			 * stopping could cause deadlock.
 			 */
 			if (p->p_flag & P_PPWAIT)
 				goto out;
 			p->p_siglist &= ~mask;
 			p->p_xstat = signum;
 			if ((p->p_pptr->p_flag & P_NOCLDSTOP) == 0)
 				psignal(p->p_pptr, SIGCHLD);
 			stop(p);
 			goto out;
 		} else
 			goto runfast;
 		/*NOTREACHED*/
 
 	case SSTOP:
 		/*
 		 * If traced process is already stopped,
 		 * then no further action is necessary.
 		 */
 		if (p->p_flag & P_TRACED)
 			goto out;
 
 		/*
 		 * Kill signal always sets processes running.
 		 */
 		if (signum == SIGKILL)
 			goto runfast;
 
 		if (prop & SA_CONT) {
 			/*
 			 * If SIGCONT is default (or ignored), we continue the
 			 * process but don't leave the signal in p_siglist, as
 			 * it has no further action.  If SIGCONT is held, we
 			 * continue the process and leave the signal in
 			 * p_siglist.  If the process catches SIGCONT, let it
 			 * handle the signal itself.  If it isn't waiting on
 			 * an event, then it goes back to run state.
 			 * Otherwise, process goes back to sleep state.
 			 */
 			if (action == SIG_DFL)
 				p->p_siglist &= ~mask;
 			if (action == SIG_CATCH)
 				goto runfast;
 			if (p->p_wchan == 0)
 				goto run;
 			p->p_stat = SSLEEP;
 			goto out;
 		}
 
 		if (prop & SA_STOP) {
 			/*
 			 * Already stopped, don't need to stop again.
 			 * (If we did the shell could get confused.)
 			 */
 			p->p_siglist &= ~mask;		/* take it away */
 			goto out;
 		}
 
 		/*
 		 * If process is sleeping interruptibly, then simulate a
 		 * wakeup so that when it is continued, it will be made
 		 * runnable and can look at the signal.  But don't make
 		 * the process runnable, leave it stopped.
 		 */
 		if (p->p_wchan && p->p_flag & P_SINTR)
 			unsleep(p);
 		goto out;
 
 	default:
 		/*
 		 * SRUN, SIDL, SZOMB do nothing with the signal,
 		 * other than kicking ourselves if we are running.
 		 * It will either never be noticed, or noticed very soon.
 		 */
 		if (p == curproc)
 			signotify(p);
 #ifdef SMP
 		else if (p->p_stat == SRUN)
 			forward_signal(p);
 #endif
 		goto out;
 	}
 	/*NOTREACHED*/
 
 runfast:
 	/*
 	 * Raise priority to at least PUSER.
 	 */
 	if (p->p_priority > PUSER)
 		p->p_priority = PUSER;
 run:
 	setrunnable(p);
 out:
 	splx(s);
 }
 
 /*
  * If the current process has received a signal (should be caught or cause
  * termination, should interrupt current syscall), return the signal number.
  * Stop signals with default action are processed immediately, then cleared;
  * they aren't returned.  This is checked after each entry to the system for
  * a syscall or trap (though this can usually be done without calling issignal
  * by checking the pending signal masks in the CURSIG macro.) The normal call
  * sequence is
  *
  *	while (signum = CURSIG(curproc))
  *		postsig(signum);
  */
 int
 issignal(p)
 	register struct proc *p;
 {
 	register int signum, mask, prop;
 
 	for (;;) {
 		int traced = (p->p_flag & P_TRACED) || (p->p_stops & S_SIG);
 
 		mask = p->p_siglist & ~p->p_sigmask;
 		if (p->p_flag & P_PPWAIT)
 			mask &= ~stopsigmask;
 		if (mask == 0)	 	/* no signal to send */
 			return (0);
 		signum = ffs((long)mask);
 		mask = sigmask(signum);
 		prop = sigprop[signum];
 
 		STOPEVENT(p, S_SIG, signum);
 
 		/*
 		 * We should see pending but ignored signals
 		 * only if P_TRACED was on when they were posted.
 		 */
 		if ((mask & p->p_sigignore) && (traced == 0)) {
 			p->p_siglist &= ~mask;
 			continue;
 		}
 		if (p->p_flag & P_TRACED && (p->p_flag & P_PPWAIT) == 0) {
 			/*
 			 * If traced, always stop, and stay
 			 * stopped until released by the parent.
 			 */
 			p->p_xstat = signum;
 			psignal(p->p_pptr, SIGCHLD);
 			do {
 				stop(p);
 				mi_switch();
 			} while (!trace_req(p)
 				 && p->p_flag & P_TRACED);
 
 			/*
 			 * If the traced bit got turned off, go back up
 			 * to the top to rescan signals.  This ensures
 			 * that p_sig* and ps_sigact are consistent.
 			 */
 			if ((p->p_flag & P_TRACED) == 0)
 				continue;
 
 			/*
 			 * If parent wants us to take the signal,
 			 * then it will leave it in p->p_xstat;
 			 * otherwise we just look for signals again.
 			 */
 			p->p_siglist &= ~mask;	/* clear the old signal */
 			signum = p->p_xstat;
 			if (signum == 0)
 				continue;
 
 			/*
 			 * Put the new signal into p_siglist.  If the
 			 * signal is being masked, look for other signals.
 			 */
 			mask = sigmask(signum);
 			p->p_siglist |= mask;
 			if (p->p_sigmask & mask)
 				continue;
 		}
 
 		/*
 		 * Decide whether the signal should be returned.
 		 * Return the signal's number, or fall through
 		 * to clear it from the pending mask.
 		 */
 		switch ((int)(intptr_t)p->p_sigacts->ps_sigact[signum]) {
 
 		case (int)SIG_DFL:
 			/*
 			 * Don't take default actions on system processes.
 			 */
 			if (p->p_pid <= 1) {
 #ifdef DIAGNOSTIC
 				/*
 				 * Are you sure you want to ignore SIGSEGV
 				 * in init? XXX
 				 */
 				printf("Process (pid %lu) got signal %d\n",
 					(u_long)p->p_pid, signum);
 #endif
 				break;		/* == ignore */
 			}
 			/*
 			 * If there is a pending stop signal to process
 			 * with default action, stop here,
 			 * then clear the signal.  However,
 			 * if process is member of an orphaned
 			 * process group, ignore tty stop signals.
 			 */
 			if (prop & SA_STOP) {
 				if (p->p_flag & P_TRACED ||
 		    		    (p->p_pgrp->pg_jobc == 0 &&
 				    prop & SA_TTYSTOP))
 					break;	/* == ignore */
 				p->p_xstat = signum;
 				stop(p);
 				if ((p->p_pptr->p_flag & P_NOCLDSTOP) == 0)
 					psignal(p->p_pptr, SIGCHLD);
 				mi_switch();
 				break;
 			} else if (prop & SA_IGNORE) {
 				/*
 				 * Except for SIGCONT, shouldn't get here.
 				 * Default action is to ignore; drop it.
 				 */
 				break;		/* == ignore */
 			} else
 				return (signum);
 			/*NOTREACHED*/
 
 		case (int)SIG_IGN:
 			/*
 			 * Masking above should prevent us ever trying
 			 * to take action on an ignored signal other
 			 * than SIGCONT, unless process is traced.
 			 */
 			if ((prop & SA_CONT) == 0 &&
 			    (p->p_flag & P_TRACED) == 0)
 				printf("issignal\n");
 			break;		/* == ignore */
 
 		default:
 			/*
 			 * This signal has an action, let
 			 * postsig() process it.
 			 */
 			return (signum);
 		}
 		p->p_siglist &= ~mask;		/* take the signal! */
 	}
 	/* NOTREACHED */
 }
 
 /*
  * Put the argument process into the stopped state and notify the parent
  * via wakeup.  Signals are handled elsewhere.  The process must not be
  * on the run queue.
  */
 void
 stop(p)
 	register struct proc *p;
 {
 
 	p->p_stat = SSTOP;
 	p->p_flag &= ~P_WAITED;
 	wakeup((caddr_t)p->p_pptr);
 }
 
 /*
  * Take the action for the specified signal
  * from the current set of pending signals.
  */
 void
 postsig(signum)
 	register int signum;
 {
 	register struct proc *p = curproc;
 	register struct sigacts *ps = p->p_sigacts;
 	register sig_t action;
 	int code, mask, returnmask;
 
 #ifdef DIAGNOSTIC
 	if (signum == 0)
 		panic("postsig");
 #endif
 	mask = sigmask(signum);
 	p->p_siglist &= ~mask;
 	action = ps->ps_sigact[signum];
 #ifdef KTRACE
 	if (KTRPOINT(p, KTR_PSIG))
 		ktrpsig(p->p_tracep,
 		    signum, action, ps->ps_flags & SAS_OLDMASK ?
 		    ps->ps_oldmask : p->p_sigmask, 0);
 #endif
 	STOPEVENT(p, S_SIG, signum);
 
 	if (action == SIG_DFL) {
 		/*
 		 * Default action, where the default is to kill
 		 * the process.  (Other cases were ignored above.)
 		 */
 		sigexit(p, signum);
 		/* NOTREACHED */
 	} else {
 		/*
 		 * If we get here, the signal must be caught.
 		 */
 #ifdef DIAGNOSTIC
 		if (action == SIG_IGN || (p->p_sigmask & mask))
 			panic("postsig action");
 #endif
 		/*
 		 * Set the new mask value and also defer further
 		 * occurences of this signal.
 		 *
 		 * Special case: user has done a sigpause.  Here the
 		 * current mask is not of interest, but rather the
 		 * mask from before the sigpause is what we want
 		 * restored after the signal processing is completed.
 		 */
 		(void) splhigh();
 		if (ps->ps_flags & SAS_OLDMASK) {
 			returnmask = ps->ps_oldmask;
 			ps->ps_flags &= ~SAS_OLDMASK;
 		} else
 			returnmask = p->p_sigmask;
 		p->p_sigmask |= ps->ps_catchmask[signum] |
 				(mask & ~ps->ps_signodefer);
 		if ((ps->ps_sigreset & mask) != 0) {
 			/*
 			 * See setsigvec() for origin of this code.
 			 */
 			p->p_sigcatch &= ~mask;
 			if (signum != SIGCONT && sigprop[signum] & SA_IGNORE)
 				p->p_sigignore |= mask;
 			ps->ps_sigact[signum] = SIG_DFL;
 		}
 		(void) spl0();
 		p->p_stats->p_ru.ru_nsignals++;
 		if (ps->ps_sig != signum) {
 			code = 0;
 		} else {
 			code = ps->ps_code;
 			ps->ps_code = 0;
 			ps->ps_sig = 0;
 		}
 		(*p->p_sysent->sv_sendsig)(action, signum, returnmask, code);
 	}
 }
 
 /*
  * Kill the current process for stated reason.
  */
 void
 killproc(p, why)
 	struct proc *p;
 	char *why;
 {
 	log(LOG_ERR, "pid %d (%s), uid %d, was killed: %s\n", p->p_pid, p->p_comm,
 		p->p_cred && p->p_ucred ? p->p_ucred->cr_uid : -1, why);
 	psignal(p, SIGKILL);
 }
 
 /*
  * Force the current process to exit with the specified signal, dumping core
  * if appropriate.  We bypass the normal tests for masked and caught signals,
  * allowing unrecoverable failures to terminate the process without changing
  * signal state.  Mark the accounting record with the signal termination.
  * If dumping core, save the signal number for the debugger.  Calls exit and
  * does not return.
  */
 void
 sigexit(p, signum)
 	register struct proc *p;
 	int signum;
 {
 
 	p->p_acflag |= AXSIG;
 	if (sigprop[signum] & SA_CORE) {
 		p->p_sigacts->ps_sig = signum;
 		/*
 		 * Log signals which would cause core dumps
 		 * (Log as LOG_INFO to appease those who don't want
 		 * these messages.)
 		 * XXX : Todo, as well as euid, write out ruid too
 		 */
 		if (p->p_sysent->sv_coredump != NULL &&
 		    (*p->p_sysent->sv_coredump)(p) == 0)
 			signum |= WCOREFLAG;
 		if (kern_logsigexit)
 			log(LOG_INFO,
 			    "pid %d (%s), uid %d: exited on signal %d%s\n",
 			    p->p_pid, p->p_comm,
 			    p->p_cred && p->p_ucred ? p->p_ucred->cr_uid : -1,
 			    signum &~ WCOREFLAG,
 			    signum & WCOREFLAG ? " (core dumped)" : "");
 	}
 	exit1(p, W_EXITCODE(0, signum));
 	/* NOTREACHED */
 }
 
 static char corefilename[MAXPATHLEN+1] = {"%N.core"};
 SYSCTL_STRING(_kern, OID_AUTO, corefile, CTLFLAG_RW, corefilename,
 	      sizeof(corefilename), "process corefile name format string");
 
 /*
  * expand_name(name, uid, pid)
  * Expand the name described in corefilename, using name, uid, and pid.
  * corefilename is a printf-like string, with three format specifiers:
  *	%N	name of process ("name")
  *	%P	process id (pid)
  *	%U	user id (uid)
  * For example, "%N.core" is the default; they can be disabled completely
  * by using "/dev/null", or all core files can be stored in "/cores/%U/%N-%P".
  * This is controlled by the sysctl variable kern.corefile (see above).
  */
 
 char *
 expand_name(name, uid, pid)
 const char *name; int uid; int pid; {
 	char *temp;
 	char buf[11];		/* Buffer for pid/uid -- max 4B */
 	int i, n;
 	char *format = corefilename;
 
 	temp = malloc(MAXPATHLEN + 3, M_TEMP, M_NOWAIT);
 	bzero(temp, MAXPATHLEN+3);
 	for (i = 0, n = 0; i < MAXPATHLEN && format[i]; i++) {
 		int l;
 		switch (format[i]) {
 		case '%':	/* Format character */
 			i++;
 			switch (format[i]) {
 			case '%':
 				temp[n++] = '%';
 				break;
 			case 'N':	/* process name */
 				l = strlen(name);
 				if ((n + l) > MAXPATHLEN) {
 					log(LOG_ERR, "pid %d (%s), uid (%d):  Path `%s%s' is too long\n",
 					    pid, name, uid, temp, name);
 					free(temp, M_TEMP);
 					return NULL;
 				}
 				memcpy(temp+n, name, l);
 				n += l;
 				break;
 			case 'P':	/* process id */
 				sprintf(buf, "%u", pid);
 				l = strlen(buf);
 				if ((n + l) > MAXPATHLEN) {
 					log(LOG_ERR, "pid %d (%s), uid (%d):  Path `%s%s' is too long\n",
 					    pid, name, uid, temp, name);
 					free(temp, M_TEMP);
 					return NULL;
 				}
 				memcpy(temp+n, buf, l);
 				n += l;
 				break;
 			case 'U':	/* user id */
 				sprintf(buf, "%u", uid);
 				l = strlen(buf);
 				if ((n + l) > MAXPATHLEN) {
 					log(LOG_ERR, "pid %d (%s), uid (%d):  Path `%s%s' is too long\n",
 					    pid, name, uid, temp, name);
 					free(temp, M_TEMP);
 					return NULL;
 				}
 				memcpy(temp+n, buf, l);
 				n += l;
 				break;
 			default:
 			  	log(LOG_ERR, "Unknown format character %c in `%s'\n", format[i], format);
 			}
 			break;
 		default:
 			temp[n++] = format[i];
 		}
 	}
 	return temp;
 }
 
 /*
  * Nonexistent system call-- signal process (may want to handle it).
  * Flag error in case process won't see signal immediately (blocked or ignored).
  */
 #ifndef _SYS_SYSPROTO_H_
 struct nosys_args {
 	int	dummy;
 };
 #endif
 /* ARGSUSED */
 int
 nosys(p, args)
 	struct proc *p;
 	struct nosys_args *args;
 {
 
 	psignal(p, SIGSYS);
 	return (EINVAL);
+}
+
+/*
+ * Send a signal to a SIGIO or SIGURG to a process or process group using
+ * stored credentials rather than those of the current process.
+ */
+void
+pgsigio(sigio, signum, checkctty)
+	struct sigio *sigio;
+	int signum, checkctty;
+{
+	if (sigio == NULL)
+		return;
+		
+	if (sigio->sio_pgid > 0) {
+		if (CANSIGIO(sigio->sio_ruid, sigio->sio_ucred,
+		             sigio->sio_proc))
+			psignal(sigio->sio_proc, signum);
+	} else if (sigio->sio_pgid < 0) {
+		struct proc *p;
+
+		for (p = sigio->sio_pgrp->pg_members.lh_first; p != NULL;
+		     p = p->p_pglist.le_next)
+			if (CANSIGIO(sigio->sio_ruid, sigio->sio_ucred, p) &&
+			    (checkctty == 0 || (p->p_flag & P_CONTROLT)))
+				psignal(p, signum);
+	}
 }
Index: head/sys/kern/subr_log.c
===================================================================
--- head/sys/kern/subr_log.c	(revision 41085)
+++ head/sys/kern/subr_log.c	(revision 41086)
@@ -1,270 +1,276 @@
 /*
  * Copyright (c) 1982, 1986, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)subr_log.c	8.1 (Berkeley) 6/10/93
- * $Id: subr_log.c,v 1.29 1998/05/28 09:30:20 phk Exp $
+ * $Id: subr_log.c,v 1.30 1998/06/07 17:11:38 dfr Exp $
  */
 
 /*
  * Error log buffer for kernel printf's.
  */
 
 #include "opt_devfs.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/conf.h>
 #include <sys/proc.h>
 #include <sys/vnode.h>
 #include <sys/filio.h>
 #include <sys/ttycom.h>
 #include <sys/msgbuf.h>
 #include <sys/signalvar.h>
 #include <sys/kernel.h>
 #include <sys/poll.h>
+#include <sys/filedesc.h>
 #ifdef DEVFS
 #include <sys/devfsext.h>
 #endif /*DEVFS*/
 
 #define LOG_RDPRI	(PZERO + 1)
 
 #define LOG_ASYNC	0x04
 #define LOG_RDWAIT	0x08
 
 static	d_open_t	logopen;
 static	d_close_t	logclose;
 static	d_read_t	logread;
 static	d_ioctl_t	logioctl;
 static	d_poll_t	logpoll;
 
 #define CDEV_MAJOR 7
 static struct cdevsw log_cdevsw = 
 	{ logopen,	logclose,	logread,	nowrite,	/*7*/
 	  logioctl,	nostop,		nullreset,	nodevtotty,/* klog */
 	  logpoll,	nommap,		NULL,	"log",	NULL,	-1 };
 
 static struct logsoftc {
 	int	sc_state;		/* see above for possibilities */
 	struct	selinfo sc_selp;	/* process waiting on select call */
-	int	sc_pgid;		/* process/group for async I/O */
+	struct  sigio *sc_sigio;	/* information for SIGIO */
 } logsoftc;
 
 int	log_open;			/* also used in log() */
 
 /*ARGSUSED*/
 static	int
 logopen(dev, flags, mode, p)
 	dev_t dev;
 	int flags, mode;
 	struct proc *p;
 {
 	if (log_open)
 		return (EBUSY);
 	log_open = 1;
-	logsoftc.sc_pgid = p->p_pid;		/* signal process only */
+	fsetown(p->p_pid, &logsoftc.sc_sigio);	/* signal process only */
 	return (0);
 }
 
 /*ARGSUSED*/
 static	int
 logclose(dev, flag, mode, p)
 	dev_t dev;
 	int flag, mode;
 	struct proc *p;
 {
 
 	log_open = 0;
 	logsoftc.sc_state = 0;
+	funsetown(logsoftc.sc_sigio);
 	return (0);
 }
 
 /*ARGSUSED*/
 static	int
 logread(dev, uio, flag)
 	dev_t dev;
 	struct uio *uio;
 	int flag;
 {
 	register struct msgbuf *mbp = msgbufp;
 	register long l;
 	register int s;
 	int error = 0;
 
 	s = splhigh();
 	while (mbp->msg_bufr == mbp->msg_bufx) {
 		if (flag & IO_NDELAY) {
 			splx(s);
 			return (EWOULDBLOCK);
 		}
 		logsoftc.sc_state |= LOG_RDWAIT;
 		if ((error = tsleep((caddr_t)mbp, LOG_RDPRI | PCATCH,
 		    "klog", 0))) {
 			splx(s);
 			return (error);
 		}
 	}
 	splx(s);
 	logsoftc.sc_state &= ~LOG_RDWAIT;
 
 	while (uio->uio_resid > 0) {
 		l = mbp->msg_bufx - mbp->msg_bufr;
 		if (l < 0)
 			l = mbp->msg_size - mbp->msg_bufr;
 		l = min(l, uio->uio_resid);
 		if (l == 0)
 			break;
 		error = uiomove((caddr_t)msgbufp->msg_ptr + mbp->msg_bufr,
 		    (int)l, uio);
 		if (error)
 			break;
 		mbp->msg_bufr += l;
 		if (mbp->msg_bufr >= mbp->msg_size)
 			mbp->msg_bufr = 0;
 	}
 	return (error);
 }
 
 /*ARGSUSED*/
 static	int
 logpoll(dev, events, p)
 	dev_t dev;
 	int events;
 	struct proc *p;
 {
 	int s;
 	int revents = 0;
 
 	s = splhigh();
 
 	if (events & (POLLIN | POLLRDNORM))
 		if (msgbufp->msg_bufr != msgbufp->msg_bufx)
 			revents |= events & (POLLIN | POLLRDNORM);
 		else
 			selrecord(p, &logsoftc.sc_selp);
 
 	splx(s);
 	return (revents);
 }
 
 void
 logwakeup()
 {
 	struct proc *p;
 
 	if (!log_open)
 		return;
 	selwakeup(&logsoftc.sc_selp);
-	if (logsoftc.sc_state & LOG_ASYNC) {
-		if (logsoftc.sc_pgid < 0)
-			gsignal(-logsoftc.sc_pgid, SIGIO);
-		else if ((p = pfind(logsoftc.sc_pgid)))
-			psignal(p, SIGIO);
-	}
+	if ((logsoftc.sc_state & LOG_ASYNC) && logsoftc.sc_sigio != NULL)
+		pgsigio(logsoftc.sc_sigio, SIGIO, 0);
 	if (logsoftc.sc_state & LOG_RDWAIT) {
 		wakeup((caddr_t)msgbufp);
 		logsoftc.sc_state &= ~LOG_RDWAIT;
 	}
 }
 
 /*ARGSUSED*/
 static	int
 logioctl(dev, com, data, flag, p)
 	dev_t dev;
 	u_long com;
 	caddr_t data;
 	int flag;
 	struct proc *p;
 {
 	long l;
 	int s;
 
 	switch (com) {
 
 	/* return number of characters immediately available */
 	case FIONREAD:
 		s = splhigh();
 		l = msgbufp->msg_bufx - msgbufp->msg_bufr;
 		splx(s);
 		if (l < 0)
 			l += msgbufp->msg_size;
 		*(int *)data = l;
 		break;
 
 	case FIONBIO:
 		break;
 
 	case FIOASYNC:
 		if (*(int *)data)
 			logsoftc.sc_state |= LOG_ASYNC;
 		else
 			logsoftc.sc_state &= ~LOG_ASYNC;
 		break;
 
-	case TIOCSPGRP:
-		logsoftc.sc_pgid = *(int *)data;
+	case FIOSETOWN:
+		return (fsetown(*(int *)data, &logsoftc.sc_sigio));
+
+	case FIOGETOWN:
+		*(int *)data = fgetown(logsoftc.sc_sigio);
 		break;
 
+	/* This is deprecated, FIOSETOWN should be used instead. */
+	case TIOCSPGRP:
+		return (fsetown(-(*(int *)data), &logsoftc.sc_sigio));
+
+	/* This is deprecated, FIOGETOWN should be used instead */
 	case TIOCGPGRP:
-		*(int *)data = logsoftc.sc_pgid;
+		*(int *)data = -fgetown(logsoftc.sc_sigio);
 		break;
 
 	default:
 		return (ENOTTY);
 	}
 	return (0);
 }
 
 static	int	log_devsw_installed;
 #ifdef DEVFS
 static	void	*log_devfs_token;
 #endif
 
 static void log_drvinit __P((void *unused));
 static void
 log_drvinit(unused)
 	void *unused;
 {
 	dev_t dev;
 
 	if( ! log_devsw_installed ) {
 		dev = makedev(CDEV_MAJOR,0);
 		cdevsw_add(&dev,&log_cdevsw,NULL);
 		log_devsw_installed = 1;
 #ifdef DEVFS
 		log_devfs_token = devfs_add_devswf(&log_cdevsw, 0, DV_CHR,
 						   UID_ROOT, GID_WHEEL, 0600,
 						   "klog");
 #endif
     	}
 }
 
 SYSINIT(logdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR,log_drvinit,NULL)
Index: head/sys/kern/sys_generic.c
===================================================================
--- head/sys/kern/sys_generic.c	(revision 41085)
+++ head/sys/kern/sys_generic.c	(revision 41086)
@@ -1,901 +1,870 @@
 /*
  * Copyright (c) 1982, 1986, 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)sys_generic.c	8.5 (Berkeley) 1/21/94
- * $Id: sys_generic.c,v 1.40 1998/08/24 08:39:38 dfr Exp $
+ * $Id: sys_generic.c,v 1.41 1998/09/05 14:30:11 bde Exp $
  */
 
 #include "opt_ktrace.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/sysproto.h>
 #include <sys/filedesc.h>
 #include <sys/filio.h>
 #include <sys/ttycom.h>
 #include <sys/fcntl.h>
 #include <sys/file.h>
 #include <sys/proc.h>
 #include <sys/signalvar.h>
 #include <sys/socketvar.h>
 #include <sys/uio.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/poll.h>
 #include <sys/sysent.h>
 #ifdef KTRACE
 #include <sys/ktrace.h>
 #endif
 
 #include <machine/limits.h>
 
 static MALLOC_DEFINE(M_IOCTLOPS, "ioctlops", "ioctl data buffer");
 static MALLOC_DEFINE(M_SELECT, "select", "select() buffer");
 MALLOC_DEFINE(M_IOV, "iov", "large iov's");
 
 static int	pollscan __P((struct proc *, struct pollfd *, int));
 static int	selscan __P((struct proc *, fd_mask **, fd_mask **, int));
 
 /*
  * Read system call.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct read_args {
 	int	fd;
 	void	*buf;
 	size_t	nbyte;
 };
 #endif
 /* ARGSUSED */
 int
 read(p, uap)
 	struct proc *p;
 	register struct read_args *uap;
 {
 	register struct file *fp;
 	register struct filedesc *fdp = p->p_fd;
 	struct uio auio;
 	struct iovec aiov;
 	long cnt, error = 0;
 #ifdef KTRACE
 	struct iovec ktriov;
 #endif
 
 	if (((u_int)uap->fd) >= fdp->fd_nfiles ||
 	    (fp = fdp->fd_ofiles[uap->fd]) == NULL ||
 	    (fp->f_flag & FREAD) == 0)
 		return (EBADF);
 	aiov.iov_base = (caddr_t)uap->buf;
 	aiov.iov_len = uap->nbyte;
 	auio.uio_iov = &aiov;
 	auio.uio_iovcnt = 1;
 	auio.uio_offset = -1;
 	if (uap->nbyte > INT_MAX)
 		return (EINVAL);
 	auio.uio_resid = uap->nbyte;
 	auio.uio_rw = UIO_READ;
 	auio.uio_segflg = UIO_USERSPACE;
 	auio.uio_procp = p;
 #ifdef KTRACE
 	/*
 	 * if tracing, save a copy of iovec
 	 */
 	if (KTRPOINT(p, KTR_GENIO))
 		ktriov = aiov;
 #endif
 	cnt = uap->nbyte;
 	if ((error = (*fp->f_ops->fo_read)(fp, &auio, fp->f_cred)))
 		if (auio.uio_resid != cnt && (error == ERESTART ||
 		    error == EINTR || error == EWOULDBLOCK))
 			error = 0;
 	cnt -= auio.uio_resid;
 #ifdef KTRACE
 	if (KTRPOINT(p, KTR_GENIO) && error == 0)
 		ktrgenio(p->p_tracep, uap->fd, UIO_READ, &ktriov, cnt, error);
 #endif
 	p->p_retval[0] = cnt;
 	return (error);
 }
 
 /*
  * Scatter read system call.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct readv_args {
 	int	fd;
 	struct	iovec *iovp;
 	u_int	iovcnt;
 };
 #endif
 int
 readv(p, uap)
 	struct proc *p;
 	register struct readv_args *uap;
 {
 	register struct file *fp;
 	register struct filedesc *fdp = p->p_fd;
 	struct uio auio;
 	register struct iovec *iov;
 	struct iovec *needfree;
 	struct iovec aiov[UIO_SMALLIOV];
 	long i, cnt, error = 0;
 	u_int iovlen;
 #ifdef KTRACE
 	struct iovec *ktriov = NULL;
 #endif
 
 	if (((u_int)uap->fd) >= fdp->fd_nfiles ||
 	    (fp = fdp->fd_ofiles[uap->fd]) == NULL ||
 	    (fp->f_flag & FREAD) == 0)
 		return (EBADF);
 	/* note: can't use iovlen until iovcnt is validated */
 	iovlen = uap->iovcnt * sizeof (struct iovec);
 	if (uap->iovcnt > UIO_SMALLIOV) {
 		if (uap->iovcnt > UIO_MAXIOV)
 			return (EINVAL);
 		MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
 		needfree = iov;
 	} else {
 		iov = aiov;
 		needfree = NULL;
 	}
 	auio.uio_iov = iov;
 	auio.uio_iovcnt = uap->iovcnt;
 	auio.uio_rw = UIO_READ;
 	auio.uio_segflg = UIO_USERSPACE;
 	auio.uio_procp = p;
 	auio.uio_offset = -1;
 	if ((error = copyin((caddr_t)uap->iovp, (caddr_t)iov, iovlen)))
 		goto done;
 	auio.uio_resid = 0;
 	for (i = 0; i < uap->iovcnt; i++) {
 		if (iov->iov_len > INT_MAX - auio.uio_resid) {
 			error = EINVAL;
 			goto done;
 		}
 		auio.uio_resid += iov->iov_len;
 		iov++;
 	}
 #ifdef KTRACE
 	/*
 	 * if tracing, save a copy of iovec
 	 */
 	if (KTRPOINT(p, KTR_GENIO))  {
 		MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
 		bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen);
 	}
 #endif
 	cnt = auio.uio_resid;
 	if ((error = (*fp->f_ops->fo_read)(fp, &auio, fp->f_cred)))
 		if (auio.uio_resid != cnt && (error == ERESTART ||
 		    error == EINTR || error == EWOULDBLOCK))
 			error = 0;
 	cnt -= auio.uio_resid;
 #ifdef KTRACE
 	if (ktriov != NULL) {
 		if (error == 0)
 			ktrgenio(p->p_tracep, uap->fd, UIO_READ, ktriov,
 			    cnt, error);
 		FREE(ktriov, M_TEMP);
 	}
 #endif
 	p->p_retval[0] = cnt;
 done:
 	if (needfree)
 		FREE(needfree, M_IOV);
 	return (error);
 }
 
 /*
  * Write system call
  */
 #ifndef _SYS_SYSPROTO_H_
 struct write_args {
 	int	fd;
 	const void *buf;
 	size_t	nbyte;
 };
 #endif
 int
 write(p, uap)
 	struct proc *p;
 	register struct write_args *uap;
 {
 	register struct file *fp;
 	register struct filedesc *fdp = p->p_fd;
 	struct uio auio;
 	struct iovec aiov;
 	long cnt, error = 0;
 #ifdef KTRACE
 	struct iovec ktriov;
 #endif
 
 	if (((u_int)uap->fd) >= fdp->fd_nfiles ||
 	    (fp = fdp->fd_ofiles[uap->fd]) == NULL ||
 	    (fp->f_flag & FWRITE) == 0)
 		return (EBADF);
 	aiov.iov_base = (caddr_t)uap->buf;
 	aiov.iov_len = uap->nbyte;
 	auio.uio_iov = &aiov;
 	auio.uio_iovcnt = 1;
 	auio.uio_offset = -1;
 	if (uap->nbyte > INT_MAX)
 		return (EINVAL);
 	auio.uio_resid = uap->nbyte;
 	auio.uio_rw = UIO_WRITE;
 	auio.uio_segflg = UIO_USERSPACE;
 	auio.uio_procp = p;
 #ifdef KTRACE
 	/*
 	 * if tracing, save a copy of iovec
 	 */
 	if (KTRPOINT(p, KTR_GENIO))
 		ktriov = aiov;
 #endif
 	cnt = uap->nbyte;
 	if ((error = (*fp->f_ops->fo_write)(fp, &auio, fp->f_cred))) {
 		if (auio.uio_resid != cnt && (error == ERESTART ||
 		    error == EINTR || error == EWOULDBLOCK))
 			error = 0;
 		if (error == EPIPE)
 			psignal(p, SIGPIPE);
 	}
 	cnt -= auio.uio_resid;
 #ifdef KTRACE
 	if (KTRPOINT(p, KTR_GENIO) && error == 0)
 		ktrgenio(p->p_tracep, uap->fd, UIO_WRITE,
 		    &ktriov, cnt, error);
 #endif
 	p->p_retval[0] = cnt;
 	return (error);
 }
 
 /*
  * Gather write system call
  */
 #ifndef _SYS_SYSPROTO_H_
 struct writev_args {
 	int	fd;
 	struct	iovec *iovp;
 	u_int	iovcnt;
 };
 #endif
 int
 writev(p, uap)
 	struct proc *p;
 	register struct writev_args *uap;
 {
 	register struct file *fp;
 	register struct filedesc *fdp = p->p_fd;
 	struct uio auio;
 	register struct iovec *iov;
 	struct iovec *needfree;
 	struct iovec aiov[UIO_SMALLIOV];
 	long i, cnt, error = 0;
 	u_int iovlen;
 #ifdef KTRACE
 	struct iovec *ktriov = NULL;
 #endif
 
 	if (((u_int)uap->fd) >= fdp->fd_nfiles ||
 	    (fp = fdp->fd_ofiles[uap->fd]) == NULL ||
 	    (fp->f_flag & FWRITE) == 0)
 		return (EBADF);
 	/* note: can't use iovlen until iovcnt is validated */
 	iovlen = uap->iovcnt * sizeof (struct iovec);
 	if (uap->iovcnt > UIO_SMALLIOV) {
 		if (uap->iovcnt > UIO_MAXIOV)
 			return (EINVAL);
 		MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
 		needfree = iov;
 	} else {
 		iov = aiov;
 		needfree = NULL;
 	}
 	auio.uio_iov = iov;
 	auio.uio_iovcnt = uap->iovcnt;
 	auio.uio_rw = UIO_WRITE;
 	auio.uio_segflg = UIO_USERSPACE;
 	auio.uio_procp = p;
 	auio.uio_offset = -1;
 	if ((error = copyin((caddr_t)uap->iovp, (caddr_t)iov, iovlen)))
 		goto done;
 	auio.uio_resid = 0;
 	for (i = 0; i < uap->iovcnt; i++) {
 		if (iov->iov_len > INT_MAX - auio.uio_resid) {
 			error = EINVAL;
 			goto done;
 		}
 		auio.uio_resid += iov->iov_len;
 		iov++;
 	}
 #ifdef KTRACE
 	/*
 	 * if tracing, save a copy of iovec
 	 */
 	if (KTRPOINT(p, KTR_GENIO))  {
 		MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
 		bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen);
 	}
 #endif
 	cnt = auio.uio_resid;
 	if ((error = (*fp->f_ops->fo_write)(fp, &auio, fp->f_cred))) {
 		if (auio.uio_resid != cnt && (error == ERESTART ||
 		    error == EINTR || error == EWOULDBLOCK))
 			error = 0;
 		if (error == EPIPE)
 			psignal(p, SIGPIPE);
 	}
 	cnt -= auio.uio_resid;
 #ifdef KTRACE
 	if (ktriov != NULL) {
 		if (error == 0)
 			ktrgenio(p->p_tracep, uap->fd, UIO_WRITE,
 				ktriov, cnt, error);
 		FREE(ktriov, M_TEMP);
 	}
 #endif
 	p->p_retval[0] = cnt;
 done:
 	if (needfree)
 		FREE(needfree, M_IOV);
 	return (error);
 }
 
 /*
  * Ioctl system call
  */
 #ifndef _SYS_SYSPROTO_H_
 struct ioctl_args {
 	int	fd;
 	u_long	com;
 	caddr_t	data;
 };
 #endif
 /* ARGSUSED */
 int
 ioctl(p, uap)
 	struct proc *p;
 	register struct ioctl_args *uap;
 {
 	register struct file *fp;
 	register struct filedesc *fdp;
 	register u_long com;
 	int error;
 	register u_int size;
 	caddr_t data, memp;
 	int tmp;
 #define STK_PARAMS	128
 	char stkbuf[STK_PARAMS];
 
 	fdp = p->p_fd;
 	if ((u_int)uap->fd >= fdp->fd_nfiles ||
 	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
 		return (EBADF);
 
 	if ((fp->f_flag & (FREAD | FWRITE)) == 0)
 		return (EBADF);
 
 	switch (com = uap->com) {
 	case FIONCLEX:
 		fdp->fd_ofileflags[uap->fd] &= ~UF_EXCLOSE;
 		return (0);
 	case FIOCLEX:
 		fdp->fd_ofileflags[uap->fd] |= UF_EXCLOSE;
 		return (0);
 	}
 
 	/*
 	 * Interpret high order word to find amount of data to be
 	 * copied to/from the user's address space.
 	 */
 	size = IOCPARM_LEN(com);
 	if (size > IOCPARM_MAX)
 		return (ENOTTY);
 	memp = NULL;
 	if (size > sizeof (stkbuf)) {
 		memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK);
 		data = memp;
 	} else
 		data = stkbuf;
 	if (com&IOC_IN) {
 		if (size) {
 			error = copyin(uap->data, data, (u_int)size);
 			if (error) {
 				if (memp)
 					free(memp, M_IOCTLOPS);
 				return (error);
 			}
 		} else
 			*(caddr_t *)data = uap->data;
 	} else if ((com&IOC_OUT) && size)
 		/*
 		 * Zero the buffer so the user always
 		 * gets back something deterministic.
 		 */
 		bzero(data, size);
 	else if (com&IOC_VOID)
 		*(caddr_t *)data = uap->data;
 
 	switch (com) {
 
 	case FIONBIO:
 		if ((tmp = *(int *)data))
 			fp->f_flag |= FNONBLOCK;
 		else
 			fp->f_flag &= ~FNONBLOCK;
 		error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p);
 		break;
 
 	case FIOASYNC:
 		if ((tmp = *(int *)data))
 			fp->f_flag |= FASYNC;
 		else
 			fp->f_flag &= ~FASYNC;
 		error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p);
-		break;
-
-	case FIOSETOWN:
-		tmp = *(int *)data;
-		if (fp->f_type == DTYPE_SOCKET) {
-			((struct socket *)fp->f_data)->so_pgid = tmp;
-			error = 0;
-			break;
-		}
-		if (tmp <= 0) {
-			tmp = -tmp;
-		} else {
-			struct proc *p1 = pfind(tmp);
-			if (p1 == 0) {
-				error = ESRCH;
-				break;
-			}
-			tmp = p1->p_pgrp->pg_id;
-		}
-		error = (*fp->f_ops->fo_ioctl)
-			(fp, (int)TIOCSPGRP, (caddr_t)&tmp, p);
-		break;
-
-	case FIOGETOWN:
-		if (fp->f_type == DTYPE_SOCKET) {
-			error = 0;
-			*(int *)data = ((struct socket *)fp->f_data)->so_pgid;
-			break;
-		}
-		error = (*fp->f_ops->fo_ioctl)(fp, (int)TIOCGPGRP, data, p);
-		*(int *)data = -*(int *)data;
 		break;
 
 	default:
 		error = (*fp->f_ops->fo_ioctl)(fp, com, data, p);
 		/*
 		 * Copy any data to user, size was
 		 * already set and checked above.
 		 */
 		if (error == 0 && (com&IOC_OUT) && size)
 			error = copyout(data, uap->data, (u_int)size);
 		break;
 	}
 	if (memp)
 		free(memp, M_IOCTLOPS);
 	return (error);
 }
 
 static int	nselcoll;
 int	selwait;
 
 /*
  * Select system call.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct select_args {
 	int	nd;
 	fd_set	*in, *ou, *ex;
 	struct	timeval *tv;
 };
 #endif
 int
 select(p, uap)
 	register struct proc *p;
 	register struct select_args *uap;
 {
 	/*
 	 * The magic 2048 here is chosen to be just enough for FD_SETSIZE
 	 * infds with the new FD_SETSIZE of 1024, and more than enough for
 	 * FD_SETSIZE infds, outfds and exceptfds with the old FD_SETSIZE
 	 * of 256.
 	 */
 	fd_mask s_selbits[howmany(2048, NFDBITS)];
 	fd_mask *ibits[3], *obits[3], *selbits, *sbp;
 	struct timeval atv, rtv, ttv;
 	int s, ncoll, error, timo;
 	u_int nbufbytes, ncpbytes, nfdbits;
 
 	if (uap->nd < 0)
 		return (EINVAL);
 	if (uap->nd > p->p_fd->fd_nfiles)
 		uap->nd = p->p_fd->fd_nfiles;   /* forgiving; slightly wrong */
 
 	/*
 	 * Allocate just enough bits for the non-null fd_sets.  Use the
 	 * preallocated auto buffer if possible.
 	 */
 	nfdbits = roundup(uap->nd, NFDBITS);
 	ncpbytes = nfdbits / NBBY;
 	nbufbytes = 0;
 	if (uap->in != NULL)
 		nbufbytes += 2 * ncpbytes;
 	if (uap->ou != NULL)
 		nbufbytes += 2 * ncpbytes;
 	if (uap->ex != NULL)
 		nbufbytes += 2 * ncpbytes;
 	if (nbufbytes <= sizeof s_selbits)
 		selbits = &s_selbits[0];
 	else
 		selbits = malloc(nbufbytes, M_SELECT, M_WAITOK);
 
 	/*
 	 * Assign pointers into the bit buffers and fetch the input bits.
 	 * Put the output buffers together so that they can be bzeroed
 	 * together.
 	 */
 	sbp = selbits;
 #define	getbits(name, x) \
 	do {								\
 		if (uap->name == NULL)					\
 			ibits[x] = NULL;				\
 		else {							\
 			ibits[x] = sbp + nbufbytes / 2 / sizeof *sbp;	\
 			obits[x] = sbp;					\
 			sbp += ncpbytes / sizeof *sbp;			\
 			error = copyin(uap->name, ibits[x], ncpbytes);	\
 			if (error != 0)					\
 				goto done;				\
 		}							\
 	} while (0)
 	getbits(in, 0);
 	getbits(ou, 1);
 	getbits(ex, 2);
 #undef	getbits
 	if (nbufbytes != 0)
 		bzero(selbits, nbufbytes / 2);
 
 	if (uap->tv) {
 		error = copyin((caddr_t)uap->tv, (caddr_t)&atv,
 			sizeof (atv));
 		if (error)
 			goto done;
 		if (itimerfix(&atv)) {
 			error = EINVAL;
 			goto done;
 		}
 		getmicrouptime(&rtv);
 		timevaladd(&atv, &rtv);
 	} else
 		atv.tv_sec = 0;
 	timo = 0;
 retry:
 	ncoll = nselcoll;
 	p->p_flag |= P_SELECT;
 	error = selscan(p, ibits, obits, uap->nd);
 	if (error || p->p_retval[0])
 		goto done;
 	if (atv.tv_sec) {
 		getmicrouptime(&rtv);
 		if (timevalcmp(&rtv, &atv, >=)) 
 			goto done;
 		ttv = atv;
 		timevalsub(&ttv, &rtv);
 		timo = ttv.tv_sec > 24 * 60 * 60 ?
 		    24 * 60 * 60 * hz : tvtohz(&ttv);
 	}
 	s = splhigh();
 	if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) {
 		splx(s);
 		goto retry;
 	}
 	p->p_flag &= ~P_SELECT;
 	error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "select", timo);
 	splx(s);
 	if (error == 0)
 		goto retry;
 done:
 	p->p_flag &= ~P_SELECT;
 	/* select is not restarted after signals... */
 	if (error == ERESTART)
 		error = EINTR;
 	if (error == EWOULDBLOCK)
 		error = 0;
 #define	putbits(name, x) \
 	if (uap->name && (error2 = copyout(obits[x], uap->name, ncpbytes))) \
 		error = error2;
 	if (error == 0) {
 		int error2;
 
 		putbits(in, 0);
 		putbits(ou, 1);
 		putbits(ex, 2);
 #undef putbits
 	}
 	if (selbits != &s_selbits[0])
 		free(selbits, M_SELECT);
 	return (error);
 }
 
 static int
 selscan(p, ibits, obits, nfd)
 	struct proc *p;
 	fd_mask **ibits, **obits;
 	int nfd;
 {
 	register struct filedesc *fdp = p->p_fd;
 	register int msk, i, j, fd;
 	register fd_mask bits;
 	struct file *fp;
 	int n = 0;
 	/* Note: backend also returns POLLHUP/POLLERR if appropriate. */
 	static int flag[3] = { POLLRDNORM, POLLWRNORM, POLLRDBAND };
 
 	for (msk = 0; msk < 3; msk++) {
 		if (ibits[msk] == NULL)
 			continue;
 		for (i = 0; i < nfd; i += NFDBITS) {
 			bits = ibits[msk][i/NFDBITS];
 			while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
 				bits &= ~(1 << j);
 				fp = fdp->fd_ofiles[fd];
 				if (fp == NULL)
 					return (EBADF);
 				if ((*fp->f_ops->fo_poll)(fp, flag[msk],
 				    fp->f_cred, p)) {
 					obits[msk][(fd)/NFDBITS] |=
 						(1 << ((fd) % NFDBITS));
 					n++;
 				}
 			}
 		}
 	}
 	p->p_retval[0] = n;
 	return (0);
 }
 
 /*
  * Poll system call.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct poll_args {
 	struct pollfd *fds;
 	u_int	nfds;
 	int	timeout;
 };
 #endif
 int
 poll(p, uap)
 	register struct proc *p;
 	register struct poll_args *uap;
 {
 	caddr_t bits;
 	char smallbits[32 * sizeof(struct pollfd)];
 	struct timeval atv, rtv, ttv;
 	int s, ncoll, error = 0, timo;
 	size_t ni;
 
 	if (SCARG(uap, nfds) > p->p_fd->fd_nfiles) {
 		/* forgiving; slightly wrong */
 		SCARG(uap, nfds) = p->p_fd->fd_nfiles;
 	}
 	ni = SCARG(uap, nfds) * sizeof(struct pollfd);
 	if (ni > sizeof(smallbits))
 		bits = malloc(ni, M_TEMP, M_WAITOK);
 	else
 		bits = smallbits;
 	error = copyin(SCARG(uap, fds), bits, ni);
 	if (error)
 		goto done;
 	if (SCARG(uap, timeout) != INFTIM) {
 		atv.tv_sec = SCARG(uap, timeout) / 1000;
 		atv.tv_usec = (SCARG(uap, timeout) % 1000) * 1000;
 		if (itimerfix(&atv)) {
 			error = EINVAL;
 			goto done;
 		}
 		getmicrouptime(&rtv);
 		timevaladd(&atv, &rtv);
 	} else
 		atv.tv_sec = 0;
 	timo = 0;
 retry:
 	ncoll = nselcoll;
 	p->p_flag |= P_SELECT;
 	error = pollscan(p, (struct pollfd *)bits, SCARG(uap, nfds));
 	if (error || p->p_retval[0])
 		goto done;
 	if (atv.tv_sec) {
 		getmicrouptime(&rtv);
 		if (timevalcmp(&rtv, &atv, >=))
 			goto done;
 		ttv = atv;
 		timevalsub(&ttv, &rtv);
 		timo = ttv.tv_sec > 24 * 60 * 60 ?
 		    24 * 60 * 60 * hz : tvtohz(&ttv);
 	} 
 	s = splhigh(); 
 	if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) {
 		splx(s);
 		goto retry;
 	}
 	p->p_flag &= ~P_SELECT;
 	error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "poll", timo);
 	splx(s);
 	if (error == 0)
 		goto retry;
 done:
 	p->p_flag &= ~P_SELECT;
 	/* poll is not restarted after signals... */
 	if (error == ERESTART)
 		error = EINTR;
 	if (error == EWOULDBLOCK)
 		error = 0;
 	if (error == 0) {
 		error = copyout(bits, SCARG(uap, fds), ni);
 		if (error)
 			goto out;
 	}
 out:
 	if (ni > sizeof(smallbits))
 		free(bits, M_TEMP);
 	return (error);
 }
 
 static int
 pollscan(p, fds, nfd)
 	struct proc *p;
 	struct pollfd *fds;
 	int nfd;
 {
 	register struct filedesc *fdp = p->p_fd;
 	int i;
 	struct file *fp;
 	int n = 0;
 
 	for (i = 0; i < nfd; i++, fds++) {
 		if ((u_int)fds->fd >= fdp->fd_nfiles) {
 			fds->revents = POLLNVAL;
 			n++;
 		} else {
 			fp = fdp->fd_ofiles[fds->fd];
 			if (fp == 0) {
 				fds->revents = POLLNVAL;
 				n++;
 			} else {
 				/*
 				 * Note: backend also returns POLLHUP and
 				 * POLLERR if appropriate.
 				 */
 				fds->revents = (*fp->f_ops->fo_poll)(fp,
 				    fds->events, fp->f_cred, p);
 				if (fds->revents != 0)
 					n++;
 			}
 		}
 	}
 	p->p_retval[0] = n;
 	return (0);
 }
 
 /*
  * OpenBSD poll system call.
  * XXX this isn't quite a true representation..  OpenBSD uses select ops.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct openbsd_poll_args {
 	struct pollfd *fds;
 	u_int	nfds;
 	int	timeout;
 };
 #endif
 int
 openbsd_poll(p, uap)
 	register struct proc *p;
 	register struct openbsd_poll_args *uap;
 {
 	return (poll(p, (struct poll_args *)uap));
 }
 
 /*ARGSUSED*/
 int
 seltrue(dev, events, p)
 	dev_t dev;
 	int events;
 	struct proc *p;
 {
 
 	return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
 }
 
 /*
  * Record a select request.
  */
 void
 selrecord(selector, sip)
 	struct proc *selector;
 	struct selinfo *sip;
 {
 	struct proc *p;
 	pid_t mypid;
 
 	mypid = selector->p_pid;
 	if (sip->si_pid == mypid)
 		return;
 	if (sip->si_pid && (p = pfind(sip->si_pid)) &&
 	    p->p_wchan == (caddr_t)&selwait)
 		sip->si_flags |= SI_COLL;
 	else
 		sip->si_pid = mypid;
 }
 
 /*
  * Do a wakeup when a selectable event occurs.
  */
 void
 selwakeup(sip)
 	register struct selinfo *sip;
 {
 	register struct proc *p;
 	int s;
 
 	if (sip->si_pid == 0)
 		return;
 	if (sip->si_flags & SI_COLL) {
 		nselcoll++;
 		sip->si_flags &= ~SI_COLL;
 		wakeup((caddr_t)&selwait);
 	}
 	p = pfind(sip->si_pid);
 	sip->si_pid = 0;
 	if (p != NULL) {
 		s = splhigh();
 		if (p->p_wchan == (caddr_t)&selwait) {
 			if (p->p_stat == SSLEEP)
 				setrunnable(p);
 			else
 				unsleep(p);
 		} else if (p->p_flag & P_SELECT)
 			p->p_flag &= ~P_SELECT;
 		splx(s);
 	}
 }
Index: head/sys/kern/sys_pipe.c
===================================================================
--- head/sys/kern/sys_pipe.c	(revision 41085)
+++ head/sys/kern/sys_pipe.c	(revision 41086)
@@ -1,1100 +1,1104 @@
 /*
  * Copyright (c) 1996 John S. Dyson
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice immediately at the beginning of the file, without modification,
  *    this list of conditions, and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Absolutely no warranty of function or purpose is made by the author
  *    John S. Dyson.
  * 4. Modifications may be freely made to this file if the above conditions
  *    are met.
  *
- * $Id: sys_pipe.c,v 1.43 1998/10/13 08:24:40 dg Exp $
+ * $Id: sys_pipe.c,v 1.44 1998/10/28 13:36:58 dg Exp $
  */
 
 /*
  * This file contains a high-performance replacement for the socket-based
  * pipes scheme originally used in FreeBSD/4.4Lite.  It does not support
  * all features of sockets, but does do everything that pipes normally
  * do.
  */
 
 /*
  * This code has two modes of operation, a small write mode and a large
  * write mode.  The small write mode acts like conventional pipes with
  * a kernel buffer.  If the buffer is less than PIPE_MINDIRECT, then the
  * "normal" pipe buffering is done.  If the buffer is between PIPE_MINDIRECT
  * and PIPE_SIZE in size, it is fully mapped and wired into the kernel, and
  * the receiving process can copy it directly from the pages in the sending
  * process.
  *
  * If the sending process receives a signal, it is possible that it will
  * go away, and certainly its address space can change, because control
  * is returned back to the user-mode side.  In that case, the pipe code
  * arranges to copy the buffer supplied by the user process, to a pageable
  * kernel buffer, and the receiving process will grab the data from the
  * pageable kernel buffer.  Since signals don't happen all that often,
  * the copy operation is normally eliminated.
  *
  * The constant PIPE_MINDIRECT is chosen to make sure that buffering will
  * happen for small transfers so that the system will not spend all of
  * its time context switching.  PIPE_SIZE is constrained by the
  * amount of kernel virtual memory.
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/proc.h>
 #include <sys/fcntl.h>
 #include <sys/file.h>
 #include <sys/filedesc.h>
 #include <sys/filio.h>
 #include <sys/ttycom.h>
 #include <sys/stat.h>
 #include <sys/poll.h>
 #include <sys/signalvar.h>
 #include <sys/sysproto.h>
 #include <sys/pipe.h>
 #include <sys/uio.h>
 
 #include <vm/vm.h>
 #include <vm/vm_prot.h>
 #include <vm/vm_param.h>
 #include <sys/lock.h>
 #include <vm/vm_object.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_extern.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_page.h>
 #include <vm/vm_zone.h>
 
 /*
  * Use this define if you want to disable *fancy* VM things.  Expect an
  * approx 30% decrease in transfer rate.  This could be useful for
  * NetBSD or OpenBSD.
  */
 /* #define PIPE_NODIRECT */
 
 /*
  * interfaces to the outside world
  */
 static int pipe_read __P((struct file *fp, struct uio *uio, 
 		struct ucred *cred));
 static int pipe_write __P((struct file *fp, struct uio *uio, 
 		struct ucred *cred));
 static int pipe_close __P((struct file *fp, struct proc *p));
 static int pipe_poll __P((struct file *fp, int events, struct ucred *cred,
 		struct proc *p));
 static int pipe_ioctl __P((struct file *fp, u_long cmd, caddr_t data, struct proc *p));
 
 static struct fileops pipeops =
     { pipe_read, pipe_write, pipe_ioctl, pipe_poll, pipe_close };
 
 /*
  * Default pipe buffer size(s), this can be kind-of large now because pipe
  * space is pageable.  The pipe code will try to maintain locality of
  * reference for performance reasons, so small amounts of outstanding I/O
  * will not wipe the cache.
  */
 #define MINPIPESIZE (PIPE_SIZE/3)
 #define MAXPIPESIZE (2*PIPE_SIZE/3)
 
 /*
  * Maximum amount of kva for pipes -- this is kind-of a soft limit, but
  * is there so that on large systems, we don't exhaust it.
  */
 #define MAXPIPEKVA (8*1024*1024)
 
 /*
  * Limit for direct transfers, we cannot, of course limit
  * the amount of kva for pipes in general though.
  */
 #define LIMITPIPEKVA (16*1024*1024)
 
 /*
  * Limit the number of "big" pipes
  */
 #define LIMITBIGPIPES	32
 static int nbigpipe;
 
 static int amountpipekva;
 
 static void pipeclose __P((struct pipe *cpipe));
 static void pipeinit __P((struct pipe *cpipe));
 static __inline int pipelock __P((struct pipe *cpipe, int catch));
 static __inline void pipeunlock __P((struct pipe *cpipe));
 static __inline void pipeselwakeup __P((struct pipe *cpipe));
 #ifndef PIPE_NODIRECT
 static int pipe_build_write_buffer __P((struct pipe *wpipe, struct uio *uio));
 static void pipe_destroy_write_buffer __P((struct pipe *wpipe));
 static int pipe_direct_write __P((struct pipe *wpipe, struct uio *uio));
 static void pipe_clone_write_buffer __P((struct pipe *wpipe));
 #endif
 static void pipespace __P((struct pipe *cpipe));
 
 static vm_zone_t pipe_zone;
 
 /*
  * The pipe system call for the DTYPE_PIPE type of pipes
  */
 
 /* ARGSUSED */
 int
 pipe(p, uap)
 	struct proc *p;
 	struct pipe_args /* {
 		int	dummy;
 	} */ *uap;
 {
 	register struct filedesc *fdp = p->p_fd;
 	struct file *rf, *wf;
 	struct pipe *rpipe, *wpipe;
 	int fd, error;
 
 	if (pipe_zone == NULL)
 		pipe_zone = zinit("PIPE", sizeof (struct pipe), 0, 0, 4);
 
 	rpipe = zalloc( pipe_zone);
 	pipeinit(rpipe);
 	rpipe->pipe_state |= PIPE_DIRECTOK;
 	wpipe = zalloc( pipe_zone);
 	pipeinit(wpipe);
 	wpipe->pipe_state |= PIPE_DIRECTOK;
 
 	error = falloc(p, &rf, &fd);
 	if (error)
 		goto free2;
 	p->p_retval[0] = fd;
 	rf->f_flag = FREAD | FWRITE;
 	rf->f_type = DTYPE_PIPE;
 	rf->f_ops = &pipeops;
 	rf->f_data = (caddr_t)rpipe;
 	error = falloc(p, &wf, &fd);
 	if (error)
 		goto free3;
 	wf->f_flag = FREAD | FWRITE;
 	wf->f_type = DTYPE_PIPE;
 	wf->f_ops = &pipeops;
 	wf->f_data = (caddr_t)wpipe;
 	p->p_retval[1] = fd;
 
 	rpipe->pipe_peer = wpipe;
 	wpipe->pipe_peer = rpipe;
 
 	return (0);
 free3:
 	ffree(rf);
 	fdp->fd_ofiles[p->p_retval[0]] = 0;
 free2:
 	(void)pipeclose(wpipe);
 	(void)pipeclose(rpipe);
 	return (error);
 }
 
 /*
  * Allocate kva for pipe circular buffer, the space is pageable
  */
 static void
 pipespace(cpipe)
 	struct pipe *cpipe;
 {
 	int npages, error;
 
 	npages = round_page(cpipe->pipe_buffer.size)/PAGE_SIZE;
 	/*
 	 * Create an object, I don't like the idea of paging to/from
 	 * kernel_object.
 	 * XXX -- minor change needed here for NetBSD/OpenBSD VM systems.
 	 */
 	cpipe->pipe_buffer.object = vm_object_allocate(OBJT_DEFAULT, npages);
 	cpipe->pipe_buffer.buffer = (caddr_t) vm_map_min(kernel_map);
 
 	/*
 	 * Insert the object into the kernel map, and allocate kva for it.
 	 * The map entry is, by default, pageable.
 	 * XXX -- minor change needed here for NetBSD/OpenBSD VM systems.
 	 */
 	error = vm_map_find(kernel_map, cpipe->pipe_buffer.object, 0,
 		(vm_offset_t *) &cpipe->pipe_buffer.buffer, 
 		cpipe->pipe_buffer.size, 1,
 		VM_PROT_ALL, VM_PROT_ALL, 0);
 
 	if (error != KERN_SUCCESS)
 		panic("pipeinit: cannot allocate pipe -- out of kvm -- code = %d", error);
 	amountpipekva += cpipe->pipe_buffer.size;
 }
 
 /*
  * initialize and allocate VM and memory for pipe
  */
 static void
 pipeinit(cpipe)
 	struct pipe *cpipe;
 {
 
 	cpipe->pipe_buffer.in = 0;
 	cpipe->pipe_buffer.out = 0;
 	cpipe->pipe_buffer.cnt = 0;
 	cpipe->pipe_buffer.size = PIPE_SIZE;
 
 	/* Buffer kva gets dynamically allocated */
 	cpipe->pipe_buffer.buffer = NULL;
 	/* cpipe->pipe_buffer.object = invalid */
 
 	cpipe->pipe_state = 0;
 	cpipe->pipe_peer = NULL;
 	cpipe->pipe_busy = 0;
 	getnanotime(&cpipe->pipe_ctime);
 	cpipe->pipe_atime = cpipe->pipe_ctime;
 	cpipe->pipe_mtime = cpipe->pipe_ctime;
 	bzero(&cpipe->pipe_sel, sizeof cpipe->pipe_sel);
-	cpipe->pipe_pgid = NO_PID;
 
 #ifndef PIPE_NODIRECT
 	/*
 	 * pipe data structure initializations to support direct pipe I/O
 	 */
 	cpipe->pipe_map.cnt = 0;
 	cpipe->pipe_map.kva = 0;
 	cpipe->pipe_map.pos = 0;
 	cpipe->pipe_map.npages = 0;
 	/* cpipe->pipe_map.ms[] = invalid */
 #endif
 }
 
 
 /*
  * lock a pipe for I/O, blocking other access
  */
 static __inline int
 pipelock(cpipe, catch)
 	struct pipe *cpipe;
 	int catch;
 {
 	int error;
 	while (cpipe->pipe_state & PIPE_LOCK) {
 		cpipe->pipe_state |= PIPE_LWANT;
 		if (error = tsleep( cpipe,
 			catch?(PRIBIO|PCATCH):PRIBIO, "pipelk", 0)) {
 			return error;
 		}
 	}
 	cpipe->pipe_state |= PIPE_LOCK;
 	return 0;
 }
 
 /*
  * unlock a pipe I/O lock
  */
 static __inline void
 pipeunlock(cpipe)
 	struct pipe *cpipe;
 {
 	cpipe->pipe_state &= ~PIPE_LOCK;
 	if (cpipe->pipe_state & PIPE_LWANT) {
 		cpipe->pipe_state &= ~PIPE_LWANT;
 		wakeup(cpipe);
 	}
 }
 
 static __inline void
 pipeselwakeup(cpipe)
 	struct pipe *cpipe;
 {
 	struct proc *p;
 
 	if (cpipe->pipe_state & PIPE_SEL) {
 		cpipe->pipe_state &= ~PIPE_SEL;
 		selwakeup(&cpipe->pipe_sel);
 	}
-	if (cpipe->pipe_state & PIPE_ASYNC) {
-		if (cpipe->pipe_pgid < 0)
-			gsignal(-cpipe->pipe_pgid, SIGIO);
-		else if ((p = pfind(cpipe->pipe_pgid)) != NULL)
-			psignal(p, SIGIO);
-	}
+	if ((cpipe->pipe_state & PIPE_ASYNC) && cpipe->pipe_sigio)
+		pgsigio(cpipe->pipe_sigio, SIGIO, 0);
 }
 
 /* ARGSUSED */
 static int
 pipe_read(fp, uio, cred)
 	struct file *fp;
 	struct uio *uio;
 	struct ucred *cred;
 {
 
 	struct pipe *rpipe = (struct pipe *) fp->f_data;
 	int error = 0;
 	int nread = 0;
 	u_int size;
 
 	++rpipe->pipe_busy;
 	while (uio->uio_resid) {
 		/*
 		 * normal pipe buffer receive
 		 */
 		if (rpipe->pipe_buffer.cnt > 0) {
 			size = rpipe->pipe_buffer.size - rpipe->pipe_buffer.out;
 			if (size > rpipe->pipe_buffer.cnt)
 				size = rpipe->pipe_buffer.cnt;
 			if (size > (u_int) uio->uio_resid)
 				size = (u_int) uio->uio_resid;
 			if ((error = pipelock(rpipe,1)) == 0) {
 				error = uiomove( &rpipe->pipe_buffer.buffer[rpipe->pipe_buffer.out], 
 					size, uio);
 				pipeunlock(rpipe);
 			}
 			if (error) {
 				break;
 			}
 			rpipe->pipe_buffer.out += size;
 			if (rpipe->pipe_buffer.out >= rpipe->pipe_buffer.size)
 				rpipe->pipe_buffer.out = 0;
 
 			rpipe->pipe_buffer.cnt -= size;
 			nread += size;
 #ifndef PIPE_NODIRECT
 		/*
 		 * Direct copy, bypassing a kernel buffer.
 		 */
 		} else if ((size = rpipe->pipe_map.cnt) &&
 			(rpipe->pipe_state & PIPE_DIRECTW)) {
 			caddr_t va;
 			if (size > (u_int) uio->uio_resid)
 				size = (u_int) uio->uio_resid;
 			if ((error = pipelock(rpipe,1)) == 0) {
 				va = (caddr_t) rpipe->pipe_map.kva + rpipe->pipe_map.pos;
 				error = uiomove(va, size, uio);
 				pipeunlock(rpipe);
 			}
 			if (error)
 				break;
 			nread += size;
 			rpipe->pipe_map.pos += size;
 			rpipe->pipe_map.cnt -= size;
 			if (rpipe->pipe_map.cnt == 0) {
 				rpipe->pipe_state &= ~PIPE_DIRECTW;
 				wakeup(rpipe);
 			}
 #endif
 		} else {
 			/*
 			 * detect EOF condition
 			 */
 			if (rpipe->pipe_state & PIPE_EOF) {
 				/* XXX error = ? */
 				break;
 			}
 			/*
 			 * If the "write-side" has been blocked, wake it up now.
 			 */
 			if (rpipe->pipe_state & PIPE_WANTW) {
 				rpipe->pipe_state &= ~PIPE_WANTW;
 				wakeup(rpipe);
 			}
 			if (nread > 0)
 				break;
 
 			if (fp->f_flag & FNONBLOCK) {
 				error = EAGAIN;
 				break;
 			}
 
 			/*
 			 * If there is no more to read in the pipe, reset
 			 * its pointers to the beginning.  This improves
 			 * cache hit stats.
 			 */
 		
 			if ((error = pipelock(rpipe,1)) == 0) {
 				if (rpipe->pipe_buffer.cnt == 0) {
 					rpipe->pipe_buffer.in = 0;
 					rpipe->pipe_buffer.out = 0;
 				}
 				pipeunlock(rpipe);
 			} else {
 				break;
 			}
 
 			if (rpipe->pipe_state & PIPE_WANTW) {
 				rpipe->pipe_state &= ~PIPE_WANTW;
 				wakeup(rpipe);
 			}
 
 			rpipe->pipe_state |= PIPE_WANTR;
 			if (error = tsleep(rpipe, PRIBIO|PCATCH, "piperd", 0)) {
 				break;
 			}
 		}
 	}
 
 	if (error == 0)
 		getnanotime(&rpipe->pipe_atime);
 
 	--rpipe->pipe_busy;
 	if ((rpipe->pipe_busy == 0) && (rpipe->pipe_state & PIPE_WANT)) {
 		rpipe->pipe_state &= ~(PIPE_WANT|PIPE_WANTW);
 		wakeup(rpipe);
 	} else if (rpipe->pipe_buffer.cnt < MINPIPESIZE) {
 		/*
 		 * If there is no more to read in the pipe, reset
 		 * its pointers to the beginning.  This improves
 		 * cache hit stats.
 		 */
 		if (rpipe->pipe_buffer.cnt == 0) {
 			if ((error == 0) && (error = pipelock(rpipe,1)) == 0) {
 				rpipe->pipe_buffer.in = 0;
 				rpipe->pipe_buffer.out = 0;
 				pipeunlock(rpipe);
 			}
 		}
 
 		/*
 		 * If the "write-side" has been blocked, wake it up now.
 		 */
 		if (rpipe->pipe_state & PIPE_WANTW) {
 			rpipe->pipe_state &= ~PIPE_WANTW;
 			wakeup(rpipe);
 		}
 	}
 
 	if ((rpipe->pipe_buffer.size - rpipe->pipe_buffer.cnt) >= PIPE_BUF)
 		pipeselwakeup(rpipe);
 
 	return error;
 }
 
 #ifndef PIPE_NODIRECT
 /*
  * Map the sending processes' buffer into kernel space and wire it.
  * This is similar to a physical write operation.
  */
 static int
 pipe_build_write_buffer(wpipe, uio)
 	struct pipe *wpipe;
 	struct uio *uio;
 {
 	u_int size;
 	int i;
 	vm_offset_t addr, endaddr, paddr;
 
 	size = (u_int) uio->uio_iov->iov_len;
 	if (size > wpipe->pipe_buffer.size)
 		size = wpipe->pipe_buffer.size;
 
 	endaddr = round_page((vm_offset_t)uio->uio_iov->iov_base + size);
 	for(i = 0, addr = trunc_page((vm_offset_t)uio->uio_iov->iov_base);
 		addr < endaddr;
 		addr += PAGE_SIZE, i+=1) {
 
 		vm_page_t m;
 
 		vm_fault_quick( (caddr_t) addr, VM_PROT_READ);
 		paddr = pmap_kextract(addr);
 		if (!paddr) {
 			int j;
 			for(j=0;j<i;j++)
 				vm_page_unwire(wpipe->pipe_map.ms[j], 1);
 			return EFAULT;
 		}
 
 		m = PHYS_TO_VM_PAGE(paddr);
 		vm_page_wire(m);
 		wpipe->pipe_map.ms[i] = m;
 	}
 
 /*
  * set up the control block
  */
 	wpipe->pipe_map.npages = i;
 	wpipe->pipe_map.pos = ((vm_offset_t) uio->uio_iov->iov_base) & PAGE_MASK;
 	wpipe->pipe_map.cnt = size;
 
 /*
  * and map the buffer
  */
 	if (wpipe->pipe_map.kva == 0) {
 		/*
 		 * We need to allocate space for an extra page because the
 		 * address range might (will) span pages at times.
 		 */
 		wpipe->pipe_map.kva = kmem_alloc_pageable(kernel_map,
 			wpipe->pipe_buffer.size + PAGE_SIZE);
 		amountpipekva += wpipe->pipe_buffer.size + PAGE_SIZE;
 	}
 	pmap_qenter(wpipe->pipe_map.kva, wpipe->pipe_map.ms,
 		wpipe->pipe_map.npages);
 
 /*
  * and update the uio data
  */
 
 	uio->uio_iov->iov_len -= size;
 	uio->uio_iov->iov_base += size;
 	if (uio->uio_iov->iov_len == 0)
 		uio->uio_iov++;
 	uio->uio_resid -= size;
 	uio->uio_offset += size;
 	return 0;
 }
 
 /*
  * unmap and unwire the process buffer
  */
 static void
 pipe_destroy_write_buffer(wpipe)
 struct pipe *wpipe;
 {
 	int i;
 	if (wpipe->pipe_map.kva) {
 		pmap_qremove(wpipe->pipe_map.kva, wpipe->pipe_map.npages);
 
 		if (amountpipekva > MAXPIPEKVA) {
 			vm_offset_t kva = wpipe->pipe_map.kva;
 			wpipe->pipe_map.kva = 0;
 			kmem_free(kernel_map, kva,
 				wpipe->pipe_buffer.size + PAGE_SIZE);
 			amountpipekva -= wpipe->pipe_buffer.size + PAGE_SIZE;
 		}
 	}
 	for (i=0;i<wpipe->pipe_map.npages;i++)
 		vm_page_unwire(wpipe->pipe_map.ms[i], 1);
 }
 
 /*
  * In the case of a signal, the writing process might go away.  This
  * code copies the data into the circular buffer so that the source
  * pages can be freed without loss of data.
  */
 static void
 pipe_clone_write_buffer(wpipe)
 struct pipe *wpipe;
 {
 	int size;
 	int pos;
 
 	size = wpipe->pipe_map.cnt;
 	pos = wpipe->pipe_map.pos;
 	bcopy((caddr_t) wpipe->pipe_map.kva+pos,
 			(caddr_t) wpipe->pipe_buffer.buffer,
 			size);
 
 	wpipe->pipe_buffer.in = size;
 	wpipe->pipe_buffer.out = 0;
 	wpipe->pipe_buffer.cnt = size;
 	wpipe->pipe_state &= ~PIPE_DIRECTW;
 
 	pipe_destroy_write_buffer(wpipe);
 }
 
 /*
  * This implements the pipe buffer write mechanism.  Note that only
  * a direct write OR a normal pipe write can be pending at any given time.
  * If there are any characters in the pipe buffer, the direct write will
  * be deferred until the receiving process grabs all of the bytes from
  * the pipe buffer.  Then the direct mapping write is set-up.
  */
 static int
 pipe_direct_write(wpipe, uio)
 	struct pipe *wpipe;
 	struct uio *uio;
 {
 	int error;
 retry:
 	while (wpipe->pipe_state & PIPE_DIRECTW) {
 		if ( wpipe->pipe_state & PIPE_WANTR) {
 			wpipe->pipe_state &= ~PIPE_WANTR;
 			wakeup(wpipe);
 		}
 		wpipe->pipe_state |= PIPE_WANTW;
 		error = tsleep(wpipe,
 				PRIBIO|PCATCH, "pipdww", 0);
 		if (error)
 			goto error1;
 		if (wpipe->pipe_state & PIPE_EOF) {
 			error = EPIPE;
 			goto error1;
 		}
 	}
 	wpipe->pipe_map.cnt = 0;	/* transfer not ready yet */
 	if (wpipe->pipe_buffer.cnt > 0) {
 		if ( wpipe->pipe_state & PIPE_WANTR) {
 			wpipe->pipe_state &= ~PIPE_WANTR;
 			wakeup(wpipe);
 		}
 			
 		wpipe->pipe_state |= PIPE_WANTW;
 		error = tsleep(wpipe,
 				PRIBIO|PCATCH, "pipdwc", 0);
 		if (error)
 			goto error1;
 		if (wpipe->pipe_state & PIPE_EOF) {
 			error = EPIPE;
 			goto error1;
 		}
 		goto retry;
 	}
 
 	wpipe->pipe_state |= PIPE_DIRECTW;
 
 	error = pipe_build_write_buffer(wpipe, uio);
 	if (error) {
 		wpipe->pipe_state &= ~PIPE_DIRECTW;
 		goto error1;
 	}
 
 	error = 0;
 	while (!error && (wpipe->pipe_state & PIPE_DIRECTW)) {
 		if (wpipe->pipe_state & PIPE_EOF) {
 			pipelock(wpipe, 0);
 			pipe_destroy_write_buffer(wpipe);
 			pipeunlock(wpipe);
 			pipeselwakeup(wpipe);
 			error = EPIPE;
 			goto error1;
 		}
 		if (wpipe->pipe_state & PIPE_WANTR) {
 			wpipe->pipe_state &= ~PIPE_WANTR;
 			wakeup(wpipe);
 		}
 		pipeselwakeup(wpipe);
 		error = tsleep(wpipe, PRIBIO|PCATCH, "pipdwt", 0);
 	}
 
 	pipelock(wpipe,0);
 	if (wpipe->pipe_state & PIPE_DIRECTW) {
 		/*
 		 * this bit of trickery substitutes a kernel buffer for
 		 * the process that might be going away.
 		 */
 		pipe_clone_write_buffer(wpipe);
 	} else {
 		pipe_destroy_write_buffer(wpipe);
 	}
 	pipeunlock(wpipe);
 	return error;
 
 error1:
 	wakeup(wpipe);
 	return error;
 }
 #endif
 	
 static int
 pipe_write(fp, uio, cred)
 	struct file *fp;
 	struct uio *uio;
 	struct ucred *cred;
 {
 	int error = 0;
 	int orig_resid;
 
 	struct pipe *wpipe, *rpipe;
 
 	rpipe = (struct pipe *) fp->f_data;
 	wpipe = rpipe->pipe_peer;
 
 	/*
 	 * detect loss of pipe read side, issue SIGPIPE if lost.
 	 */
 	if ((wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) {
 		return EPIPE;
 	}
 
 	/*
 	 * If it is advantageous to resize the pipe buffer, do
 	 * so.
 	 */
 	if ((uio->uio_resid > PIPE_SIZE) &&
 		(nbigpipe < LIMITBIGPIPES) &&
 		(wpipe->pipe_state & PIPE_DIRECTW) == 0 &&
 		(wpipe->pipe_buffer.size <= PIPE_SIZE) &&
 		(wpipe->pipe_buffer.cnt == 0)) {
 
 		if (wpipe->pipe_buffer.buffer) {
 			amountpipekva -= wpipe->pipe_buffer.size;
 			kmem_free(kernel_map,
 				(vm_offset_t)wpipe->pipe_buffer.buffer,
 				wpipe->pipe_buffer.size);
 		}
 
 #ifndef PIPE_NODIRECT
 		if (wpipe->pipe_map.kva) {
 			amountpipekva -= wpipe->pipe_buffer.size + PAGE_SIZE;
 			kmem_free(kernel_map,
 				wpipe->pipe_map.kva,
 				wpipe->pipe_buffer.size + PAGE_SIZE);
 		}
 #endif
 
 		wpipe->pipe_buffer.in = 0;
 		wpipe->pipe_buffer.out = 0;
 		wpipe->pipe_buffer.cnt = 0;
 		wpipe->pipe_buffer.size = BIG_PIPE_SIZE;
 		wpipe->pipe_buffer.buffer = NULL;
 		++nbigpipe;
 
 #ifndef PIPE_NODIRECT
 		wpipe->pipe_map.cnt = 0;
 		wpipe->pipe_map.kva = 0;
 		wpipe->pipe_map.pos = 0;
 		wpipe->pipe_map.npages = 0;
 #endif
 
 	}
 		
 
 	if( wpipe->pipe_buffer.buffer == NULL) {
 		if ((error = pipelock(wpipe,1)) == 0) {
 			pipespace(wpipe);
 			pipeunlock(wpipe);
 		} else {
 			return error;
 		}
 	}
 
 	++wpipe->pipe_busy;
 	orig_resid = uio->uio_resid;
 	while (uio->uio_resid) {
 		int space;
 #ifndef PIPE_NODIRECT
 		/*
 		 * If the transfer is large, we can gain performance if
 		 * we do process-to-process copies directly.
 		 * If the write is non-blocking, we don't use the
 		 * direct write mechanism.
 		 */
 		if ((uio->uio_iov->iov_len >= PIPE_MINDIRECT) &&
 		    (fp->f_flag & FNONBLOCK) == 0 &&
 			(wpipe->pipe_map.kva || (amountpipekva < LIMITPIPEKVA)) &&
 			(uio->uio_iov->iov_len >= PIPE_MINDIRECT)) {
 			error = pipe_direct_write( wpipe, uio);
 			if (error) {
 				break;
 			}
 			continue;
 		}
 #endif
 
 		/*
 		 * Pipe buffered writes cannot be coincidental with
 		 * direct writes.  We wait until the currently executing
 		 * direct write is completed before we start filling the
 		 * pipe buffer.
 		 */
 	retrywrite:
 		while (wpipe->pipe_state & PIPE_DIRECTW) {
 			if (wpipe->pipe_state & PIPE_WANTR) {
 				wpipe->pipe_state &= ~PIPE_WANTR;
 				wakeup(wpipe);
 			}
 			error = tsleep(wpipe,
 					PRIBIO|PCATCH, "pipbww", 0);
 			if (error)
 				break;
 		}
 
 		space = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt;
 
 		/* Writes of size <= PIPE_BUF must be atomic. */
 		/* XXX perhaps they need to be contiguous to be atomic? */
 		if ((space < uio->uio_resid) && (orig_resid <= PIPE_BUF))
 			space = 0;
 
 		if (space > 0 && (wpipe->pipe_buffer.cnt < PIPE_SIZE)) {
 			/*
 			 * This set the maximum transfer as a segment of
 			 * the buffer.
 			 */
 			int size = wpipe->pipe_buffer.size - wpipe->pipe_buffer.in;
 			/*
 			 * space is the size left in the buffer
 			 */
 			if (size > space)
 				size = space;
 			/*
 			 * now limit it to the size of the uio transfer
 			 */
 			if (size > uio->uio_resid)
 				size = uio->uio_resid;
 			if ((error = pipelock(wpipe,1)) == 0) {
 				/*
 				 * It is possible for a direct write to
 				 * slip in on us... handle it here...
 				 */
 				if (wpipe->pipe_state & PIPE_DIRECTW) {
 					pipeunlock(wpipe);
 					goto retrywrite;
 				}
 				error = uiomove( &wpipe->pipe_buffer.buffer[wpipe->pipe_buffer.in], 
 					size, uio);
 				pipeunlock(wpipe);
 			}
 			if (error)
 				break;
 
 			wpipe->pipe_buffer.in += size;
 			if (wpipe->pipe_buffer.in >= wpipe->pipe_buffer.size)
 				wpipe->pipe_buffer.in = 0;
 
 			wpipe->pipe_buffer.cnt += size;
 		} else {
 			/*
 			 * If the "read-side" has been blocked, wake it up now.
 			 */
 			if (wpipe->pipe_state & PIPE_WANTR) {
 				wpipe->pipe_state &= ~PIPE_WANTR;
 				wakeup(wpipe);
 			}
 
 			/*
 			 * don't block on non-blocking I/O
 			 */
 			if (fp->f_flag & FNONBLOCK) {
 				error = EAGAIN;
 				break;
 			}
 
 			/*
 			 * We have no more space and have something to offer,
 			 * wake up select/poll.
 			 */
 			pipeselwakeup(wpipe);
 
 			wpipe->pipe_state |= PIPE_WANTW;
 			if (error = tsleep(wpipe, (PRIBIO+1)|PCATCH, "pipewr", 0)) {
 				break;
 			}
 			/*
 			 * If read side wants to go away, we just issue a signal
 			 * to ourselves.
 			 */
 			if (wpipe->pipe_state & PIPE_EOF) {
 				error = EPIPE;
 				break;
 			}	
 		}
 	}
 
 	--wpipe->pipe_busy;
 	if ((wpipe->pipe_busy == 0) &&
 		(wpipe->pipe_state & PIPE_WANT)) {
 		wpipe->pipe_state &= ~(PIPE_WANT|PIPE_WANTR);
 		wakeup(wpipe);
 	} else if (wpipe->pipe_buffer.cnt > 0) {
 		/*
 		 * If we have put any characters in the buffer, we wake up
 		 * the reader.
 		 */
 		if (wpipe->pipe_state & PIPE_WANTR) {
 			wpipe->pipe_state &= ~PIPE_WANTR;
 			wakeup(wpipe);
 		}
 	}
 
 	/*
 	 * Don't return EPIPE if I/O was successful
 	 */
 	if ((wpipe->pipe_buffer.cnt == 0) &&
 		(uio->uio_resid == 0) &&
 		(error == EPIPE))
 		error = 0;
 
 	if (error == 0)
 		getnanotime(&wpipe->pipe_mtime);
 
 	/*
 	 * We have something to offer,
 	 * wake up select/poll.
 	 */
 	if (wpipe->pipe_buffer.cnt)
 		pipeselwakeup(wpipe);
 
 	return error;
 }
 
 /*
  * we implement a very minimal set of ioctls for compatibility with sockets.
  */
 int
 pipe_ioctl(fp, cmd, data, p)
 	struct file *fp;
 	u_long cmd;
 	register caddr_t data;
 	struct proc *p;
 {
 	register struct pipe *mpipe = (struct pipe *)fp->f_data;
 
 	switch (cmd) {
 
 	case FIONBIO:
 		return (0);
 
 	case FIOASYNC:
 		if (*(int *)data) {
 			mpipe->pipe_state |= PIPE_ASYNC;
 		} else {
 			mpipe->pipe_state &= ~PIPE_ASYNC;
 		}
 		return (0);
 
 	case FIONREAD:
 		if (mpipe->pipe_state & PIPE_DIRECTW)
 			*(int *)data = mpipe->pipe_map.cnt;
 		else
 			*(int *)data = mpipe->pipe_buffer.cnt;
 		return (0);
 
-	case TIOCSPGRP:
-		mpipe->pipe_pgid = *(int *)data;
+	case FIOSETOWN:
+		return (fsetown(*(int *)data, &mpipe->pipe_sigio));
+
+	case FIOGETOWN:
+		*(int *)data = fgetown(mpipe->pipe_sigio);
 		return (0);
 
+	/* This is deprecated, FIOSETOWN should be used instead. */
+	case TIOCSPGRP:
+		return (fsetown(-(*(int *)data), &mpipe->pipe_sigio));
+
+	/* This is deprecated, FIOGETOWN should be used instead. */
 	case TIOCGPGRP:
-		*(int *)data = mpipe->pipe_pgid;
+		*(int *)data = -fgetown(mpipe->pipe_sigio);
 		return (0);
 
 	}
 	return (ENOTTY);
 }
 
 int
 pipe_poll(fp, events, cred, p)
 	struct file *fp;
 	int events;
 	struct ucred *cred;
 	struct proc *p;
 {
 	register struct pipe *rpipe = (struct pipe *)fp->f_data;
 	struct pipe *wpipe;
 	int revents = 0;
 
 	wpipe = rpipe->pipe_peer;
 	if (events & (POLLIN | POLLRDNORM))
 		if ((rpipe->pipe_state & PIPE_DIRECTW) ||
 		    (rpipe->pipe_buffer.cnt > 0) ||
 		    (rpipe->pipe_state & PIPE_EOF))
 			revents |= events & (POLLIN | POLLRDNORM);
 
 	if (events & (POLLOUT | POLLWRNORM))
 		if (wpipe == NULL || (wpipe->pipe_state & PIPE_EOF) ||
 		    ((wpipe->pipe_state & PIPE_DIRECTW) == 0) &&
 		     (wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt) >= PIPE_BUF)
 			revents |= events & (POLLOUT | POLLWRNORM);
 
 	if ((rpipe->pipe_state & PIPE_EOF) ||
 	    (wpipe == NULL) ||
 	    (wpipe->pipe_state & PIPE_EOF))
 		revents |= POLLHUP;
 
 	if (revents == 0) {
 		if (events & (POLLIN | POLLRDNORM)) {
 			selrecord(p, &rpipe->pipe_sel);
 			rpipe->pipe_state |= PIPE_SEL;
 		}
 
 		if (events & (POLLOUT | POLLWRNORM)) {
 			selrecord(p, &wpipe->pipe_sel);
 			wpipe->pipe_state |= PIPE_SEL;
 		}
 	}
 
 	return (revents);
 }
 
 int
 pipe_stat(pipe, ub)
 	register struct pipe *pipe;
 	register struct stat *ub;
 {
 	bzero((caddr_t)ub, sizeof (*ub));
 	ub->st_mode = S_IFIFO;
 	ub->st_blksize = pipe->pipe_buffer.size;
 	ub->st_size = pipe->pipe_buffer.cnt;
 	ub->st_blocks = (ub->st_size + ub->st_blksize - 1) / ub->st_blksize;
 	ub->st_atimespec = pipe->pipe_atime;
 	ub->st_mtimespec = pipe->pipe_mtime;
 	ub->st_ctimespec = pipe->pipe_ctime;
 	/*
 	 * Left as 0: st_dev, st_ino, st_nlink, st_uid, st_gid, st_rdev,
 	 * st_flags, st_gen.
 	 * XXX (st_dev, st_ino) should be unique.
 	 */
 	return 0;
 }
 
 /* ARGSUSED */
 static int
 pipe_close(fp, p)
 	struct file *fp;
 	struct proc *p;
 {
 	struct pipe *cpipe = (struct pipe *)fp->f_data;
 
+	funsetown(cpipe->pipe_sigio);
 	pipeclose(cpipe);
 	fp->f_data = NULL;
 	return 0;
 }
 
 /*
  * shutdown the pipe
  */
 static void
 pipeclose(cpipe)
 	struct pipe *cpipe;
 {
 	struct pipe *ppipe;
 	if (cpipe) {
 		
 		pipeselwakeup(cpipe);
 
 		/*
 		 * If the other side is blocked, wake it up saying that
 		 * we want to close it down.
 		 */
 		while (cpipe->pipe_busy) {
 			wakeup(cpipe);
 			cpipe->pipe_state |= PIPE_WANT|PIPE_EOF;
 			tsleep(cpipe, PRIBIO, "pipecl", 0);
 		}
 
 		/*
 		 * Disconnect from peer
 		 */
 		if (ppipe = cpipe->pipe_peer) {
 			pipeselwakeup(ppipe);
 
 			ppipe->pipe_state |= PIPE_EOF;
 			wakeup(ppipe);
 			ppipe->pipe_peer = NULL;
 		}
 
 		/*
 		 * free resources
 		 */
 		if (cpipe->pipe_buffer.buffer) {
 			if (cpipe->pipe_buffer.size > PIPE_SIZE)
 				--nbigpipe;
 			amountpipekva -= cpipe->pipe_buffer.size;
 			kmem_free(kernel_map,
 				(vm_offset_t)cpipe->pipe_buffer.buffer,
 				cpipe->pipe_buffer.size);
 		}
 #ifndef PIPE_NODIRECT
 		if (cpipe->pipe_map.kva) {
 			amountpipekva -= cpipe->pipe_buffer.size + PAGE_SIZE;
 			kmem_free(kernel_map,
 				cpipe->pipe_map.kva,
 				cpipe->pipe_buffer.size + PAGE_SIZE);
 		}
 #endif
 		zfree(pipe_zone, cpipe);
 	}
 }
Index: head/sys/kern/sys_socket.c
===================================================================
--- head/sys/kern/sys_socket.c	(revision 41085)
+++ head/sys/kern/sys_socket.c	(revision 41086)
@@ -1,175 +1,182 @@
 /*
  * Copyright (c) 1982, 1986, 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)sys_socket.c	8.1 (Berkeley) 6/10/93
- * $Id: sys_socket.c,v 1.17 1998/03/28 10:33:07 bde Exp $
+ * $Id: sys_socket.c,v 1.18 1998/06/07 17:11:40 dfr Exp $
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/file.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/filio.h>			/* XXX */
 #include <sys/sockio.h>
 #include <sys/stat.h>
 #include <sys/uio.h>
+#include <sys/filedesc.h>
 
 #include <net/if.h>
 #include <net/route.h>
 
 static int soo_read __P((struct file *fp, struct uio *uio, 
 		struct ucred *cred));
 static int soo_write __P((struct file *fp, struct uio *uio, 
 		struct ucred *cred));
 static int soo_close __P((struct file *fp, struct proc *p));
 
 struct	fileops socketops =
     { soo_read, soo_write, soo_ioctl, soo_poll, soo_close };
 
 /* ARGSUSED */
 static int
 soo_read(fp, uio, cred)
 	struct file *fp;
 	struct uio *uio;
 	struct ucred *cred;
 {
 	struct socket *so = (struct socket *)fp->f_data;
 	return so->so_proto->pr_usrreqs->pru_soreceive(so, 0, uio, 0, 0, 0);
 }
 
 /* ARGSUSED */
 static int
 soo_write(fp, uio, cred)
 	struct file *fp;
 	struct uio *uio;
 	struct ucred *cred;
 {
 	struct socket *so = (struct socket *)fp->f_data;
 	return so->so_proto->pr_usrreqs->pru_sosend(so, 0, uio, 0, 0, 0,
 						    uio->uio_procp);
 }
 
 int
 soo_ioctl(fp, cmd, data, p)
 	struct file *fp;
 	u_long cmd;
 	register caddr_t data;
 	struct proc *p;
 {
 	register struct socket *so = (struct socket *)fp->f_data;
 
 	switch (cmd) {
 
 	case FIONBIO:
 		if (*(int *)data)
 			so->so_state |= SS_NBIO;
 		else
 			so->so_state &= ~SS_NBIO;
 		return (0);
 
 	case FIOASYNC:
 		if (*(int *)data) {
 			so->so_state |= SS_ASYNC;
 			so->so_rcv.sb_flags |= SB_ASYNC;
 			so->so_snd.sb_flags |= SB_ASYNC;
 		} else {
 			so->so_state &= ~SS_ASYNC;
 			so->so_rcv.sb_flags &= ~SB_ASYNC;
 			so->so_snd.sb_flags &= ~SB_ASYNC;
 		}
 		return (0);
 
 	case FIONREAD:
 		*(int *)data = so->so_rcv.sb_cc;
 		return (0);
 
-	case SIOCSPGRP:
-		so->so_pgid = *(int *)data;
+	case FIOSETOWN:
+		return (fsetown(*(int *)data, &so->so_sigio));
+
+	case FIOGETOWN:
+		*(int *)data = fgetown(so->so_sigio);
 		return (0);
 
+	case SIOCSPGRP:
+		return (fsetown(-(*(int *)data), &so->so_sigio));
+
 	case SIOCGPGRP:
-		*(int *)data = so->so_pgid;
+		*(int *)data = -fgetown(so->so_sigio);
 		return (0);
 
 	case SIOCATMARK:
 		*(int *)data = (so->so_state&SS_RCVATMARK) != 0;
 		return (0);
 	}
 	/*
 	 * Interface/routing/protocol specific ioctls:
 	 * interface and routing ioctls should have a
 	 * different entry since a socket's unnecessary
 	 */
 	if (IOCGROUP(cmd) == 'i')
 		return (ifioctl(so, cmd, data, p));
 	if (IOCGROUP(cmd) == 'r')
 		return (rtioctl(cmd, data, p));
 	return ((*so->so_proto->pr_usrreqs->pru_control)(so, cmd, data, 0, p));
 }
 
 int
 soo_poll(fp, events, cred, p)
 	struct file *fp;
 	int events;
 	struct ucred *cred;
 	struct proc *p;
 {
 	struct socket *so = (struct socket *)fp->f_data;
 	return so->so_proto->pr_usrreqs->pru_sopoll(so, events, cred, p);
 }
 
 int
 soo_stat(so, ub)
 	register struct socket *so;
 	register struct stat *ub;
 {
 
 	bzero((caddr_t)ub, sizeof (*ub));
 	ub->st_mode = S_IFSOCK;
 	return ((*so->so_proto->pr_usrreqs->pru_sense)(so, ub));
 }
 
 /* ARGSUSED */
 static int
 soo_close(fp, p)
 	struct file *fp;
 	struct proc *p;
 {
 	int error = 0;
 
 	if (fp->f_data)
 		error = soclose((struct socket *)fp->f_data);
 	fp->f_data = 0;
 	return (error);
 }
Index: head/sys/kern/tty.c
===================================================================
--- head/sys/kern/tty.c	(revision 41085)
+++ head/sys/kern/tty.c	(revision 41086)
@@ -1,2401 +1,2422 @@
 /*-
  * Copyright (c) 1982, 1986, 1990, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)tty.c	8.8 (Berkeley) 1/21/94
- * $Id: tty.c,v 1.105 1998/07/11 10:41:15 bde Exp $
+ * $Id: tty.c,v 1.106 1998/08/19 04:01:00 bde Exp $
  */
 
 /*-
  * TODO:
  *	o Fix races for sending the start char in ttyflush().
  *	o Handle inter-byte timeout for "MIN > 0, TIME > 0" in ttyselect().
  *	  With luck, there will be MIN chars before select() returns().
  *	o Handle CLOCAL consistently for ptys.  Perhaps disallow setting it.
  *	o Don't allow input in TS_ZOMBIE case.  It would be visible through
  *	  FIONREAD.
  *	o Do the new sio locking stuff here and use it to avoid special
  *	  case for EXTPROC?
  *	o Lock PENDIN too?
  *	o Move EXTPROC and/or PENDIN to t_state?
  *	o Wrap most of ttioctl in spltty/splx.
  *	o Implement TIOCNOTTY or remove it from <sys/ioctl.h>.
  *	o Send STOP if IXOFF is toggled off while TS_TBLOCK is set.
  *	o Don't allow certain termios flags to affect disciplines other
  *	  than TTYDISC.  Cancel their effects before switch disciplines
  *	  and ignore them if they are set while we are in another
  *	  discipline.
  *	o Handle c_ispeed = 0 to c_ispeed = c_ospeed conversion here instead
  *	  of in drivers and fix drivers that write to tp->t_termios.
  *	o Check for TS_CARR_ON being set while everything is closed and not
  *	  waiting for carrier.  TS_CARR_ON isn't cleared if nothing is open,
  *	  so it would live until the next open even if carrier drops.
  *	o Restore TS_WOPEN since it is useful in pstat.  It must be cleared
  *	  only when _all_ openers leave open().
  */
 
 #include "snp.h"
 #include "opt_compat.h"
 #include "opt_uconsole.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/filio.h>
 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
 #include <sys/ioctl_compat.h>
 #endif
 #include <sys/proc.h>
 #define	TTYDEFCHARS
 #include <sys/tty.h>
 #undef	TTYDEFCHARS
 #include <sys/fcntl.h>
 #include <sys/conf.h>
 #include <sys/dkstat.h>
 #include <sys/poll.h>
 #include <sys/kernel.h>
 #include <sys/vnode.h>
 #include <sys/signalvar.h>
 #include <sys/resourcevar.h>
 #include <sys/malloc.h>
+#include <sys/filedesc.h>
 #if NSNP > 0
 #include <sys/snoop.h>
 #endif
 
 #include <vm/vm.h>
 #include <sys/lock.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 
 MALLOC_DEFINE(M_TTYS, "ttys", "tty data structures");
 
 static int	proc_compare __P((struct proc *p1, struct proc *p2));
 static int	ttnread __P((struct tty *tp));
 static void	ttyecho __P((int c, struct tty *tp));
 static int	ttyoutput __P((int c, register struct tty *tp));
 static void	ttypend __P((struct tty *tp));
 static void	ttyretype __P((struct tty *tp));
 static void	ttyrub __P((int c, struct tty *tp));
 static void	ttyrubo __P((struct tty *tp, int cnt));
 static void	ttyunblock __P((struct tty *tp));
 static int	ttywflush __P((struct tty *tp));
 
 /*
  * Table with character classes and parity. The 8th bit indicates parity,
  * the 7th bit indicates the character is an alphameric or underscore (for
  * ALTWERASE), and the low 6 bits indicate delay type.  If the low 6 bits
  * are 0 then the character needs no special processing on output; classes
  * other than 0 might be translated or (not currently) require delays.
  */
 #define	E	0x00	/* Even parity. */
 #define	O	0x80	/* Odd parity. */
 #define	PARITY(c)	(char_type[c] & O)
 
 #define	ALPHA	0x40	/* Alpha or underscore. */
 #define	ISALPHA(c)	(char_type[(c) & TTY_CHARMASK] & ALPHA)
 
 #define	CCLASSMASK	0x3f
 #define	CCLASS(c)	(char_type[c] & CCLASSMASK)
 
 #define	BS	BACKSPACE
 #define	CC	CONTROL
 #define	CR	RETURN
 #define	NA	ORDINARY | ALPHA
 #define	NL	NEWLINE
 #define	NO	ORDINARY
 #define	TB	TAB
 #define	VT	VTAB
 
 static u_char const char_type[] = {
 	E|CC, O|CC, O|CC, E|CC, O|CC, E|CC, E|CC, O|CC,	/* nul - bel */
 	O|BS, E|TB, E|NL, O|CC, E|VT, O|CR, O|CC, E|CC, /* bs - si */
 	O|CC, E|CC, E|CC, O|CC, E|CC, O|CC, O|CC, E|CC, /* dle - etb */
 	E|CC, O|CC, O|CC, E|CC, O|CC, E|CC, E|CC, O|CC, /* can - us */
 	O|NO, E|NO, E|NO, O|NO, E|NO, O|NO, O|NO, E|NO, /* sp - ' */
 	E|NO, O|NO, O|NO, E|NO, O|NO, E|NO, E|NO, O|NO, /* ( - / */
 	E|NA, O|NA, O|NA, E|NA, O|NA, E|NA, E|NA, O|NA, /* 0 - 7 */
 	O|NA, E|NA, E|NO, O|NO, E|NO, O|NO, O|NO, E|NO, /* 8 - ? */
 	O|NO, E|NA, E|NA, O|NA, E|NA, O|NA, O|NA, E|NA, /* @ - G */
 	E|NA, O|NA, O|NA, E|NA, O|NA, E|NA, E|NA, O|NA, /* H - O */
 	E|NA, O|NA, O|NA, E|NA, O|NA, E|NA, E|NA, O|NA, /* P - W */
 	O|NA, E|NA, E|NA, O|NO, E|NO, O|NO, O|NO, O|NA, /* X - _ */
 	E|NO, O|NA, O|NA, E|NA, O|NA, E|NA, E|NA, O|NA, /* ` - g */
 	O|NA, E|NA, E|NA, O|NA, E|NA, O|NA, O|NA, E|NA, /* h - o */
 	O|NA, E|NA, E|NA, O|NA, E|NA, O|NA, O|NA, E|NA, /* p - w */
 	E|NA, O|NA, O|NA, E|NO, O|NO, E|NO, E|NO, O|CC, /* x - del */
 	/*
 	 * Meta chars; should be settable per character set;
 	 * for now, treat them all as normal characters.
 	 */
 	NA,   NA,   NA,   NA,   NA,   NA,   NA,   NA,
 	NA,   NA,   NA,   NA,   NA,   NA,   NA,   NA,
 	NA,   NA,   NA,   NA,   NA,   NA,   NA,   NA,
 	NA,   NA,   NA,   NA,   NA,   NA,   NA,   NA,
 	NA,   NA,   NA,   NA,   NA,   NA,   NA,   NA,
 	NA,   NA,   NA,   NA,   NA,   NA,   NA,   NA,
 	NA,   NA,   NA,   NA,   NA,   NA,   NA,   NA,
 	NA,   NA,   NA,   NA,   NA,   NA,   NA,   NA,
 	NA,   NA,   NA,   NA,   NA,   NA,   NA,   NA,
 	NA,   NA,   NA,   NA,   NA,   NA,   NA,   NA,
 	NA,   NA,   NA,   NA,   NA,   NA,   NA,   NA,
 	NA,   NA,   NA,   NA,   NA,   NA,   NA,   NA,
 	NA,   NA,   NA,   NA,   NA,   NA,   NA,   NA,
 	NA,   NA,   NA,   NA,   NA,   NA,   NA,   NA,
 	NA,   NA,   NA,   NA,   NA,   NA,   NA,   NA,
 	NA,   NA,   NA,   NA,   NA,   NA,   NA,   NA,
 };
 #undef	BS
 #undef	CC
 #undef	CR
 #undef	NA
 #undef	NL
 #undef	NO
 #undef	TB
 #undef	VT
 
 /* Macros to clear/set/test flags. */
 #define	SET(t, f)	(t) |= (f)
 #define	CLR(t, f)	(t) &= ~(f)
 #define	ISSET(t, f)	((t) & (f))
 
 #undef MAX_INPUT		/* XXX wrong in <sys/syslimits.h> */
 #define	MAX_INPUT	TTYHOG	/* XXX limit is usually larger for !ICANON */
 
 /*
  * Initial open of tty, or (re)entry to standard tty line discipline.
  */
 int
 ttyopen(device, tp)
 	dev_t device;
 	register struct tty *tp;
 {
 	int s;
 
 	s = spltty();
 	tp->t_dev = device;
 	if (!ISSET(tp->t_state, TS_ISOPEN)) {
 		SET(tp->t_state, TS_ISOPEN);
 		if (ISSET(tp->t_cflag, CLOCAL))
 			SET(tp->t_state, TS_CONNECTED);
 		bzero(&tp->t_winsize, sizeof(tp->t_winsize));
 	}
 	ttsetwater(tp);
 	splx(s);
 	return (0);
 }
 
 /*
  * Handle close() on a tty line: flush and set to initial state,
  * bumping generation number so that pending read/write calls
  * can detect recycling of the tty.
  * XXX our caller should have done `spltty(); l_close(); ttyclose();'
  * and l_close() should have flushed, but we repeat the spltty() and
  * the flush in case there are buggy callers.
  */
 int
 ttyclose(tp)
 	register struct tty *tp;
 {
 	int s;
 
+	funsetown(tp->t_sigio);
 	s = spltty();
 	if (constty == tp)
 		constty = NULL;
 
 	ttyflush(tp, FREAD | FWRITE);
 	clist_free_cblocks(&tp->t_canq);
 	clist_free_cblocks(&tp->t_outq);
 	clist_free_cblocks(&tp->t_rawq);
 
 #if NSNP > 0
 	if (ISSET(tp->t_state, TS_SNOOP) && tp->t_sc != NULL)
 		snpdown((struct snoop *)tp->t_sc);
 #endif
 
 	tp->t_gen++;
 	tp->t_line = TTYDISC;
 	tp->t_pgrp = NULL;
 	tp->t_session = NULL;
 	tp->t_state = 0;
 	splx(s);
 	return (0);
 }
 
 #define	FLUSHQ(q) {							\
 	if ((q)->c_cc)							\
 		ndflush(q, (q)->c_cc);					\
 }
 
 /* Is 'c' a line delimiter ("break" character)? */
 #define	TTBREAKC(c, lflag)							\
 	((c) == '\n' || (((c) == cc[VEOF] ||				\
 	  (c) == cc[VEOL] || ((c) == cc[VEOL2] && lflag & IEXTEN)) &&	\
 	 (c) != _POSIX_VDISABLE))
 
 /*
  * Process input of a single character received on a tty.
  */
 int
 ttyinput(c, tp)
 	register int c;
 	register struct tty *tp;
 {
 	register tcflag_t iflag, lflag;
 	register cc_t *cc;
 	int i, err;
 
 	/*
 	 * If input is pending take it first.
 	 */
 	lflag = tp->t_lflag;
 	if (ISSET(lflag, PENDIN))
 		ttypend(tp);
 	/*
 	 * Gather stats.
 	 */
 	if (ISSET(lflag, ICANON)) {
 		++tk_cancc;
 		++tp->t_cancc;
 	} else {
 		++tk_rawcc;
 		++tp->t_rawcc;
 	}
 	++tk_nin;
 
 	/*
 	 * Block further input iff:
 	 * current input > threshold AND input is available to user program
 	 * AND input flow control is enabled and not yet invoked.
 	 * The 3 is slop for PARMRK.
 	 */
 	iflag = tp->t_iflag;
 	if (tp->t_rawq.c_cc + tp->t_canq.c_cc > tp->t_ihiwat - 3 &&
 	    (!ISSET(lflag, ICANON) || tp->t_canq.c_cc != 0) &&
 	    (ISSET(tp->t_cflag, CRTS_IFLOW) || ISSET(iflag, IXOFF)) &&
 	    !ISSET(tp->t_state, TS_TBLOCK))
 		ttyblock(tp);
 
 	/* Handle exceptional conditions (break, parity, framing). */
 	cc = tp->t_cc;
 	err = (ISSET(c, TTY_ERRORMASK));
 	if (err) {
 		CLR(c, TTY_ERRORMASK);
 		if (ISSET(err, TTY_BI)) {
 			if (ISSET(iflag, IGNBRK))
 				return (0);
 			if (ISSET(iflag, BRKINT)) {
 				ttyflush(tp, FREAD | FWRITE);
 				pgsignal(tp->t_pgrp, SIGINT, 1);
 				goto endcase;
 			}
 			if (ISSET(iflag, PARMRK))
 				goto parmrk;
 		} else if ((ISSET(err, TTY_PE) && ISSET(iflag, INPCK))
 			|| ISSET(err, TTY_FE)) {
 			if (ISSET(iflag, IGNPAR))
 				return (0);
 			else if (ISSET(iflag, PARMRK)) {
 parmrk:
 				if (tp->t_rawq.c_cc + tp->t_canq.c_cc >
 				    MAX_INPUT - 3)
 					goto input_overflow;
 				(void)putc(0377 | TTY_QUOTE, &tp->t_rawq);
 				(void)putc(0 | TTY_QUOTE, &tp->t_rawq);
 				(void)putc(c | TTY_QUOTE, &tp->t_rawq);
 				goto endcase;
 			} else
 				c = 0;
 		}
 	}
 
 	if (!ISSET(tp->t_state, TS_TYPEN) && ISSET(iflag, ISTRIP))
 		CLR(c, 0x80);
 	if (!ISSET(lflag, EXTPROC)) {
 		/*
 		 * Check for literal nexting very first
 		 */
 		if (ISSET(tp->t_state, TS_LNCH)) {
 			SET(c, TTY_QUOTE);
 			CLR(tp->t_state, TS_LNCH);
 		}
 		/*
 		 * Scan for special characters.  This code
 		 * is really just a big case statement with
 		 * non-constant cases.  The bottom of the
 		 * case statement is labeled ``endcase'', so goto
 		 * it after a case match, or similar.
 		 */
 
 		/*
 		 * Control chars which aren't controlled
 		 * by ICANON, ISIG, or IXON.
 		 */
 		if (ISSET(lflag, IEXTEN)) {
 			if (CCEQ(cc[VLNEXT], c)) {
 				if (ISSET(lflag, ECHO)) {
 					if (ISSET(lflag, ECHOE)) {
 						(void)ttyoutput('^', tp);
 						(void)ttyoutput('\b', tp);
 					} else
 						ttyecho(c, tp);
 				}
 				SET(tp->t_state, TS_LNCH);
 				goto endcase;
 			}
 			if (CCEQ(cc[VDISCARD], c)) {
 				if (ISSET(lflag, FLUSHO))
 					CLR(tp->t_lflag, FLUSHO);
 				else {
 					ttyflush(tp, FWRITE);
 					ttyecho(c, tp);
 					if (tp->t_rawq.c_cc + tp->t_canq.c_cc)
 						ttyretype(tp);
 					SET(tp->t_lflag, FLUSHO);
 				}
 				goto startoutput;
 			}
 		}
 		/*
 		 * Signals.
 		 */
 		if (ISSET(lflag, ISIG)) {
 			if (CCEQ(cc[VINTR], c) || CCEQ(cc[VQUIT], c)) {
 				if (!ISSET(lflag, NOFLSH))
 					ttyflush(tp, FREAD | FWRITE);
 				ttyecho(c, tp);
 				pgsignal(tp->t_pgrp,
 				    CCEQ(cc[VINTR], c) ? SIGINT : SIGQUIT, 1);
 				goto endcase;
 			}
 			if (CCEQ(cc[VSUSP], c)) {
 				if (!ISSET(lflag, NOFLSH))
 					ttyflush(tp, FREAD);
 				ttyecho(c, tp);
 				pgsignal(tp->t_pgrp, SIGTSTP, 1);
 				goto endcase;
 			}
 		}
 		/*
 		 * Handle start/stop characters.
 		 */
 		if (ISSET(iflag, IXON)) {
 			if (CCEQ(cc[VSTOP], c)) {
 				if (!ISSET(tp->t_state, TS_TTSTOP)) {
 					SET(tp->t_state, TS_TTSTOP);
 #ifdef sun4c						/* XXX */
 					(*tp->t_stop)(tp, 0);
 #else
 					(*cdevsw[major(tp->t_dev)]->d_stop)(tp,
 					   0);
 #endif
 					return (0);
 				}
 				if (!CCEQ(cc[VSTART], c))
 					return (0);
 				/*
 				 * if VSTART == VSTOP then toggle
 				 */
 				goto endcase;
 			}
 			if (CCEQ(cc[VSTART], c))
 				goto restartoutput;
 		}
 		/*
 		 * IGNCR, ICRNL, & INLCR
 		 */
 		if (c == '\r') {
 			if (ISSET(iflag, IGNCR))
 				return (0);
 			else if (ISSET(iflag, ICRNL))
 				c = '\n';
 		} else if (c == '\n' && ISSET(iflag, INLCR))
 			c = '\r';
 	}
 	if (!ISSET(tp->t_lflag, EXTPROC) && ISSET(lflag, ICANON)) {
 		/*
 		 * From here on down canonical mode character
 		 * processing takes place.
 		 */
 		/*
 		 * erase (^H / ^?)
 		 */
 		if (CCEQ(cc[VERASE], c)) {
 			if (tp->t_rawq.c_cc)
 				ttyrub(unputc(&tp->t_rawq), tp);
 			goto endcase;
 		}
 		/*
 		 * kill (^U)
 		 */
 		if (CCEQ(cc[VKILL], c)) {
 			if (ISSET(lflag, ECHOKE) &&
 			    tp->t_rawq.c_cc == tp->t_rocount &&
 			    !ISSET(lflag, ECHOPRT))
 				while (tp->t_rawq.c_cc)
 					ttyrub(unputc(&tp->t_rawq), tp);
 			else {
 				ttyecho(c, tp);
 				if (ISSET(lflag, ECHOK) ||
 				    ISSET(lflag, ECHOKE))
 					ttyecho('\n', tp);
 				FLUSHQ(&tp->t_rawq);
 				tp->t_rocount = 0;
 			}
 			CLR(tp->t_state, TS_LOCAL);
 			goto endcase;
 		}
 		/*
 		 * word erase (^W)
 		 */
 		if (CCEQ(cc[VWERASE], c) && ISSET(lflag, IEXTEN)) {
 			int ctype;
 
 			/*
 			 * erase whitespace
 			 */
 			while ((c = unputc(&tp->t_rawq)) == ' ' || c == '\t')
 				ttyrub(c, tp);
 			if (c == -1)
 				goto endcase;
 			/*
 			 * erase last char of word and remember the
 			 * next chars type (for ALTWERASE)
 			 */
 			ttyrub(c, tp);
 			c = unputc(&tp->t_rawq);
 			if (c == -1)
 				goto endcase;
 			if (c == ' ' || c == '\t') {
 				(void)putc(c, &tp->t_rawq);
 				goto endcase;
 			}
 			ctype = ISALPHA(c);
 			/*
 			 * erase rest of word
 			 */
 			do {
 				ttyrub(c, tp);
 				c = unputc(&tp->t_rawq);
 				if (c == -1)
 					goto endcase;
 			} while (c != ' ' && c != '\t' &&
 			    (!ISSET(lflag, ALTWERASE) || ISALPHA(c) == ctype));
 			(void)putc(c, &tp->t_rawq);
 			goto endcase;
 		}
 		/*
 		 * reprint line (^R)
 		 */
 		if (CCEQ(cc[VREPRINT], c) && ISSET(lflag, IEXTEN)) {
 			ttyretype(tp);
 			goto endcase;
 		}
 		/*
 		 * ^T - kernel info and generate SIGINFO
 		 */
 		if (CCEQ(cc[VSTATUS], c) && ISSET(lflag, IEXTEN)) {
 			if (ISSET(lflag, ISIG))
 				pgsignal(tp->t_pgrp, SIGINFO, 1);
 			if (!ISSET(lflag, NOKERNINFO))
 				ttyinfo(tp);
 			goto endcase;
 		}
 	}
 	/*
 	 * Check for input buffer overflow
 	 */
 	if (tp->t_rawq.c_cc + tp->t_canq.c_cc >= MAX_INPUT) {
 input_overflow:
 		if (ISSET(iflag, IMAXBEL)) {
 			if (tp->t_outq.c_cc < tp->t_ohiwat)
 				(void)ttyoutput(CTRL('g'), tp);
 		}
 		goto endcase;
 	}
 
 	if (   c == 0377 && ISSET(iflag, PARMRK) && !ISSET(iflag, ISTRIP)
 	     && ISSET(iflag, IGNBRK|IGNPAR) != (IGNBRK|IGNPAR))
 		(void)putc(0377 | TTY_QUOTE, &tp->t_rawq);
 
 	/*
 	 * Put data char in q for user and
 	 * wakeup on seeing a line delimiter.
 	 */
 	if (putc(c, &tp->t_rawq) >= 0) {
 		if (!ISSET(lflag, ICANON)) {
 			ttwakeup(tp);
 			ttyecho(c, tp);
 			goto endcase;
 		}
 		if (TTBREAKC(c, lflag)) {
 			tp->t_rocount = 0;
 			catq(&tp->t_rawq, &tp->t_canq);
 			ttwakeup(tp);
 		} else if (tp->t_rocount++ == 0)
 			tp->t_rocol = tp->t_column;
 		if (ISSET(tp->t_state, TS_ERASE)) {
 			/*
 			 * end of prterase \.../
 			 */
 			CLR(tp->t_state, TS_ERASE);
 			(void)ttyoutput('/', tp);
 		}
 		i = tp->t_column;
 		ttyecho(c, tp);
 		if (CCEQ(cc[VEOF], c) && ISSET(lflag, ECHO)) {
 			/*
 			 * Place the cursor over the '^' of the ^D.
 			 */
 			i = imin(2, tp->t_column - i);
 			while (i > 0) {
 				(void)ttyoutput('\b', tp);
 				i--;
 			}
 		}
 	}
 endcase:
 	/*
 	 * IXANY means allow any character to restart output.
 	 */
 	if (ISSET(tp->t_state, TS_TTSTOP) &&
 	    !ISSET(iflag, IXANY) && cc[VSTART] != cc[VSTOP])
 		return (0);
 restartoutput:
 	CLR(tp->t_lflag, FLUSHO);
 	CLR(tp->t_state, TS_TTSTOP);
 startoutput:
 	return (ttstart(tp));
 }
 
 /*
  * Output a single character on a tty, doing output processing
  * as needed (expanding tabs, newline processing, etc.).
  * Returns < 0 if succeeds, otherwise returns char to resend.
  * Must be recursive.
  */
 static int
 ttyoutput(c, tp)
 	register int c;
 	register struct tty *tp;
 {
 	register tcflag_t oflag;
 	register int col, s;
 
 	oflag = tp->t_oflag;
 	if (!ISSET(oflag, OPOST)) {
 		if (ISSET(tp->t_lflag, FLUSHO))
 			return (-1);
 		if (putc(c, &tp->t_outq))
 			return (c);
 		tk_nout++;
 		tp->t_outcc++;
 		return (-1);
 	}
 	/*
 	 * Do tab expansion if OXTABS is set.  Special case if we external
 	 * processing, we don't do the tab expansion because we'll probably
 	 * get it wrong.  If tab expansion needs to be done, let it happen
 	 * externally.
 	 */
 	CLR(c, ~TTY_CHARMASK);
 	if (c == '\t' &&
 	    ISSET(oflag, OXTABS) && !ISSET(tp->t_lflag, EXTPROC)) {
 		c = 8 - (tp->t_column & 7);
 		if (!ISSET(tp->t_lflag, FLUSHO)) {
 			s = spltty();		/* Don't interrupt tabs. */
 			c -= b_to_q("        ", c, &tp->t_outq);
 			tk_nout += c;
 			tp->t_outcc += c;
 			splx(s);
 		}
 		tp->t_column += c;
 		return (c ? -1 : '\t');
 	}
 	if (c == CEOT && ISSET(oflag, ONOEOT))
 		return (-1);
 
 	/*
 	 * Newline translation: if ONLCR is set,
 	 * translate newline into "\r\n".
 	 */
 	if (c == '\n' && ISSET(tp->t_oflag, ONLCR)) {
 		tk_nout++;
 		tp->t_outcc++;
 		if (putc('\r', &tp->t_outq))
 			return (c);
 	}
 	tk_nout++;
 	tp->t_outcc++;
 	if (!ISSET(tp->t_lflag, FLUSHO) && putc(c, &tp->t_outq))
 		return (c);
 
 	col = tp->t_column;
 	switch (CCLASS(c)) {
 	case BACKSPACE:
 		if (col > 0)
 			--col;
 		break;
 	case CONTROL:
 		break;
 	case NEWLINE:
 	case RETURN:
 		col = 0;
 		break;
 	case ORDINARY:
 		++col;
 		break;
 	case TAB:
 		col = (col + 8) & ~7;
 		break;
 	}
 	tp->t_column = col;
 	return (-1);
 }
 
 /*
  * Ioctls for all tty devices.  Called after line-discipline specific ioctl
  * has been called to do discipline-specific functions and/or reject any
  * of these ioctl commands.
  */
 /* ARGSUSED */
 int
 ttioctl(tp, cmd, data, flag)
 	register struct tty *tp;
 	u_long cmd;
 	int flag;
 	void *data;
 {
 	register struct proc *p;
 	int s, error;
 
 	p = curproc;			/* XXX */
 
 	/* If the ioctl involves modification, hang if in the background. */
 	switch (cmd) {
 	case  TIOCFLUSH:
 	case  TIOCSETA:
 	case  TIOCSETD:
 	case  TIOCSETAF:
 	case  TIOCSETAW:
 #ifdef notdef
 	case  TIOCSPGRP:
 #endif
 	case  TIOCSTAT:
 	case  TIOCSTI:
 	case  TIOCSWINSZ:
 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
 	case  TIOCLBIC:
 	case  TIOCLBIS:
 	case  TIOCLSET:
 	case  TIOCSETC:
 	case OTIOCSETD:
 	case  TIOCSETN:
 	case  TIOCSETP:
 	case  TIOCSLTC:
 #endif
 		while (isbackground(p, tp) &&
 		    (p->p_flag & P_PPWAIT) == 0 &&
 		    (p->p_sigignore & sigmask(SIGTTOU)) == 0 &&
 		    (p->p_sigmask & sigmask(SIGTTOU)) == 0) {
 			if (p->p_pgrp->pg_jobc == 0)
 				return (EIO);
 			pgsignal(p->p_pgrp, SIGTTOU, 1);
 			error = ttysleep(tp, &lbolt, TTOPRI | PCATCH, "ttybg1",
 					 0);
 			if (error)
 				return (error);
 		}
 		break;
 	}
 
 	switch (cmd) {			/* Process the ioctl. */
 	case FIOASYNC:			/* set/clear async i/o */
 		s = spltty();
 		if (*(int *)data)
 			SET(tp->t_state, TS_ASYNC);
 		else
 			CLR(tp->t_state, TS_ASYNC);
 		splx(s);
 		break;
 	case FIONBIO:			/* set/clear non-blocking i/o */
 		break;			/* XXX: delete. */
 	case FIONREAD:			/* get # bytes to read */
 		s = spltty();
 		*(int *)data = ttnread(tp);
 		splx(s);
 		break;
+
+	case FIOSETOWN:
+		/*
+		 * Policy -- Don't allow FIOSETOWN on someone else's 
+		 *           controlling tty
+		 */
+		if (tp->t_session != NULL && !isctty(p, tp))
+			return (ENOTTY);
+
+		error = fsetown(*(int *)data, &tp->t_sigio);
+		if (error)
+			return (error);
+		break;
+	case FIOGETOWN:
+		if (tp->t_session != NULL && !isctty(p, tp))
+			return (ENOTTY);
+		*(int *)data = fgetown(tp->t_sigio);
+		break;
+
 	case TIOCEXCL:			/* set exclusive use of tty */
 		s = spltty();
 		SET(tp->t_state, TS_XCLUDE);
 		splx(s);
 		break;
 	case TIOCFLUSH: {		/* flush buffers */
 		register int flags = *(int *)data;
 
 		if (flags == 0)
 			flags = FREAD | FWRITE;
 		else
 			flags &= FREAD | FWRITE;
 		ttyflush(tp, flags);
 		break;
 	}
 	case TIOCCONS:			/* become virtual console */
 		if (*(int *)data) {
 			if (constty && constty != tp &&
 			    ISSET(constty->t_state, TS_CONNECTED))
 				return (EBUSY);
 #ifndef	UCONSOLE
 			if (error = suser(p->p_ucred, &p->p_acflag))
 				return (error);
 #endif
 			constty = tp;
 		} else if (tp == constty)
 			constty = NULL;
 		break;
 	case TIOCDRAIN:			/* wait till output drained */
 		error = ttywait(tp);
 		if (error)
 			return (error);
 		break;
 	case TIOCGETA: {		/* get termios struct */
 		struct termios *t = (struct termios *)data;
 
 		bcopy(&tp->t_termios, t, sizeof(struct termios));
 		break;
 	}
 	case TIOCGETD:			/* get line discipline */
 		*(int *)data = tp->t_line;
 		break;
 	case TIOCGWINSZ:		/* get window size */
 		*(struct winsize *)data = tp->t_winsize;
 		break;
 	case TIOCGPGRP:			/* get pgrp of tty */
 		if (!isctty(p, tp))
 			return (ENOTTY);
 		*(int *)data = tp->t_pgrp ? tp->t_pgrp->pg_id : NO_PID;
 		break;
 #ifdef TIOCHPCL
 	case TIOCHPCL:			/* hang up on last close */
 		s = spltty();
 		SET(tp->t_cflag, HUPCL);
 		splx(s);
 		break;
 #endif
 	case TIOCNXCL:			/* reset exclusive use of tty */
 		s = spltty();
 		CLR(tp->t_state, TS_XCLUDE);
 		splx(s);
 		break;
 	case TIOCOUTQ:			/* output queue size */
 		*(int *)data = tp->t_outq.c_cc;
 		break;
 	case TIOCSETA:			/* set termios struct */
 	case TIOCSETAW:			/* drain output, set */
 	case TIOCSETAF: {		/* drn out, fls in, set */
 		register struct termios *t = (struct termios *)data;
 
 		if (t->c_ispeed < 0 || t->c_ospeed < 0)
 			return (EINVAL);
 		s = spltty();
 		if (cmd == TIOCSETAW || cmd == TIOCSETAF) {
 			error = ttywait(tp);
 			if (error) {
 				splx(s);
 				return (error);
 			}
 			if (cmd == TIOCSETAF)
 				ttyflush(tp, FREAD);
 		}
 		if (!ISSET(t->c_cflag, CIGNORE)) {
 			/*
 			 * Set device hardware.
 			 */
 			if (tp->t_param && (error = (*tp->t_param)(tp, t))) {
 				splx(s);
 				return (error);
 			}
 			if (ISSET(t->c_cflag, CLOCAL) &&
 			    !ISSET(tp->t_cflag, CLOCAL)) {
 				/*
 				 * XXX disconnections would be too hard to
 				 * get rid of without this kludge.  The only
 				 * way to get rid of controlling terminals
 				 * is to exit from the session leader.
 				 */
 				CLR(tp->t_state, TS_ZOMBIE);
 
 				wakeup(TSA_CARR_ON(tp));
 				ttwakeup(tp);
 				ttwwakeup(tp);
 			}
 			if ((ISSET(tp->t_state, TS_CARR_ON) ||
 			     ISSET(t->c_cflag, CLOCAL)) &&
 			    !ISSET(tp->t_state, TS_ZOMBIE))
 				SET(tp->t_state, TS_CONNECTED);
 			else
 				CLR(tp->t_state, TS_CONNECTED);
 			tp->t_cflag = t->c_cflag;
 			tp->t_ispeed = t->c_ispeed;
 			tp->t_ospeed = t->c_ospeed;
 			ttsetwater(tp);
 		}
 		if (ISSET(t->c_lflag, ICANON) != ISSET(tp->t_lflag, ICANON) &&
 		    cmd != TIOCSETAF) {
 			if (ISSET(t->c_lflag, ICANON))
 				SET(tp->t_lflag, PENDIN);
 			else {
 				/*
 				 * XXX we really shouldn't allow toggling
 				 * ICANON while we're in a non-termios line
 				 * discipline.  Now we have to worry about
 				 * panicing for a null queue.
 				 */
 				if (tp->t_canq.c_cbreserved > 0 &&
 				    tp->t_rawq.c_cbreserved > 0) {
 					catq(&tp->t_rawq, &tp->t_canq);
 					/*
 					 * XXX the queue limits may be
 					 * different, so the old queue
 					 * swapping method no longer works.
 					 */
 					catq(&tp->t_canq, &tp->t_rawq);
 				}
 				CLR(tp->t_lflag, PENDIN);
 			}
 			ttwakeup(tp);
 		}
 		tp->t_iflag = t->c_iflag;
 		tp->t_oflag = t->c_oflag;
 		/*
 		 * Make the EXTPROC bit read only.
 		 */
 		if (ISSET(tp->t_lflag, EXTPROC))
 			SET(t->c_lflag, EXTPROC);
 		else
 			CLR(t->c_lflag, EXTPROC);
 		tp->t_lflag = t->c_lflag | ISSET(tp->t_lflag, PENDIN);
 		if (t->c_cc[VMIN] != tp->t_cc[VMIN] ||
 		    t->c_cc[VTIME] != tp->t_cc[VTIME])
 			ttwakeup(tp);
 		bcopy(t->c_cc, tp->t_cc, sizeof(t->c_cc));
 		splx(s);
 		break;
 	}
 	case TIOCSETD: {		/* set line discipline */
 		register int t = *(int *)data;
 		dev_t device = tp->t_dev;
 
 		if ((u_int)t >= nlinesw)
 			return (ENXIO);
 		if (t != tp->t_line) {
 			s = spltty();
 			(*linesw[tp->t_line].l_close)(tp, flag);
 			error = (*linesw[t].l_open)(device, tp);
 			if (error) {
 				(void)(*linesw[tp->t_line].l_open)(device, tp);
 				splx(s);
 				return (error);
 			}
 			tp->t_line = t;
 			splx(s);
 		}
 		break;
 	}
 	case TIOCSTART:			/* start output, like ^Q */
 		s = spltty();
 		if (ISSET(tp->t_state, TS_TTSTOP) ||
 		    ISSET(tp->t_lflag, FLUSHO)) {
 			CLR(tp->t_lflag, FLUSHO);
 			CLR(tp->t_state, TS_TTSTOP);
 			ttstart(tp);
 		}
 		splx(s);
 		break;
 	case TIOCSTI:			/* simulate terminal input */
 		if (p->p_ucred->cr_uid && (flag & FREAD) == 0)
 			return (EPERM);
 		if (p->p_ucred->cr_uid && !isctty(p, tp))
 			return (EACCES);
 		s = spltty();
 		(*linesw[tp->t_line].l_rint)(*(u_char *)data, tp);
 		splx(s);
 		break;
 	case TIOCSTOP:			/* stop output, like ^S */
 		s = spltty();
 		if (!ISSET(tp->t_state, TS_TTSTOP)) {
 			SET(tp->t_state, TS_TTSTOP);
 #ifdef sun4c				/* XXX */
 			(*tp->t_stop)(tp, 0);
 #else
 			(*cdevsw[major(tp->t_dev)]->d_stop)(tp, 0);
 #endif
 		}
 		splx(s);
 		break;
 	case TIOCSCTTY:			/* become controlling tty */
 		/* Session ctty vnode pointer set in vnode layer. */
 		if (!SESS_LEADER(p) ||
 		    ((p->p_session->s_ttyvp || tp->t_session) &&
 		    (tp->t_session != p->p_session)))
 			return (EPERM);
 		tp->t_session = p->p_session;
 		tp->t_pgrp = p->p_pgrp;
 		p->p_session->s_ttyp = tp;
 		p->p_flag |= P_CONTROLT;
 		break;
 	case TIOCSPGRP: {		/* set pgrp of tty */
 		register struct pgrp *pgrp = pgfind(*(int *)data);
 
 		if (!isctty(p, tp))
 			return (ENOTTY);
 		else if (pgrp == NULL || pgrp->pg_session != p->p_session)
 			return (EPERM);
 		tp->t_pgrp = pgrp;
 		break;
 	}
 	case TIOCSTAT:			/* simulate control-T */
 		s = spltty();
 		ttyinfo(tp);
 		splx(s);
 		break;
 	case TIOCSWINSZ:		/* set window size */
 		if (bcmp((caddr_t)&tp->t_winsize, data,
 		    sizeof (struct winsize))) {
 			tp->t_winsize = *(struct winsize *)data;
 			pgsignal(tp->t_pgrp, SIGWINCH, 1);
 		}
 		break;
 	case TIOCSDRAINWAIT:
 		error = suser(p->p_ucred, &p->p_acflag);
 		if (error)
 			return (error);
 		tp->t_timeout = *(int *)data * hz;
 		wakeup(TSA_OCOMPLETE(tp));
 		wakeup(TSA_OLOWAT(tp));
 		break;
 	case TIOCGDRAINWAIT:
 		*(int *)data = tp->t_timeout / hz;
 		break;
 	default:
 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
 		return (ttcompat(tp, cmd, data, flag));
 #else
 		return (ENOIOCTL);
 #endif
 	}
 	return (0);
 }
 
 int
 ttypoll(tp, events, p)
 	struct tty *tp;
 	int events;
 	struct proc *p;
 {
 	int s;
 	int revents = 0;
 
 	if (tp == NULL)	/* XXX used to return ENXIO, but that means true! */
 		return ((events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM))
 			| POLLHUP);
 
 	s = spltty();
 	if (events & (POLLIN | POLLRDNORM))
 		if (ttnread(tp) > 0 || ISSET(tp->t_state, TS_ZOMBIE))
 			revents |= events & (POLLIN | POLLRDNORM);
 		else
 			selrecord(p, &tp->t_rsel);
 
 	if (events & (POLLOUT | POLLWRNORM))
 		if ((tp->t_outq.c_cc <= tp->t_olowat &&
 		     ISSET(tp->t_state, TS_CONNECTED))
 		    || ISSET(tp->t_state, TS_ZOMBIE))
 			revents |= events & (POLLOUT | POLLWRNORM);
 		else
 			selrecord(p, &tp->t_wsel);
 	splx(s);
 	return (revents);
 }
 
 /*
  * This is a wrapper for compatibility with the select vector used by
  * cdevsw.  It relies on a proper xxxdevtotty routine.
  */
 int
 ttpoll(dev, events, p)
 	dev_t dev;
 	int events;
 	struct proc *p;
 {
 	return ttypoll((*cdevsw[major(dev)]->d_devtotty)(dev), events, p);
 }
 
 /*
  * Must be called at spltty().
  */
 static int
 ttnread(tp)
 	struct tty *tp;
 {
 	int nread;
 
 	if (ISSET(tp->t_lflag, PENDIN))
 		ttypend(tp);
 	nread = tp->t_canq.c_cc;
 	if (!ISSET(tp->t_lflag, ICANON)) {
 		nread += tp->t_rawq.c_cc;
 		if (nread < tp->t_cc[VMIN] && tp->t_cc[VTIME] == 0)
 			nread = 0;
 	}
 	return (nread);
 }
 
 /*
  * Wait for output to drain.
  */
 int
 ttywait(tp)
 	register struct tty *tp;
 {
 	int error, s;
 
 	error = 0;
 	s = spltty();
 	while ((tp->t_outq.c_cc || ISSET(tp->t_state, TS_BUSY)) &&
 	       ISSET(tp->t_state, TS_CONNECTED) && tp->t_oproc) {
 		(*tp->t_oproc)(tp);
 		if ((tp->t_outq.c_cc || ISSET(tp->t_state, TS_BUSY)) &&
 		    ISSET(tp->t_state, TS_CONNECTED)) {
 			SET(tp->t_state, TS_SO_OCOMPLETE);
 			error = ttysleep(tp, TSA_OCOMPLETE(tp),
 					 TTOPRI | PCATCH, "ttywai",
 					 tp->t_timeout);
 			if (error) {
 				if (error == EWOULDBLOCK)
 					error = EIO;
 				break;
 			}
 		} else
 			break;
 	}
 	if (!error && (tp->t_outq.c_cc || ISSET(tp->t_state, TS_BUSY)))
 		error = EIO;
 	splx(s);
 	return (error);
 }
 
 /*
  * Flush if successfully wait.
  */
 static int
 ttywflush(tp)
 	struct tty *tp;
 {
 	int error;
 
 	if ((error = ttywait(tp)) == 0)
 		ttyflush(tp, FREAD);
 	return (error);
 }
 
 /*
  * Flush tty read and/or write queues, notifying anyone waiting.
  */
 void
 ttyflush(tp, rw)
 	register struct tty *tp;
 	int rw;
 {
 	register int s;
 
 	s = spltty();
 #if 0
 again:
 #endif
 	if (rw & FWRITE) {
 		FLUSHQ(&tp->t_outq);
 		CLR(tp->t_state, TS_TTSTOP);
 	}
 #ifdef sun4c						/* XXX */
 	(*tp->t_stop)(tp, rw);
 #else
 	(*cdevsw[major(tp->t_dev)]->d_stop)(tp, rw);
 #endif
 	if (rw & FREAD) {
 		FLUSHQ(&tp->t_canq);
 		FLUSHQ(&tp->t_rawq);
 		CLR(tp->t_lflag, PENDIN);
 		tp->t_rocount = 0;
 		tp->t_rocol = 0;
 		CLR(tp->t_state, TS_LOCAL);
 		ttwakeup(tp);
 		if (ISSET(tp->t_state, TS_TBLOCK)) {
 			if (rw & FWRITE)
 				FLUSHQ(&tp->t_outq);
 			ttyunblock(tp);
 
 			/*
 			 * Don't let leave any state that might clobber the
 			 * next line discipline (although we should do more
 			 * to send the START char).  Not clearing the state
 			 * may have caused the "putc to a clist with no
 			 * reserved cblocks" panic/printf.
 			 */
 			CLR(tp->t_state, TS_TBLOCK);
 
 #if 0 /* forget it, sleeping isn't always safe and we don't know when it is */
 			if (ISSET(tp->t_iflag, IXOFF)) {
 				/*
 				 * XXX wait a bit in the hope that the stop
 				 * character (if any) will go out.  Waiting
 				 * isn't good since it allows races.  This
 				 * will be fixed when the stop character is
 				 * put in a special queue.  Don't bother with
 				 * the checks in ttywait() since the timeout
 				 * will save us.
 				 */
 				SET(tp->t_state, TS_SO_OCOMPLETE);
 				ttysleep(tp, TSA_OCOMPLETE(tp), TTOPRI,
 					 "ttyfls", hz / 10);
 				/*
 				 * Don't try sending the stop character again.
 				 */
 				CLR(tp->t_state, TS_TBLOCK);
 				goto again;
 			}
 #endif
 		}
 	}
 	if (rw & FWRITE) {
 		FLUSHQ(&tp->t_outq);
 		ttwwakeup(tp);
 	}
 	splx(s);
 }
 
 /*
  * Copy in the default termios characters.
  */
 void
 termioschars(t)
 	struct termios *t;
 {
 
 	bcopy(ttydefchars, t->c_cc, sizeof t->c_cc);
 }
 
 /*
  * Old interface.
  */
 void
 ttychars(tp)
 	struct tty *tp;
 {
 
 	termioschars(&tp->t_termios);
 }
 
 /*
  * Handle input high water.  Send stop character for the IXOFF case.  Turn
  * on our input flow control bit and propagate the changes to the driver.
  * XXX the stop character should be put in a special high priority queue.
  */
 void
 ttyblock(tp)
 	struct tty *tp;
 {
 
 	SET(tp->t_state, TS_TBLOCK);
 	if (ISSET(tp->t_iflag, IXOFF) && tp->t_cc[VSTOP] != _POSIX_VDISABLE &&
 	    putc(tp->t_cc[VSTOP], &tp->t_outq) != 0)
 		CLR(tp->t_state, TS_TBLOCK);	/* try again later */
 	ttstart(tp);
 }
 
 /*
  * Handle input low water.  Send start character for the IXOFF case.  Turn
  * off our input flow control bit and propagate the changes to the driver.
  * XXX the start character should be put in a special high priority queue.
  */
 static void
 ttyunblock(tp)
 	struct tty *tp;
 {
 
 	CLR(tp->t_state, TS_TBLOCK);
 	if (ISSET(tp->t_iflag, IXOFF) && tp->t_cc[VSTART] != _POSIX_VDISABLE &&
 	    putc(tp->t_cc[VSTART], &tp->t_outq) != 0)
 		SET(tp->t_state, TS_TBLOCK);	/* try again later */
 	ttstart(tp);
 }
 
 #ifdef notyet
 /* Not used by any current (i386) drivers. */
 /*
  * Restart after an inter-char delay.
  */
 void
 ttrstrt(tp_arg)
 	void *tp_arg;
 {
 	struct tty *tp;
 	int s;
 
 #ifdef DIAGNOSTIC
 	if (tp_arg == NULL)
 		panic("ttrstrt");
 #endif
 	tp = tp_arg;
 	s = spltty();
 
 	CLR(tp->t_state, TS_TIMEOUT);
 	ttstart(tp);
 
 	splx(s);
 }
 #endif
 
 int
 ttstart(tp)
 	struct tty *tp;
 {
 
 	if (tp->t_oproc != NULL)	/* XXX: Kludge for pty. */
 		(*tp->t_oproc)(tp);
 	return (0);
 }
 
 /*
  * "close" a line discipline
  */
 int
 ttylclose(tp, flag)
 	struct tty *tp;
 	int flag;
 {
 
 	if (flag & FNONBLOCK || ttywflush(tp))
 		ttyflush(tp, FREAD | FWRITE);
 	return (0);
 }
 
 /*
  * Handle modem control transition on a tty.
  * Flag indicates new state of carrier.
  * Returns 0 if the line should be turned off, otherwise 1.
  */
 int
 ttymodem(tp, flag)
 	register struct tty *tp;
 	int flag;
 {
 
 	if (ISSET(tp->t_state, TS_CARR_ON) && ISSET(tp->t_cflag, MDMBUF)) {
 		/*
 		 * MDMBUF: do flow control according to carrier flag
 		 * XXX TS_CAR_OFLOW doesn't do anything yet.  TS_TTSTOP
 		 * works if IXON and IXANY are clear.
 		 */
 		if (flag) {
 			CLR(tp->t_state, TS_CAR_OFLOW);
 			CLR(tp->t_state, TS_TTSTOP);
 			ttstart(tp);
 		} else if (!ISSET(tp->t_state, TS_CAR_OFLOW)) {
 			SET(tp->t_state, TS_CAR_OFLOW);
 			SET(tp->t_state, TS_TTSTOP);
 #ifdef sun4c						/* XXX */
 			(*tp->t_stop)(tp, 0);
 #else
 			(*cdevsw[major(tp->t_dev)]->d_stop)(tp, 0);
 #endif
 		}
 	} else if (flag == 0) {
 		/*
 		 * Lost carrier.
 		 */
 		CLR(tp->t_state, TS_CARR_ON);
 		if (ISSET(tp->t_state, TS_ISOPEN) &&
 		    !ISSET(tp->t_cflag, CLOCAL)) {
 			SET(tp->t_state, TS_ZOMBIE);
 			CLR(tp->t_state, TS_CONNECTED);
 			if (tp->t_session && tp->t_session->s_leader)
 				psignal(tp->t_session->s_leader, SIGHUP);
 			ttyflush(tp, FREAD | FWRITE);
 			return (0);
 		}
 	} else {
 		/*
 		 * Carrier now on.
 		 */
 		SET(tp->t_state, TS_CARR_ON);
 		if (!ISSET(tp->t_state, TS_ZOMBIE))
 			SET(tp->t_state, TS_CONNECTED);
 		wakeup(TSA_CARR_ON(tp));
 		ttwakeup(tp);
 		ttwwakeup(tp);
 	}
 	return (1);
 }
 
 /*
  * Reinput pending characters after state switch
  * call at spltty().
  */
 static void
 ttypend(tp)
 	register struct tty *tp;
 {
 	struct clist tq;
 	register int c;
 
 	CLR(tp->t_lflag, PENDIN);
 	SET(tp->t_state, TS_TYPEN);
 	/*
 	 * XXX this assumes too much about clist internals.  It may even
 	 * fail if the cblock slush pool is empty.  We can't allocate more
 	 * cblocks here because we are called from an interrupt handler
 	 * and clist_alloc_cblocks() can wait.
 	 */
 	tq = tp->t_rawq;
 	bzero(&tp->t_rawq, sizeof tp->t_rawq);
 	tp->t_rawq.c_cbmax = tq.c_cbmax;
 	tp->t_rawq.c_cbreserved = tq.c_cbreserved;
 	while ((c = getc(&tq)) >= 0)
 		ttyinput(c, tp);
 	CLR(tp->t_state, TS_TYPEN);
 }
 
 /*
  * Process a read call on a tty device.
  */
 int
 ttread(tp, uio, flag)
 	register struct tty *tp;
 	struct uio *uio;
 	int flag;
 {
 	register struct clist *qp;
 	register int c;
 	register tcflag_t lflag;
 	register cc_t *cc = tp->t_cc;
 	register struct proc *p = curproc;
 	int s, first, error = 0;
 	int has_stime = 0, last_cc = 0;
 	long slp = 0;		/* XXX this should be renamed `timo'. */
 
 loop:
 	s = spltty();
 	lflag = tp->t_lflag;
 	/*
 	 * take pending input first
 	 */
 	if (ISSET(lflag, PENDIN)) {
 		ttypend(tp);
 		splx(s);	/* reduce latency */
 		s = spltty();
 		lflag = tp->t_lflag;	/* XXX ttypend() clobbers it */
 	}
 
 	/*
 	 * Hang process if it's in the background.
 	 */
 	if (isbackground(p, tp)) {
 		splx(s);
 		if ((p->p_sigignore & sigmask(SIGTTIN)) ||
 		   (p->p_sigmask & sigmask(SIGTTIN)) ||
 		    p->p_flag & P_PPWAIT || p->p_pgrp->pg_jobc == 0)
 			return (EIO);
 		pgsignal(p->p_pgrp, SIGTTIN, 1);
 		error = ttysleep(tp, &lbolt, TTIPRI | PCATCH, "ttybg2", 0);
 		if (error)
 			return (error);
 		goto loop;
 	}
 
 	if (ISSET(tp->t_state, TS_ZOMBIE)) {
 		splx(s);
 		return (0);	/* EOF */
 	}
 
 	/*
 	 * If canonical, use the canonical queue,
 	 * else use the raw queue.
 	 *
 	 * (should get rid of clists...)
 	 */
 	qp = ISSET(lflag, ICANON) ? &tp->t_canq : &tp->t_rawq;
 
 	if (flag & IO_NDELAY) {
 		if (qp->c_cc > 0)
 			goto read;
 		if (!ISSET(lflag, ICANON) && cc[VMIN] == 0) {
 			splx(s);
 			return (0);
 		}
 		splx(s);
 		return (EWOULDBLOCK);
 	}
 	if (!ISSET(lflag, ICANON)) {
 		int m = cc[VMIN];
 		long t = cc[VTIME];
 		struct timeval stime, timecopy;
 
 		/*
 		 * Check each of the four combinations.
 		 * (m > 0 && t == 0) is the normal read case.
 		 * It should be fairly efficient, so we check that and its
 		 * companion case (m == 0 && t == 0) first.
 		 * For the other two cases, we compute the target sleep time
 		 * into slp.
 		 */
 		if (t == 0) {
 			if (qp->c_cc < m)
 				goto sleep;
 			if (qp->c_cc > 0)
 				goto read;
 
 			/* m, t and qp->c_cc are all 0.  0 is enough input. */
 			splx(s);
 			return (0);
 		}
 		t *= 100000;		/* time in us */
 #define diff(t1, t2) (((t1).tv_sec - (t2).tv_sec) * 1000000 + \
 			 ((t1).tv_usec - (t2).tv_usec))
 		if (m > 0) {
 			if (qp->c_cc <= 0)
 				goto sleep;
 			if (qp->c_cc >= m)
 				goto read;
 			getmicrotime(&timecopy);
 			if (!has_stime) {
 				/* first character, start timer */
 				has_stime = 1;
 				stime = timecopy;
 				slp = t;
 			} else if (qp->c_cc > last_cc) {
 				/* got a character, restart timer */
 				stime = timecopy;
 				slp = t;
 			} else {
 				/* nothing, check expiration */
 				slp = t - diff(timecopy, stime);
 				if (slp <= 0)
 					goto read;
 			}
 			last_cc = qp->c_cc;
 		} else {	/* m == 0 */
 			if (qp->c_cc > 0)
 				goto read;
 			getmicrotime(&timecopy);
 			if (!has_stime) {
 				has_stime = 1;
 				stime = timecopy;
 				slp = t;
 			} else {
 				slp = t - diff(timecopy, stime);
 				if (slp <= 0) {
 					/* Timed out, but 0 is enough input. */
 					splx(s);
 					return (0);
 				}
 			}
 		}
 #undef diff
 		/*
 		 * Rounding down may make us wake up just short
 		 * of the target, so we round up.
 		 * The formula is ceiling(slp * hz/1000000).
 		 * 32-bit arithmetic is enough for hz < 169.
 		 * XXX see tvtohz() for how to avoid overflow if hz
 		 * is large (divide by `tick' and/or arrange to
 		 * use tvtohz() if hz is large).
 		 */
 		slp = (long) (((u_long)slp * hz) + 999999) / 1000000;
 		goto sleep;
 	}
 	if (qp->c_cc <= 0) {
 sleep:
 		/*
 		 * There is no input, or not enough input and we can block.
 		 */
 		error = ttysleep(tp, TSA_HUP_OR_INPUT(tp), TTIPRI | PCATCH,
 				 ISSET(tp->t_state, TS_CONNECTED) ?
 				 "ttyin" : "ttyhup", (int)slp);
 		splx(s);
 		if (error == EWOULDBLOCK)
 			error = 0;
 		else if (error)
 			return (error);
 		/*
 		 * XXX what happens if another process eats some input
 		 * while we are asleep (not just here)?  It would be
 		 * safest to detect changes and reset our state variables
 		 * (has_stime and last_cc).
 		 */
 		slp = 0;
 		goto loop;
 	}
 read:
 	splx(s);
 	/*
 	 * Input present, check for input mapping and processing.
 	 */
 	first = 1;
 	if (ISSET(lflag, ICANON | ISIG))
 		goto slowcase;
 	for (;;) {
 		char ibuf[IBUFSIZ];
 		int icc;
 
 		icc = imin(uio->uio_resid, IBUFSIZ);
 		icc = q_to_b(qp, ibuf, icc);
 		if (icc <= 0) {
 			if (first)
 				goto loop;
 			break;
 		}
 		error = uiomove(ibuf, icc, uio);
 		/*
 		 * XXX if there was an error then we should ungetc() the
 		 * unmoved chars and reduce icc here.
 		 */
 #if NSNP > 0
 		if (ISSET(tp->t_lflag, ECHO) &&
 		    ISSET(tp->t_state, TS_SNOOP) && tp->t_sc != NULL)
 			snpin((struct snoop *)tp->t_sc, ibuf, icc);
 #endif
 		if (error)
 			break;
  		if (uio->uio_resid == 0)
 			break;
 		first = 0;
 	}
 	goto out;
 slowcase:
 	for (;;) {
 		c = getc(qp);
 		if (c < 0) {
 			if (first)
 				goto loop;
 			break;
 		}
 		/*
 		 * delayed suspend (^Y)
 		 */
 		if (CCEQ(cc[VDSUSP], c) &&
 		    ISSET(lflag, IEXTEN | ISIG) == (IEXTEN | ISIG)) {
 			pgsignal(tp->t_pgrp, SIGTSTP, 1);
 			if (first) {
 				error = ttysleep(tp, &lbolt, TTIPRI | PCATCH,
 						 "ttybg3", 0);
 				if (error)
 					break;
 				goto loop;
 			}
 			break;
 		}
 		/*
 		 * Interpret EOF only in canonical mode.
 		 */
 		if (CCEQ(cc[VEOF], c) && ISSET(lflag, ICANON))
 			break;
 		/*
 		 * Give user character.
 		 */
  		error = ureadc(c, uio);
 		if (error)
 			/* XXX should ungetc(c, qp). */
 			break;
 #if NSNP > 0
 		/*
 		 * Only snoop directly on input in echo mode.  Non-echoed
 		 * input will be snooped later iff the application echoes it.
 		 */
 		if (ISSET(tp->t_lflag, ECHO) &&
 		    ISSET(tp->t_state, TS_SNOOP) && tp->t_sc != NULL)
 			snpinc((struct snoop *)tp->t_sc, (char)c);
 #endif
  		if (uio->uio_resid == 0)
 			break;
 		/*
 		 * In canonical mode check for a "break character"
 		 * marking the end of a "line of input".
 		 */
 		if (ISSET(lflag, ICANON) && TTBREAKC(c, lflag))
 			break;
 		first = 0;
 	}
 
 out:
 	/*
 	 * Look to unblock input now that (presumably)
 	 * the input queue has gone down.
 	 */
 	s = spltty();
 	if (ISSET(tp->t_state, TS_TBLOCK) &&
 	    tp->t_rawq.c_cc + tp->t_canq.c_cc <= tp->t_ilowat)
 		ttyunblock(tp);
 	splx(s);
 
 	return (error);
 }
 
 /*
  * Check the output queue on tp for space for a kernel message (from uprintf
  * or tprintf).  Allow some space over the normal hiwater mark so we don't
  * lose messages due to normal flow control, but don't let the tty run amok.
  * Sleeps here are not interruptible, but we return prematurely if new signals
  * arrive.
  */
 int
 ttycheckoutq(tp, wait)
 	register struct tty *tp;
 	int wait;
 {
 	int hiwat, s, oldsig;
 
 	hiwat = tp->t_ohiwat;
 	s = spltty();
 	oldsig = wait ? curproc->p_siglist : 0;
 	if (tp->t_outq.c_cc > hiwat + OBUFSIZ + 100)
 		while (tp->t_outq.c_cc > hiwat) {
 			ttstart(tp);
 			if (tp->t_outq.c_cc <= hiwat)
 				break;
 			if (wait == 0 || curproc->p_siglist != oldsig) {
 				splx(s);
 				return (0);
 			}
 			SET(tp->t_state, TS_SO_OLOWAT);
 			tsleep(TSA_OLOWAT(tp), PZERO - 1, "ttoutq", hz);
 		}
 	splx(s);
 	return (1);
 }
 
 /*
  * Process a write call on a tty device.
  */
 int
 ttwrite(tp, uio, flag)
 	register struct tty *tp;
 	register struct uio *uio;
 	int flag;
 {
 	register char *cp = NULL;
 	register int cc, ce;
 	register struct proc *p;
 	int i, hiwat, cnt, error, s;
 	char obuf[OBUFSIZ];
 
 	hiwat = tp->t_ohiwat;
 	cnt = uio->uio_resid;
 	error = 0;
 	cc = 0;
 loop:
 	s = spltty();
 	if (ISSET(tp->t_state, TS_ZOMBIE)) {
 		splx(s);
 		if (uio->uio_resid == cnt)
 			error = EIO;
 		goto out;
 	}
 	if (!ISSET(tp->t_state, TS_CONNECTED)) {
 		if (flag & IO_NDELAY) {
 			splx(s);
 			error = EWOULDBLOCK;
 			goto out;
 		}
 		error = ttysleep(tp, TSA_CARR_ON(tp), TTIPRI | PCATCH,
 				 "ttydcd", 0);
 		splx(s);
 		if (error)
 			goto out;
 		goto loop;
 	}
 	splx(s);
 	/*
 	 * Hang the process if it's in the background.
 	 */
 	p = curproc;
 	if (isbackground(p, tp) &&
 	    ISSET(tp->t_lflag, TOSTOP) && (p->p_flag & P_PPWAIT) == 0 &&
 	    (p->p_sigignore & sigmask(SIGTTOU)) == 0 &&
 	    (p->p_sigmask & sigmask(SIGTTOU)) == 0) {
 		if (p->p_pgrp->pg_jobc == 0) {
 			error = EIO;
 			goto out;
 		}
 		pgsignal(p->p_pgrp, SIGTTOU, 1);
 		error = ttysleep(tp, &lbolt, TTIPRI | PCATCH, "ttybg4", 0);
 		if (error)
 			goto out;
 		goto loop;
 	}
 	/*
 	 * Process the user's data in at most OBUFSIZ chunks.  Perform any
 	 * output translation.  Keep track of high water mark, sleep on
 	 * overflow awaiting device aid in acquiring new space.
 	 */
 	while (uio->uio_resid > 0 || cc > 0) {
 		if (ISSET(tp->t_lflag, FLUSHO)) {
 			uio->uio_resid = 0;
 			return (0);
 		}
 		if (tp->t_outq.c_cc > hiwat)
 			goto ovhiwat;
 		/*
 		 * Grab a hunk of data from the user, unless we have some
 		 * leftover from last time.
 		 */
 		if (cc == 0) {
 			cc = imin(uio->uio_resid, OBUFSIZ);
 			cp = obuf;
 			error = uiomove(cp, cc, uio);
 			if (error) {
 				cc = 0;
 				break;
 			}
 #if NSNP > 0
 			if (ISSET(tp->t_state, TS_SNOOP) && tp->t_sc != NULL)
 				snpin((struct snoop *)tp->t_sc, cp, cc);
 #endif
 		}
 		/*
 		 * If nothing fancy need be done, grab those characters we
 		 * can handle without any of ttyoutput's processing and
 		 * just transfer them to the output q.  For those chars
 		 * which require special processing (as indicated by the
 		 * bits in char_type), call ttyoutput.  After processing
 		 * a hunk of data, look for FLUSHO so ^O's will take effect
 		 * immediately.
 		 */
 		while (cc > 0) {
 			if (!ISSET(tp->t_oflag, OPOST))
 				ce = cc;
 			else {
 				ce = cc - scanc((u_int)cc, (u_char *)cp,
 						char_type, CCLASSMASK);
 				/*
 				 * If ce is zero, then we're processing
 				 * a special character through ttyoutput.
 				 */
 				if (ce == 0) {
 					tp->t_rocount = 0;
 					if (ttyoutput(*cp, tp) >= 0) {
 						/* No Clists, wait a bit. */
 						ttstart(tp);
 						if (flag & IO_NDELAY) {
 							error = EWOULDBLOCK;
 							goto out;
 						}
 						error = ttysleep(tp, &lbolt,
 								 TTOPRI|PCATCH,
 								 "ttybf1", 0);
 						if (error)
 							goto out;
 						goto loop;
 					}
 					cp++;
 					cc--;
 					if (ISSET(tp->t_lflag, FLUSHO) ||
 					    tp->t_outq.c_cc > hiwat)
 						goto ovhiwat;
 					continue;
 				}
 			}
 			/*
 			 * A bunch of normal characters have been found.
 			 * Transfer them en masse to the output queue and
 			 * continue processing at the top of the loop.
 			 * If there are any further characters in this
 			 * <= OBUFSIZ chunk, the first should be a character
 			 * requiring special handling by ttyoutput.
 			 */
 			tp->t_rocount = 0;
 			i = b_to_q(cp, ce, &tp->t_outq);
 			ce -= i;
 			tp->t_column += ce;
 			cp += ce, cc -= ce, tk_nout += ce;
 			tp->t_outcc += ce;
 			if (i > 0) {
 				/* No Clists, wait a bit. */
 				ttstart(tp);
 				if (flag & IO_NDELAY) {
 					error = EWOULDBLOCK;
 					goto out;
 				}
 				error = ttysleep(tp, &lbolt, TTOPRI | PCATCH,
 						 "ttybf2", 0);
 				if (error)
 					goto out;
 				goto loop;
 			}
 			if (ISSET(tp->t_lflag, FLUSHO) ||
 			    tp->t_outq.c_cc > hiwat)
 				break;
 		}
 		ttstart(tp);
 	}
 out:
 	/*
 	 * If cc is nonzero, we leave the uio structure inconsistent, as the
 	 * offset and iov pointers have moved forward, but it doesn't matter
 	 * (the call will either return short or restart with a new uio).
 	 */
 	uio->uio_resid += cc;
 	return (error);
 
 ovhiwat:
 	ttstart(tp);
 	s = spltty();
 	/*
 	 * This can only occur if FLUSHO is set in t_lflag,
 	 * or if ttstart/oproc is synchronous (or very fast).
 	 */
 	if (tp->t_outq.c_cc <= hiwat) {
 		splx(s);
 		goto loop;
 	}
 	if (flag & IO_NDELAY) {
 		splx(s);
 		uio->uio_resid += cc;
 		return (uio->uio_resid == cnt ? EWOULDBLOCK : 0);
 	}
 	SET(tp->t_state, TS_SO_OLOWAT);
 	error = ttysleep(tp, TSA_OLOWAT(tp), TTOPRI | PCATCH, "ttywri",
 			 tp->t_timeout);
 	splx(s);
 	if (error == EWOULDBLOCK)
 		error = EIO;
 	if (error)
 		goto out;
 	goto loop;
 }
 
 /*
  * Rubout one character from the rawq of tp
  * as cleanly as possible.
  */
 static void
 ttyrub(c, tp)
 	register int c;
 	register struct tty *tp;
 {
 	register char *cp;
 	register int savecol;
 	int tabc, s;
 
 	if (!ISSET(tp->t_lflag, ECHO) || ISSET(tp->t_lflag, EXTPROC))
 		return;
 	CLR(tp->t_lflag, FLUSHO);
 	if (ISSET(tp->t_lflag, ECHOE)) {
 		if (tp->t_rocount == 0) {
 			/*
 			 * Screwed by ttwrite; retype
 			 */
 			ttyretype(tp);
 			return;
 		}
 		if (c == ('\t' | TTY_QUOTE) || c == ('\n' | TTY_QUOTE))
 			ttyrubo(tp, 2);
 		else {
 			CLR(c, ~TTY_CHARMASK);
 			switch (CCLASS(c)) {
 			case ORDINARY:
 				ttyrubo(tp, 1);
 				break;
 			case BACKSPACE:
 			case CONTROL:
 			case NEWLINE:
 			case RETURN:
 			case VTAB:
 				if (ISSET(tp->t_lflag, ECHOCTL))
 					ttyrubo(tp, 2);
 				break;
 			case TAB:
 				if (tp->t_rocount < tp->t_rawq.c_cc) {
 					ttyretype(tp);
 					return;
 				}
 				s = spltty();
 				savecol = tp->t_column;
 				SET(tp->t_state, TS_CNTTB);
 				SET(tp->t_lflag, FLUSHO);
 				tp->t_column = tp->t_rocol;
 				cp = tp->t_rawq.c_cf;
 				if (cp)
 					tabc = *cp;	/* XXX FIX NEXTC */
 				for (; cp; cp = nextc(&tp->t_rawq, cp, &tabc))
 					ttyecho(tabc, tp);
 				CLR(tp->t_lflag, FLUSHO);
 				CLR(tp->t_state, TS_CNTTB);
 				splx(s);
 
 				/* savecol will now be length of the tab. */
 				savecol -= tp->t_column;
 				tp->t_column += savecol;
 				if (savecol > 8)
 					savecol = 8;	/* overflow screw */
 				while (--savecol >= 0)
 					(void)ttyoutput('\b', tp);
 				break;
 			default:			/* XXX */
 #define	PANICSTR	"ttyrub: would panic c = %d, val = %d\n"
 				(void)printf(PANICSTR, c, CCLASS(c));
 #ifdef notdef
 				panic(PANICSTR, c, CCLASS(c));
 #endif
 			}
 		}
 	} else if (ISSET(tp->t_lflag, ECHOPRT)) {
 		if (!ISSET(tp->t_state, TS_ERASE)) {
 			SET(tp->t_state, TS_ERASE);
 			(void)ttyoutput('\\', tp);
 		}
 		ttyecho(c, tp);
 	} else
 		ttyecho(tp->t_cc[VERASE], tp);
 	--tp->t_rocount;
 }
 
 /*
  * Back over cnt characters, erasing them.
  */
 static void
 ttyrubo(tp, cnt)
 	register struct tty *tp;
 	int cnt;
 {
 
 	while (cnt-- > 0) {
 		(void)ttyoutput('\b', tp);
 		(void)ttyoutput(' ', tp);
 		(void)ttyoutput('\b', tp);
 	}
 }
 
 /*
  * ttyretype --
  *	Reprint the rawq line.  Note, it is assumed that c_cc has already
  *	been checked.
  */
 static void
 ttyretype(tp)
 	register struct tty *tp;
 {
 	register char *cp;
 	int s, c;
 
 	/* Echo the reprint character. */
 	if (tp->t_cc[VREPRINT] != _POSIX_VDISABLE)
 		ttyecho(tp->t_cc[VREPRINT], tp);
 
 	(void)ttyoutput('\n', tp);
 
 	/*
 	 * XXX
 	 * FIX: NEXTC IS BROKEN - DOESN'T CHECK QUOTE
 	 * BIT OF FIRST CHAR.
 	 */
 	s = spltty();
 	for (cp = tp->t_canq.c_cf, c = (cp != NULL ? *cp : 0);
 	    cp != NULL; cp = nextc(&tp->t_canq, cp, &c))
 		ttyecho(c, tp);
 	for (cp = tp->t_rawq.c_cf, c = (cp != NULL ? *cp : 0);
 	    cp != NULL; cp = nextc(&tp->t_rawq, cp, &c))
 		ttyecho(c, tp);
 	CLR(tp->t_state, TS_ERASE);
 	splx(s);
 
 	tp->t_rocount = tp->t_rawq.c_cc;
 	tp->t_rocol = 0;
 }
 
 /*
  * Echo a typed character to the terminal.
  */
 static void
 ttyecho(c, tp)
 	register int c;
 	register struct tty *tp;
 {
 
 	if (!ISSET(tp->t_state, TS_CNTTB))
 		CLR(tp->t_lflag, FLUSHO);
 	if ((!ISSET(tp->t_lflag, ECHO) &&
 	     (c != '\n' || !ISSET(tp->t_lflag, ECHONL))) ||
 	    ISSET(tp->t_lflag, EXTPROC))
 		return;
 	if (ISSET(tp->t_lflag, ECHOCTL) &&
 	    ((ISSET(c, TTY_CHARMASK) <= 037 && c != '\t' && c != '\n') ||
 	    ISSET(c, TTY_CHARMASK) == 0177)) {
 		(void)ttyoutput('^', tp);
 		CLR(c, ~TTY_CHARMASK);
 		if (c == 0177)
 			c = '?';
 		else
 			c += 'A' - 1;
 	}
 	(void)ttyoutput(c, tp);
 }
 
 /*
  * Wake up any readers on a tty.
  */
 void
 ttwakeup(tp)
 	register struct tty *tp;
 {
 
 	if (tp->t_rsel.si_pid != 0)
 		selwakeup(&tp->t_rsel);
-	if (ISSET(tp->t_state, TS_ASYNC))
-		pgsignal(tp->t_pgrp, SIGIO, 1);
+	if (ISSET(tp->t_state, TS_ASYNC) && tp->t_sigio != NULL)
+		pgsigio(tp->t_sigio, SIGIO, (tp->t_session != NULL));
 	wakeup(TSA_HUP_OR_INPUT(tp));
 }
 
 /*
  * Wake up any writers on a tty.
  */
 void
 ttwwakeup(tp)
 	register struct tty *tp;
 {
 
 	if (tp->t_wsel.si_pid != 0 && tp->t_outq.c_cc <= tp->t_olowat)
 		selwakeup(&tp->t_wsel);
 	if (ISSET(tp->t_state, TS_BUSY | TS_SO_OCOMPLETE) ==
 	    TS_SO_OCOMPLETE && tp->t_outq.c_cc == 0) {
 		CLR(tp->t_state, TS_SO_OCOMPLETE);
 		wakeup(TSA_OCOMPLETE(tp));
 	}
 	if (ISSET(tp->t_state, TS_SO_OLOWAT) &&
 	    tp->t_outq.c_cc <= tp->t_olowat) {
 		CLR(tp->t_state, TS_SO_OLOWAT);
 		wakeup(TSA_OLOWAT(tp));
 	}
 }
 
 /*
  * Look up a code for a specified speed in a conversion table;
  * used by drivers to map software speed values to hardware parameters.
  */
 int
 ttspeedtab(speed, table)
 	int speed;
 	register struct speedtab *table;
 {
 
 	for ( ; table->sp_speed != -1; table++)
 		if (table->sp_speed == speed)
 			return (table->sp_code);
 	return (-1);
 }
 
 /*
  * Set input and output watermarks and buffer sizes.  For input, the
  * high watermark is about one second's worth of input above empty, the
  * low watermark is slightly below high water, and the buffer size is a
  * driver-dependent amount above high water.  For output, the watermarks
  * are near the ends of the buffer, with about 1 second's worth of input
  * between them.  All this only applies to the standard line discipline.
  */
 void
 ttsetwater(tp)
 	struct tty *tp;
 {
 	register int cps, ttmaxhiwat, x;
 
 	/* Input. */
 	clist_alloc_cblocks(&tp->t_canq, TTYHOG, 512);
 	switch (tp->t_ispeedwat) {
 	case (speed_t)-1:
 		cps = tp->t_ispeed / 10;
 		break;
 	case 0:
 		/*
 		 * This case is for old drivers that don't know about
 		 * t_ispeedwat.  Arrange for them to get the old buffer
 		 * sizes and watermarks.
 		 */
 		cps = TTYHOG - 2 * 256;
 		tp->t_ififosize = 2 * 256;
 		break;
 	default:
 		cps = tp->t_ispeedwat / 10;
 		break;
 	}
 	tp->t_ihiwat = cps;
 	tp->t_ilowat = 7 * cps / 8;
 	x = cps + tp->t_ififosize;
 	clist_alloc_cblocks(&tp->t_rawq, x, x);
 
 	/* Output. */
 	switch (tp->t_ospeedwat) {
 	case (speed_t)-1:
 		cps = tp->t_ospeed / 10;
 		ttmaxhiwat = 2 * TTMAXHIWAT;
 		break;
 	case 0:
 		cps = tp->t_ospeed / 10;
 		ttmaxhiwat = TTMAXHIWAT;
 		break;
 	default:
 		cps = tp->t_ospeedwat / 10;
 		ttmaxhiwat = 8 * TTMAXHIWAT;
 		break;
 	}
 #define CLAMP(x, h, l)	((x) > h ? h : ((x) < l) ? l : (x))
 	tp->t_olowat = x = CLAMP(cps / 2, TTMAXLOWAT, TTMINLOWAT);
 	x += cps;
 	x = CLAMP(x, ttmaxhiwat, TTMINHIWAT);	/* XXX clamps are too magic */
 	tp->t_ohiwat = roundup(x, CBSIZE);	/* XXX for compat */
 	x = imax(tp->t_ohiwat, TTMAXHIWAT);	/* XXX for compat/safety */
 	x += OBUFSIZ + 100;
 	clist_alloc_cblocks(&tp->t_outq, x, x);
 #undef	CLAMP
 }
 
 /*
  * Report on state of foreground process group.
  */
 void
 ttyinfo(tp)
 	register struct tty *tp;
 {
 	register struct proc *p, *pick;
 	struct timeval utime, stime;
 	int tmp;
 
 	if (ttycheckoutq(tp,0) == 0)
 		return;
 
 	/* Print load average. */
 	tmp = (averunnable.ldavg[0] * 100 + FSCALE / 2) >> FSHIFT;
 	ttyprintf(tp, "load: %d.%02d ", tmp / 100, tmp % 100);
 
 	if (tp->t_session == NULL)
 		ttyprintf(tp, "not a controlling terminal\n");
 	else if (tp->t_pgrp == NULL)
 		ttyprintf(tp, "no foreground process group\n");
 	else if ((p = tp->t_pgrp->pg_members.lh_first) == 0)
 		ttyprintf(tp, "empty foreground process group\n");
 	else {
 		/* Pick interesting process. */
 		for (pick = NULL; p != 0; p = p->p_pglist.le_next)
 			if (proc_compare(pick, p))
 				pick = p;
 
 		ttyprintf(tp, " cmd: %s %d [%s] ", pick->p_comm, pick->p_pid,
 		    pick->p_stat == SRUN ? "running" :
 		    pick->p_wmesg ? pick->p_wmesg : "iowait");
 
 		calcru(pick, &utime, &stime, NULL);
 
 		/* Print user time. */
 		ttyprintf(tp, "%ld.%02ldu ",
 		    utime.tv_sec, utime.tv_usec / 10000);
 
 		/* Print system time. */
 		ttyprintf(tp, "%ld.%02lds ",
 		    stime.tv_sec, stime.tv_usec / 10000);
 
 #define	pgtok(a)	(((a) * PAGE_SIZE) / 1024)
 		/* Print percentage cpu, resident set size. */
 		tmp = (pick->p_pctcpu * 10000 + FSCALE / 2) >> FSHIFT;
 		ttyprintf(tp, "%d%% %ldk\n",
 		    tmp / 100,
 		    pick->p_stat == SIDL || pick->p_stat == SZOMB ? 0 :
 #ifdef pmap_resident_count
 		    (long)pgtok(pmap_resident_count(&pick->p_vmspace->vm_pmap))
 #else
 		    (long)pgtok(pick->p_vmspace->vm_rssize)
 #endif
 		    );
 	}
 	tp->t_rocount = 0;	/* so pending input will be retyped if BS */
 }
 
 /*
  * Returns 1 if p2 is "better" than p1
  *
  * The algorithm for picking the "interesting" process is thus:
  *
  *	1) Only foreground processes are eligible - implied.
  *	2) Runnable processes are favored over anything else.  The runner
  *	   with the highest cpu utilization is picked (p_estcpu).  Ties are
  *	   broken by picking the highest pid.
  *	3) The sleeper with the shortest sleep time is next.  With ties,
  *	   we pick out just "short-term" sleepers (P_SINTR == 0).
  *	4) Further ties are broken by picking the highest pid.
  */
 #define ISRUN(p)	(((p)->p_stat == SRUN) || ((p)->p_stat == SIDL))
 #define TESTAB(a, b)    ((a)<<1 | (b))
 #define ONLYA   2
 #define ONLYB   1
 #define BOTH    3
 
 static int
 proc_compare(p1, p2)
 	register struct proc *p1, *p2;
 {
 
 	if (p1 == NULL)
 		return (1);
 	/*
 	 * see if at least one of them is runnable
 	 */
 	switch (TESTAB(ISRUN(p1), ISRUN(p2))) {
 	case ONLYA:
 		return (0);
 	case ONLYB:
 		return (1);
 	case BOTH:
 		/*
 		 * tie - favor one with highest recent cpu utilization
 		 */
 		if (p2->p_estcpu > p1->p_estcpu)
 			return (1);
 		if (p1->p_estcpu > p2->p_estcpu)
 			return (0);
 		return (p2->p_pid > p1->p_pid);	/* tie - return highest pid */
 	}
 	/*
  	 * weed out zombies
 	 */
 	switch (TESTAB(p1->p_stat == SZOMB, p2->p_stat == SZOMB)) {
 	case ONLYA:
 		return (1);
 	case ONLYB:
 		return (0);
 	case BOTH:
 		return (p2->p_pid > p1->p_pid); /* tie - return highest pid */
 	}
 	/*
 	 * pick the one with the smallest sleep time
 	 */
 	if (p2->p_slptime > p1->p_slptime)
 		return (0);
 	if (p1->p_slptime > p2->p_slptime)
 		return (1);
 	/*
 	 * favor one sleeping in a non-interruptible sleep
 	 */
 	if (p1->p_flag & P_SINTR && (p2->p_flag & P_SINTR) == 0)
 		return (1);
 	if (p2->p_flag & P_SINTR && (p1->p_flag & P_SINTR) == 0)
 		return (0);
 	return (p2->p_pid > p1->p_pid);		/* tie - return highest pid */
 }
 
 /*
  * Output char to tty; console putchar style.
  */
 int
 tputchar(c, tp)
 	int c;
 	struct tty *tp;
 {
 	register int s;
 
 	s = spltty();
 	if (!ISSET(tp->t_state, TS_CONNECTED)) {
 		splx(s);
 		return (-1);
 	}
 	if (c == '\n')
 		(void)ttyoutput('\r', tp);
 	(void)ttyoutput(c, tp);
 	ttstart(tp);
 	splx(s);
 	return (0);
 }
 
 /*
  * Sleep on chan, returning ERESTART if tty changed while we napped and
  * returning any errors (e.g. EINTR/EWOULDBLOCK) reported by tsleep.  If
  * the tty is revoked, restarting a pending call will redo validation done
  * at the start of the call.
  */
 int
 ttysleep(tp, chan, pri, wmesg, timo)
 	struct tty *tp;
 	void *chan;
 	int pri, timo;
 	char *wmesg;
 {
 	int error;
 	int gen;
 
 	gen = tp->t_gen;
 	error = tsleep(chan, pri, wmesg, timo);
 	if (error)
 		return (error);
 	return (tp->t_gen == gen ? 0 : ERESTART);
 }
 
 #ifdef notyet
 /*
  * XXX this is usable not useful or used.  Most tty drivers have
  * ifdefs for using ttymalloc() but assume a different interface.
  */
 /*
  * Allocate a tty struct.  Clists in the struct will be allocated by
  * ttyopen().
  */
 struct tty *
 ttymalloc()
 {
         struct tty *tp;
 
         tp = malloc(sizeof *tp, M_TTYS, M_WAITOK);
         bzero(tp, sizeof *tp);
         return (tp);
 }
 #endif
 
 #if 0 /* XXX not yet usable: session leader holds a ref (see kern_exit.c). */
 /*
  * Free a tty struct.  Clists in the struct should have been freed by
  * ttyclose().
  */
 void
 ttyfree(tp)
 	struct tty *tp;
 {
         free(tp, M_TTYS);
 }
 #endif /* 0 */
Index: head/sys/kern/uipc_sockbuf.c
===================================================================
--- head/sys/kern/uipc_sockbuf.c	(revision 41085)
+++ head/sys/kern/uipc_sockbuf.c	(revision 41086)
@@ -1,961 +1,957 @@
 /*
  * Copyright (c) 1982, 1986, 1988, 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)uipc_socket2.c	8.1 (Berkeley) 6/10/93
- *	$Id: uipc_socket2.c,v 1.39 1998/09/05 13:24:39 bde Exp $
+ *	$Id: uipc_socket2.c,v 1.40 1998/11/04 20:22:11 fenner Exp $
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/domain.h>
 #include <sys/kernel.h>
 #include <sys/proc.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/protosw.h>
 #include <sys/stat.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/signalvar.h>
 #include <sys/sysctl.h>
 
 /*
  * Primitive routines for operating on sockets and socket buffers
  */
 
 u_long	sb_max = SB_MAX;		/* XXX should be static */
 
 static	u_long sb_efficiency = 8;	/* parameter for sbreserve() */
 
 /*
  * Procedures to manipulate state flags of socket
  * and do appropriate wakeups.  Normal sequence from the
  * active (originating) side is that soisconnecting() is
  * called during processing of connect() call,
  * resulting in an eventual call to soisconnected() if/when the
  * connection is established.  When the connection is torn down
  * soisdisconnecting() is called during processing of disconnect() call,
  * and soisdisconnected() is called when the connection to the peer
  * is totally severed.  The semantics of these routines are such that
  * connectionless protocols can call soisconnected() and soisdisconnected()
  * only, bypassing the in-progress calls when setting up a ``connection''
  * takes no time.
  *
  * From the passive side, a socket is created with
  * two queues of sockets: so_q0 for connections in progress
  * and so_q for connections already made and awaiting user acceptance.
  * As a protocol is preparing incoming connections, it creates a socket
  * structure queued on so_q0 by calling sonewconn().  When the connection
  * is established, soisconnected() is called, and transfers the
  * socket structure to so_q, making it available to accept().
  *
  * If a socket is closed with sockets on either
  * so_q0 or so_q, these sockets are dropped.
  *
  * If higher level protocols are implemented in
  * the kernel, the wakeups done here will sometimes
  * cause software-interrupt process scheduling.
  */
 
 void
 soisconnecting(so)
 	register struct socket *so;
 {
 
 	so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING);
 	so->so_state |= SS_ISCONNECTING;
 }
 
 void
 soisconnected(so)
 	register struct socket *so;
 {
 	register struct socket *head = so->so_head;
 
 	so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING);
 	so->so_state |= SS_ISCONNECTED;
 	if (head && (so->so_state & SS_INCOMP)) {
 		TAILQ_REMOVE(&head->so_incomp, so, so_list);
 		head->so_incqlen--;
 		so->so_state &= ~SS_INCOMP;
 		TAILQ_INSERT_TAIL(&head->so_comp, so, so_list);
 		so->so_state |= SS_COMP;
 		sorwakeup(head);
 		wakeup_one(&head->so_timeo);
 	} else {
 		wakeup(&so->so_timeo);
 		sorwakeup(so);
 		sowwakeup(so);
 	}
 }
 
 void
 soisdisconnecting(so)
 	register struct socket *so;
 {
 
 	so->so_state &= ~SS_ISCONNECTING;
 	so->so_state |= (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE);
 	wakeup((caddr_t)&so->so_timeo);
 	sowwakeup(so);
 	sorwakeup(so);
 }
 
 void
 soisdisconnected(so)
 	register struct socket *so;
 {
 
 	so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
 	so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE);
 	wakeup((caddr_t)&so->so_timeo);
 	sowwakeup(so);
 	sorwakeup(so);
 }
 
 /*
  * Return a random connection that hasn't been serviced yet and
  * is eligible for discard.  There is a one in qlen chance that
  * we will return a null, saying that there are no dropable
  * requests.  In this case, the protocol specific code should drop
  * the new request.  This insures fairness.
  *
  * This may be used in conjunction with protocol specific queue
  * congestion routines.
  */
 struct socket *
 sodropablereq(head)
 	register struct socket *head;
 {
 	register struct socket *so;
 	unsigned int i, j, qlen;
 	static int rnd;
 	static struct timeval old_runtime;
 	static unsigned int cur_cnt, old_cnt;
 	struct timeval tv;
 
 	getmicrouptime(&tv);
 	if ((i = (tv.tv_sec - old_runtime.tv_sec)) != 0) {
 		old_runtime = tv;
 		old_cnt = cur_cnt / i;
 		cur_cnt = 0;
 	}
 
 	so = TAILQ_FIRST(&head->so_incomp);
 	if (!so)
 		return (so);
 
 	qlen = head->so_incqlen;
 	if (++cur_cnt > qlen || old_cnt > qlen) {
 		rnd = (314159 * rnd + 66329) & 0xffff;
 		j = ((qlen + 1) * rnd) >> 16;
 
 		while (j-- && so)
 		    so = TAILQ_NEXT(so, so_list);
 	}
 
 	return (so);
 }
 
 /*
  * When an attempt at a new connection is noted on a socket
  * which accepts connections, sonewconn is called.  If the
  * connection is possible (subject to space constraints, etc.)
  * then we allocate a new structure, propoerly linked into the
  * data structure of the original socket, and return this.
  * Connstatus may be 0, or SO_ISCONFIRMING, or SO_ISCONNECTED.
  */
 struct socket *
 sonewconn(head, connstatus)
 	register struct socket *head;
 	int connstatus;
 {
 	register struct socket *so;
 
 	if (head->so_qlen > 3 * head->so_qlimit / 2)
 		return ((struct socket *)0);
 	so = soalloc(0);
 	if (so == NULL)
 		return ((struct socket *)0);
 	so->so_head = head;
 	so->so_type = head->so_type;
 	so->so_options = head->so_options &~ SO_ACCEPTCONN;
 	so->so_linger = head->so_linger;
 	so->so_state = head->so_state | SS_NOFDREF;
 	so->so_proto = head->so_proto;
 	so->so_timeo = head->so_timeo;
-	so->so_pgid = head->so_pgid;
+	fsetown(fgetown(head->so_sigio), &so->so_sigio);
 	so->so_uid = head->so_uid;
 	(void) soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat);
 
 	if ((*so->so_proto->pr_usrreqs->pru_attach)(so, 0, NULL)) {
 		sodealloc(so);
 		return ((struct socket *)0);
 	}
 
 	if (connstatus) {
 		TAILQ_INSERT_TAIL(&head->so_comp, so, so_list);
 		so->so_state |= SS_COMP;
 	} else {
 		TAILQ_INSERT_TAIL(&head->so_incomp, so, so_list);
 		so->so_state |= SS_INCOMP;
 		head->so_incqlen++;
 	}
 	head->so_qlen++;
 	if (connstatus) {
 		sorwakeup(head);
 		wakeup((caddr_t)&head->so_timeo);
 		so->so_state |= connstatus;
 	}
 	return (so);
 }
 
 /*
  * Socantsendmore indicates that no more data will be sent on the
  * socket; it would normally be applied to a socket when the user
  * informs the system that no more data is to be sent, by the protocol
  * code (in case PRU_SHUTDOWN).  Socantrcvmore indicates that no more data
  * will be received, and will normally be applied to the socket by a
  * protocol when it detects that the peer will send no more data.
  * Data queued for reading in the socket may yet be read.
  */
 
 void
 socantsendmore(so)
 	struct socket *so;
 {
 
 	so->so_state |= SS_CANTSENDMORE;
 	sowwakeup(so);
 }
 
 void
 socantrcvmore(so)
 	struct socket *so;
 {
 
 	so->so_state |= SS_CANTRCVMORE;
 	sorwakeup(so);
 }
 
 /*
  * Wait for data to arrive at/drain from a socket buffer.
  */
 int
 sbwait(sb)
 	struct sockbuf *sb;
 {
 
 	sb->sb_flags |= SB_WAIT;
 	return (tsleep((caddr_t)&sb->sb_cc,
 	    (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "sbwait",
 	    sb->sb_timeo));
 }
 
 /*
  * Lock a sockbuf already known to be locked;
  * return any error returned from sleep (EINTR).
  */
 int
 sb_lock(sb)
 	register struct sockbuf *sb;
 {
 	int error;
 
 	while (sb->sb_flags & SB_LOCK) {
 		sb->sb_flags |= SB_WANT;
 		error = tsleep((caddr_t)&sb->sb_flags,
 		    (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK|PCATCH,
 		    "sblock", 0);
 		if (error)
 			return (error);
 	}
 	sb->sb_flags |= SB_LOCK;
 	return (0);
 }
 
 /*
  * Wakeup processes waiting on a socket buffer.
  * Do asynchronous notification via SIGIO
  * if the socket has the SS_ASYNC flag set.
  */
 void
 sowakeup(so, sb)
 	register struct socket *so;
 	register struct sockbuf *sb;
 {
 	struct proc *p;
 
 	selwakeup(&sb->sb_sel);
 	sb->sb_flags &= ~SB_SEL;
 	if (sb->sb_flags & SB_WAIT) {
 		sb->sb_flags &= ~SB_WAIT;
 		wakeup((caddr_t)&sb->sb_cc);
 	}
-	if (so->so_state & SS_ASYNC) {
-		if (so->so_pgid < 0)
-			gsignal(-so->so_pgid, SIGIO);
-		else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0)
-			psignal(p, SIGIO);
-	}
+	if ((so->so_state & SS_ASYNC) && so->so_sigio != NULL)
+		pgsigio(so->so_sigio, SIGIO, 0);
 	if (sb->sb_flags & SB_UPCALL)
 		(*so->so_upcall)(so, so->so_upcallarg, M_DONTWAIT);
 }
 
 /*
  * Socket buffer (struct sockbuf) utility routines.
  *
  * Each socket contains two socket buffers: one for sending data and
  * one for receiving data.  Each buffer contains a queue of mbufs,
  * information about the number of mbufs and amount of data in the
  * queue, and other fields allowing select() statements and notification
  * on data availability to be implemented.
  *
  * Data stored in a socket buffer is maintained as a list of records.
  * Each record is a list of mbufs chained together with the m_next
  * field.  Records are chained together with the m_nextpkt field. The upper
  * level routine soreceive() expects the following conventions to be
  * observed when placing information in the receive buffer:
  *
  * 1. If the protocol requires each message be preceded by the sender's
  *    name, then a record containing that name must be present before
  *    any associated data (mbuf's must be of type MT_SONAME).
  * 2. If the protocol supports the exchange of ``access rights'' (really
  *    just additional data associated with the message), and there are
  *    ``rights'' to be received, then a record containing this data
  *    should be present (mbuf's must be of type MT_RIGHTS).
  * 3. If a name or rights record exists, then it must be followed by
  *    a data record, perhaps of zero length.
  *
  * Before using a new socket structure it is first necessary to reserve
  * buffer space to the socket, by calling sbreserve().  This should commit
  * some of the available buffer space in the system buffer pool for the
  * socket (currently, it does nothing but enforce limits).  The space
  * should be released by calling sbrelease() when the socket is destroyed.
  */
 
 int
 soreserve(so, sndcc, rcvcc)
 	register struct socket *so;
 	u_long sndcc, rcvcc;
 {
 
 	if (sbreserve(&so->so_snd, sndcc) == 0)
 		goto bad;
 	if (sbreserve(&so->so_rcv, rcvcc) == 0)
 		goto bad2;
 	if (so->so_rcv.sb_lowat == 0)
 		so->so_rcv.sb_lowat = 1;
 	if (so->so_snd.sb_lowat == 0)
 		so->so_snd.sb_lowat = MCLBYTES;
 	if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat)
 		so->so_snd.sb_lowat = so->so_snd.sb_hiwat;
 	return (0);
 bad2:
 	sbrelease(&so->so_snd);
 bad:
 	return (ENOBUFS);
 }
 
 /*
  * Allot mbufs to a sockbuf.
  * Attempt to scale mbmax so that mbcnt doesn't become limiting
  * if buffering efficiency is near the normal case.
  */
 int
 sbreserve(sb, cc)
 	struct sockbuf *sb;
 	u_long cc;
 {
 	if ((u_quad_t)cc > (u_quad_t)sb_max * MCLBYTES / (MSIZE + MCLBYTES))
 		return (0);
 	sb->sb_hiwat = cc;
 	sb->sb_mbmax = min(cc * sb_efficiency, sb_max);
 	if (sb->sb_lowat > sb->sb_hiwat)
 		sb->sb_lowat = sb->sb_hiwat;
 	return (1);
 }
 
 /*
  * Free mbufs held by a socket, and reserved mbuf space.
  */
 void
 sbrelease(sb)
 	struct sockbuf *sb;
 {
 
 	sbflush(sb);
 	sb->sb_hiwat = sb->sb_mbmax = 0;
 }
 
 /*
  * Routines to add and remove
  * data from an mbuf queue.
  *
  * The routines sbappend() or sbappendrecord() are normally called to
  * append new mbufs to a socket buffer, after checking that adequate
  * space is available, comparing the function sbspace() with the amount
  * of data to be added.  sbappendrecord() differs from sbappend() in
  * that data supplied is treated as the beginning of a new record.
  * To place a sender's address, optional access rights, and data in a
  * socket receive buffer, sbappendaddr() should be used.  To place
  * access rights and data in a socket receive buffer, sbappendrights()
  * should be used.  In either case, the new data begins a new record.
  * Note that unlike sbappend() and sbappendrecord(), these routines check
  * for the caller that there will be enough space to store the data.
  * Each fails if there is not enough space, or if it cannot find mbufs
  * to store additional information in.
  *
  * Reliable protocols may use the socket send buffer to hold data
  * awaiting acknowledgement.  Data is normally copied from a socket
  * send buffer in a protocol with m_copy for output to a peer,
  * and then removing the data from the socket buffer with sbdrop()
  * or sbdroprecord() when the data is acknowledged by the peer.
  */
 
 /*
  * Append mbuf chain m to the last record in the
  * socket buffer sb.  The additional space associated
  * the mbuf chain is recorded in sb.  Empty mbufs are
  * discarded and mbufs are compacted where possible.
  */
 void
 sbappend(sb, m)
 	struct sockbuf *sb;
 	struct mbuf *m;
 {
 	register struct mbuf *n;
 
 	if (m == 0)
 		return;
 	n = sb->sb_mb;
 	if (n) {
 		while (n->m_nextpkt)
 			n = n->m_nextpkt;
 		do {
 			if (n->m_flags & M_EOR) {
 				sbappendrecord(sb, m); /* XXXXXX!!!! */
 				return;
 			}
 		} while (n->m_next && (n = n->m_next));
 	}
 	sbcompress(sb, m, n);
 }
 
 #ifdef SOCKBUF_DEBUG
 void
 sbcheck(sb)
 	register struct sockbuf *sb;
 {
 	register struct mbuf *m;
 	register struct mbuf *n = 0;
 	register u_long len = 0, mbcnt = 0;
 
 	for (m = sb->sb_mb; m; m = n) {
 	    n = m->m_nextpkt;
 	    for (; m; m = m->m_next) {
 		len += m->m_len;
 		mbcnt += MSIZE;
 		if (m->m_flags & M_EXT) /*XXX*/ /* pretty sure this is bogus */
 			mbcnt += m->m_ext.ext_size;
 	    }
 	}
 	if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) {
 		printf("cc %ld != %ld || mbcnt %ld != %ld\n", len, sb->sb_cc,
 		    mbcnt, sb->sb_mbcnt);
 		panic("sbcheck");
 	}
 }
 #endif
 
 /*
  * As above, except the mbuf chain
  * begins a new record.
  */
 void
 sbappendrecord(sb, m0)
 	register struct sockbuf *sb;
 	register struct mbuf *m0;
 {
 	register struct mbuf *m;
 
 	if (m0 == 0)
 		return;
 	m = sb->sb_mb;
 	if (m)
 		while (m->m_nextpkt)
 			m = m->m_nextpkt;
 	/*
 	 * Put the first mbuf on the queue.
 	 * Note this permits zero length records.
 	 */
 	sballoc(sb, m0);
 	if (m)
 		m->m_nextpkt = m0;
 	else
 		sb->sb_mb = m0;
 	m = m0->m_next;
 	m0->m_next = 0;
 	if (m && (m0->m_flags & M_EOR)) {
 		m0->m_flags &= ~M_EOR;
 		m->m_flags |= M_EOR;
 	}
 	sbcompress(sb, m, m0);
 }
 
 /*
  * As above except that OOB data
  * is inserted at the beginning of the sockbuf,
  * but after any other OOB data.
  */
 void
 sbinsertoob(sb, m0)
 	register struct sockbuf *sb;
 	register struct mbuf *m0;
 {
 	register struct mbuf *m;
 	register struct mbuf **mp;
 
 	if (m0 == 0)
 		return;
 	for (mp = &sb->sb_mb; *mp ; mp = &((*mp)->m_nextpkt)) {
 	    m = *mp;
 	    again:
 		switch (m->m_type) {
 
 		case MT_OOBDATA:
 			continue;		/* WANT next train */
 
 		case MT_CONTROL:
 			m = m->m_next;
 			if (m)
 				goto again;	/* inspect THIS train further */
 		}
 		break;
 	}
 	/*
 	 * Put the first mbuf on the queue.
 	 * Note this permits zero length records.
 	 */
 	sballoc(sb, m0);
 	m0->m_nextpkt = *mp;
 	*mp = m0;
 	m = m0->m_next;
 	m0->m_next = 0;
 	if (m && (m0->m_flags & M_EOR)) {
 		m0->m_flags &= ~M_EOR;
 		m->m_flags |= M_EOR;
 	}
 	sbcompress(sb, m, m0);
 }
 
 /*
  * Append address and data, and optionally, control (ancillary) data
  * to the receive queue of a socket.  If present,
  * m0 must include a packet header with total length.
  * Returns 0 if no space in sockbuf or insufficient mbufs.
  */
 int
 sbappendaddr(sb, asa, m0, control)
 	register struct sockbuf *sb;
 	struct sockaddr *asa;
 	struct mbuf *m0, *control;
 {
 	register struct mbuf *m, *n;
 	int space = asa->sa_len;
 
 if (m0 && (m0->m_flags & M_PKTHDR) == 0)
 panic("sbappendaddr");
 	if (m0)
 		space += m0->m_pkthdr.len;
 	for (n = control; n; n = n->m_next) {
 		space += n->m_len;
 		if (n->m_next == 0)	/* keep pointer to last control buf */
 			break;
 	}
 	if (space > sbspace(sb))
 		return (0);
 	if (asa->sa_len > MLEN)
 		return (0);
 	MGET(m, M_DONTWAIT, MT_SONAME);
 	if (m == 0)
 		return (0);
 	m->m_len = asa->sa_len;
 	bcopy((caddr_t)asa, mtod(m, caddr_t), asa->sa_len);
 	if (n)
 		n->m_next = m0;		/* concatenate data to control */
 	else
 		control = m0;
 	m->m_next = control;
 	for (n = m; n; n = n->m_next)
 		sballoc(sb, n);
 	n = sb->sb_mb;
 	if (n) {
 		while (n->m_nextpkt)
 			n = n->m_nextpkt;
 		n->m_nextpkt = m;
 	} else
 		sb->sb_mb = m;
 	return (1);
 }
 
 int
 sbappendcontrol(sb, m0, control)
 	struct sockbuf *sb;
 	struct mbuf *control, *m0;
 {
 	register struct mbuf *m, *n;
 	int space = 0;
 
 	if (control == 0)
 		panic("sbappendcontrol");
 	for (m = control; ; m = m->m_next) {
 		space += m->m_len;
 		if (m->m_next == 0)
 			break;
 	}
 	n = m;			/* save pointer to last control buffer */
 	for (m = m0; m; m = m->m_next)
 		space += m->m_len;
 	if (space > sbspace(sb))
 		return (0);
 	n->m_next = m0;			/* concatenate data to control */
 	for (m = control; m; m = m->m_next)
 		sballoc(sb, m);
 	n = sb->sb_mb;
 	if (n) {
 		while (n->m_nextpkt)
 			n = n->m_nextpkt;
 		n->m_nextpkt = control;
 	} else
 		sb->sb_mb = control;
 	return (1);
 }
 
 /*
  * Compress mbuf chain m into the socket
  * buffer sb following mbuf n.  If n
  * is null, the buffer is presumed empty.
  */
 void
 sbcompress(sb, m, n)
 	register struct sockbuf *sb;
 	register struct mbuf *m, *n;
 {
 	register int eor = 0;
 	register struct mbuf *o;
 
 	while (m) {
 		eor |= m->m_flags & M_EOR;
 		if (m->m_len == 0 &&
 		    (eor == 0 ||
 		     (((o = m->m_next) || (o = n)) &&
 		      o->m_type == m->m_type))) {
 			m = m_free(m);
 			continue;
 		}
 		if (n && (n->m_flags & (M_EXT | M_EOR)) == 0 &&
 		    (n->m_data + n->m_len + m->m_len) < &n->m_dat[MLEN] &&
 		    n->m_type == m->m_type) {
 			bcopy(mtod(m, caddr_t), mtod(n, caddr_t) + n->m_len,
 			    (unsigned)m->m_len);
 			n->m_len += m->m_len;
 			sb->sb_cc += m->m_len;
 			m = m_free(m);
 			continue;
 		}
 		if (n)
 			n->m_next = m;
 		else
 			sb->sb_mb = m;
 		sballoc(sb, m);
 		n = m;
 		m->m_flags &= ~M_EOR;
 		m = m->m_next;
 		n->m_next = 0;
 	}
 	if (eor) {
 		if (n)
 			n->m_flags |= eor;
 		else
 			printf("semi-panic: sbcompress\n");
 	}
 }
 
 /*
  * Free all mbufs in a sockbuf.
  * Check that all resources are reclaimed.
  */
 void
 sbflush(sb)
 	register struct sockbuf *sb;
 {
 
 	if (sb->sb_flags & SB_LOCK)
 		panic("sbflush: locked");
 	while (sb->sb_mbcnt && sb->sb_cc)
 		sbdrop(sb, (int)sb->sb_cc);
 	if (sb->sb_cc || sb->sb_mb || sb->sb_mbcnt)
 		panic("sbflush: cc %ld || mb %p || mbcnt %ld", sb->sb_cc, (void *)sb->sb_mb, sb->sb_mbcnt);
 }
 
 /*
  * Drop data from (the front of) a sockbuf.
  */
 void
 sbdrop(sb, len)
 	register struct sockbuf *sb;
 	register int len;
 {
 	register struct mbuf *m, *mn;
 	struct mbuf *next;
 
 	next = (m = sb->sb_mb) ? m->m_nextpkt : 0;
 	while (len > 0) {
 		if (m == 0) {
 			if (next == 0)
 				panic("sbdrop");
 			m = next;
 			next = m->m_nextpkt;
 			continue;
 		}
 		if (m->m_len > len) {
 			m->m_len -= len;
 			m->m_data += len;
 			sb->sb_cc -= len;
 			break;
 		}
 		len -= m->m_len;
 		sbfree(sb, m);
 		MFREE(m, mn);
 		m = mn;
 	}
 	while (m && m->m_len == 0) {
 		sbfree(sb, m);
 		MFREE(m, mn);
 		m = mn;
 	}
 	if (m) {
 		sb->sb_mb = m;
 		m->m_nextpkt = next;
 	} else
 		sb->sb_mb = next;
 }
 
 /*
  * Drop a record off the front of a sockbuf
  * and move the next record to the front.
  */
 void
 sbdroprecord(sb)
 	register struct sockbuf *sb;
 {
 	register struct mbuf *m, *mn;
 
 	m = sb->sb_mb;
 	if (m) {
 		sb->sb_mb = m->m_nextpkt;
 		do {
 			sbfree(sb, m);
 			MFREE(m, mn);
 			m = mn;
 		} while (m);
 	}
 }
 
 /*
  * Create a "control" mbuf containing the specified data
  * with the specified type for presentation on a socket buffer.
  */
 struct mbuf *
 sbcreatecontrol(p, size, type, level)
 	caddr_t p;
 	register int size;
 	int type, level;
 {
 	register struct cmsghdr *cp;
 	struct mbuf *m;
 
 	if ((m = m_get(M_DONTWAIT, MT_CONTROL)) == NULL)
 		return ((struct mbuf *) NULL);
 	cp = mtod(m, struct cmsghdr *);
 	/* XXX check size? */
 	(void)memcpy(CMSG_DATA(cp), p, size);
 	size += sizeof(*cp);
 	m->m_len = size;
 	cp->cmsg_len = size;
 	cp->cmsg_level = level;
 	cp->cmsg_type = type;
 	return (m);
 }
 
 /*
  * Some routines that return EOPNOTSUPP for entry points that are not
  * supported by a protocol.  Fill in as needed.
  */
 int
 pru_accept_notsupp(struct socket *so, struct sockaddr **nam)
 {
 	return EOPNOTSUPP;
 }
 
 int
 pru_connect_notsupp(struct socket *so, struct sockaddr *nam, struct proc *p)
 {
 	return EOPNOTSUPP;
 }
 
 int
 pru_connect2_notsupp(struct socket *so1, struct socket *so2)
 {
 	return EOPNOTSUPP;
 }
 
 int
 pru_control_notsupp(struct socket *so, u_long cmd, caddr_t data,
 		    struct ifnet *ifp, struct proc *p)
 {
 	return EOPNOTSUPP;
 }
 
 int
 pru_listen_notsupp(struct socket *so, struct proc *p)
 {
 	return EOPNOTSUPP;
 }
 
 int
 pru_rcvd_notsupp(struct socket *so, int flags)
 {
 	return EOPNOTSUPP;
 }
 
 int
 pru_rcvoob_notsupp(struct socket *so, struct mbuf *m, int flags)
 {
 	return EOPNOTSUPP;
 }
 
 /*
  * This isn't really a ``null'' operation, but it's the default one
  * and doesn't do anything destructive.
  */
 int
 pru_sense_null(struct socket *so, struct stat *sb)
 {
 	sb->st_blksize = so->so_snd.sb_hiwat;
 	return 0;
 }
 
 /*
  * Make a copy of a sockaddr in a malloced buffer of type M_SONAME.
  */
 struct sockaddr *
 dup_sockaddr(sa, canwait)
 	struct sockaddr *sa;
 	int canwait;
 {
 	struct sockaddr *sa2;
 
 	MALLOC(sa2, struct sockaddr *, sa->sa_len, M_SONAME, 
 	       canwait ? M_WAITOK : M_NOWAIT);
 	if (sa2)
 		bcopy(sa, sa2, sa->sa_len);
 	return sa2;
 }
 
 /*
  * Create an external-format (``xsocket'') structure using the information
  * in the kernel-format socket structure pointed to by so.  This is done
  * to reduce the spew of irrelevant information over this interface,
  * to isolate user code from changes in the kernel structure, and
  * potentially to provide information-hiding if we decide that
  * some of this information should be hidden from users.
  */
 void
 sotoxsocket(struct socket *so, struct xsocket *xso)
 {
 	xso->xso_len = sizeof *xso;
 	xso->xso_so = so;
 	xso->so_type = so->so_type;
 	xso->so_options = so->so_options;
 	xso->so_linger = so->so_linger;
 	xso->so_state = so->so_state;
 	xso->so_pcb = so->so_pcb;
 	xso->xso_protocol = so->so_proto->pr_protocol;
 	xso->xso_family = so->so_proto->pr_domain->dom_family;
 	xso->so_qlen = so->so_qlen;
 	xso->so_incqlen = so->so_incqlen;
 	xso->so_qlimit = so->so_qlimit;
 	xso->so_timeo = so->so_timeo;
 	xso->so_error = so->so_error;
-	xso->so_pgid = so->so_pgid;
+	xso->so_pgid = so->so_sigio ? so->so_sigio->sio_pgid : 0;
 	xso->so_oobmark = so->so_oobmark;
 	sbtoxsockbuf(&so->so_snd, &xso->so_snd);
 	sbtoxsockbuf(&so->so_rcv, &xso->so_rcv);
 	xso->so_uid = so->so_uid;
 }
 
 /*
  * This does the same for sockbufs.  Note that the xsockbuf structure,
  * since it is always embedded in a socket, does not include a self
  * pointer nor a length.  We make this entry point public in case
  * some other mechanism needs it.
  */
 void
 sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb)
 {
 	xsb->sb_cc = sb->sb_cc;
 	xsb->sb_hiwat = sb->sb_hiwat;
 	xsb->sb_mbcnt = sb->sb_mbcnt;
 	xsb->sb_mbmax = sb->sb_mbmax;
 	xsb->sb_lowat = sb->sb_lowat;
 	xsb->sb_flags = sb->sb_flags;
 	xsb->sb_timeo = sb->sb_timeo;
 }
 
 /*
  * Here is the definition of some of the basic objects in the kern.ipc
  * branch of the MIB.
  */
 SYSCTL_NODE(_kern, KERN_IPC, ipc, CTLFLAG_RW, 0, "IPC");
 
 /* This takes the place of kern.maxsockbuf, which moved to kern.ipc. */
 static int dummy;
 SYSCTL_INT(_kern, KERN_DUMMY, dummy, CTLFLAG_RW, &dummy, 0, "");
 
 SYSCTL_INT(_kern_ipc, KIPC_MAXSOCKBUF, maxsockbuf, CTLFLAG_RW, &sb_max, 0, "");
 SYSCTL_INT(_kern_ipc, OID_AUTO, maxsockets, CTLFLAG_RD, &maxsockets, 0, "");
 SYSCTL_INT(_kern_ipc, KIPC_SOCKBUF_WASTE, sockbuf_waste_factor, CTLFLAG_RW,
 	   &sb_efficiency, 0, "");
 SYSCTL_INT(_kern_ipc, KIPC_NMBCLUSTERS, nmbclusters, CTLFLAG_RD, &nmbclusters, 0, "");
 
Index: head/sys/kern/uipc_socket.c
===================================================================
--- head/sys/kern/uipc_socket.c	(revision 41085)
+++ head/sys/kern/uipc_socket.c	(revision 41086)
@@ -1,1224 +1,1223 @@
 /*
  * Copyright (c) 1982, 1986, 1988, 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)uipc_socket.c	8.3 (Berkeley) 4/15/94
- *	$Id: uipc_socket.c,v 1.44 1998/08/31 15:34:55 wollman Exp $
+ *	$Id: uipc_socket.c,v 1.45 1998/08/31 18:07:23 wollman Exp $
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/proc.h>
 #include <sys/fcntl.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/domain.h>
 #include <sys/kernel.h>
 #include <sys/poll.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/resourcevar.h>
 #include <sys/signalvar.h>
 #include <sys/sysctl.h>
 #include <sys/uio.h>
 #include <vm/vm_zone.h>
 
 #include <machine/limits.h>
 
 struct	vm_zone *socket_zone;
 so_gen_t	so_gencnt;	/* generation count for sockets */
 
 MALLOC_DEFINE(M_SONAME, "soname", "socket name");
 MALLOC_DEFINE(M_PCB, "pcb", "protocol control block");
 
 static int somaxconn = SOMAXCONN;
 SYSCTL_INT(_kern_ipc, KIPC_SOMAXCONN, somaxconn, CTLFLAG_RW, &somaxconn,
 	   0, "");
 
 /*
  * Socket operation routines.
  * These routines are called by the routines in
  * sys_socket.c or from a system process, and
  * implement the semantics of socket operations by
  * switching out to the protocol specific routines.
  */
 
 /*
  * Get a socket structure from our zone, and initialize it.
  * We don't implement `waitok' yet (see comments in uipc_domain.c).
  * Note that it would probably be better to allocate socket
  * and PCB at the same time, but I'm not convinced that all
  * the protocols can be easily modified to do this.
  */
 struct socket *
 soalloc(waitok)
 	int waitok;
 {
 	struct socket *so;
 
 	so = zalloci(socket_zone);
 	if (so) {
 		/* XXX race condition for reentrant kernel */
 		bzero(so, sizeof *so);
 		so->so_gencnt = ++so_gencnt;
 		so->so_zone = socket_zone;
 	}
 	return so;
 }
 
 int
 socreate(dom, aso, type, proto, p)
 	int dom;
 	struct socket **aso;
 	register int type;
 	int proto;
 	struct proc *p;
 {
 	register struct protosw *prp;
 	register struct socket *so;
 	register int error;
 
 	if (proto)
 		prp = pffindproto(dom, proto, type);
 	else
 		prp = pffindtype(dom, type);
 	if (prp == 0 || prp->pr_usrreqs->pru_attach == 0)
 		return (EPROTONOSUPPORT);
 	if (prp->pr_type != type)
 		return (EPROTOTYPE);
 	so = soalloc(p != 0);
 	if (so == 0)
 		return (ENOBUFS);
 
 	TAILQ_INIT(&so->so_incomp);
 	TAILQ_INIT(&so->so_comp);
 	so->so_type = type;
 	if (p != 0)
 		so->so_uid = p->p_ucred->cr_uid;
 	so->so_proto = prp;
 	error = (*prp->pr_usrreqs->pru_attach)(so, proto, p);
 	if (error) {
 		so->so_state |= SS_NOFDREF;
 		sofree(so);
 		return (error);
 	}
 	*aso = so;
 	return (0);
 }
 
 int
 sobind(so, nam, p)
 	struct socket *so;
 	struct sockaddr *nam;
 	struct proc *p;
 {
 	int s = splnet();
 	int error;
 
 	error = (*so->so_proto->pr_usrreqs->pru_bind)(so, nam, p);
 	splx(s);
 	return (error);
 }
 
 void
 sodealloc(so)
 	struct socket *so;
 {
 	so->so_gencnt = ++so_gencnt;
 	zfreei(so->so_zone, so);
 }
 
 int
 solisten(so, backlog, p)
 	register struct socket *so;
 	int backlog;
 	struct proc *p;
 {
 	int s, error;
 
 	s = splnet();
 	error = (*so->so_proto->pr_usrreqs->pru_listen)(so, p);
 	if (error) {
 		splx(s);
 		return (error);
 	}
 	if (so->so_comp.tqh_first == NULL)
 		so->so_options |= SO_ACCEPTCONN;
 	if (backlog < 0 || backlog > somaxconn)
 		backlog = somaxconn;
 	so->so_qlimit = backlog;
 	splx(s);
 	return (0);
 }
 
 void
 sofree(so)
 	register struct socket *so;
 {
 	struct socket *head = so->so_head;
 
 	if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0)
 		return;
 	if (head != NULL) {
 		if (so->so_state & SS_INCOMP) {
 			TAILQ_REMOVE(&head->so_incomp, so, so_list);
 			head->so_incqlen--;
 		} else if (so->so_state & SS_COMP) {
 			TAILQ_REMOVE(&head->so_comp, so, so_list);
 		} else {
 			panic("sofree: not queued");
 		}
 		head->so_qlen--;
 		so->so_state &= ~(SS_INCOMP|SS_COMP);
 		so->so_head = NULL;
 	}
 	sbrelease(&so->so_snd);
 	sorflush(so);
 	sodealloc(so);
 }
 
 /*
  * Close a socket on last file table reference removal.
  * Initiate disconnect if connected.
  * Free socket when disconnect complete.
  */
 int
 soclose(so)
 	register struct socket *so;
 {
 	int s = splnet();		/* conservative */
 	int error = 0;
 
+	funsetown(so->so_sigio);
 	if (so->so_options & SO_ACCEPTCONN) {
 		struct socket *sp, *sonext;
 
 		for (sp = so->so_incomp.tqh_first; sp != NULL; sp = sonext) {
 			sonext = sp->so_list.tqe_next;
 			(void) soabort(sp);
 		}
 		for (sp = so->so_comp.tqh_first; sp != NULL; sp = sonext) {
 			sonext = sp->so_list.tqe_next;
 			(void) soabort(sp);
 		}
 	}
 	if (so->so_pcb == 0)
 		goto discard;
 	if (so->so_state & SS_ISCONNECTED) {
 		if ((so->so_state & SS_ISDISCONNECTING) == 0) {
 			error = sodisconnect(so);
 			if (error)
 				goto drop;
 		}
 		if (so->so_options & SO_LINGER) {
 			if ((so->so_state & SS_ISDISCONNECTING) &&
 			    (so->so_state & SS_NBIO))
 				goto drop;
 			while (so->so_state & SS_ISCONNECTED) {
 				error = tsleep((caddr_t)&so->so_timeo,
 				    PSOCK | PCATCH, "soclos", so->so_linger);
 				if (error)
 					break;
 			}
 		}
 	}
 drop:
 	if (so->so_pcb) {
 		int error2 = (*so->so_proto->pr_usrreqs->pru_detach)(so);
 		if (error == 0)
 			error = error2;
 	}
 discard:
 	if (so->so_state & SS_NOFDREF)
 		panic("soclose: NOFDREF");
 	so->so_state |= SS_NOFDREF;
 	sofree(so);
 	splx(s);
 	return (error);
 }
 
 /*
  * Must be called at splnet...
  */
 int
 soabort(so)
 	struct socket *so;
 {
 
 	return (*so->so_proto->pr_usrreqs->pru_abort)(so);
 }
 
 int
 soaccept(so, nam)
 	register struct socket *so;
 	struct sockaddr **nam;
 {
 	int s = splnet();
 	int error;
 
 	if ((so->so_state & SS_NOFDREF) == 0)
 		panic("soaccept: !NOFDREF");
 	so->so_state &= ~SS_NOFDREF;
 	error = (*so->so_proto->pr_usrreqs->pru_accept)(so, nam);
 	splx(s);
 	return (error);
 }
 
 int
 soconnect(so, nam, p)
 	register struct socket *so;
 	struct sockaddr *nam;
 	struct proc *p;
 {
 	int s;
 	int error;
 
 	if (so->so_options & SO_ACCEPTCONN)
 		return (EOPNOTSUPP);
 	s = splnet();
 	/*
 	 * If protocol is connection-based, can only connect once.
 	 * Otherwise, if connected, try to disconnect first.
 	 * This allows user to disconnect by connecting to, e.g.,
 	 * a null address.
 	 */
 	if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) &&
 	    ((so->so_proto->pr_flags & PR_CONNREQUIRED) ||
 	    (error = sodisconnect(so))))
 		error = EISCONN;
 	else
 		error = (*so->so_proto->pr_usrreqs->pru_connect)(so, nam, p);
 	splx(s);
 	return (error);
 }
 
 int
 soconnect2(so1, so2)
 	register struct socket *so1;
 	struct socket *so2;
 {
 	int s = splnet();
 	int error;
 
 	error = (*so1->so_proto->pr_usrreqs->pru_connect2)(so1, so2);
 	splx(s);
 	return (error);
 }
 
 int
 sodisconnect(so)
 	register struct socket *so;
 {
 	int s = splnet();
 	int error;
 
 	if ((so->so_state & SS_ISCONNECTED) == 0) {
 		error = ENOTCONN;
 		goto bad;
 	}
 	if (so->so_state & SS_ISDISCONNECTING) {
 		error = EALREADY;
 		goto bad;
 	}
 	error = (*so->so_proto->pr_usrreqs->pru_disconnect)(so);
 bad:
 	splx(s);
 	return (error);
 }
 
 #define	SBLOCKWAIT(f)	(((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK)
 /*
  * Send on a socket.
  * If send must go all at once and message is larger than
  * send buffering, then hard error.
  * Lock against other senders.
  * If must go all at once and not enough room now, then
  * inform user that this would block and do nothing.
  * Otherwise, if nonblocking, send as much as possible.
  * The data to be sent is described by "uio" if nonzero,
  * otherwise by the mbuf chain "top" (which must be null
  * if uio is not).  Data provided in mbuf chain must be small
  * enough to send all at once.
  *
  * Returns nonzero on error, timeout or signal; callers
  * must check for short counts if EINTR/ERESTART are returned.
  * Data and control buffers are freed on return.
  */
 int
 sosend(so, addr, uio, top, control, flags, p)
 	register struct socket *so;
 	struct sockaddr *addr;
 	struct uio *uio;
 	struct mbuf *top;
 	struct mbuf *control;
 	int flags;
 	struct proc *p;
 {
 	struct mbuf **mp;
 	register struct mbuf *m;
 	register long space, len, resid;
 	int clen = 0, error, s, dontroute, mlen;
 	int atomic = sosendallatonce(so) || top;
 
 	if (uio)
 		resid = uio->uio_resid;
 	else
 		resid = top->m_pkthdr.len;
 	/*
 	 * In theory resid should be unsigned.
 	 * However, space must be signed, as it might be less than 0
 	 * if we over-committed, and we must use a signed comparison
 	 * of space and resid.  On the other hand, a negative resid
 	 * causes us to loop sending 0-length segments to the protocol.
 	 *
 	 * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM
 	 * type sockets since that's an error.
 	 */
 	if (resid < 0 || so->so_type == SOCK_STREAM && (flags & MSG_EOR)) {
 		error = EINVAL;
 		goto out;
 	}
 
 	dontroute =
 	    (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 &&
 	    (so->so_proto->pr_flags & PR_ATOMIC);
 	if (p)
 		p->p_stats->p_ru.ru_msgsnd++;
 	if (control)
 		clen = control->m_len;
 #define	snderr(errno)	{ error = errno; splx(s); goto release; }
 
 restart:
 	error = sblock(&so->so_snd, SBLOCKWAIT(flags));
 	if (error)
 		goto out;
 	do {
 		s = splnet();
 		if (so->so_state & SS_CANTSENDMORE)
 			snderr(EPIPE);
 		if (so->so_error) {
 			error = so->so_error;
 			so->so_error = 0;
 			splx(s);
 			goto release;
 		}
 		if ((so->so_state & SS_ISCONNECTED) == 0) {
 			/*
 			 * `sendto' and `sendmsg' is allowed on a connection-
 			 * based socket if it supports implied connect.
 			 * Return ENOTCONN if not connected and no address is
 			 * supplied.
 			 */
 			if ((so->so_proto->pr_flags & PR_CONNREQUIRED) &&
 			    (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) {
 				if ((so->so_state & SS_ISCONFIRMING) == 0 &&
 				    !(resid == 0 && clen != 0))
 					snderr(ENOTCONN);
 			} else if (addr == 0)
 			    snderr(so->so_proto->pr_flags & PR_CONNREQUIRED ?
 				   ENOTCONN : EDESTADDRREQ);
 		}
 		space = sbspace(&so->so_snd);
 		if (flags & MSG_OOB)
 			space += 1024;
 		if ((atomic && resid > so->so_snd.sb_hiwat) ||
 		    clen > so->so_snd.sb_hiwat)
 			snderr(EMSGSIZE);
 		if (space < resid + clen && uio &&
 		    (atomic || space < so->so_snd.sb_lowat || space < clen)) {
 			if (so->so_state & SS_NBIO)
 				snderr(EWOULDBLOCK);
 			sbunlock(&so->so_snd);
 			error = sbwait(&so->so_snd);
 			splx(s);
 			if (error)
 				goto out;
 			goto restart;
 		}
 		splx(s);
 		mp = &top;
 		space -= clen;
 		do {
 		    if (uio == NULL) {
 			/*
 			 * Data is prepackaged in "top".
 			 */
 			resid = 0;
 			if (flags & MSG_EOR)
 				top->m_flags |= M_EOR;
 		    } else do {
 			if (top == 0) {
 				MGETHDR(m, M_WAIT, MT_DATA);
 				mlen = MHLEN;
 				m->m_pkthdr.len = 0;
 				m->m_pkthdr.rcvif = (struct ifnet *)0;
 			} else {
 				MGET(m, M_WAIT, MT_DATA);
 				mlen = MLEN;
 			}
 			if (resid >= MINCLSIZE) {
 				MCLGET(m, M_WAIT);
 				if ((m->m_flags & M_EXT) == 0)
 					goto nopages;
 				mlen = MCLBYTES;
 				len = min(min(mlen, resid), space);
 			} else {
 nopages:
 				len = min(min(mlen, resid), space);
 				/*
 				 * For datagram protocols, leave room
 				 * for protocol headers in first mbuf.
 				 */
 				if (atomic && top == 0 && len < mlen)
 					MH_ALIGN(m, len);
 			}
 			space -= len;
 			error = uiomove(mtod(m, caddr_t), (int)len, uio);
 			resid = uio->uio_resid;
 			m->m_len = len;
 			*mp = m;
 			top->m_pkthdr.len += len;
 			if (error)
 				goto release;
 			mp = &m->m_next;
 			if (resid <= 0) {
 				if (flags & MSG_EOR)
 					top->m_flags |= M_EOR;
 				break;
 			}
 		    } while (space > 0 && atomic);
 		    if (dontroute)
 			    so->so_options |= SO_DONTROUTE;
 		    s = splnet();				/* XXX */
 		    error = (*so->so_proto->pr_usrreqs->pru_send)(so,
 			(flags & MSG_OOB) ? PRUS_OOB :
 			/*
 			 * If the user set MSG_EOF, the protocol
 			 * understands this flag and nothing left to
 			 * send then use PRU_SEND_EOF instead of PRU_SEND.
 			 */
 			((flags & MSG_EOF) &&
 			 (so->so_proto->pr_flags & PR_IMPLOPCL) &&
 			 (resid <= 0)) ?
 				PRUS_EOF : 0,
 			top, addr, control, p);
 		    splx(s);
 		    if (dontroute)
 			    so->so_options &= ~SO_DONTROUTE;
 		    clen = 0;
 		    control = 0;
 		    top = 0;
 		    mp = &top;
 		    if (error)
 			goto release;
 		} while (resid && space > 0);
 	} while (resid);
 
 release:
 	sbunlock(&so->so_snd);
 out:
 	if (top)
 		m_freem(top);
 	if (control)
 		m_freem(control);
 	return (error);
 }
 
 /*
  * Implement receive operations on a socket.
  * We depend on the way that records are added to the sockbuf
  * by sbappend*.  In particular, each record (mbufs linked through m_next)
  * must begin with an address if the protocol so specifies,
  * followed by an optional mbuf or mbufs containing ancillary data,
  * and then zero or more mbufs of data.
  * In order to avoid blocking network interrupts for the entire time here,
  * we splx() while doing the actual copy to user space.
  * Although the sockbuf is locked, new data may still be appended,
  * and thus we must maintain consistency of the sockbuf during that time.
  *
  * The caller may receive the data as a single mbuf chain by supplying
  * an mbuf **mp0 for use in returning the chain.  The uio is then used
  * only for the count in uio_resid.
  */
 int
 soreceive(so, psa, uio, mp0, controlp, flagsp)
 	register struct socket *so;
 	struct sockaddr **psa;
 	struct uio *uio;
 	struct mbuf **mp0;
 	struct mbuf **controlp;
 	int *flagsp;
 {
 	register struct mbuf *m, **mp;
 	register int flags, len, error, s, offset;
 	struct protosw *pr = so->so_proto;
 	struct mbuf *nextrecord;
 	int moff, type = 0;
 	int orig_resid = uio->uio_resid;
 
 	mp = mp0;
 	if (psa)
 		*psa = 0;
 	if (controlp)
 		*controlp = 0;
 	if (flagsp)
 		flags = *flagsp &~ MSG_EOR;
 	else
 		flags = 0;
 	if (flags & MSG_OOB) {
 		m = m_get(M_WAIT, MT_DATA);
 		error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK);
 		if (error)
 			goto bad;
 		do {
 			error = uiomove(mtod(m, caddr_t),
 			    (int) min(uio->uio_resid, m->m_len), uio);
 			m = m_free(m);
 		} while (uio->uio_resid && error == 0 && m);
 bad:
 		if (m)
 			m_freem(m);
 		return (error);
 	}
 	if (mp)
 		*mp = (struct mbuf *)0;
 	if (so->so_state & SS_ISCONFIRMING && uio->uio_resid)
 		(*pr->pr_usrreqs->pru_rcvd)(so, 0);
 
 restart:
 	error = sblock(&so->so_rcv, SBLOCKWAIT(flags));
 	if (error)
 		return (error);
 	s = splnet();
 
 	m = so->so_rcv.sb_mb;
 	/*
 	 * If we have less data than requested, block awaiting more
 	 * (subject to any timeout) if:
 	 *   1. the current count is less than the low water mark, or
 	 *   2. MSG_WAITALL is set, and it is possible to do the entire
 	 *	receive operation at once if we block (resid <= hiwat).
 	 *   3. MSG_DONTWAIT is not set
 	 * If MSG_WAITALL is set but resid is larger than the receive buffer,
 	 * we have to do the receive in sections, and thus risk returning
 	 * a short count if a timeout or signal occurs after we start.
 	 */
 	if (m == 0 || (((flags & MSG_DONTWAIT) == 0 &&
 	    so->so_rcv.sb_cc < uio->uio_resid) &&
 	    (so->so_rcv.sb_cc < so->so_rcv.sb_lowat ||
 	    ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) &&
 	    m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) {
 #ifdef DIAGNOSTIC
 		if (m == 0 && so->so_rcv.sb_cc)
 			panic("receive 1");
 #endif
 		if (so->so_error) {
 			if (m)
 				goto dontblock;
 			error = so->so_error;
 			if ((flags & MSG_PEEK) == 0)
 				so->so_error = 0;
 			goto release;
 		}
 		if (so->so_state & SS_CANTRCVMORE) {
 			if (m)
 				goto dontblock;
 			else
 				goto release;
 		}
 		for (; m; m = m->m_next)
 			if (m->m_type == MT_OOBDATA  || (m->m_flags & M_EOR)) {
 				m = so->so_rcv.sb_mb;
 				goto dontblock;
 			}
 		if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 &&
 		    (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
 			error = ENOTCONN;
 			goto release;
 		}
 		if (uio->uio_resid == 0)
 			goto release;
 		if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) {
 			error = EWOULDBLOCK;
 			goto release;
 		}
 		sbunlock(&so->so_rcv);
 		error = sbwait(&so->so_rcv);
 		splx(s);
 		if (error)
 			return (error);
 		goto restart;
 	}
 dontblock:
 	if (uio->uio_procp)
 		uio->uio_procp->p_stats->p_ru.ru_msgrcv++;
 	nextrecord = m->m_nextpkt;
 	if (pr->pr_flags & PR_ADDR) {
 #ifdef DIAGNOSTIC
 		if (m->m_type != MT_SONAME)
 			panic("receive 1a");
 #endif
 		orig_resid = 0;
 		if (psa)
 			*psa = dup_sockaddr(mtod(m, struct sockaddr *),
 					    mp0 == 0);
 		if (flags & MSG_PEEK) {
 			m = m->m_next;
 		} else {
 			sbfree(&so->so_rcv, m);
 			MFREE(m, so->so_rcv.sb_mb);
 			m = so->so_rcv.sb_mb;
 		}
 	}
 	while (m && m->m_type == MT_CONTROL && error == 0) {
 		if (flags & MSG_PEEK) {
 			if (controlp)
 				*controlp = m_copy(m, 0, m->m_len);
 			m = m->m_next;
 		} else {
 			sbfree(&so->so_rcv, m);
 			if (controlp) {
 				if (pr->pr_domain->dom_externalize &&
 				    mtod(m, struct cmsghdr *)->cmsg_type ==
 				    SCM_RIGHTS)
 				   error = (*pr->pr_domain->dom_externalize)(m);
 				*controlp = m;
 				so->so_rcv.sb_mb = m->m_next;
 				m->m_next = 0;
 				m = so->so_rcv.sb_mb;
 			} else {
 				MFREE(m, so->so_rcv.sb_mb);
 				m = so->so_rcv.sb_mb;
 			}
 		}
 		if (controlp) {
 			orig_resid = 0;
 			controlp = &(*controlp)->m_next;
 		}
 	}
 	if (m) {
 		if ((flags & MSG_PEEK) == 0)
 			m->m_nextpkt = nextrecord;
 		type = m->m_type;
 		if (type == MT_OOBDATA)
 			flags |= MSG_OOB;
 	}
 	moff = 0;
 	offset = 0;
 	while (m && uio->uio_resid > 0 && error == 0) {
 		if (m->m_type == MT_OOBDATA) {
 			if (type != MT_OOBDATA)
 				break;
 		} else if (type == MT_OOBDATA)
 			break;
 #ifdef DIAGNOSTIC
 		else if (m->m_type != MT_DATA && m->m_type != MT_HEADER)
 			panic("receive 3");
 #endif
 		so->so_state &= ~SS_RCVATMARK;
 		len = uio->uio_resid;
 		if (so->so_oobmark && len > so->so_oobmark - offset)
 			len = so->so_oobmark - offset;
 		if (len > m->m_len - moff)
 			len = m->m_len - moff;
 		/*
 		 * If mp is set, just pass back the mbufs.
 		 * Otherwise copy them out via the uio, then free.
 		 * Sockbuf must be consistent here (points to current mbuf,
 		 * it points to next record) when we drop priority;
 		 * we must note any additions to the sockbuf when we
 		 * block interrupts again.
 		 */
 		if (mp == 0) {
 			splx(s);
 			error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio);
 			s = splnet();
 			if (error)
 				goto release;
 		} else
 			uio->uio_resid -= len;
 		if (len == m->m_len - moff) {
 			if (m->m_flags & M_EOR)
 				flags |= MSG_EOR;
 			if (flags & MSG_PEEK) {
 				m = m->m_next;
 				moff = 0;
 			} else {
 				nextrecord = m->m_nextpkt;
 				sbfree(&so->so_rcv, m);
 				if (mp) {
 					*mp = m;
 					mp = &m->m_next;
 					so->so_rcv.sb_mb = m = m->m_next;
 					*mp = (struct mbuf *)0;
 				} else {
 					MFREE(m, so->so_rcv.sb_mb);
 					m = so->so_rcv.sb_mb;
 				}
 				if (m)
 					m->m_nextpkt = nextrecord;
 			}
 		} else {
 			if (flags & MSG_PEEK)
 				moff += len;
 			else {
 				if (mp)
 					*mp = m_copym(m, 0, len, M_WAIT);
 				m->m_data += len;
 				m->m_len -= len;
 				so->so_rcv.sb_cc -= len;
 			}
 		}
 		if (so->so_oobmark) {
 			if ((flags & MSG_PEEK) == 0) {
 				so->so_oobmark -= len;
 				if (so->so_oobmark == 0) {
 					so->so_state |= SS_RCVATMARK;
 					break;
 				}
 			} else {
 				offset += len;
 				if (offset == so->so_oobmark)
 					break;
 			}
 		}
 		if (flags & MSG_EOR)
 			break;
 		/*
 		 * If the MSG_WAITALL flag is set (for non-atomic socket),
 		 * we must not quit until "uio->uio_resid == 0" or an error
 		 * termination.  If a signal/timeout occurs, return
 		 * with a short count but without error.
 		 * Keep sockbuf locked against other readers.
 		 */
 		while (flags & MSG_WAITALL && m == 0 && uio->uio_resid > 0 &&
 		    !sosendallatonce(so) && !nextrecord) {
 			if (so->so_error || so->so_state & SS_CANTRCVMORE)
 				break;
 			error = sbwait(&so->so_rcv);
 			if (error) {
 				sbunlock(&so->so_rcv);
 				splx(s);
 				return (0);
 			}
 			m = so->so_rcv.sb_mb;
 			if (m)
 				nextrecord = m->m_nextpkt;
 		}
 	}
 
 	if (m && pr->pr_flags & PR_ATOMIC) {
 		flags |= MSG_TRUNC;
 		if ((flags & MSG_PEEK) == 0)
 			(void) sbdroprecord(&so->so_rcv);
 	}
 	if ((flags & MSG_PEEK) == 0) {
 		if (m == 0)
 			so->so_rcv.sb_mb = nextrecord;
 		if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
 			(*pr->pr_usrreqs->pru_rcvd)(so, flags);
 	}
 	if (orig_resid == uio->uio_resid && orig_resid &&
 	    (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) {
 		sbunlock(&so->so_rcv);
 		splx(s);
 		goto restart;
 	}
 
 	if (flagsp)
 		*flagsp |= flags;
 release:
 	sbunlock(&so->so_rcv);
 	splx(s);
 	return (error);
 }
 
 int
 soshutdown(so, how)
 	register struct socket *so;
 	register int how;
 {
 	register struct protosw *pr = so->so_proto;
 
 	how++;
 	if (how & FREAD)
 		sorflush(so);
 	if (how & FWRITE)
 		return ((*pr->pr_usrreqs->pru_shutdown)(so));
 	return (0);
 }
 
 void
 sorflush(so)
 	register struct socket *so;
 {
 	register struct sockbuf *sb = &so->so_rcv;
 	register struct protosw *pr = so->so_proto;
 	register int s;
 	struct sockbuf asb;
 
 	sb->sb_flags |= SB_NOINTR;
 	(void) sblock(sb, M_WAITOK);
 	s = splimp();
 	socantrcvmore(so);
 	sbunlock(sb);
 	asb = *sb;
 	bzero((caddr_t)sb, sizeof (*sb));
 	splx(s);
 	if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose)
 		(*pr->pr_domain->dom_dispose)(asb.sb_mb);
 	sbrelease(&asb);
 }
 
 /*
  * Perhaps this routine, and sooptcopyout(), below, ought to come in
  * an additional variant to handle the case where the option value needs
  * to be some kind of integer, but not a specific size.
  * In addition to their use here, these functions are also called by the
  * protocol-level pr_ctloutput() routines.
  */
 int
 sooptcopyin(sopt, buf, len, minlen)
 	struct	sockopt *sopt;
 	void	*buf;
 	size_t	len;
 	size_t	minlen;
 {
 	size_t	valsize;
 
 	/*
 	 * If the user gives us more than we wanted, we ignore it,
 	 * but if we don't get the minimum length the caller
 	 * wants, we return EINVAL.  On success, sopt->sopt_valsize
 	 * is set to however much we actually retrieved.
 	 */
 	if ((valsize = sopt->sopt_valsize) < minlen)
 		return EINVAL;
 	if (valsize > len)
 		sopt->sopt_valsize = valsize = len;
 
 	if (sopt->sopt_p != 0)
 		return (copyin(sopt->sopt_val, buf, valsize));
 
 	bcopy(sopt->sopt_val, buf, valsize);
 	return 0;
 }
 
 int
 sosetopt(so, sopt)
 	struct socket *so;
 	struct sockopt *sopt;
 {
 	int	error, optval;
 	struct	linger l;
 	struct	timeval tv;
 	short	val;
 
 	error = 0;
 	if (sopt->sopt_level != SOL_SOCKET) {
 		if (so->so_proto && so->so_proto->pr_ctloutput)
 			return ((*so->so_proto->pr_ctloutput)
 				  (so, sopt));
 		error = ENOPROTOOPT;
 	} else {
 		switch (sopt->sopt_name) {
 		case SO_LINGER:
 			error = sooptcopyin(sopt, &l, sizeof l, sizeof l);
 			if (error)
 				goto bad;
 
 			so->so_linger = l.l_linger;
 			if (l.l_onoff)
 				so->so_options |= SO_LINGER;
 			else
 				so->so_options &= ~SO_LINGER;
 			break;
 
 		case SO_DEBUG:
 		case SO_KEEPALIVE:
 		case SO_DONTROUTE:
 		case SO_USELOOPBACK:
 		case SO_BROADCAST:
 		case SO_REUSEADDR:
 		case SO_REUSEPORT:
 		case SO_OOBINLINE:
 		case SO_TIMESTAMP:
 			error = sooptcopyin(sopt, &optval, sizeof optval,
 					    sizeof optval);
 			if (error)
 				goto bad;
 			if (optval)
 				so->so_options |= sopt->sopt_name;
 			else
 				so->so_options &= ~sopt->sopt_name;
 			break;
 
 		case SO_SNDBUF:
 		case SO_RCVBUF:
 		case SO_SNDLOWAT:
 		case SO_RCVLOWAT:
 			error = sooptcopyin(sopt, &optval, sizeof optval,
 					    sizeof optval);
 			if (error)
 				goto bad;
 
 			/*
 			 * Values < 1 make no sense for any of these
 			 * options, so disallow them.
 			 */
 			if (optval < 1) {
 				error = EINVAL;
 				goto bad;
 			}
 
 			switch (sopt->sopt_name) {
 			case SO_SNDBUF:
 			case SO_RCVBUF:
 				if (sbreserve(sopt->sopt_name == SO_SNDBUF ?
 					      &so->so_snd : &so->so_rcv,
 					      (u_long) optval) == 0) {
 					error = ENOBUFS;
 					goto bad;
 				}
 				break;
 
 			/*
 			 * Make sure the low-water is never greater than
 			 * the high-water.
 			 */
 			case SO_SNDLOWAT:
 				so->so_snd.sb_lowat =
 				    (optval > so->so_snd.sb_hiwat) ?
 				    so->so_snd.sb_hiwat : optval;
 				break;
 			case SO_RCVLOWAT:
 				so->so_rcv.sb_lowat =
 				    (optval > so->so_rcv.sb_hiwat) ?
 				    so->so_rcv.sb_hiwat : optval;
 				break;
 			}
 			break;
 
 		case SO_SNDTIMEO:
 		case SO_RCVTIMEO:
 			error = sooptcopyin(sopt, &tv, sizeof tv,
 					    sizeof tv);
 			if (error)
 				goto bad;
 
 			if (tv.tv_sec > SHRT_MAX / hz - hz) {
 				error = EDOM;
 				goto bad;
 			}
 			val = tv.tv_sec * hz + tv.tv_usec / tick;
 
 			switch (sopt->sopt_name) {
 			case SO_SNDTIMEO:
 				so->so_snd.sb_timeo = val;
 				break;
 			case SO_RCVTIMEO:
 				so->so_rcv.sb_timeo = val;
 				break;
 			}
 			break;
 
 		default:
 			error = ENOPROTOOPT;
 			break;
 		}
 		if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) {
 			(void) ((*so->so_proto->pr_ctloutput)
 				  (so, sopt));
 		}
 	}
 bad:
 	return (error);
 }
 
 /* Helper routine for getsockopt */
 int
 sooptcopyout(sopt, buf, len)
 	struct	sockopt *sopt;
 	void	*buf;
 	size_t	len;
 {
 	int	error;
 	size_t	valsize;
 
 	error = 0;
 
 	/*
 	 * Documented get behavior is that we always return a value,
 	 * possibly truncated to fit in the user's buffer.
 	 * Traditional behavior is that we always tell the user
 	 * precisely how much we copied, rather than something useful
 	 * like the total amount we had available for her.
 	 * Note that this interface is not idempotent; the entire answer must
 	 * generated ahead of time.
 	 */
 	valsize = min(len, sopt->sopt_valsize);
 	sopt->sopt_valsize = valsize;
 	if (sopt->sopt_val != 0) {
 		if (sopt->sopt_p != 0)
 			error = copyout(buf, sopt->sopt_val, valsize);
 		else
 			bcopy(buf, sopt->sopt_val, valsize);
 	}
 	return error;
 }
 
 int
 sogetopt(so, sopt)
 	struct socket *so;
 	struct sockopt *sopt;
 {
 	int	error, optval;
 	struct	linger l;
 	struct	timeval tv;
 
 	error = 0;
 	if (sopt->sopt_level != SOL_SOCKET) {
 		if (so->so_proto && so->so_proto->pr_ctloutput) {
 			return ((*so->so_proto->pr_ctloutput)
 				  (so, sopt));
 		} else
 			return (ENOPROTOOPT);
 	} else {
 		switch (sopt->sopt_name) {
 		case SO_LINGER:
 			l.l_onoff = so->so_options & SO_LINGER;
 			l.l_linger = so->so_linger;
 			error = sooptcopyout(sopt, &l, sizeof l);
 			break;
 
 		case SO_USELOOPBACK:
 		case SO_DONTROUTE:
 		case SO_DEBUG:
 		case SO_KEEPALIVE:
 		case SO_REUSEADDR:
 		case SO_REUSEPORT:
 		case SO_BROADCAST:
 		case SO_OOBINLINE:
 		case SO_TIMESTAMP:
 			optval = so->so_options & sopt->sopt_name;
 integer:
 			error = sooptcopyout(sopt, &optval, sizeof optval);
 			break;
 
 		case SO_TYPE:
 			optval = so->so_type;
 			goto integer;
 
 		case SO_ERROR:
 			optval = so->so_error;
 			so->so_error = 0;
 			goto integer;
 
 		case SO_SNDBUF:
 			optval = so->so_snd.sb_hiwat;
 			goto integer;
 
 		case SO_RCVBUF:
 			optval = so->so_rcv.sb_hiwat;
 			goto integer;
 
 		case SO_SNDLOWAT:
 			optval = so->so_snd.sb_lowat;
 			goto integer;
 
 		case SO_RCVLOWAT:
 			optval = so->so_rcv.sb_lowat;
 			goto integer;
 
 		case SO_SNDTIMEO:
 		case SO_RCVTIMEO:
 			optval = (sopt->sopt_name == SO_SNDTIMEO ?
 				  so->so_snd.sb_timeo : so->so_rcv.sb_timeo);
 
 			tv.tv_sec = optval / hz;
 			tv.tv_usec = (optval % hz) * tick;
 			error = sooptcopyout(sopt, &tv, sizeof tv);
 			break;			
 
 		default:
 			error = ENOPROTOOPT;
 			break;
 		}
 		return (error);
 	}
 }
 
 void
 sohasoutofband(so)
 	register struct socket *so;
 {
 	struct proc *p;
 
-	if (so->so_pgid < 0)
-		gsignal(-so->so_pgid, SIGURG);
-	else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0)
-		psignal(p, SIGURG);
+	if (so->so_sigio != NULL)
+		pgsigio(so->so_sigio, SIGURG, 0);
 	selwakeup(&so->so_rcv.sb_sel);
 }
 
 int
 sopoll(struct socket *so, int events, struct ucred *cred, struct proc *p)
 {
 	int revents = 0;
 	int s = splnet();
 
 	if (events & (POLLIN | POLLRDNORM))
 		if (soreadable(so))
 			revents |= events & (POLLIN | POLLRDNORM);
 
 	if (events & (POLLOUT | POLLWRNORM))
 		if (sowriteable(so))
 			revents |= events & (POLLOUT | POLLWRNORM);
 
 	if (events & (POLLPRI | POLLRDBAND))
 		if (so->so_oobmark || (so->so_state & SS_RCVATMARK))
 			revents |= events & (POLLPRI | POLLRDBAND);
 
 	if (revents == 0) {
 		if (events & (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND)) {
 			selrecord(p, &so->so_rcv.sb_sel);
 			so->so_rcv.sb_flags |= SB_SEL;
 		}
 
 		if (events & (POLLOUT | POLLWRNORM)) {
 			selrecord(p, &so->so_snd.sb_sel);
 			so->so_snd.sb_flags |= SB_SEL;
 		}
 	}
 
 	splx(s);
 	return (revents);
 }
Index: head/sys/kern/uipc_socket2.c
===================================================================
--- head/sys/kern/uipc_socket2.c	(revision 41085)
+++ head/sys/kern/uipc_socket2.c	(revision 41086)
@@ -1,961 +1,957 @@
 /*
  * Copyright (c) 1982, 1986, 1988, 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)uipc_socket2.c	8.1 (Berkeley) 6/10/93
- *	$Id: uipc_socket2.c,v 1.39 1998/09/05 13:24:39 bde Exp $
+ *	$Id: uipc_socket2.c,v 1.40 1998/11/04 20:22:11 fenner Exp $
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/domain.h>
 #include <sys/kernel.h>
 #include <sys/proc.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/protosw.h>
 #include <sys/stat.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/signalvar.h>
 #include <sys/sysctl.h>
 
 /*
  * Primitive routines for operating on sockets and socket buffers
  */
 
 u_long	sb_max = SB_MAX;		/* XXX should be static */
 
 static	u_long sb_efficiency = 8;	/* parameter for sbreserve() */
 
 /*
  * Procedures to manipulate state flags of socket
  * and do appropriate wakeups.  Normal sequence from the
  * active (originating) side is that soisconnecting() is
  * called during processing of connect() call,
  * resulting in an eventual call to soisconnected() if/when the
  * connection is established.  When the connection is torn down
  * soisdisconnecting() is called during processing of disconnect() call,
  * and soisdisconnected() is called when the connection to the peer
  * is totally severed.  The semantics of these routines are such that
  * connectionless protocols can call soisconnected() and soisdisconnected()
  * only, bypassing the in-progress calls when setting up a ``connection''
  * takes no time.
  *
  * From the passive side, a socket is created with
  * two queues of sockets: so_q0 for connections in progress
  * and so_q for connections already made and awaiting user acceptance.
  * As a protocol is preparing incoming connections, it creates a socket
  * structure queued on so_q0 by calling sonewconn().  When the connection
  * is established, soisconnected() is called, and transfers the
  * socket structure to so_q, making it available to accept().
  *
  * If a socket is closed with sockets on either
  * so_q0 or so_q, these sockets are dropped.
  *
  * If higher level protocols are implemented in
  * the kernel, the wakeups done here will sometimes
  * cause software-interrupt process scheduling.
  */
 
 void
 soisconnecting(so)
 	register struct socket *so;
 {
 
 	so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING);
 	so->so_state |= SS_ISCONNECTING;
 }
 
 void
 soisconnected(so)
 	register struct socket *so;
 {
 	register struct socket *head = so->so_head;
 
 	so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING);
 	so->so_state |= SS_ISCONNECTED;
 	if (head && (so->so_state & SS_INCOMP)) {
 		TAILQ_REMOVE(&head->so_incomp, so, so_list);
 		head->so_incqlen--;
 		so->so_state &= ~SS_INCOMP;
 		TAILQ_INSERT_TAIL(&head->so_comp, so, so_list);
 		so->so_state |= SS_COMP;
 		sorwakeup(head);
 		wakeup_one(&head->so_timeo);
 	} else {
 		wakeup(&so->so_timeo);
 		sorwakeup(so);
 		sowwakeup(so);
 	}
 }
 
 void
 soisdisconnecting(so)
 	register struct socket *so;
 {
 
 	so->so_state &= ~SS_ISCONNECTING;
 	so->so_state |= (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE);
 	wakeup((caddr_t)&so->so_timeo);
 	sowwakeup(so);
 	sorwakeup(so);
 }
 
 void
 soisdisconnected(so)
 	register struct socket *so;
 {
 
 	so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
 	so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE);
 	wakeup((caddr_t)&so->so_timeo);
 	sowwakeup(so);
 	sorwakeup(so);
 }
 
 /*
  * Return a random connection that hasn't been serviced yet and
  * is eligible for discard.  There is a one in qlen chance that
  * we will return a null, saying that there are no dropable
  * requests.  In this case, the protocol specific code should drop
  * the new request.  This insures fairness.
  *
  * This may be used in conjunction with protocol specific queue
  * congestion routines.
  */
 struct socket *
 sodropablereq(head)
 	register struct socket *head;
 {
 	register struct socket *so;
 	unsigned int i, j, qlen;
 	static int rnd;
 	static struct timeval old_runtime;
 	static unsigned int cur_cnt, old_cnt;
 	struct timeval tv;
 
 	getmicrouptime(&tv);
 	if ((i = (tv.tv_sec - old_runtime.tv_sec)) != 0) {
 		old_runtime = tv;
 		old_cnt = cur_cnt / i;
 		cur_cnt = 0;
 	}
 
 	so = TAILQ_FIRST(&head->so_incomp);
 	if (!so)
 		return (so);
 
 	qlen = head->so_incqlen;
 	if (++cur_cnt > qlen || old_cnt > qlen) {
 		rnd = (314159 * rnd + 66329) & 0xffff;
 		j = ((qlen + 1) * rnd) >> 16;
 
 		while (j-- && so)
 		    so = TAILQ_NEXT(so, so_list);
 	}
 
 	return (so);
 }
 
 /*
  * When an attempt at a new connection is noted on a socket
  * which accepts connections, sonewconn is called.  If the
  * connection is possible (subject to space constraints, etc.)
  * then we allocate a new structure, propoerly linked into the
  * data structure of the original socket, and return this.
  * Connstatus may be 0, or SO_ISCONFIRMING, or SO_ISCONNECTED.
  */
 struct socket *
 sonewconn(head, connstatus)
 	register struct socket *head;
 	int connstatus;
 {
 	register struct socket *so;
 
 	if (head->so_qlen > 3 * head->so_qlimit / 2)
 		return ((struct socket *)0);
 	so = soalloc(0);
 	if (so == NULL)
 		return ((struct socket *)0);
 	so->so_head = head;
 	so->so_type = head->so_type;
 	so->so_options = head->so_options &~ SO_ACCEPTCONN;
 	so->so_linger = head->so_linger;
 	so->so_state = head->so_state | SS_NOFDREF;
 	so->so_proto = head->so_proto;
 	so->so_timeo = head->so_timeo;
-	so->so_pgid = head->so_pgid;
+	fsetown(fgetown(head->so_sigio), &so->so_sigio);
 	so->so_uid = head->so_uid;
 	(void) soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat);
 
 	if ((*so->so_proto->pr_usrreqs->pru_attach)(so, 0, NULL)) {
 		sodealloc(so);
 		return ((struct socket *)0);
 	}
 
 	if (connstatus) {
 		TAILQ_INSERT_TAIL(&head->so_comp, so, so_list);
 		so->so_state |= SS_COMP;
 	} else {
 		TAILQ_INSERT_TAIL(&head->so_incomp, so, so_list);
 		so->so_state |= SS_INCOMP;
 		head->so_incqlen++;
 	}
 	head->so_qlen++;
 	if (connstatus) {
 		sorwakeup(head);
 		wakeup((caddr_t)&head->so_timeo);
 		so->so_state |= connstatus;
 	}
 	return (so);
 }
 
 /*
  * Socantsendmore indicates that no more data will be sent on the
  * socket; it would normally be applied to a socket when the user
  * informs the system that no more data is to be sent, by the protocol
  * code (in case PRU_SHUTDOWN).  Socantrcvmore indicates that no more data
  * will be received, and will normally be applied to the socket by a
  * protocol when it detects that the peer will send no more data.
  * Data queued for reading in the socket may yet be read.
  */
 
 void
 socantsendmore(so)
 	struct socket *so;
 {
 
 	so->so_state |= SS_CANTSENDMORE;
 	sowwakeup(so);
 }
 
 void
 socantrcvmore(so)
 	struct socket *so;
 {
 
 	so->so_state |= SS_CANTRCVMORE;
 	sorwakeup(so);
 }
 
 /*
  * Wait for data to arrive at/drain from a socket buffer.
  */
 int
 sbwait(sb)
 	struct sockbuf *sb;
 {
 
 	sb->sb_flags |= SB_WAIT;
 	return (tsleep((caddr_t)&sb->sb_cc,
 	    (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "sbwait",
 	    sb->sb_timeo));
 }
 
 /*
  * Lock a sockbuf already known to be locked;
  * return any error returned from sleep (EINTR).
  */
 int
 sb_lock(sb)
 	register struct sockbuf *sb;
 {
 	int error;
 
 	while (sb->sb_flags & SB_LOCK) {
 		sb->sb_flags |= SB_WANT;
 		error = tsleep((caddr_t)&sb->sb_flags,
 		    (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK|PCATCH,
 		    "sblock", 0);
 		if (error)
 			return (error);
 	}
 	sb->sb_flags |= SB_LOCK;
 	return (0);
 }
 
 /*
  * Wakeup processes waiting on a socket buffer.
  * Do asynchronous notification via SIGIO
  * if the socket has the SS_ASYNC flag set.
  */
 void
 sowakeup(so, sb)
 	register struct socket *so;
 	register struct sockbuf *sb;
 {
 	struct proc *p;
 
 	selwakeup(&sb->sb_sel);
 	sb->sb_flags &= ~SB_SEL;
 	if (sb->sb_flags & SB_WAIT) {
 		sb->sb_flags &= ~SB_WAIT;
 		wakeup((caddr_t)&sb->sb_cc);
 	}
-	if (so->so_state & SS_ASYNC) {
-		if (so->so_pgid < 0)
-			gsignal(-so->so_pgid, SIGIO);
-		else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0)
-			psignal(p, SIGIO);
-	}
+	if ((so->so_state & SS_ASYNC) && so->so_sigio != NULL)
+		pgsigio(so->so_sigio, SIGIO, 0);
 	if (sb->sb_flags & SB_UPCALL)
 		(*so->so_upcall)(so, so->so_upcallarg, M_DONTWAIT);
 }
 
 /*
  * Socket buffer (struct sockbuf) utility routines.
  *
  * Each socket contains two socket buffers: one for sending data and
  * one for receiving data.  Each buffer contains a queue of mbufs,
  * information about the number of mbufs and amount of data in the
  * queue, and other fields allowing select() statements and notification
  * on data availability to be implemented.
  *
  * Data stored in a socket buffer is maintained as a list of records.
  * Each record is a list of mbufs chained together with the m_next
  * field.  Records are chained together with the m_nextpkt field. The upper
  * level routine soreceive() expects the following conventions to be
  * observed when placing information in the receive buffer:
  *
  * 1. If the protocol requires each message be preceded by the sender's
  *    name, then a record containing that name must be present before
  *    any associated data (mbuf's must be of type MT_SONAME).
  * 2. If the protocol supports the exchange of ``access rights'' (really
  *    just additional data associated with the message), and there are
  *    ``rights'' to be received, then a record containing this data
  *    should be present (mbuf's must be of type MT_RIGHTS).
  * 3. If a name or rights record exists, then it must be followed by
  *    a data record, perhaps of zero length.
  *
  * Before using a new socket structure it is first necessary to reserve
  * buffer space to the socket, by calling sbreserve().  This should commit
  * some of the available buffer space in the system buffer pool for the
  * socket (currently, it does nothing but enforce limits).  The space
  * should be released by calling sbrelease() when the socket is destroyed.
  */
 
 int
 soreserve(so, sndcc, rcvcc)
 	register struct socket *so;
 	u_long sndcc, rcvcc;
 {
 
 	if (sbreserve(&so->so_snd, sndcc) == 0)
 		goto bad;
 	if (sbreserve(&so->so_rcv, rcvcc) == 0)
 		goto bad2;
 	if (so->so_rcv.sb_lowat == 0)
 		so->so_rcv.sb_lowat = 1;
 	if (so->so_snd.sb_lowat == 0)
 		so->so_snd.sb_lowat = MCLBYTES;
 	if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat)
 		so->so_snd.sb_lowat = so->so_snd.sb_hiwat;
 	return (0);
 bad2:
 	sbrelease(&so->so_snd);
 bad:
 	return (ENOBUFS);
 }
 
 /*
  * Allot mbufs to a sockbuf.
  * Attempt to scale mbmax so that mbcnt doesn't become limiting
  * if buffering efficiency is near the normal case.
  */
 int
 sbreserve(sb, cc)
 	struct sockbuf *sb;
 	u_long cc;
 {
 	if ((u_quad_t)cc > (u_quad_t)sb_max * MCLBYTES / (MSIZE + MCLBYTES))
 		return (0);
 	sb->sb_hiwat = cc;
 	sb->sb_mbmax = min(cc * sb_efficiency, sb_max);
 	if (sb->sb_lowat > sb->sb_hiwat)
 		sb->sb_lowat = sb->sb_hiwat;
 	return (1);
 }
 
 /*
  * Free mbufs held by a socket, and reserved mbuf space.
  */
 void
 sbrelease(sb)
 	struct sockbuf *sb;
 {
 
 	sbflush(sb);
 	sb->sb_hiwat = sb->sb_mbmax = 0;
 }
 
 /*
  * Routines to add and remove
  * data from an mbuf queue.
  *
  * The routines sbappend() or sbappendrecord() are normally called to
  * append new mbufs to a socket buffer, after checking that adequate
  * space is available, comparing the function sbspace() with the amount
  * of data to be added.  sbappendrecord() differs from sbappend() in
  * that data supplied is treated as the beginning of a new record.
  * To place a sender's address, optional access rights, and data in a
  * socket receive buffer, sbappendaddr() should be used.  To place
  * access rights and data in a socket receive buffer, sbappendrights()
  * should be used.  In either case, the new data begins a new record.
  * Note that unlike sbappend() and sbappendrecord(), these routines check
  * for the caller that there will be enough space to store the data.
  * Each fails if there is not enough space, or if it cannot find mbufs
  * to store additional information in.
  *
  * Reliable protocols may use the socket send buffer to hold data
  * awaiting acknowledgement.  Data is normally copied from a socket
  * send buffer in a protocol with m_copy for output to a peer,
  * and then removing the data from the socket buffer with sbdrop()
  * or sbdroprecord() when the data is acknowledged by the peer.
  */
 
 /*
  * Append mbuf chain m to the last record in the
  * socket buffer sb.  The additional space associated
  * the mbuf chain is recorded in sb.  Empty mbufs are
  * discarded and mbufs are compacted where possible.
  */
 void
 sbappend(sb, m)
 	struct sockbuf *sb;
 	struct mbuf *m;
 {
 	register struct mbuf *n;
 
 	if (m == 0)
 		return;
 	n = sb->sb_mb;
 	if (n) {
 		while (n->m_nextpkt)
 			n = n->m_nextpkt;
 		do {
 			if (n->m_flags & M_EOR) {
 				sbappendrecord(sb, m); /* XXXXXX!!!! */
 				return;
 			}
 		} while (n->m_next && (n = n->m_next));
 	}
 	sbcompress(sb, m, n);
 }
 
 #ifdef SOCKBUF_DEBUG
 void
 sbcheck(sb)
 	register struct sockbuf *sb;
 {
 	register struct mbuf *m;
 	register struct mbuf *n = 0;
 	register u_long len = 0, mbcnt = 0;
 
 	for (m = sb->sb_mb; m; m = n) {
 	    n = m->m_nextpkt;
 	    for (; m; m = m->m_next) {
 		len += m->m_len;
 		mbcnt += MSIZE;
 		if (m->m_flags & M_EXT) /*XXX*/ /* pretty sure this is bogus */
 			mbcnt += m->m_ext.ext_size;
 	    }
 	}
 	if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) {
 		printf("cc %ld != %ld || mbcnt %ld != %ld\n", len, sb->sb_cc,
 		    mbcnt, sb->sb_mbcnt);
 		panic("sbcheck");
 	}
 }
 #endif
 
 /*
  * As above, except the mbuf chain
  * begins a new record.
  */
 void
 sbappendrecord(sb, m0)
 	register struct sockbuf *sb;
 	register struct mbuf *m0;
 {
 	register struct mbuf *m;
 
 	if (m0 == 0)
 		return;
 	m = sb->sb_mb;
 	if (m)
 		while (m->m_nextpkt)
 			m = m->m_nextpkt;
 	/*
 	 * Put the first mbuf on the queue.
 	 * Note this permits zero length records.
 	 */
 	sballoc(sb, m0);
 	if (m)
 		m->m_nextpkt = m0;
 	else
 		sb->sb_mb = m0;
 	m = m0->m_next;
 	m0->m_next = 0;
 	if (m && (m0->m_flags & M_EOR)) {
 		m0->m_flags &= ~M_EOR;
 		m->m_flags |= M_EOR;
 	}
 	sbcompress(sb, m, m0);
 }
 
 /*
  * As above except that OOB data
  * is inserted at the beginning of the sockbuf,
  * but after any other OOB data.
  */
 void
 sbinsertoob(sb, m0)
 	register struct sockbuf *sb;
 	register struct mbuf *m0;
 {
 	register struct mbuf *m;
 	register struct mbuf **mp;
 
 	if (m0 == 0)
 		return;
 	for (mp = &sb->sb_mb; *mp ; mp = &((*mp)->m_nextpkt)) {
 	    m = *mp;
 	    again:
 		switch (m->m_type) {
 
 		case MT_OOBDATA:
 			continue;		/* WANT next train */
 
 		case MT_CONTROL:
 			m = m->m_next;
 			if (m)
 				goto again;	/* inspect THIS train further */
 		}
 		break;
 	}
 	/*
 	 * Put the first mbuf on the queue.
 	 * Note this permits zero length records.
 	 */
 	sballoc(sb, m0);
 	m0->m_nextpkt = *mp;
 	*mp = m0;
 	m = m0->m_next;
 	m0->m_next = 0;
 	if (m && (m0->m_flags & M_EOR)) {
 		m0->m_flags &= ~M_EOR;
 		m->m_flags |= M_EOR;
 	}
 	sbcompress(sb, m, m0);
 }
 
 /*
  * Append address and data, and optionally, control (ancillary) data
  * to the receive queue of a socket.  If present,
  * m0 must include a packet header with total length.
  * Returns 0 if no space in sockbuf or insufficient mbufs.
  */
 int
 sbappendaddr(sb, asa, m0, control)
 	register struct sockbuf *sb;
 	struct sockaddr *asa;
 	struct mbuf *m0, *control;
 {
 	register struct mbuf *m, *n;
 	int space = asa->sa_len;
 
 if (m0 && (m0->m_flags & M_PKTHDR) == 0)
 panic("sbappendaddr");
 	if (m0)
 		space += m0->m_pkthdr.len;
 	for (n = control; n; n = n->m_next) {
 		space += n->m_len;
 		if (n->m_next == 0)	/* keep pointer to last control buf */
 			break;
 	}
 	if (space > sbspace(sb))
 		return (0);
 	if (asa->sa_len > MLEN)
 		return (0);
 	MGET(m, M_DONTWAIT, MT_SONAME);
 	if (m == 0)
 		return (0);
 	m->m_len = asa->sa_len;
 	bcopy((caddr_t)asa, mtod(m, caddr_t), asa->sa_len);
 	if (n)
 		n->m_next = m0;		/* concatenate data to control */
 	else
 		control = m0;
 	m->m_next = control;
 	for (n = m; n; n = n->m_next)
 		sballoc(sb, n);
 	n = sb->sb_mb;
 	if (n) {
 		while (n->m_nextpkt)
 			n = n->m_nextpkt;
 		n->m_nextpkt = m;
 	} else
 		sb->sb_mb = m;
 	return (1);
 }
 
 int
 sbappendcontrol(sb, m0, control)
 	struct sockbuf *sb;
 	struct mbuf *control, *m0;
 {
 	register struct mbuf *m, *n;
 	int space = 0;
 
 	if (control == 0)
 		panic("sbappendcontrol");
 	for (m = control; ; m = m->m_next) {
 		space += m->m_len;
 		if (m->m_next == 0)
 			break;
 	}
 	n = m;			/* save pointer to last control buffer */
 	for (m = m0; m; m = m->m_next)
 		space += m->m_len;
 	if (space > sbspace(sb))
 		return (0);
 	n->m_next = m0;			/* concatenate data to control */
 	for (m = control; m; m = m->m_next)
 		sballoc(sb, m);
 	n = sb->sb_mb;
 	if (n) {
 		while (n->m_nextpkt)
 			n = n->m_nextpkt;
 		n->m_nextpkt = control;
 	} else
 		sb->sb_mb = control;
 	return (1);
 }
 
 /*
  * Compress mbuf chain m into the socket
  * buffer sb following mbuf n.  If n
  * is null, the buffer is presumed empty.
  */
 void
 sbcompress(sb, m, n)
 	register struct sockbuf *sb;
 	register struct mbuf *m, *n;
 {
 	register int eor = 0;
 	register struct mbuf *o;
 
 	while (m) {
 		eor |= m->m_flags & M_EOR;
 		if (m->m_len == 0 &&
 		    (eor == 0 ||
 		     (((o = m->m_next) || (o = n)) &&
 		      o->m_type == m->m_type))) {
 			m = m_free(m);
 			continue;
 		}
 		if (n && (n->m_flags & (M_EXT | M_EOR)) == 0 &&
 		    (n->m_data + n->m_len + m->m_len) < &n->m_dat[MLEN] &&
 		    n->m_type == m->m_type) {
 			bcopy(mtod(m, caddr_t), mtod(n, caddr_t) + n->m_len,
 			    (unsigned)m->m_len);
 			n->m_len += m->m_len;
 			sb->sb_cc += m->m_len;
 			m = m_free(m);
 			continue;
 		}
 		if (n)
 			n->m_next = m;
 		else
 			sb->sb_mb = m;
 		sballoc(sb, m);
 		n = m;
 		m->m_flags &= ~M_EOR;
 		m = m->m_next;
 		n->m_next = 0;
 	}
 	if (eor) {
 		if (n)
 			n->m_flags |= eor;
 		else
 			printf("semi-panic: sbcompress\n");
 	}
 }
 
 /*
  * Free all mbufs in a sockbuf.
  * Check that all resources are reclaimed.
  */
 void
 sbflush(sb)
 	register struct sockbuf *sb;
 {
 
 	if (sb->sb_flags & SB_LOCK)
 		panic("sbflush: locked");
 	while (sb->sb_mbcnt && sb->sb_cc)
 		sbdrop(sb, (int)sb->sb_cc);
 	if (sb->sb_cc || sb->sb_mb || sb->sb_mbcnt)
 		panic("sbflush: cc %ld || mb %p || mbcnt %ld", sb->sb_cc, (void *)sb->sb_mb, sb->sb_mbcnt);
 }
 
 /*
  * Drop data from (the front of) a sockbuf.
  */
 void
 sbdrop(sb, len)
 	register struct sockbuf *sb;
 	register int len;
 {
 	register struct mbuf *m, *mn;
 	struct mbuf *next;
 
 	next = (m = sb->sb_mb) ? m->m_nextpkt : 0;
 	while (len > 0) {
 		if (m == 0) {
 			if (next == 0)
 				panic("sbdrop");
 			m = next;
 			next = m->m_nextpkt;
 			continue;
 		}
 		if (m->m_len > len) {
 			m->m_len -= len;
 			m->m_data += len;
 			sb->sb_cc -= len;
 			break;
 		}
 		len -= m->m_len;
 		sbfree(sb, m);
 		MFREE(m, mn);
 		m = mn;
 	}
 	while (m && m->m_len == 0) {
 		sbfree(sb, m);
 		MFREE(m, mn);
 		m = mn;
 	}
 	if (m) {
 		sb->sb_mb = m;
 		m->m_nextpkt = next;
 	} else
 		sb->sb_mb = next;
 }
 
 /*
  * Drop a record off the front of a sockbuf
  * and move the next record to the front.
  */
 void
 sbdroprecord(sb)
 	register struct sockbuf *sb;
 {
 	register struct mbuf *m, *mn;
 
 	m = sb->sb_mb;
 	if (m) {
 		sb->sb_mb = m->m_nextpkt;
 		do {
 			sbfree(sb, m);
 			MFREE(m, mn);
 			m = mn;
 		} while (m);
 	}
 }
 
 /*
  * Create a "control" mbuf containing the specified data
  * with the specified type for presentation on a socket buffer.
  */
 struct mbuf *
 sbcreatecontrol(p, size, type, level)
 	caddr_t p;
 	register int size;
 	int type, level;
 {
 	register struct cmsghdr *cp;
 	struct mbuf *m;
 
 	if ((m = m_get(M_DONTWAIT, MT_CONTROL)) == NULL)
 		return ((struct mbuf *) NULL);
 	cp = mtod(m, struct cmsghdr *);
 	/* XXX check size? */
 	(void)memcpy(CMSG_DATA(cp), p, size);
 	size += sizeof(*cp);
 	m->m_len = size;
 	cp->cmsg_len = size;
 	cp->cmsg_level = level;
 	cp->cmsg_type = type;
 	return (m);
 }
 
 /*
  * Some routines that return EOPNOTSUPP for entry points that are not
  * supported by a protocol.  Fill in as needed.
  */
 int
 pru_accept_notsupp(struct socket *so, struct sockaddr **nam)
 {
 	return EOPNOTSUPP;
 }
 
 int
 pru_connect_notsupp(struct socket *so, struct sockaddr *nam, struct proc *p)
 {
 	return EOPNOTSUPP;
 }
 
 int
 pru_connect2_notsupp(struct socket *so1, struct socket *so2)
 {
 	return EOPNOTSUPP;
 }
 
 int
 pru_control_notsupp(struct socket *so, u_long cmd, caddr_t data,
 		    struct ifnet *ifp, struct proc *p)
 {
 	return EOPNOTSUPP;
 }
 
 int
 pru_listen_notsupp(struct socket *so, struct proc *p)
 {
 	return EOPNOTSUPP;
 }
 
 int
 pru_rcvd_notsupp(struct socket *so, int flags)
 {
 	return EOPNOTSUPP;
 }
 
 int
 pru_rcvoob_notsupp(struct socket *so, struct mbuf *m, int flags)
 {
 	return EOPNOTSUPP;
 }
 
 /*
  * This isn't really a ``null'' operation, but it's the default one
  * and doesn't do anything destructive.
  */
 int
 pru_sense_null(struct socket *so, struct stat *sb)
 {
 	sb->st_blksize = so->so_snd.sb_hiwat;
 	return 0;
 }
 
 /*
  * Make a copy of a sockaddr in a malloced buffer of type M_SONAME.
  */
 struct sockaddr *
 dup_sockaddr(sa, canwait)
 	struct sockaddr *sa;
 	int canwait;
 {
 	struct sockaddr *sa2;
 
 	MALLOC(sa2, struct sockaddr *, sa->sa_len, M_SONAME, 
 	       canwait ? M_WAITOK : M_NOWAIT);
 	if (sa2)
 		bcopy(sa, sa2, sa->sa_len);
 	return sa2;
 }
 
 /*
  * Create an external-format (``xsocket'') structure using the information
  * in the kernel-format socket structure pointed to by so.  This is done
  * to reduce the spew of irrelevant information over this interface,
  * to isolate user code from changes in the kernel structure, and
  * potentially to provide information-hiding if we decide that
  * some of this information should be hidden from users.
  */
 void
 sotoxsocket(struct socket *so, struct xsocket *xso)
 {
 	xso->xso_len = sizeof *xso;
 	xso->xso_so = so;
 	xso->so_type = so->so_type;
 	xso->so_options = so->so_options;
 	xso->so_linger = so->so_linger;
 	xso->so_state = so->so_state;
 	xso->so_pcb = so->so_pcb;
 	xso->xso_protocol = so->so_proto->pr_protocol;
 	xso->xso_family = so->so_proto->pr_domain->dom_family;
 	xso->so_qlen = so->so_qlen;
 	xso->so_incqlen = so->so_incqlen;
 	xso->so_qlimit = so->so_qlimit;
 	xso->so_timeo = so->so_timeo;
 	xso->so_error = so->so_error;
-	xso->so_pgid = so->so_pgid;
+	xso->so_pgid = so->so_sigio ? so->so_sigio->sio_pgid : 0;
 	xso->so_oobmark = so->so_oobmark;
 	sbtoxsockbuf(&so->so_snd, &xso->so_snd);
 	sbtoxsockbuf(&so->so_rcv, &xso->so_rcv);
 	xso->so_uid = so->so_uid;
 }
 
 /*
  * This does the same for sockbufs.  Note that the xsockbuf structure,
  * since it is always embedded in a socket, does not include a self
  * pointer nor a length.  We make this entry point public in case
  * some other mechanism needs it.
  */
 void
 sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb)
 {
 	xsb->sb_cc = sb->sb_cc;
 	xsb->sb_hiwat = sb->sb_hiwat;
 	xsb->sb_mbcnt = sb->sb_mbcnt;
 	xsb->sb_mbmax = sb->sb_mbmax;
 	xsb->sb_lowat = sb->sb_lowat;
 	xsb->sb_flags = sb->sb_flags;
 	xsb->sb_timeo = sb->sb_timeo;
 }
 
 /*
  * Here is the definition of some of the basic objects in the kern.ipc
  * branch of the MIB.
  */
 SYSCTL_NODE(_kern, KERN_IPC, ipc, CTLFLAG_RW, 0, "IPC");
 
 /* This takes the place of kern.maxsockbuf, which moved to kern.ipc. */
 static int dummy;
 SYSCTL_INT(_kern, KERN_DUMMY, dummy, CTLFLAG_RW, &dummy, 0, "");
 
 SYSCTL_INT(_kern_ipc, KIPC_MAXSOCKBUF, maxsockbuf, CTLFLAG_RW, &sb_max, 0, "");
 SYSCTL_INT(_kern_ipc, OID_AUTO, maxsockets, CTLFLAG_RD, &maxsockets, 0, "");
 SYSCTL_INT(_kern_ipc, KIPC_SOCKBUF_WASTE, sockbuf_waste_factor, CTLFLAG_RW,
 	   &sb_efficiency, 0, "");
 SYSCTL_INT(_kern_ipc, KIPC_NMBCLUSTERS, nmbclusters, CTLFLAG_RD, &nmbclusters, 0, "");
 
Index: head/sys/net/bpf.c
===================================================================
--- head/sys/net/bpf.c	(revision 41085)
+++ head/sys/net/bpf.c	(revision 41086)
@@ -1,1311 +1,1314 @@
 /*
  * Copyright (c) 1990, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from the Stanford/CMU enet packet filter,
  * (net/enet.c) distributed as part of 4.3BSD, and code contributed
  * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
  * Berkeley Laboratory.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *      @(#)bpf.c	8.2 (Berkeley) 3/28/94
  *
- * $Id: bpf.c,v 1.43 1998/10/04 23:04:48 alex Exp $
+ * $Id: bpf.c,v 1.44 1998/10/08 00:32:08 alex Exp $
  */
 
 #include "bpfilter.h"
 
 #if NBPFILTER > 0
 
 #ifndef __GNUC__
 #define inline
 #else
 #define inline __inline
 #endif
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/conf.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/time.h>
 #include <sys/proc.h>
 #include <sys/signalvar.h>
 #include <sys/filio.h>
 #include <sys/sockio.h>
 #include <sys/ttycom.h>
+#include <sys/filedesc.h>
 
 #if defined(sparc) && BSD < 199103
 #include <sys/stream.h>
 #endif
 #include <sys/poll.h>
 
 #include <sys/socket.h>
 #include <sys/vnode.h>
 
 #include <net/if.h>
 #include <net/bpf.h>
 #include <net/bpfdesc.h>
 
 #include <netinet/in.h>
 #include <netinet/if_ether.h>
 #include <sys/kernel.h>
 #include <sys/sysctl.h>
 
 #include "opt_devfs.h"
 
 #ifdef DEVFS
 #include <sys/devfsext.h>
 #endif /*DEVFS*/
 
 
 /*
  * Older BSDs don't have kernel malloc.
  */
 #if BSD < 199103
 extern bcopy();
 static caddr_t bpf_alloc();
 #include <net/bpf_compat.h>
 #define BPF_BUFSIZE (MCLBYTES-8)
 #define UIOMOVE(cp, len, code, uio) uiomove(cp, len, code, uio)
 #else
 #define BPF_BUFSIZE 4096
 #define UIOMOVE(cp, len, code, uio) uiomove(cp, len, uio)
 #endif
 
 #define PRINET  26			/* interruptible */
 
 /*
  * The default read buffer size is patchable.
  */
 static int bpf_bufsize = BPF_BUFSIZE;
 SYSCTL_INT(_debug, OID_AUTO, bpf_bufsize, CTLFLAG_RW, 
 	&bpf_bufsize, 0, "");
 
 /*
  *  bpf_iflist is the list of interfaces; each corresponds to an ifnet
  *  bpf_dtab holds the descriptors, indexed by minor device #
  */
 static struct bpf_if	*bpf_iflist;
 static struct bpf_d	bpf_dtab[NBPFILTER];
 static int		bpf_dtab_init;
 
 static int	bpf_allocbufs __P((struct bpf_d *));
 static void	bpf_attachd __P((struct bpf_d *d, struct bpf_if *bp));
 static void	bpf_detachd __P((struct bpf_d *d));
 static void	bpf_freed __P((struct bpf_d *));
 static void	bpf_ifname __P((struct ifnet *, struct ifreq *));
 static void	bpf_mcopy __P((const void *, void *, size_t));
 static int	bpf_movein __P((struct uio *, int,
 		    struct mbuf **, struct sockaddr *, int *));
 static int	bpf_setif __P((struct bpf_d *, struct ifreq *));
 static inline void
 		bpf_wakeup __P((struct bpf_d *));
 static void	catchpacket __P((struct bpf_d *, u_char *, u_int,
 		    u_int, void (*)(const void *, void *, size_t)));
 static void	reset_d __P((struct bpf_d *));
 static int	 bpf_setf __P((struct bpf_d *, struct bpf_program *));
 
 static	d_open_t	bpfopen;
 static	d_close_t	bpfclose;
 static	d_read_t	bpfread;
 static	d_write_t	bpfwrite;
 static	d_ioctl_t	bpfioctl;
 static	d_poll_t	bpfpoll;
 
 #define CDEV_MAJOR 23
 static struct cdevsw bpf_cdevsw = 
  	{ bpfopen,	bpfclose,	bpfread,	bpfwrite,	/*23*/
  	  bpfioctl,	nostop,		nullreset,	nodevtotty,/* bpf */
  	  bpfpoll,	nommap,		NULL,	"bpf",	NULL,	-1 };
 
 
 static int
 bpf_movein(uio, linktype, mp, sockp, datlen)
 	register struct uio *uio;
 	int linktype, *datlen;
 	register struct mbuf **mp;
 	register struct sockaddr *sockp;
 {
 	struct mbuf *m;
 	int error;
 	int len;
 	int hlen;
 
 	/*
 	 * Build a sockaddr based on the data link layer type.
 	 * We do this at this level because the ethernet header
 	 * is copied directly into the data field of the sockaddr.
 	 * In the case of SLIP, there is no header and the packet
 	 * is forwarded as is.
 	 * Also, we are careful to leave room at the front of the mbuf
 	 * for the link level header.
 	 */
 	switch (linktype) {
 
 	case DLT_SLIP:
 		sockp->sa_family = AF_INET;
 		hlen = 0;
 		break;
 
 	case DLT_EN10MB:
 		sockp->sa_family = AF_UNSPEC;
 		/* XXX Would MAXLINKHDR be better? */
 		hlen = sizeof(struct ether_header);
 		break;
 
 	case DLT_FDDI:
 #if defined(__FreeBSD__) || defined(__bsdi__)
 		sockp->sa_family = AF_IMPLINK;
 		hlen = 0;
 #else
 		sockp->sa_family = AF_UNSPEC;
 		/* XXX 4(FORMAC)+6(dst)+6(src)+3(LLC)+5(SNAP) */
 		hlen = 24;
 #endif
 		break;
 
 	case DLT_RAW:
 	case DLT_NULL:
 		sockp->sa_family = AF_UNSPEC;
 		hlen = 0;
 		break;
 
 #ifdef __FreeBSD__
 	case DLT_ATM_RFC1483:
 		/*
 		 * en atm driver requires 4-byte atm pseudo header.
 		 * though it isn't standard, vpi:vci needs to be
 		 * specified anyway.
 		 */
 		sockp->sa_family = AF_UNSPEC;
 		hlen = 12; 	/* XXX 4(ATM_PH) + 3(LLC) + 5(SNAP) */
 		break;
 #endif
 
 	default:
 		return (EIO);
 	}
 
 	len = uio->uio_resid;
 	*datlen = len - hlen;
 	if ((unsigned)len > MCLBYTES)
 		return (EIO);
 
 	MGETHDR(m, M_WAIT, MT_DATA);
 	if (m == 0)
 		return (ENOBUFS);
 	if (len > MHLEN) {
 #if BSD >= 199103
 		MCLGET(m, M_WAIT);
 		if ((m->m_flags & M_EXT) == 0) {
 #else
 		MCLGET(m);
 		if (m->m_len != MCLBYTES) {
 #endif
 			error = ENOBUFS;
 			goto bad;
 		}
 	}
 	m->m_pkthdr.len = m->m_len = len;
 	m->m_pkthdr.rcvif = NULL;
 	*mp = m;
 	/*
 	 * Make room for link header.
 	 */
 	if (hlen != 0) {
 		m->m_pkthdr.len -= hlen;
 		m->m_len -= hlen;
 #if BSD >= 199103
 		m->m_data += hlen; /* XXX */
 #else
 		m->m_off += hlen;
 #endif
 		error = UIOMOVE((caddr_t)sockp->sa_data, hlen, UIO_WRITE, uio);
 		if (error)
 			goto bad;
 	}
 	error = UIOMOVE(mtod(m, caddr_t), len - hlen, UIO_WRITE, uio);
 	if (!error)
 		return (0);
  bad:
 	m_freem(m);
 	return (error);
 }
 
 /*
  * Attach file to the bpf interface, i.e. make d listen on bp.
  * Must be called at splimp.
  */
 static void
 bpf_attachd(d, bp)
 	struct bpf_d *d;
 	struct bpf_if *bp;
 {
 	/*
 	 * Point d at bp, and add d to the interface's list of listeners.
 	 * Finally, point the driver's bpf cookie at the interface so
 	 * it will divert packets to bpf.
 	 */
 	d->bd_bif = bp;
 	d->bd_next = bp->bif_dlist;
 	bp->bif_dlist = d;
 
 	bp->bif_ifp->if_bpf = bp;
 }
 
 /*
  * Detach a file from its interface.
  */
 static void
 bpf_detachd(d)
 	struct bpf_d *d;
 {
 	struct bpf_d **p;
 	struct bpf_if *bp;
 
 	bp = d->bd_bif;
 	/*
 	 * Check if this descriptor had requested promiscuous mode.
 	 * If so, turn it off.
 	 */
 	if (d->bd_promisc) {
 		d->bd_promisc = 0;
 		if (ifpromisc(bp->bif_ifp, 0))
 			/*
 			 * Something is really wrong if we were able to put
 			 * the driver into promiscuous mode, but can't
 			 * take it out.
 			 */
 			panic("bpf: ifpromisc failed");
 	}
 	/* Remove d from the interface's descriptor list. */
 	p = &bp->bif_dlist;
 	while (*p != d) {
 		p = &(*p)->bd_next;
 		if (*p == 0)
 			panic("bpf_detachd: descriptor not in list");
 	}
 	*p = (*p)->bd_next;
 	if (bp->bif_dlist == 0)
 		/*
 		 * Let the driver know that there are no more listeners.
 		 */
 		d->bd_bif->bif_ifp->if_bpf = 0;
 	d->bd_bif = 0;
 }
 
 
 /*
  * Mark a descriptor free by making it point to itself.
  * This is probably cheaper than marking with a constant since
  * the address should be in a register anyway.
  */
 #define D_ISFREE(d) ((d) == (d)->bd_next)
 #define D_MARKFREE(d) ((d)->bd_next = (d))
 #define D_MARKUSED(d) ((d)->bd_next = 0)
 
 /*
  * Open ethernet device.  Returns ENXIO for illegal minor device number,
  * EBUSY if file is open by another process.
  */
 /* ARGSUSED */
 static	int
 bpfopen(dev, flags, fmt, p)
 	dev_t dev;
 	int flags;
 	int fmt;
 	struct proc *p;
 {
 	register struct bpf_d *d;
 
 	if (minor(dev) >= NBPFILTER)
 		return (ENXIO);
 	/*
 	 * Each minor can be opened by only one process.  If the requested
 	 * minor is in use, return EBUSY.
 	 */
 	d = &bpf_dtab[minor(dev)];
 	if (!D_ISFREE(d))
 		return (EBUSY);
 
 	/* Mark "free" and do most initialization. */
 	bzero((char *)d, sizeof(*d));
 	d->bd_bufsize = bpf_bufsize;
 	d->bd_sig = SIGIO;
 
 	return (0);
 }
 
 /*
  * Close the descriptor by detaching it from its interface,
  * deallocating its buffers, and marking it free.
  */
 /* ARGSUSED */
 static	int
 bpfclose(dev, flags, fmt, p)
 	dev_t dev;
 	int flags;
 	int fmt;
 	struct proc *p;
 {
 	register struct bpf_d *d = &bpf_dtab[minor(dev)];
 	register int s;
 
+	funsetown(d->bd_sigio);
 	s = splimp();
 	if (d->bd_bif)
 		bpf_detachd(d);
 	splx(s);
 	bpf_freed(d);
 
 	return (0);
 }
 
 /*
  * Support for SunOS, which does not have tsleep.
  */
 #if BSD < 199103
 static
 bpf_timeout(arg)
 	caddr_t arg;
 {
 	struct bpf_d *d = (struct bpf_d *)arg;
 	d->bd_timedout = 1;
 	wakeup(arg);
 }
 
 #define BPF_SLEEP(chan, pri, s, t) bpf_sleep((struct bpf_d *)chan)
 
 int
 bpf_sleep(d)
 	register struct bpf_d *d;
 {
 	register int rto = d->bd_rtout;
 	register int st;
 
 	if (rto != 0) {
 		d->bd_timedout = 0;
 		timeout(bpf_timeout, (caddr_t)d, rto);
 	}
 	st = sleep((caddr_t)d, PRINET|PCATCH);
 	if (rto != 0) {
 		if (d->bd_timedout == 0)
 			untimeout(bpf_timeout, (caddr_t)d);
 		else if (st == 0)
 			return EWOULDBLOCK;
 	}
 	return (st != 0) ? EINTR : 0;
 }
 #else
 #define BPF_SLEEP tsleep
 #endif
 
 /*
  * Rotate the packet buffers in descriptor d.  Move the store buffer
  * into the hold slot, and the free buffer into the store slot.
  * Zero the length of the new store buffer.
  */
 #define ROTATE_BUFFERS(d) \
 	(d)->bd_hbuf = (d)->bd_sbuf; \
 	(d)->bd_hlen = (d)->bd_slen; \
 	(d)->bd_sbuf = (d)->bd_fbuf; \
 	(d)->bd_slen = 0; \
 	(d)->bd_fbuf = 0;
 /*
  *  bpfread - read next chunk of packets from buffers
  */
 static	int
 bpfread(dev, uio, ioflag)
 	dev_t dev;
 	register struct uio *uio;
 	int ioflag;
 {
 	register struct bpf_d *d = &bpf_dtab[minor(dev)];
 	int error;
 	int s;
 
 	/*
 	 * Restrict application to use a buffer the same size as
 	 * as kernel buffers.
 	 */
 	if (uio->uio_resid != d->bd_bufsize)
 		return (EINVAL);
 
 	s = splimp();
 	/*
 	 * If the hold buffer is empty, then do a timed sleep, which
 	 * ends when the timeout expires or when enough packets
 	 * have arrived to fill the store buffer.
 	 */
 	while (d->bd_hbuf == 0) {
 		if (d->bd_immediate && d->bd_slen != 0) {
 			/*
 			 * A packet(s) either arrived since the previous
 			 * read or arrived while we were asleep.
 			 * Rotate the buffers and return what's here.
 			 */
 			ROTATE_BUFFERS(d);
 			break;
 		}
 		if (ioflag & IO_NDELAY)
 			error = EWOULDBLOCK;
 		else
 			error = BPF_SLEEP((caddr_t)d, PRINET|PCATCH, "bpf",
 					  d->bd_rtout);
 		if (error == EINTR || error == ERESTART) {
 			splx(s);
 			return (error);
 		}
 		if (error == EWOULDBLOCK) {
 			/*
 			 * On a timeout, return what's in the buffer,
 			 * which may be nothing.  If there is something
 			 * in the store buffer, we can rotate the buffers.
 			 */
 			if (d->bd_hbuf)
 				/*
 				 * We filled up the buffer in between
 				 * getting the timeout and arriving
 				 * here, so we don't need to rotate.
 				 */
 				break;
 
 			if (d->bd_slen == 0) {
 				splx(s);
 				return (0);
 			}
 			ROTATE_BUFFERS(d);
 			break;
 		}
 	}
 	/*
 	 * At this point, we know we have something in the hold slot.
 	 */
 	splx(s);
 
 	/*
 	 * Move data from hold buffer into user space.
 	 * We know the entire buffer is transferred since
 	 * we checked above that the read buffer is bpf_bufsize bytes.
 	 */
 	error = UIOMOVE(d->bd_hbuf, d->bd_hlen, UIO_READ, uio);
 
 	s = splimp();
 	d->bd_fbuf = d->bd_hbuf;
 	d->bd_hbuf = 0;
 	d->bd_hlen = 0;
 	splx(s);
 
 	return (error);
 }
 
 
 /*
  * If there are processes sleeping on this descriptor, wake them up.
  */
 static inline void
 bpf_wakeup(d)
 	register struct bpf_d *d;
 {
 	struct proc *p;
 
 	wakeup((caddr_t)d);
-	if (d->bd_async && d->bd_sig)
-		if (d->bd_pgid > 0)
-			gsignal (d->bd_pgid, d->bd_sig);
-		else if (p = pfind (-d->bd_pgid))
-			psignal (p, d->bd_sig);
+	if (d->bd_async && d->bd_sig && d->bd_sigio)
+		pgsigio(d->bd_sigio, d->bd_sig, 0);
 
 #if BSD >= 199103
 	selwakeup(&d->bd_sel);
 	/* XXX */
 	d->bd_sel.si_pid = 0;
 #else
 	if (d->bd_selproc) {
 		selwakeup(d->bd_selproc, (int)d->bd_selcoll);
 		d->bd_selcoll = 0;
 		d->bd_selproc = 0;
 	}
 #endif
 }
 
 static	int
 bpfwrite(dev, uio, ioflag)
 	dev_t dev;
 	struct uio *uio;
 	int ioflag;
 {
 	register struct bpf_d *d = &bpf_dtab[minor(dev)];
 	struct ifnet *ifp;
 	struct mbuf *m;
 	int error, s;
 	static struct sockaddr dst;
 	int datlen;
 
 	if (d->bd_bif == 0)
 		return (ENXIO);
 
 	ifp = d->bd_bif->bif_ifp;
 
 	if (uio->uio_resid == 0)
 		return (0);
 
 	error = bpf_movein(uio, (int)d->bd_bif->bif_dlt, &m, &dst, &datlen);
 	if (error)
 		return (error);
 
 	if (datlen > ifp->if_mtu)
 		return (EMSGSIZE);
 
 	s = splnet();
 #if BSD >= 199103
 	error = (*ifp->if_output)(ifp, m, &dst, (struct rtentry *)0);
 #else
 	error = (*ifp->if_output)(ifp, m, &dst);
 #endif
 	splx(s);
 	/*
 	 * The driver frees the mbuf.
 	 */
 	return (error);
 }
 
 /*
  * Reset a descriptor by flushing its packet buffer and clearing the
  * receive and drop counts.  Should be called at splimp.
  */
 static void
 reset_d(d)
 	struct bpf_d *d;
 {
 	if (d->bd_hbuf) {
 		/* Free the hold buffer. */
 		d->bd_fbuf = d->bd_hbuf;
 		d->bd_hbuf = 0;
 	}
 	d->bd_slen = 0;
 	d->bd_hlen = 0;
 	d->bd_rcount = 0;
 	d->bd_dcount = 0;
 }
 
 /*
  *  FIONREAD		Check for read packet available.
  *  SIOCGIFADDR		Get interface address - convenient hook to driver.
  *  BIOCGBLEN		Get buffer len [for read()].
  *  BIOCSETF		Set ethernet read filter.
  *  BIOCFLUSH		Flush read packet buffer.
  *  BIOCPROMISC		Put interface into promiscuous mode.
  *  BIOCGDLT		Get link layer type.
  *  BIOCGETIF		Get interface name.
  *  BIOCSETIF		Set interface.
  *  BIOCSRTIMEOUT	Set read timeout.
  *  BIOCGRTIMEOUT	Get read timeout.
  *  BIOCGSTATS		Get packet stats.
  *  BIOCIMMEDIATE	Set immediate mode.
  *  BIOCVERSION		Get filter language version.
  */
 /* ARGSUSED */
 static	int
 bpfioctl(dev, cmd, addr, flags, p)
 	dev_t dev;
 	u_long cmd;
 	caddr_t addr;
 	int flags;
 	struct proc *p;
 {
 	register struct bpf_d *d = &bpf_dtab[minor(dev)];
 	int s, error = 0;
 
 	switch (cmd) {
 
 	default:
 		error = EINVAL;
 		break;
 
 	/*
 	 * Check for read packet available.
 	 */
 	case FIONREAD:
 		{
 			int n;
 
 			s = splimp();
 			n = d->bd_slen;
 			if (d->bd_hbuf)
 				n += d->bd_hlen;
 			splx(s);
 
 			*(int *)addr = n;
 			break;
 		}
 
 	case SIOCGIFADDR:
 		{
 			struct ifnet *ifp;
 
 			if (d->bd_bif == 0)
 				error = EINVAL;
 			else {
 				ifp = d->bd_bif->bif_ifp;
 				error = (*ifp->if_ioctl)(ifp, cmd, addr);
 			}
 			break;
 		}
 
 	/*
 	 * Get buffer len [for read()].
 	 */
 	case BIOCGBLEN:
 		*(u_int *)addr = d->bd_bufsize;
 		break;
 
 	/*
 	 * Set buffer length.
 	 */
 	case BIOCSBLEN:
 #if BSD < 199103
 		error = EINVAL;
 #else
 		if (d->bd_bif != 0)
 			error = EINVAL;
 		else {
 			register u_int size = *(u_int *)addr;
 
 			if (size > BPF_MAXBUFSIZE)
 				*(u_int *)addr = size = BPF_MAXBUFSIZE;
 			else if (size < BPF_MINBUFSIZE)
 				*(u_int *)addr = size = BPF_MINBUFSIZE;
 			d->bd_bufsize = size;
 		}
 #endif
 		break;
 
 	/*
 	 * Set link layer read filter.
 	 */
 	case BIOCSETF:
 		error = bpf_setf(d, (struct bpf_program *)addr);
 		break;
 
 	/*
 	 * Flush read packet buffer.
 	 */
 	case BIOCFLUSH:
 		s = splimp();
 		reset_d(d);
 		splx(s);
 		break;
 
 	/*
 	 * Put interface into promiscuous mode.
 	 */
 	case BIOCPROMISC:
 		if (d->bd_bif == 0) {
 			/*
 			 * No interface attached yet.
 			 */
 			error = EINVAL;
 			break;
 		}
 		s = splimp();
 		if (d->bd_promisc == 0) {
 			error = ifpromisc(d->bd_bif->bif_ifp, 1);
 			if (error == 0)
 				d->bd_promisc = 1;
 		}
 		splx(s);
 		break;
 
 	/*
 	 * Get device parameters.
 	 */
 	case BIOCGDLT:
 		if (d->bd_bif == 0)
 			error = EINVAL;
 		else
 			*(u_int *)addr = d->bd_bif->bif_dlt;
 		break;
 
 	/*
 	 * Set interface name.
 	 */
 	case BIOCGETIF:
 		if (d->bd_bif == 0)
 			error = EINVAL;
 		else
 			bpf_ifname(d->bd_bif->bif_ifp, (struct ifreq *)addr);
 		break;
 
 	/*
 	 * Set interface.
 	 */
 	case BIOCSETIF:
 		error = bpf_setif(d, (struct ifreq *)addr);
 		break;
 
 	/*
 	 * Set read timeout.
 	 */
 	case BIOCSRTIMEOUT:
 		{
 			struct timeval *tv = (struct timeval *)addr;
 
 			/*
 			 * Subtract 1 tick from tvtohz() since this isn't
 			 * a one-shot timer.
 			 */
 			if ((error = itimerfix(tv)) == 0)
 				d->bd_rtout = tvtohz(tv) - 1;
 			break;
 		}
 
 	/*
 	 * Get read timeout.
 	 */
 	case BIOCGRTIMEOUT:
 		{
 			struct timeval *tv = (struct timeval *)addr;
 
 			tv->tv_sec = d->bd_rtout / hz;
 			tv->tv_usec = (d->bd_rtout % hz) * tick;
 			break;
 		}
 
 	/*
 	 * Get packet stats.
 	 */
 	case BIOCGSTATS:
 		{
 			struct bpf_stat *bs = (struct bpf_stat *)addr;
 
 			bs->bs_recv = d->bd_rcount;
 			bs->bs_drop = d->bd_dcount;
 			break;
 		}
 
 	/*
 	 * Set immediate mode.
 	 */
 	case BIOCIMMEDIATE:
 		d->bd_immediate = *(u_int *)addr;
 		break;
 
 	case BIOCVERSION:
 		{
 			struct bpf_version *bv = (struct bpf_version *)addr;
 
 			bv->bv_major = BPF_MAJOR_VERSION;
 			bv->bv_minor = BPF_MINOR_VERSION;
 			break;
 		}
 
 	case FIONBIO:		/* Non-blocking I/O */
 		break;
 
 	case FIOASYNC:		/* Send signal on receive packets */
 		d->bd_async = *(int *)addr;
 		break;
 
-/* N.B.  ioctl (FIOSETOWN) and fcntl (F_SETOWN) both end up doing the
-   equivalent of a TIOCSPGRP and hence end up here.  *However* TIOCSPGRP's arg
-   is a process group if it's positive and a process id if it's negative.  This
-   is exactly the opposite of what the other two functions want!  Therefore
-   there is code in ioctl and fcntl to negate the arg before calling here. */
+	case FIOSETOWN:
+		error = fsetown(*(int *)addr, &d->bd_sigio);
+		break;
 
-	case TIOCSPGRP:		/* Process or group to send signals to */
-		d->bd_pgid = *(int *)addr;
+	case FIOGETOWN:
+		*(int *)addr = fgetown(d->bd_sigio);
 		break;
 
+	/* This is deprecated, FIOSETOWN should be used instead. */
+	case TIOCSPGRP:
+		error = fsetown(-(*(int *)addr), &d->bd_sigio);
+		break;
+
+	/* This is deprecated, FIOGETOWN should be used instead. */
 	case TIOCGPGRP:
-		*(int *)addr = d->bd_pgid;
+		*(int *)addr = -fgetown(d->bd_sigio);
 		break;
 
 	case BIOCSRSIG:		/* Set receive signal */
 		{
 		 	u_int sig;
 
 			sig = *(u_int *)addr;
 
 			if (sig >= NSIG)
 				error = EINVAL;
 			else
 				d->bd_sig = sig;
 			break;
 		}
 	case BIOCGRSIG:
 		*(u_int *)addr = d->bd_sig;
 		break;
 	}
 	return (error);
 }
 
 /*
  * Set d's packet filter program to fp.  If this file already has a filter,
  * free it and replace it.  Returns EINVAL for bogus requests.
  */
 static int
 bpf_setf(d, fp)
 	struct bpf_d *d;
 	struct bpf_program *fp;
 {
 	struct bpf_insn *fcode, *old;
 	u_int flen, size;
 	int s;
 
 	old = d->bd_filter;
 	if (fp->bf_insns == 0) {
 		if (fp->bf_len != 0)
 			return (EINVAL);
 		s = splimp();
 		d->bd_filter = 0;
 		reset_d(d);
 		splx(s);
 		if (old != 0)
 			free((caddr_t)old, M_DEVBUF);
 		return (0);
 	}
 	flen = fp->bf_len;
 	if (flen > BPF_MAXINSNS)
 		return (EINVAL);
 
 	size = flen * sizeof(*fp->bf_insns);
 	fcode = (struct bpf_insn *)malloc(size, M_DEVBUF, M_WAITOK);
 	if (copyin((caddr_t)fp->bf_insns, (caddr_t)fcode, size) == 0 &&
 	    bpf_validate(fcode, (int)flen)) {
 		s = splimp();
 		d->bd_filter = fcode;
 		reset_d(d);
 		splx(s);
 		if (old != 0)
 			free((caddr_t)old, M_DEVBUF);
 
 		return (0);
 	}
 	free((caddr_t)fcode, M_DEVBUF);
 	return (EINVAL);
 }
 
 /*
  * Detach a file from its current interface (if attached at all) and attach
  * to the interface indicated by the name stored in ifr.
  * Return an errno or 0.
  */
 static int
 bpf_setif(d, ifr)
 	struct bpf_d *d;
 	struct ifreq *ifr;
 {
 	struct bpf_if *bp;
 	int s, error;
 	struct ifnet *theywant;
 
 	theywant = ifunit(ifr->ifr_name);
 	if (theywant == 0)
 		return ENXIO;
 
 	/*
 	 * Look through attached interfaces for the named one.
 	 */
 	for (bp = bpf_iflist; bp != 0; bp = bp->bif_next) {
 		struct ifnet *ifp = bp->bif_ifp;
 
 		if (ifp == 0 || ifp != theywant)
 			continue;
 		/*
 		 * We found the requested interface.
 		 * If it's not up, return an error.
 		 * Allocate the packet buffers if we need to.
 		 * If we're already attached to requested interface,
 		 * just flush the buffer.
 		 */
 		if ((ifp->if_flags & IFF_UP) == 0)
 			return (ENETDOWN);
 
 		if (d->bd_sbuf == 0) {
 			error = bpf_allocbufs(d);
 			if (error != 0)
 				return (error);
 		}
 		s = splimp();
 		if (bp != d->bd_bif) {
 			if (d->bd_bif)
 				/*
 				 * Detach if attached to something else.
 				 */
 				bpf_detachd(d);
 
 			bpf_attachd(d, bp);
 		}
 		reset_d(d);
 		splx(s);
 		return (0);
 	}
 	/* Not found. */
 	return (ENXIO);
 }
 
 /*
  * Convert an interface name plus unit number of an ifp to a single
  * name which is returned in the ifr.
  */
 static void
 bpf_ifname(ifp, ifr)
 	struct ifnet *ifp;
 	struct ifreq *ifr;
 {
 	char *s = ifp->if_name;
 	char *d = ifr->ifr_name;
 
 	while (*d++ = *s++)
 		continue;
 	d--; /* back to the null */
 	/* XXX Assume that unit number is less than 10. */
 	*d++ = ifp->if_unit + '0';
 	*d = '\0';
 }
 
 /*
  * Support for select() and poll() system calls
  *
  * Return true iff the specific operation will not block indefinitely.
  * Otherwise, return false but make a note that a selwakeup() must be done.
  */
 int
 bpfpoll(dev, events, p)
 	register dev_t dev;
 	int events;
 	struct proc *p;
 {
 	register struct bpf_d *d;
 	register int s;
 	int revents = 0;
 
 	/*
 	 * An imitation of the FIONREAD ioctl code.
 	 */
 	d = &bpf_dtab[minor(dev)];
 
 	s = splimp();
 	if (events & (POLLIN | POLLRDNORM))
 		if (d->bd_hlen != 0 || (d->bd_immediate && d->bd_slen != 0))
 			revents |= events & (POLLIN | POLLRDNORM);
 		else
 			selrecord(p, &d->bd_sel);
 
 	splx(s);
 	return (revents);
 }
 
 /*
  * Incoming linkage from device drivers.  Process the packet pkt, of length
  * pktlen, which is stored in a contiguous buffer.  The packet is parsed
  * by each process' filter, and if accepted, stashed into the corresponding
  * buffer.
  */
 void
 bpf_tap(ifp, pkt, pktlen)
 	struct ifnet *ifp;
 	register u_char *pkt;
 	register u_int pktlen;
 {
 	struct bpf_if *bp;
 	register struct bpf_d *d;
 	register u_int slen;
 	/*
 	 * Note that the ipl does not have to be raised at this point.
 	 * The only problem that could arise here is that if two different
 	 * interfaces shared any data.  This is not the case.
 	 */
 	bp = ifp->if_bpf;
 	for (d = bp->bif_dlist; d != 0; d = d->bd_next) {
 		++d->bd_rcount;
 		slen = bpf_filter(d->bd_filter, pkt, pktlen, pktlen);
 		if (slen != 0)
 			catchpacket(d, pkt, pktlen, slen, bcopy);
 	}
 }
 
 /*
  * Copy data from an mbuf chain into a buffer.  This code is derived
  * from m_copydata in sys/uipc_mbuf.c.
  */
 static void
 bpf_mcopy(src_arg, dst_arg, len)
 	const void *src_arg;
 	void *dst_arg;
 	register size_t len;
 {
 	register const struct mbuf *m;
 	register u_int count;
 	u_char *dst;
 
 	m = src_arg;
 	dst = dst_arg;
 	while (len > 0) {
 		if (m == 0)
 			panic("bpf_mcopy");
 		count = min(m->m_len, len);
 		bcopy(mtod(m, void *), dst, count);
 		m = m->m_next;
 		dst += count;
 		len -= count;
 	}
 }
 
 /*
  * Incoming linkage from device drivers, when packet is in an mbuf chain.
  */
 void
 bpf_mtap(ifp, m)
 	struct ifnet *ifp;
 	struct mbuf *m;
 {
 	struct bpf_if *bp = ifp->if_bpf;
 	struct bpf_d *d;
 	u_int pktlen, slen;
 	struct mbuf *m0;
 
 	pktlen = 0;
 	for (m0 = m; m0 != 0; m0 = m0->m_next)
 		pktlen += m0->m_len;
 
 	for (d = bp->bif_dlist; d != 0; d = d->bd_next) {
 		++d->bd_rcount;
 		slen = bpf_filter(d->bd_filter, (u_char *)m, pktlen, 0);
 		if (slen != 0)
 			catchpacket(d, (u_char *)m, pktlen, slen, bpf_mcopy);
 	}
 }
 
 /*
  * Move the packet data from interface memory (pkt) into the
  * store buffer.  Return 1 if it's time to wakeup a listener (buffer full),
  * otherwise 0.  "copy" is the routine called to do the actual data
  * transfer.  bcopy is passed in to copy contiguous chunks, while
  * bpf_mcopy is passed in to copy mbuf chains.  In the latter case,
  * pkt is really an mbuf.
  */
 static void
 catchpacket(d, pkt, pktlen, snaplen, cpfn)
 	register struct bpf_d *d;
 	register u_char *pkt;
 	register u_int pktlen, snaplen;
 	register void (*cpfn) __P((const void *, void *, size_t));
 {
 	register struct bpf_hdr *hp;
 	register int totlen, curlen;
 	register int hdrlen = d->bd_bif->bif_hdrlen;
 	/*
 	 * Figure out how many bytes to move.  If the packet is
 	 * greater or equal to the snapshot length, transfer that
 	 * much.  Otherwise, transfer the whole packet (unless
 	 * we hit the buffer size limit).
 	 */
 	totlen = hdrlen + min(snaplen, pktlen);
 	if (totlen > d->bd_bufsize)
 		totlen = d->bd_bufsize;
 
 	/*
 	 * Round up the end of the previous packet to the next longword.
 	 */
 	curlen = BPF_WORDALIGN(d->bd_slen);
 	if (curlen + totlen > d->bd_bufsize) {
 		/*
 		 * This packet will overflow the storage buffer.
 		 * Rotate the buffers if we can, then wakeup any
 		 * pending reads.
 		 */
 		if (d->bd_fbuf == 0) {
 			/*
 			 * We haven't completed the previous read yet,
 			 * so drop the packet.
 			 */
 			++d->bd_dcount;
 			return;
 		}
 		ROTATE_BUFFERS(d);
 		bpf_wakeup(d);
 		curlen = 0;
 	}
 	else if (d->bd_immediate)
 		/*
 		 * Immediate mode is set.  A packet arrived so any
 		 * reads should be woken up.
 		 */
 		bpf_wakeup(d);
 
 	/*
 	 * Append the bpf header.
 	 */
 	hp = (struct bpf_hdr *)(d->bd_sbuf + curlen);
 #if BSD >= 199103
 	microtime(&hp->bh_tstamp);
 #elif defined(sun)
 	uniqtime(&hp->bh_tstamp);
 #else
 	hp->bh_tstamp = time;
 #endif
 	hp->bh_datalen = pktlen;
 	hp->bh_hdrlen = hdrlen;
 	/*
 	 * Copy the packet data into the store buffer and update its length.
 	 */
 	(*cpfn)(pkt, (u_char *)hp + hdrlen, (hp->bh_caplen = totlen - hdrlen));
 	d->bd_slen = curlen + totlen;
 }
 
 /*
  * Initialize all nonzero fields of a descriptor.
  */
 static int
 bpf_allocbufs(d)
 	register struct bpf_d *d;
 {
 	d->bd_fbuf = (caddr_t)malloc(d->bd_bufsize, M_DEVBUF, M_WAITOK);
 	if (d->bd_fbuf == 0)
 		return (ENOBUFS);
 
 	d->bd_sbuf = (caddr_t)malloc(d->bd_bufsize, M_DEVBUF, M_WAITOK);
 	if (d->bd_sbuf == 0) {
 		free(d->bd_fbuf, M_DEVBUF);
 		return (ENOBUFS);
 	}
 	d->bd_slen = 0;
 	d->bd_hlen = 0;
 	return (0);
 }
 
 /*
  * Free buffers currently in use by a descriptor.
  * Called on close.
  */
 static void
 bpf_freed(d)
 	register struct bpf_d *d;
 {
 	/*
 	 * We don't need to lock out interrupts since this descriptor has
 	 * been detached from its interface and it yet hasn't been marked
 	 * free.
 	 */
 	if (d->bd_sbuf != 0) {
 		free(d->bd_sbuf, M_DEVBUF);
 		if (d->bd_hbuf != 0)
 			free(d->bd_hbuf, M_DEVBUF);
 		if (d->bd_fbuf != 0)
 			free(d->bd_fbuf, M_DEVBUF);
 	}
 	if (d->bd_filter)
 		free((caddr_t)d->bd_filter, M_DEVBUF);
 
 	D_MARKFREE(d);
 }
 
 /*
  * Attach an interface to bpf.  driverp is a pointer to a (struct bpf_if *)
  * in the driver's softc; dlt is the link layer type; hdrlen is the fixed
  * size of the link header (variable length headers not yet supported).
  */
 void
 bpfattach(ifp, dlt, hdrlen)
 	struct ifnet *ifp;
 	u_int dlt, hdrlen;
 {
 	struct bpf_if *bp;
 	int i;
 	bp = (struct bpf_if *)malloc(sizeof(*bp), M_DEVBUF, M_DONTWAIT);
 	if (bp == 0)
 		panic("bpfattach");
 
 	bp->bif_dlist = 0;
 	bp->bif_ifp = ifp;
 	bp->bif_dlt = dlt;
 
 	bp->bif_next = bpf_iflist;
 	bpf_iflist = bp;
 
 	bp->bif_ifp->if_bpf = 0;
 
 	/*
 	 * Compute the length of the bpf header.  This is not necessarily
 	 * equal to SIZEOF_BPF_HDR because we want to insert spacing such
 	 * that the network layer header begins on a longword boundary (for
 	 * performance reasons and to alleviate alignment restrictions).
 	 */
 	bp->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen;
 
 	/*
 	 * Mark all the descriptors free if this hasn't been done.
 	 */
 	if (!bpf_dtab_init) {
 		for (i = 0; i < NBPFILTER; ++i)
 			D_MARKFREE(&bpf_dtab[i]);
 		bpf_dtab_init = 1;
 	}
 
 	if (bootverbose)
 		printf("bpf: %s%d attached\n", ifp->if_name, ifp->if_unit);
 }
 
 #ifdef DEVFS
 static	void *bpf_devfs_token[NBPFILTER];
 #endif
 
 static	int bpf_devsw_installed;
 
 static void bpf_drvinit __P((void *unused));
 static void
 bpf_drvinit(unused)
 	void *unused;
 {
 	dev_t dev;
 #ifdef DEVFS
 	int i;
 #endif
 
 	if( ! bpf_devsw_installed ) {
 		dev = makedev(CDEV_MAJOR, 0);
 		cdevsw_add(&dev,&bpf_cdevsw, NULL);
 		bpf_devsw_installed = 1;
 #ifdef DEVFS
 
 		for ( i = 0 ; i < NBPFILTER ; i++ ) {
 			bpf_devfs_token[i] =
 				devfs_add_devswf(&bpf_cdevsw, i, DV_CHR, 0, 0, 
 						 0600, "bpf%d", i);
 		}
 #endif
     	}
 }
 
 SYSINIT(bpfdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR,bpf_drvinit,NULL)
 
 #endif
Index: head/sys/net/bpfdesc.h
===================================================================
--- head/sys/net/bpfdesc.h	(revision 41085)
+++ head/sys/net/bpfdesc.h	(revision 41086)
@@ -1,103 +1,103 @@
 /*
  * Copyright (c) 1990, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from the Stanford/CMU enet packet filter,
  * (net/enet.c) distributed as part of 4.3BSD, and code contributed
  * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
  * Berkeley Laboratory.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *      @(#)bpfdesc.h	8.1 (Berkeley) 6/10/93
  *
- * $Id$
+ * $Id: bpfdesc.h,v 1.10 1997/02/22 09:40:57 peter Exp $
  */
 
 #ifndef _NET_BPFDESC_H_
 #define _NET_BPFDESC_H_
 
 #include <sys/select.h>
 
 /*
  * Descriptor associated with each open bpf file.
  */
 struct bpf_d {
 	struct bpf_d	*bd_next;	/* Linked list of descriptors */
 	/*
 	 * Buffer slots: two mbuf clusters buffer the incoming packets.
 	 *   The model has three slots.  Sbuf is always occupied.
 	 *   sbuf (store) - Receive interrupt puts packets here.
 	 *   hbuf (hold) - When sbuf is full, put cluster here and
 	 *                 wakeup read (replace sbuf with fbuf).
 	 *   fbuf (free) - When read is done, put cluster here.
 	 * On receiving, if sbuf is full and fbuf is 0, packet is dropped.
 	 */
 	caddr_t		bd_sbuf;	/* store slot */
 	caddr_t		bd_hbuf;	/* hold slot */
 	caddr_t		bd_fbuf;	/* free slot */
 	int 		bd_slen;	/* current length of store buffer */
 	int 		bd_hlen;	/* current length of hold buffer */
 
 	int		bd_bufsize;	/* absolute length of buffers */
 
 	struct bpf_if *	bd_bif;		/* interface descriptor */
 	u_long		bd_rtout;	/* Read timeout in 'ticks' */
 	struct bpf_insn *bd_filter; 	/* filter code */
 	u_long		bd_rcount;	/* number of packets received */
 	u_long		bd_dcount;	/* number of packets dropped */
 
 	u_char		bd_promisc;	/* true if listening promiscuously */
 	u_char		bd_state;	/* idle, waiting, or timed out */
 	u_char		bd_immediate;	/* true to return on packet arrival */
 	int		bd_async;	/* non-zero if packet reception should generate signal */
 	int		bd_sig;		/* signal to send upon packet reception */
-	pid_t		bd_pgid;	/* process or group id for signal */
+	struct sigio *	bd_sigio;	/* information for SIGIO */
 #if BSD < 199103
 	u_char		bd_selcoll;	/* true if selects collide */
 	int		bd_timedout;
 	struct proc *	bd_selproc;	/* process that last selected us */
 #else
 	u_char		bd_pad;		/* explicit alignment */
 	struct selinfo	bd_sel;		/* bsd select info */
 #endif
 };
 
 /*
  * Descriptor associated with each attached hardware interface.
  */
 struct bpf_if {
 	struct bpf_if *bif_next;	/* list of all interfaces */
 	struct bpf_d *bif_dlist;	/* descriptor list */
 	u_int bif_dlt;			/* link layer type */
 	u_int bif_hdrlen;		/* length of header (with padding) */
 	struct ifnet *bif_ifp;		/* corresponding interface */
 };
 
 #endif
Index: head/sys/net/if_tun.c
===================================================================
--- head/sys/net/if_tun.c	(revision 41085)
+++ head/sys/net/if_tun.c	(revision 41086)
@@ -1,638 +1,645 @@
 /*	$NetBSD: if_tun.c,v 1.14 1994/06/29 06:36:25 cgd Exp $	*/
 
 /*
  * Copyright (c) 1988, Julian Onions <jpo@cs.nott.ac.uk>
  * Nottingham University 1987.
  *
  * This source may be freely distributed, however I would be interested
  * in any changes that are made.
  *
  * This driver takes packets off the IP i/f and hands them up to a
  * user process to have its wicked way with. This driver has it's
  * roots in a similar driver written by Phil Cockcroft (formerly) at
  * UCL. This driver is based much more on read/write/poll mode of
  * operation though.
  */
 
 #include "tun.h"
 #if NTUN > 0
 
 #include "opt_devfs.h"
 #include "opt_inet.h"
 
 #include <sys/param.h>
 #include <sys/proc.h>
 #include <sys/systm.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
 #include <sys/filio.h>
 #include <sys/sockio.h>
 #include <sys/ttycom.h>
 #include <sys/poll.h>
 #include <sys/signalvar.h>
+#include <sys/filedesc.h>
 #include <sys/kernel.h>
 #include <sys/sysctl.h>
 #ifdef DEVFS
 #include <sys/devfsext.h>
 #endif /*DEVFS*/
 #include <sys/conf.h>
 #include <sys/uio.h>
 /*
  * XXX stop <sys/vnode.h> from including <vnode_if.h>.  <vnode_if.h> doesn't
  * exist if we are an LKM.
  */
 #undef KERNEL
 #include <sys/vnode.h>
 #define KERNEL
 
 #include <net/if.h>
 #include <net/netisr.h>
 #include <net/route.h>
 
 #ifdef INET
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #endif
 
 #ifdef NS
 #include <netns/ns.h>
 #include <netns/ns_if.h>
 #endif
 
 #include "bpfilter.h"
 #if NBPFILTER > 0
 #include <net/bpf.h>
 #endif
 
 #include <net/if_tunvar.h>
 #include <net/if_tun.h>
 
 static void tunattach __P((void *));
 PSEUDO_SET(tunattach, if_tun);
 
 #define TUNDEBUG	if (tundebug) printf
 static int tundebug = 0;
 SYSCTL_INT(_debug, OID_AUTO, if_tun_debug, CTLFLAG_RW, &tundebug, 0, "");
 
 static struct tun_softc tunctl[NTUN];
 
 static int tunoutput __P((struct ifnet *, struct mbuf *, struct sockaddr *,
 	    struct rtentry *rt));
 static int tunifioctl __P((struct ifnet *, u_long, caddr_t));
 static int tuninit __P((int));
 
 static	d_open_t	tunopen;
 static	d_close_t	tunclose;
 static	d_read_t	tunread;
 static	d_write_t	tunwrite;
 static	d_ioctl_t	tunioctl;
 static	d_poll_t	tunpoll;
 
 #define CDEV_MAJOR 52
 static struct cdevsw tun_cdevsw = {
 	tunopen,	tunclose,	tunread,	tunwrite,
 	tunioctl,	nullstop,	noreset,	nodevtotty,
 	tunpoll,	nommap,		nostrategy,	"tun",	NULL,	-1
 };
 
 
 static	int	tun_devsw_installed;
 #ifdef	DEVFS
 static	void	*tun_devfs_token[NTUN];
 #endif
 
 #define minor_val(n) ((((n) & ~0xff) << 8) | ((n) & 0xff))
 #define dev_val(n) (((n) >> 8) | ((n) & 0xff))
 
 static void
 tunattach(dummy)
 	void *dummy;
 {
 	register int i;
 	struct ifnet *ifp;
 	dev_t dev;
 
 	if ( tun_devsw_installed )
 		return;
 	dev = makedev(CDEV_MAJOR, 0);
 	cdevsw_add(&dev, &tun_cdevsw, NULL);
 	tun_devsw_installed = 1;
 	for ( i = 0; i < NTUN; i++ ) {
 #ifdef DEVFS
 		tun_devfs_token[i] = devfs_add_devswf(&tun_cdevsw, minor_val(i),
 						      DV_CHR, UID_UUCP,
 						      GID_DIALER, 0600,
 						      "tun%d", i);
 #endif
 		tunctl[i].tun_flags = TUN_INITED;
 
 		ifp = &tunctl[i].tun_if;
 		ifp->if_unit = i;
 		ifp->if_name = "tun";
 		ifp->if_mtu = TUNMTU;
 		ifp->if_ioctl = tunifioctl;
 		ifp->if_output = tunoutput;
 		ifp->if_flags = IFF_POINTOPOINT | IFF_MULTICAST;
 		ifp->if_snd.ifq_maxlen = ifqmaxlen;
 		if_attach(ifp);
 #if NBPFILTER > 0
 		bpfattach(ifp, DLT_NULL, sizeof(u_int));
 #endif
 	}
 }
 
 /*
  * tunnel open - must be superuser & the device must be
  * configured in
  */
 static	int
 tunopen(dev, flag, mode, p)
 	dev_t	dev;
 	int	flag, mode;
 	struct proc *p;
 {
 	struct ifnet	*ifp;
 	struct tun_softc *tp;
 	register int	unit, error;
 
 	error = suser(p->p_ucred, &p->p_acflag);
 	if (error)
 		return (error);
 
 	if ((unit = dev_val(minor(dev))) >= NTUN)
 		return (ENXIO);
 	tp = &tunctl[unit];
 	if (tp->tun_flags & TUN_OPEN)
 		return EBUSY;
 	ifp = &tp->tun_if;
 	tp->tun_flags |= TUN_OPEN;
 	TUNDEBUG("%s%d: open\n", ifp->if_name, ifp->if_unit);
 	return (0);
 }
 
 /*
  * tunclose - close the device - mark i/f down & delete
  * routing info
  */
 static	int
 tunclose(dev, foo, bar, p)
 	dev_t dev;
 	int foo;
 	int bar;
 	struct proc *p;
 {
 	register int	unit = dev_val(minor(dev)), s;
 	struct tun_softc *tp = &tunctl[unit];
 	struct ifnet	*ifp = &tp->tun_if;
 	struct mbuf	*m;
 
 	tp->tun_flags &= ~TUN_OPEN;
 
 	/*
 	 * junk all pending output
 	 */
 	do {
 		s = splimp();
 		IF_DEQUEUE(&ifp->if_snd, m);
 		splx(s);
 		if (m)
 			m_freem(m);
 	} while (m);
 
 	if (ifp->if_flags & IFF_UP) {
 		s = splimp();
 		if_down(ifp);
 		if (ifp->if_flags & IFF_RUNNING) {
 		    /* find internet addresses and delete routes */
 		    register struct ifaddr *ifa;
 		    for (ifa = ifp->if_addrhead.tqh_first; ifa;
 			 ifa = ifa->ifa_link.tqe_next) {
 			if (ifa->ifa_addr->sa_family == AF_INET) {
 			    rtinit(ifa, (int)RTM_DELETE,
 				   tp->tun_flags & TUN_DSTADDR ? RTF_HOST : 0);
 			}
 		    }
 		}
 		splx(s);
 	}
-	tp->tun_pgrp = 0;
+	funsetown(tp->tun_sigio);
 	selwakeup(&tp->tun_rsel);
 
 	TUNDEBUG ("%s%d: closed\n", ifp->if_name, ifp->if_unit);
 	return (0);
 }
 
 static int
 tuninit(unit)
 	int	unit;
 {
 	struct tun_softc *tp = &tunctl[unit];
 	struct ifnet	*ifp = &tp->tun_if;
 	register struct ifaddr *ifa;
 
 	TUNDEBUG("%s%d: tuninit\n", ifp->if_name, ifp->if_unit);
 
 	ifp->if_flags |= IFF_UP | IFF_RUNNING;
 	getmicrotime(&ifp->if_lastchange);
 
 	for (ifa = ifp->if_addrhead.tqh_first; ifa; 
 	     ifa = ifa->ifa_link.tqe_next) {
 #ifdef INET
 		if (ifa->ifa_addr->sa_family == AF_INET) {
 		    struct sockaddr_in *si;
 
 		    si = (struct sockaddr_in *)ifa->ifa_addr;
 		    if (si && si->sin_addr.s_addr)
 			    tp->tun_flags |= TUN_IASET;
 
 		    si = (struct sockaddr_in *)ifa->ifa_dstaddr;
 		    if (si && si->sin_addr.s_addr)
 			    tp->tun_flags |= TUN_DSTADDR;
 		}
 #endif
 	}
 	return 0;
 }
 
 /*
  * Process an ioctl request.
  */
 int
 tunifioctl(ifp, cmd, data)
 	struct ifnet *ifp;
 	u_long	cmd;
 	caddr_t	data;
 {
 	register struct ifreq *ifr = (struct ifreq *)data;
 	int		error = 0, s;
 
 	s = splimp();
 	switch(cmd) {
 	case SIOCSIFADDR:
 		tuninit(ifp->if_unit);
 		TUNDEBUG("%s%d: address set\n",
 			 ifp->if_name, ifp->if_unit);
 		break;
 	case SIOCSIFDSTADDR:
 		tuninit(ifp->if_unit);
 		TUNDEBUG("%s%d: destination address set\n",
 			 ifp->if_name, ifp->if_unit);
 		break;
 	case SIOCSIFMTU:
 		ifp->if_mtu = ifr->ifr_mtu;
 		TUNDEBUG("%s%d: mtu set\n",
 			 ifp->if_name, ifp->if_unit);
 		break;
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
 		break;
 
 
 	default:
 		error = EINVAL;
 	}
 	splx(s);
 	return (error);
 }
 
 /*
  * tunoutput - queue packets from higher level ready to put out.
  */
 int
 tunoutput(ifp, m0, dst, rt)
 	struct ifnet   *ifp;
 	struct mbuf    *m0;
 	struct sockaddr *dst;
 	struct rtentry *rt;
 {
 	struct tun_softc *tp = &tunctl[ifp->if_unit];
 	struct proc	*p;
 	int		s;
 
 	TUNDEBUG ("%s%d: tunoutput\n", ifp->if_name, ifp->if_unit);
 
 	if ((tp->tun_flags & TUN_READY) != TUN_READY) {
 		TUNDEBUG ("%s%d: not ready 0%o\n", ifp->if_name,
 			  ifp->if_unit, tp->tun_flags);
 		m_freem (m0);
 		return EHOSTDOWN;
 	}
 
 #if NBPFILTER > 0
 	/* BPF write needs to be handled specially */
 	if (dst->sa_family == AF_UNSPEC) {
 		dst->sa_family = *(mtod(m0, int *));
 		m0->m_len -= sizeof(int);
 		m0->m_pkthdr.len -= sizeof(int);
 		m0->m_data += sizeof(int);
 	}
 
 	if (ifp->if_bpf) {
 		/*
 		 * We need to prepend the address family as
 		 * a four byte field.  Cons up a dummy header
 		 * to pacify bpf.  This is safe because bpf
 		 * will only read from the mbuf (i.e., it won't
 		 * try to free it or keep a pointer to it).
 		 */
 		struct mbuf m;
 		u_int af = dst->sa_family;
 
 		m.m_next = m0;
 		m.m_len = 4;
 		m.m_data = (char *)&af;
 
 		bpf_mtap(ifp, &m);
 	}
 #endif
 
 	switch(dst->sa_family) {
 #ifdef INET
 	case AF_INET:
 		s = splimp();
 		if (IF_QFULL(&ifp->if_snd)) {
 			IF_DROP(&ifp->if_snd);
 			m_freem(m0);
 			splx(s);
 			ifp->if_collisions++;
 			return (ENOBUFS);
 		}
 		ifp->if_obytes += m0->m_pkthdr.len;
 		IF_ENQUEUE(&ifp->if_snd, m0);
 		splx(s);
 		ifp->if_opackets++;
 		break;
 #endif
 	default:
 		m_freem(m0);
 		return EAFNOSUPPORT;
 	}
 
 	if (tp->tun_flags & TUN_RWAIT) {
 		tp->tun_flags &= ~TUN_RWAIT;
 		wakeup((caddr_t)tp);
 	}
-	if (tp->tun_flags & TUN_ASYNC && tp->tun_pgrp) {
-		if (tp->tun_pgrp > 0)
-			gsignal(tp->tun_pgrp, SIGIO);
-		else if ((p = pfind(-tp->tun_pgrp)) != 0) 
-			psignal(p, SIGIO);
-	}
+	if (tp->tun_flags & TUN_ASYNC && tp->tun_sigio)
+		pgsigio(tp->tun_sigio, SIGIO, 0);
 	selwakeup(&tp->tun_rsel);
 	return 0;
 }
 
 /*
  * the cdevsw interface is now pretty minimal.
  */
 static	int
 tunioctl(dev, cmd, data, flag, p)
 	dev_t		dev;
 	u_long		cmd;
 	caddr_t		data;
 	int		flag;
 	struct proc	*p;
 {
 	int		unit = dev_val(minor(dev)), s;
 	struct tun_softc *tp = &tunctl[unit];
  	struct tuninfo *tunp;
 
 	switch (cmd) {
  	case TUNSIFINFO:
  	        tunp = (struct tuninfo *)data;
  		tp->tun_if.if_mtu = tunp->mtu;
  		tp->tun_if.if_type = tunp->type;
  		tp->tun_if.if_baudrate = tunp->baudrate;
  		break;
  	case TUNGIFINFO:
  		tunp = (struct tuninfo *)data;
  		tunp->mtu = tp->tun_if.if_mtu;
  		tunp->type = tp->tun_if.if_type;
  		tunp->baudrate = tp->tun_if.if_baudrate;
  		break;
 	case TUNSDEBUG:
 		tundebug = *(int *)data;
 		break;
 	case TUNGDEBUG:
 		*(int *)data = tundebug;
 		break;
 	case FIONBIO:
 		break;
 	case FIOASYNC:
 		if (*(int *)data)
 			tp->tun_flags |= TUN_ASYNC;
 		else
 			tp->tun_flags &= ~TUN_ASYNC;
 		break;
 	case FIONREAD:
 		s = splimp();
 		if (tp->tun_if.if_snd.ifq_head) {
 			struct mbuf *mb = tp->tun_if.if_snd.ifq_head;
 			for( *(int *)data = 0; mb != 0; mb = mb->m_next) 
 				*(int *)data += mb->m_len;
 		} else
 			*(int *)data = 0;
 		splx(s);
 		break;
+	case FIOSETOWN:
+		return (fsetown(*(int *)data, &tp->tun_sigio));
+
+	case FIOGETOWN:
+		*(int *)data = fgetown(tp->tun_sigio);
+		return (0);
+
+	/* This is deprecated, FIOSETOWN should be used instead. */
 	case TIOCSPGRP:
-		tp->tun_pgrp = *(int *)data;
-		break;
+		return (fsetown(-(*(int *)data), &tp->tun_sigio));
+
+	/* This is deprecated, FIOGETOWN should be used instead. */
 	case TIOCGPGRP:
-		*(int *)data = tp->tun_pgrp;
-		break;
+		*(int *)data = -fgetown(tp->tun_sigio);
+		return (0);
+
 	default:
 		return (ENOTTY);
 	}
 	return (0);
 }
 
 /*
  * The cdevsw read interface - reads a packet at a time, or at
  * least as much of a packet as can be read.
  */
 static	int
 tunread(dev, uio, flag)
 	dev_t dev;
 	struct uio *uio;
 	int flag;
 {
 	int		unit = dev_val(minor(dev));
 	struct tun_softc *tp = &tunctl[unit];
 	struct ifnet	*ifp = &tp->tun_if;
 	struct mbuf	*m, *m0;
 	int		error=0, len, s;
 
 	TUNDEBUG ("%s%d: read\n", ifp->if_name, ifp->if_unit);
 	if ((tp->tun_flags & TUN_READY) != TUN_READY) {
 		TUNDEBUG ("%s%d: not ready 0%o\n", ifp->if_name,
 			  ifp->if_unit, tp->tun_flags);
 		return EHOSTDOWN;
 	}
 
 	tp->tun_flags &= ~TUN_RWAIT;
 
 	s = splimp();
 	do {
 		IF_DEQUEUE(&ifp->if_snd, m0);
 		if (m0 == 0) {
 			if (flag & IO_NDELAY) {
 				splx(s);
 				return EWOULDBLOCK;
 			}
 			tp->tun_flags |= TUN_RWAIT;
 			if( error = tsleep((caddr_t)tp, PCATCH | (PZERO + 1),
 					"tunread", 0)) {
 				splx(s);
 				return error;
 			}
 		}
 	} while (m0 == 0);
 	splx(s);
 
 	while (m0 && uio->uio_resid > 0 && error == 0) {
 		len = min(uio->uio_resid, m0->m_len);
 		if (len == 0)
 			break;
 		error = uiomove(mtod(m0, caddr_t), len, uio);
 		MFREE(m0, m);
 		m0 = m;
 	}
 
 	if (m0) {
 		TUNDEBUG("Dropping mbuf\n");
 		m_freem(m0);
 	}
 	return error;
 }
 
 /*
  * the cdevsw write interface - an atomic write is a packet - or else!
  */
 static	int
 tunwrite(dev, uio, flag)
 	dev_t dev;
 	struct uio *uio;
 	int flag;
 {
 	int		unit = dev_val(minor(dev));
 	struct ifnet	*ifp = &tunctl[unit].tun_if;
 	struct mbuf	*top, **mp, *m;
 	int		error=0, s, tlen, mlen;
 
 	TUNDEBUG("%s%d: tunwrite\n", ifp->if_name, ifp->if_unit);
 
 	if (uio->uio_resid < 0 || uio->uio_resid > TUNMRU) {
 		TUNDEBUG("%s%d: len=%d!\n", ifp->if_name, ifp->if_unit,
 		    uio->uio_resid);
 		return EIO;
 	}
 	tlen = uio->uio_resid;
 
 	/* get a header mbuf */
 	MGETHDR(m, M_DONTWAIT, MT_DATA);
 	if (m == NULL)
 		return ENOBUFS;
 	mlen = MHLEN;
 
 	top = 0;
 	mp = &top;
 	while (error == 0 && uio->uio_resid > 0) {
 		m->m_len = min(mlen, uio->uio_resid);
 		error = uiomove(mtod (m, caddr_t), m->m_len, uio);
 		*mp = m;
 		mp = &m->m_next;
 		if (uio->uio_resid > 0) {
 			MGET (m, M_DONTWAIT, MT_DATA);
 			if (m == 0) {
 				error = ENOBUFS;
 				break;
 			}
 			mlen = MLEN;
 		}
 	}
 	if (error) {
 		if (top)
 			m_freem (top);
 		return error;
 	}
 
 	top->m_pkthdr.len = tlen;
 	top->m_pkthdr.rcvif = ifp;
 
 #if NBPFILTER > 0
 	if (ifp->if_bpf) {
 		/*
 		 * We need to prepend the address family as
 		 * a four byte field.  Cons up a dummy header
 		 * to pacify bpf.  This is safe because bpf
 		 * will only read from the mbuf (i.e., it won't
 		 * try to free it or keep a pointer to it).
 		 */
 		struct mbuf m;
 		u_int af = AF_INET;
 
 		m.m_next = top;
 		m.m_len = 4;
 		m.m_data = (char *)&af;
 
 		bpf_mtap(ifp, &m);
 	}
 #endif
 
 #ifdef INET
 	s = splimp();
 	if (IF_QFULL (&ipintrq)) {
 		IF_DROP(&ipintrq);
 		splx(s);
 		ifp->if_collisions++;
 		m_freem(top);
 		return ENOBUFS;
 	}
 	IF_ENQUEUE(&ipintrq, top);
 	splx(s);
 	ifp->if_ibytes += tlen;
 	ifp->if_ipackets++;
 	schednetisr(NETISR_IP);
 #endif
 	return error;
 }
 
 /*
  * tunpoll - the poll interface, this is only useful on reads
  * really. The write detect always returns true, write never blocks
  * anyway, it either accepts the packet or drops it.
  */
 static	int
 tunpoll(dev, events, p)
 	dev_t dev;
 	int events;
 	struct proc *p;
 {
 	int		unit = dev_val(minor(dev)), s;
 	struct tun_softc *tp = &tunctl[unit];
 	struct ifnet	*ifp = &tp->tun_if;
 	int		revents = 0;
 
 	s = splimp();
 	TUNDEBUG("%s%d: tunpoll\n", ifp->if_name, ifp->if_unit);
 
 	if (events & (POLLIN | POLLRDNORM))
 		if (ifp->if_snd.ifq_len > 0) {
 			TUNDEBUG("%s%d: tunpoll q=%d\n", ifp->if_name,
 			    ifp->if_unit, ifp->if_snd.ifq_len);
 			revents |= events & (POLLIN | POLLRDNORM);
 		} else {
 			TUNDEBUG("%s%d: tunpoll waiting\n", ifp->if_name,
 			    ifp->if_unit);
 			selrecord(p, &tp->tun_rsel);
 		}
 
 	if (events & (POLLOUT | POLLWRNORM))
 		revents |= events & (POLLOUT | POLLWRNORM);
 
 	splx(s);
 	return (revents);
 }
 
 
 #endif  /* NTUN */
Index: head/sys/net/if_tunvar.h
===================================================================
--- head/sys/net/if_tunvar.h	(revision 41085)
+++ head/sys/net/if_tunvar.h	(revision 41086)
@@ -1,50 +1,50 @@
 /*-
  * Copyright (c) 1998 Brian Somers <brian@Awfulhak.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- *	$Id:$
+ *	$Id: if_tunvar.h,v 1.1 1998/01/11 17:52:33 brian Exp $
  */
 
 #ifndef _NET_IF_TUNVAR_H_
 #define _NET_IF_TUNVAR_H_
 
 struct tun_softc {
 	u_short	tun_flags;		/* misc flags */
 #define	TUN_OPEN	0x0001
 #define	TUN_INITED	0x0002
 #define	TUN_RCOLL	0x0004
 #define	TUN_IASET	0x0008
 #define	TUN_DSTADDR	0x0010
 #define	TUN_RWAIT	0x0040
 #define	TUN_ASYNC	0x0080
 
 #define TUN_READY       (TUN_OPEN | TUN_INITED)
 
 	struct	ifnet tun_if;		/* the interface */
-	int	tun_pgrp;		/* the process group - if any */
+	struct  sigio *tun_sigio;	/* information for SIGIO */
 	struct	selinfo	tun_rsel;	/* read select */
 	struct	selinfo	tun_wsel;	/* write select (not used) */
 };
 
 #endif /* !_NET_IF_TUNVAR_H_ */
Index: head/sys/sys/filedesc.h
===================================================================
--- head/sys/sys/filedesc.h	(revision 41085)
+++ head/sys/sys/filedesc.h	(revision 41086)
@@ -1,114 +1,144 @@
 /*
  * Copyright (c) 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)filedesc.h	8.1 (Berkeley) 6/2/93
- * $Id: filedesc.h,v 1.12 1997/10/12 20:25:57 phk Exp $
+ * $Id: filedesc.h,v 1.13 1997/12/05 18:58:10 bde Exp $
  */
 
 #ifndef _SYS_FILEDESC_H_
 #define _SYS_FILEDESC_H_
 
+#include <sys/queue.h>
+
 /*
  * This structure is used for the management of descriptors.  It may be
  * shared by multiple processes.
  *
  * A process is initially started out with NDFILE descriptors stored within
  * this structure, selected to be enough for typical applications based on
  * the historical limit of 20 open files (and the usage of descriptors by
  * shells).  If these descriptors are exhausted, a larger descriptor table
  * may be allocated, up to a process' resource limit; the internal arrays
  * are then unused.  The initial expansion is set to NDEXTENT; each time
  * it runs out, it is doubled until the resource limit is reached. NDEXTENT
  * should be selected to be the biggest multiple of OFILESIZE (see below)
  * that will fit in a power-of-two sized piece of memory.
  */
 #define NDFILE		20
 #define NDEXTENT	50		/* 250 bytes in 256-byte alloc. */
 
 struct filedesc {
 	struct	file **fd_ofiles;	/* file structures for open files */
 	char	*fd_ofileflags;		/* per-process open file flags */
 	struct	vnode *fd_cdir;		/* current directory */
 	struct	vnode *fd_rdir;		/* root directory */
 	int	fd_nfiles;		/* number of open files allocated */
 	u_short	fd_lastfile;		/* high-water mark of fd_ofiles */
 	u_short	fd_freefile;		/* approx. next free file */
 	u_short	fd_cmask;		/* mask for file creation */
 	u_short	fd_refcnt;		/* reference count */
 };
 
 /*
  * Basic allocation of descriptors:
  * one of the above, plus arrays for NDFILE descriptors.
  */
 struct filedesc0 {
 	struct	filedesc fd_fd;
 	/*
 	 * These arrays are used when the number of open files is
 	 * <= NDFILE, and are then pointed to by the pointers above.
 	 */
 	struct	file *fd_dfiles[NDFILE];
 	char	fd_dfileflags[NDFILE];
 };
 
 /*
  * Per-process open flags.
  */
 #define	UF_EXCLOSE 	0x01		/* auto-close on exec */
 #define	UF_MAPPED 	0x02		/* mapped from device */
 
 /*
  * Storage required per open file descriptor.
  */
 #define OFILESIZE (sizeof(struct file *) + sizeof(char))
 
+/*
+ * This structure that holds the information needed to send a SIGIO or
+ * a SIGURG signal to a process or process group when new data arrives
+ * on a device or socket.  The structure is placed on an SLIST belonging
+ * to the proc or pgrp so that the entire list may be revoked when the
+ * process exits or the process group disappears.
+ */
+struct	sigio {
+	union {
+		struct	proc *siu_proc; /* Process to receive SIGIO/SIGURG */
+		struct	pgrp *siu_pgrp; /* Process group to receive ... */
+	} sio_u;
+	SLIST_ENTRY(sigio) sio_pgsigio;	/* sigio's for process or group */
+	struct	sigio **sio_myref;	/* location of the pointer that holds
+					 * the reference to this structure */
+	struct	ucred *sio_ucred;	/* Current credentials */
+	uid_t	sio_ruid;		/* Real user id */
+	pid_t	sio_pgid;		/* pgid for signals */
+};
+#define	sio_proc	sio_u.siu_proc
+#define	sio_pgrp	sio_u.siu_pgrp
+
+SLIST_HEAD(sigiolst, sigio);
+
 #ifdef KERNEL
 /*
  * Kernel global variables and routines.
  */
 int	dupfdopen __P((struct filedesc *, int, int, int, int));
 int	fdalloc __P((struct proc *p, int want, int *result));
 int	fdavail __P((struct proc *p, int n));
 int	falloc __P((struct proc *p, struct file **resultfp, int *resultfd));
 void	ffree __P((struct file *));
 struct	filedesc *fdinit __P((struct proc *p));
 struct	filedesc *fdshare __P((struct proc *p));
 struct	filedesc *fdcopy __P((struct proc *p));
 void	fdfree __P((struct proc *p));
 int	closef __P((struct file *fp,struct proc *p));
 void	fdcloseexec __P((struct proc *p));
 int	getvnode __P((struct filedesc *fdp, int fd, struct file **fpp));
 int	fdissequential __P((struct file *));
 void	fdsequential __P((struct file *, int));
+pid_t	fgetown __P((struct sigio *));
+int	fsetown __P((pid_t, struct sigio **));
+void	funsetown __P((struct sigio *));
+void	funsetownlst __P((struct sigiolst *));
 #endif
 
 #endif
Index: head/sys/sys/pipe.h
===================================================================
--- head/sys/sys/pipe.h	(revision 41085)
+++ head/sys/sys/pipe.h	(revision 41086)
@@ -1,115 +1,115 @@
 /*
  * Copyright (c) 1996 John S. Dyson
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice immediately at the beginning of the file, without modification,
  *    this list of conditions, and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Absolutely no warranty of function or purpose is made by the author
  *    John S. Dyson.
  * 4. This work was done expressly for inclusion into FreeBSD.  Other use
  *    is allowed if this notation is included.
  * 5. Modifications may be freely made to this file if the above conditions
  *    are met.
  *
- * $Id: pipe.h,v 1.9 1997/04/09 16:53:45 bde Exp $
+ * $Id: pipe.h,v 1.10 1998/03/26 20:53:26 phk Exp $
  */
 
 #ifndef _SYS_PIPE_H_
 #define _SYS_PIPE_H_
 
 #ifndef KERNEL
 #include <sys/time.h>			/* for struct timeval */
 #include <sys/select.h>			/* for struct selinfo */
 #include <vm/vm.h>			/* for vm_page_t */
 #include <machine/param.h>		/* for PAGE_SIZE */
 #endif
 
 /*
  * Pipe buffer size, keep moderate in value, pipes take kva space.
  */
 #ifndef PIPE_SIZE
 #define PIPE_SIZE	16384
 #endif
 
 #ifndef BIG_PIPE_SIZE
 #define BIG_PIPE_SIZE	(64*1024)
 #endif
 
 /*
  * PIPE_MINDIRECT MUST be smaller than PIPE_SIZE and MUST be bigger
  * than PIPE_BUF.
  */
 #ifndef PIPE_MINDIRECT
 #define PIPE_MINDIRECT	8192
 #endif
 
 #define PIPENPAGES	(BIG_PIPE_SIZE / PAGE_SIZE + 1)
 
 /*
  * Pipe buffer information.
  * Separate in, out, cnt are used to simplify calculations.
  * Buffered write is active when the buffer.cnt field is set.
  */
 struct pipebuf {
 	u_int	cnt;		/* number of chars currently in buffer */
 	u_int	in;		/* in pointer */
 	u_int	out;		/* out pointer */
 	u_int	size;		/* size of buffer */
 	caddr_t	buffer;		/* kva of buffer */
 	struct	vm_object *object;	/* VM object containing buffer */
 };
 
 /*
  * Information to support direct transfers between processes for pipes.
  */
 struct pipemapping {
 	vm_offset_t	kva;		/* kernel virtual address */
 	vm_size_t	cnt;		/* number of chars in buffer */
 	vm_size_t	pos;		/* current position of transfer */
 	int		npages;		/* number of pages */
 	vm_page_t	ms[PIPENPAGES];	/* pages in source process */
 };
 
 /*
  * Bits in pipe_state.
  */
 #define PIPE_ASYNC	0x004	/* Async? I/O. */
 #define PIPE_WANTR	0x008	/* Reader wants some characters. */
 #define PIPE_WANTW	0x010	/* Writer wants space to put characters. */
 #define PIPE_WANT	0x020	/* Pipe is wanted to be run-down. */
 #define PIPE_SEL	0x040	/* Pipe has a select active. */
 #define PIPE_EOF	0x080	/* Pipe is in EOF condition. */
 #define PIPE_LOCK	0x100	/* Process has exclusive access to pointers/data. */
 #define PIPE_LWANT	0x200	/* Process wants exclusive access to pointers/data. */
 #define PIPE_DIRECTW	0x400	/* Pipe direct write active. */
 #define PIPE_DIRECTOK	0x800	/* Direct mode ok. */
 
 /*
  * Per-pipe data structure.
  * Two of these are linked together to produce bi-directional pipes.
  */
 struct pipe {
 	struct	pipebuf pipe_buffer;	/* data storage */
 	struct	pipemapping pipe_map;	/* pipe mapping for direct I/O */
 	struct	selinfo pipe_sel;	/* for compat with select */
 	struct	timespec pipe_atime;	/* time of last access */
 	struct	timespec pipe_mtime;	/* time of last modify */
 	struct	timespec pipe_ctime;	/* time of status change */
-	int	pipe_pgid;		/* process/group for async I/O */
+	struct	sigio *pipe_sigio;	/* information for SIGIO */
 	struct	pipe *pipe_peer;	/* link with other direction */
 	u_int	pipe_state;		/* pipe status info */
 	int	pipe_busy;		/* busy flag, mostly to handle rundown sanely */
 };
 
 #ifdef KERNEL
 int	pipe_stat __P((struct pipe *pipe, struct stat *ub));
 #endif
 
 #endif /* !_SYS_PIPE_H_ */
Index: head/sys/sys/proc.h
===================================================================
--- head/sys/sys/proc.h	(revision 41085)
+++ head/sys/sys/proc.h	(revision 41086)
@@ -1,356 +1,359 @@
 /*-
  * Copyright (c) 1986, 1989, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)proc.h	8.15 (Berkeley) 5/19/95
- * $Id: proc.h,v 1.58 1998/05/28 09:30:26 phk Exp $
+ * $Id: proc.h,v 1.59 1998/11/09 15:08:04 truckman Exp $
  */
 
 #ifndef _SYS_PROC_H_
 #define	_SYS_PROC_H_
 
 #include <machine/proc.h>		/* Machine-dependent proc substruct. */
 #include <sys/callout.h>		/* For struct callout_handle. */
 #include <sys/rtprio.h>			/* For struct rtprio. */
 #include <sys/select.h>			/* For struct selinfo. */
 #include <sys/signal.h>
 #ifndef KERNEL
 #include <sys/time.h>			/* For structs itimerval, timeval. */
 #endif
 #include <sys/ucred.h>
 #include <sys/queue.h>
+#include <sys/filedesc.h>		/* For struct sigiolst */
 
 /*
  * One structure allocated per session.
  */
 struct	session {
 	int	s_count;		/* Ref cnt; pgrps in session. */
 	struct	proc *s_leader;		/* Session leader. */
 	struct	vnode *s_ttyvp;		/* Vnode of controlling terminal. */
 	struct	tty *s_ttyp;		/* Controlling terminal. */
 	pid_t	s_sid;			/* Session ID */
 	char	s_login[roundup(MAXLOGNAME, sizeof(long))];	/* Setlogin() name. */
 };
 
 /*
  * One structure allocated per process group.
  */
 struct	pgrp {
 	LIST_ENTRY(pgrp) pg_hash;	/* Hash chain. */
 	LIST_HEAD(, proc) pg_members;	/* Pointer to pgrp members. */
 	struct	session *pg_session;	/* Pointer to session. */
+	struct  sigiolst pg_sigiolst;	/* List of sigio sources */
 	pid_t	pg_id;			/* Pgrp id. */
 	int	pg_jobc;	/* # procs qualifying pgrp for job control */
 };
 
 /*
  * Description of a process.
  *
  * This structure contains the information needed to manage a thread of
  * control, known in UN*X as a process; it has references to substructures
  * containing descriptions of things that the process uses, but may share
  * with related processes.  The process structure and the substructures
  * are always addressable except for those marked "(PROC ONLY)" below,
  * which might be addressable only on a processor on which the process
  * is running.
  */
 struct	proc {
 	TAILQ_ENTRY(proc) p_procq;	/* run/sleep queue. */
 	LIST_ENTRY(proc) p_list;	/* List of all processes. */
 
 	/* substructures: */
 	struct	pcred *p_cred;		/* Process owner's identity. */
 	struct	filedesc *p_fd;		/* Ptr to open files structure. */
 	struct	pstats *p_stats;	/* Accounting/statistics (PROC ONLY). */
 	struct	plimit *p_limit;	/* Process limits. */
 	struct	vm_object *p_upages_obj;/* Upages object */
 	struct	sigacts *p_sigacts;	/* Signal actions, state (PROC ONLY). */
 
 #define	p_ucred		p_cred->pc_ucred
 #define	p_rlimit	p_limit->pl_rlimit
 
 	int	p_flag;			/* P_* flags. */
 	char	p_stat;			/* S* process status. */
 	char	p_pad1[3];
 
 	pid_t	p_pid;			/* Process identifier. */
 	LIST_ENTRY(proc) p_hash;	/* Hash chain. */
 	LIST_ENTRY(proc) p_pglist;	/* List of processes in pgrp. */
 	struct	proc *p_pptr;	 	/* Pointer to parent process. */
 	LIST_ENTRY(proc) p_sibling;	/* List of sibling processes. */
 	LIST_HEAD(, proc) p_children;	/* Pointer to list of children. */
 
 	struct callout_handle p_ithandle; /*
 					      * Callout handle for scheduling
 					      * p_realtimer.
 					      */
 /* The following fields are all zeroed upon creation in fork. */
 #define	p_startzero	p_oppid
 
 	pid_t	p_oppid;	 /* Save parent pid during ptrace. XXX */
 	int	p_dupfd;	 /* Sideways return value from fdopen. XXX */
 
 	struct	vmspace *p_vmspace;	/* Address space. */
 
 	/* scheduling */
 	u_int	p_estcpu;	 /* Time averaged value of p_cpticks. */
 	int	p_cpticks;	 /* Ticks of cpu time. */
 	fixpt_t	p_pctcpu;	 /* %cpu for this process during p_swtime */
 	void	*p_wchan;	 /* Sleep address. */
 	const char *p_wmesg;	 /* Reason for sleep. */
 	u_int	p_swtime;	 /* Time swapped in or out. */
 	u_int	p_slptime;	 /* Time since last blocked. */
 
 	struct	itimerval p_realtimer;	/* Alarm timer. */
 	u_int64_t	p_runtime;	/* Real time in microsec. */
 	struct	timeval p_switchtime;	/* When last scheduled */
 	u_quad_t p_uticks;		/* Statclock hits in user mode. */
 	u_quad_t p_sticks;		/* Statclock hits in system mode. */
 	u_quad_t p_iticks;		/* Statclock hits processing intr. */
 
 	int	p_traceflag;		/* Kernel trace points. */
 	struct	vnode *p_tracep;	/* Trace to vnode. */
 
 	int	p_siglist;		/* Signals arrived but not delivered. */
 
 	struct	vnode *p_textvp;	/* Vnode of executable. */
 
 	char	p_lock;			/* Process lock (prevent swap) count. */
 	char	p_oncpu;		/* Which cpu we are on */
 	char	p_lastcpu;		/* Last cpu we were on */
 	char	p_pad2;			/* alignment */
 
 	short	p_locks;		/* DEBUG: lockmgr count of held locks */
 	short	p_simple_locks;		/* DEBUG: count of held simple locks */
 	unsigned int	p_stops;	/* procfs event bitmask */
 	unsigned int	p_stype;	/* procfs stop event type */
 	char	p_step;			/* procfs stop *once* flag */
 	unsigned char	p_pfsflags;	/* procfs flags */
 	char	p_pad3[2];		/* padding for alignment */
 	register_t p_retval[2];		/* syscall aux returns */
+	struct	sigiolst p_sigiolst;	/* List of sigio sources */
 
 /* End area that is zeroed on creation. */
 #define	p_endzero	p_startcopy
 
 /* The following fields are all copied upon creation in fork. */
 #define	p_startcopy	p_sigmask
 
 	sigset_t p_sigmask;	/* Current signal mask. */
 	sigset_t p_sigignore;	/* Signals being ignored. */
 	sigset_t p_sigcatch;	/* Signals being caught by user. */
 
 	u_char	p_priority;	/* Process priority. */
 	u_char	p_usrpri;	/* User-priority based on p_cpu and p_nice. */
 	char	p_nice;		/* Process "nice" value. */
 	char	p_comm[MAXCOMLEN+1];
 
 	struct 	pgrp *p_pgrp;	/* Pointer to process group. */
 
 	struct 	sysentvec *p_sysent; /* System call dispatch information. */
 
 	struct	rtprio p_rtprio;	/* Realtime priority. */
 /* End area that is copied on creation. */
 #define	p_endcopy	p_addr
 	struct	user *p_addr;	/* Kernel virtual addr of u-area (PROC ONLY). */
 	struct	mdproc p_md;	/* Any machine-dependent fields. */
 
 	u_short	p_xstat;	/* Exit status for wait; also stop signal. */
 	u_short	p_acflag;	/* Accounting flags. */
 	struct	rusage *p_ru;	/* Exit information. XXX */
 
 	int	p_nthreads;	/* number of threads (only in leader) */
 	void	*p_aioinfo;	/* ASYNC I/O info */
 	int	p_wakeup;	/* thread id */
 	struct proc *p_peers;	
 	struct proc *p_leader;
 };
 
 #define	p_session	p_pgrp->pg_session
 #define	p_pgid		p_pgrp->pg_id
 
 /* Status values. */
 #define	SIDL	1		/* Process being created by fork. */
 #define	SRUN	2		/* Currently runnable. */
 #define	SSLEEP	3		/* Sleeping on an address. */
 #define	SSTOP	4		/* Process debugging or suspension. */
 #define	SZOMB	5		/* Awaiting collection by parent. */
 
 /* These flags are kept in p_flags. */
 #define	P_ADVLOCK	0x00001	/* Process may hold a POSIX advisory lock. */
 #define	P_CONTROLT	0x00002	/* Has a controlling terminal. */
 #define	P_INMEM		0x00004	/* Loaded into memory. */
 #define	P_NOCLDSTOP	0x00008	/* No SIGCHLD when children stop. */
 #define	P_PPWAIT	0x00010	/* Parent is waiting for child to exec/exit. */
 #define	P_PROFIL	0x00020	/* Has started profiling. */
 #define	P_SELECT	0x00040	/* Selecting; wakeup/waiting danger. */
 #define	P_SINTR		0x00080	/* Sleep is interruptible. */
 #define	P_SUGID		0x00100	/* Had set id privileges since last exec. */
 #define	P_SYSTEM	0x00200	/* System proc: no sigs, stats or swapping. */
 #define	P_TIMEOUT	0x00400	/* Timing out during sleep. */
 #define	P_TRACED	0x00800	/* Debugged process being traced. */
 #define	P_WAITED	0x01000	/* Debugging process has waited for child. */
 #define	P_WEXIT		0x02000	/* Working on exiting. */
 #define	P_EXEC		0x04000	/* Process called exec. */
 
 /* Should probably be changed into a hold count. */
 #define	P_NOSWAP	0x08000	/* Another flag to prevent swap out. */
 #define	P_PHYSIO	0x10000	/* Doing physical I/O. */
 
 /* Should be moved to machine-dependent areas. */
 #define	P_OWEUPC	0x20000	/* Owe process an addupc() call at next ast. */
 
 #define	P_SWAPPING	0x40000	/* Process is being swapped. */
 #define	P_SWAPINREQ	0x80000	/* Swapin request due to wakeup */
 
 /* Marked a kernel thread */
 #define P_KTHREADP	0x200000 /* Process is really a kernel thread */
 
 #define	P_NOCLDWAIT	0x400000 /* No zombies if child dies */
 
 
 /*
  * MOVE TO ucred.h?
  *
  * Shareable process credentials (always resident).  This includes a reference
  * to the current user credentials as well as real and saved ids that may be
  * used to change ids.
  */
 struct	pcred {
 	struct	ucred *pc_ucred;	/* Current credentials. */
 	uid_t	p_ruid;			/* Real user id. */
 	uid_t	p_svuid;		/* Saved effective user id. */
 	gid_t	p_rgid;			/* Real group id. */
 	gid_t	p_svgid;		/* Saved effective group id. */
 	int	p_refcnt;		/* Number of references. */
 };
 
 #ifdef KERNEL
 
 #ifdef MALLOC_DECLARE
 MALLOC_DECLARE(M_SESSION);
 MALLOC_DECLARE(M_SUBPROC);
 #endif
 
 /*
  * We use process IDs <= PID_MAX; PID_MAX + 1 must also fit in a pid_t,
  * as it is used to represent "no process group".
  */
 #define	PID_MAX		30000
 #define	NO_PID		30001
 
 #define SESS_LEADER(p)	((p)->p_session->s_leader == (p))
 #define	SESSHOLD(s)	((s)->s_count++)
 #define	SESSRELE(s) {							\
 	if (--(s)->s_count == 0)					\
 		FREE(s, M_SESSION);					\
 }
 
 extern void stopevent(struct proc*, unsigned int, unsigned int);
 #define	STOPEVENT(p,e,v)	do { \
 	if ((p)->p_stops & (e)) stopevent(p,e,v); } while (0)
 
 /* hold process U-area in memory, normally for ptrace/procfs work */
 #define PHOLD(p) {							\
 	if ((p)->p_lock++ == 0 && ((p)->p_flag & P_INMEM) == 0)	\
 		faultin(p);						\
 }
 #define PRELE(p)	(--(p)->p_lock)
 
 #define	PIDHASH(pid)	(&pidhashtbl[(pid) & pidhash])
 extern LIST_HEAD(pidhashhead, proc) *pidhashtbl;
 extern u_long pidhash;
 
 #define	PGRPHASH(pgid)	(&pgrphashtbl[(pgid) & pgrphash])
 extern LIST_HEAD(pgrphashhead, pgrp) *pgrphashtbl;
 extern u_long pgrphash;
 
 extern struct proc *curproc;		/* Current running proc. */
 extern struct proc proc0;		/* Process slot for swapper. */
 extern int nprocs, maxproc;		/* Current and max number of procs. */
 extern int maxprocperuid;		/* Max procs per uid. */
 extern struct timeval switchtime;	/* Uptime at last context switch */
 
 LIST_HEAD(proclist, proc);
 extern struct proclist allproc;		/* List of all processes. */
 extern struct proclist zombproc;	/* List of zombie processes. */
 extern struct proc *initproc, *pageproc; /* Process slots for init, pager. */
 
 #define	NQS	32			/* 32 run queues. */
 extern struct prochd qs[];
 extern struct prochd rtqs[];
 extern struct prochd idqs[];
 extern int	whichqs;	/* Bit mask summary of non-empty Q's. */
 extern int	whichrtqs;	/* Bit mask summary of non-empty Q's. */
 extern int	whichidqs;	/* Bit mask summary of non-empty Q's. */
 struct	prochd {
 	struct	proc *ph_link;		/* Linked list of running processes. */
 	struct	proc *ph_rlink;
 };
 
 struct proc *pfind __P((pid_t));	/* Find process by id. */
 struct pgrp *pgfind __P((pid_t));	/* Find process group by id. */
 
 struct vm_zone;
 extern struct vm_zone *proc_zone;
 
 int	chgproccnt __P((uid_t uid, int diff));
 int	enterpgrp __P((struct proc *p, pid_t pgid, int mksess));
 void	fixjobc __P((struct proc *p, struct pgrp *pgrp, int entering));
 int	inferior __P((struct proc *p));
 int	leavepgrp __P((struct proc *p));
 void	mi_switch __P((void));
 void	procinit __P((void));
 void	resetpriority __P((struct proc *));
 int	roundrobin_interval __P((void));
 void	setrunnable __P((struct proc *));
 void	setrunqueue __P((struct proc *));
 void	sleepinit __P((void));
 void	remrq __P((struct proc *));
 void	cpu_switch __P((struct proc *));
 void	unsleep __P((struct proc *));
 void	wakeup_one __P((void *chan));
 
 void	cpu_exit __P((struct proc *)) __dead2;
 void	exit1 __P((struct proc *, int)) __dead2;
 void	cpu_fork __P((struct proc *, struct proc *));
 int		fork1 __P((struct proc *, int));
 int	trace_req __P((struct proc *));
 void	cpu_wait __P((struct proc *));
 int	cpu_coredump __P((struct proc *, struct vnode *, struct ucred *));
 void		setsugid __P((struct proc *p));
 #endif	/* KERNEL */
 
 #endif	/* !_SYS_PROC_H_ */
Index: head/sys/sys/signalvar.h
===================================================================
--- head/sys/sys/signalvar.h	(revision 41085)
+++ head/sys/sys/signalvar.h	(revision 41086)
@@ -1,178 +1,180 @@
 /*
  * Copyright (c) 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)signalvar.h	8.6 (Berkeley) 2/19/95
- * $Id: signalvar.h,v 1.18 1998/03/28 10:33:23 bde Exp $
+ * $Id: signalvar.h,v 1.19 1998/09/14 05:36:51 jdp Exp $
  */
 
 #ifndef	_SYS_SIGNALVAR_H_		/* tmp for user.h */
 #define	_SYS_SIGNALVAR_H_
 
 #include <sys/signal.h>
 
 /*
  * Kernel signal definitions and data structures,
  * not exported to user programs.
  */
 
 /*
  * Process signal actions and state, needed only within the process
  * (not necessarily resident).
  */
 struct	sigacts {
 	sig_t	ps_sigact[NSIG];	/* disposition of signals */
 	sigset_t ps_catchmask[NSIG];	/* signals to be blocked */
 	sigset_t ps_sigonstack;		/* signals to take on sigstack */
 	sigset_t ps_sigintr;		/* signals that interrupt syscalls */
 	sigset_t ps_sigreset;		/* signals that reset when caught */
 	sigset_t ps_signodefer;		/* signals not masked while handled */
 	sigset_t ps_oldmask;		/* saved mask from before sigpause */
 	int	ps_flags;		/* signal flags, below */
 	struct	sigaltstack ps_sigstk;	/* sp & on stack state variable */
 	int	ps_sig;			/* for core dump/debugger XXX */
 	u_long	ps_code;		/* for core dump/debugger XXX */
 	sigset_t ps_usertramp;		/* SunOS compat; libc sigtramp XXX */
 };
 
 /* signal flags */
 #define	SAS_OLDMASK	0x01		/* need to restore mask before pause */
 #define	SAS_ALTSTACK	0x02		/* have alternate signal stack */
 
 /* additional signal action values, used only temporarily/internally */
 #define	SIG_CATCH	((__sighandler_t *)2)
 #define	SIG_HOLD	((__sighandler_t *)3)
 
 /*
  * get signal action for process and signal; currently only for current process
  */
 #define SIGACTION(p, sig)	(p->p_sigacts->ps_sigact[(sig)])
 
 /*
  * Determine signal that should be delivered to process p, the current
  * process, 0 if none.  If there is a pending stop signal with default
  * action, the process stops in issignal().
  */
 #define	CURSIG(p)							\
 	(((p)->p_siglist == 0 ||					\
 	    (((p)->p_flag & P_TRACED) == 0 &&				\
 	     ((p)->p_siglist & ~(p)->p_sigmask) == 0)) ?		\
 	    0 : issignal(p))
 
 /*
  * Clear a pending signal from a process.
  */
 #define	CLRSIG(p, sig)	{ (p)->p_siglist &= ~sigmask(sig); }
 
 /*
  * Signal properties and actions.
  * The array below categorizes the signals and their default actions
  * according to the following properties:
  */
 #define	SA_KILL		0x01		/* terminates process by default */
 #define	SA_CORE		0x02		/* ditto and coredumps */
 #define	SA_STOP		0x04		/* suspend process */
 #define	SA_TTYSTOP	0x08		/* ditto, from tty */
 #define	SA_IGNORE	0x10		/* ignore by default */
 #define	SA_CONT		0x20		/* continue if suspended */
 #define	SA_CANTMASK	0x40		/* non-maskable, catchable */
 
 #ifdef	SIGPROP
 static int sigprop[NSIG + 1] = {
 	0,			/* unused */
 	SA_KILL,		/* SIGHUP */
 	SA_KILL,		/* SIGINT */
 	SA_KILL|SA_CORE,	/* SIGQUIT */
 	SA_KILL|SA_CORE,	/* SIGILL */
 	SA_KILL|SA_CORE,	/* SIGTRAP */
 	SA_KILL|SA_CORE,	/* SIGABRT */
 	SA_KILL|SA_CORE,	/* SIGEMT */
 	SA_KILL|SA_CORE,	/* SIGFPE */
 	SA_KILL,		/* SIGKILL */
 	SA_KILL|SA_CORE,	/* SIGBUS */
 	SA_KILL|SA_CORE,	/* SIGSEGV */
 	SA_KILL|SA_CORE,	/* SIGSYS */
 	SA_KILL,		/* SIGPIPE */
 	SA_KILL,		/* SIGALRM */
 	SA_KILL,		/* SIGTERM */
 	SA_IGNORE,		/* SIGURG */
 	SA_STOP,		/* SIGSTOP */
 	SA_STOP|SA_TTYSTOP,	/* SIGTSTP */
 	SA_IGNORE|SA_CONT,	/* SIGCONT */
 	SA_IGNORE,		/* SIGCHLD */
 	SA_STOP|SA_TTYSTOP,	/* SIGTTIN */
 	SA_STOP|SA_TTYSTOP,	/* SIGTTOU */
 	SA_IGNORE,		/* SIGIO */
 	SA_KILL,		/* SIGXCPU */
 	SA_KILL,		/* SIGXFSZ */
 	SA_KILL,		/* SIGVTALRM */
 	SA_KILL,		/* SIGPROF */
 	SA_IGNORE,		/* SIGWINCH  */
 	SA_IGNORE,		/* SIGINFO */
 	SA_KILL,		/* SIGUSR1 */
 	SA_KILL,		/* SIGUSR2 */
 };
 
 #define	contsigmask	(sigmask(SIGCONT))
 #define	stopsigmask	(sigmask(SIGSTOP) | sigmask(SIGTSTP) | \
 			    sigmask(SIGTTIN) | sigmask(SIGTTOU))
 
 #endif /* SIGPROP */
 
 #define	sigcantmask	(sigmask(SIGKILL) | sigmask(SIGSTOP))
 
 #ifdef KERNEL
 struct pgrp;
 struct proc;
+struct sigio;
 
 extern int sugid_coredump;	/* Sysctl variable kern.sugid_coredump */
 
 /*
  * Machine-independent functions:
  */
 void	execsigs __P((struct proc *p));
 char	*expand_name __P((const char*, int, int));
 void	gsignal __P((int pgid, int sig));
 int	issignal __P((struct proc *p));
 void	killproc __P((struct proc *p, char *why));
+void	pgsigio __P((struct sigio *, int signum, int checkctty));
 void	pgsignal __P((struct pgrp *pgrp, int sig, int checkctty));
 void	postsig __P((int sig));
 void	psignal __P((struct proc *p, int sig));
 void	sigexit __P((struct proc *p, int signum));
 void	siginit __P((struct proc *p));
 void	trapsignal __P((struct proc *p, int sig, u_long code));
 
 /*
  * Machine-dependent functions:
  */
 void	sendsig __P((sig_t action, int sig, int returnmask, u_long code));
 #endif	/* KERNEL */
 #endif	/* !_SYS_SIGNALVAR_H_ */
Index: head/sys/sys/socketvar.h
===================================================================
--- head/sys/sys/socketvar.h	(revision 41085)
+++ head/sys/sys/socketvar.h	(revision 41086)
@@ -1,364 +1,364 @@
 /*-
  * Copyright (c) 1982, 1986, 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)socketvar.h	8.3 (Berkeley) 2/19/95
- *	$Id: socketvar.h,v 1.29 1998/08/23 03:07:17 wollman Exp $
+ *	$Id: socketvar.h,v 1.30 1998/11/05 14:28:25 dg Exp $
  */
 
 #ifndef _SYS_SOCKETVAR_H_
 #define _SYS_SOCKETVAR_H_
 
 #include <sys/queue.h>			/* for TAILQ macros */
 #include <sys/select.h>			/* for struct selinfo */
 
 /*
  * Kernel structure per socket.
  * Contains send and receive buffer queues,
  * handle on protocol and pointer to protocol
  * private data and error information.
  */
 typedef	u_quad_t so_gen_t;
 
 struct socket {
 	struct	vm_zone *so_zone;	/* zone we were allocated from */
 	short	so_type;		/* generic type, see socket.h */
 	short	so_options;		/* from socket call, see socket.h */
 	short	so_linger;		/* time to linger while closing */
 	short	so_state;		/* internal state flags SS_*, below */
 	caddr_t	so_pcb;			/* protocol control block */
 	struct	protosw *so_proto;	/* protocol handle */
 /*
  * Variables for connection queuing.
  * Socket where accepts occur is so_head in all subsidiary sockets.
  * If so_head is 0, socket is not related to an accept.
  * For head socket so_q0 queues partially completed connections,
  * while so_q is a queue of connections ready to be accepted.
  * If a connection is aborted and it has so_head set, then
  * it has to be pulled out of either so_q0 or so_q.
  * We allow connections to queue up based on current queue lengths
  * and limit on number of queued connections for this socket.
  */
 	struct	socket *so_head;	/* back pointer to accept socket */
 	TAILQ_HEAD(, socket) so_incomp;	/* queue of partial unaccepted connections */
 	TAILQ_HEAD(, socket) so_comp;	/* queue of complete unaccepted connections */
 	TAILQ_ENTRY(socket) so_list;	/* list of unaccepted connections */
 	short	so_qlen;		/* number of unaccepted connections */
 	short	so_incqlen;		/* number of unaccepted incomplete
 					   connections */
 	short	so_qlimit;		/* max number queued connections */
 	short	so_timeo;		/* connection timeout */
 	u_short	so_error;		/* error affecting connection */
-	pid_t	so_pgid;		/* pgid for signals */
+	struct  sigio *so_sigio;	/* information for SIGIO/SIGURG */
 	u_long	so_oobmark;		/* chars to oob mark */
 /*
  * Variables for socket buffering.
  */
 	struct	sockbuf {
 		u_long	sb_cc;		/* actual chars in buffer */
 		u_long	sb_hiwat;	/* max actual char count */
 		u_long	sb_mbcnt;	/* chars of mbufs used */
 		u_long	sb_mbmax;	/* max chars of mbufs to use */
 		long	sb_lowat;	/* low water mark */
 		struct	mbuf *sb_mb;	/* the mbuf chain */
 		struct	selinfo sb_sel;	/* process selecting read/write */
 		short	sb_flags;	/* flags, see below */
 		short	sb_timeo;	/* timeout for read/write */
 	} so_rcv, so_snd;
 #define	SB_MAX		(256*1024)	/* default for max chars in sockbuf */
 #define	SB_LOCK		0x01		/* lock on data queue */
 #define	SB_WANT		0x02		/* someone is waiting to lock */
 #define	SB_WAIT		0x04		/* someone is waiting for data/space */
 #define	SB_SEL		0x08		/* someone is selecting */
 #define	SB_ASYNC	0x10		/* ASYNC I/O, need signals */
 #define	SB_UPCALL	0x20		/* someone wants an upcall */
 #define	SB_NOINTR	0x40		/* operations not interruptible */
 
 	void	(*so_upcall) __P((struct socket *, void *, int));
 	void	*so_upcallarg;
 	uid_t	so_uid;			/* who opened the socket */
 	/* NB: generation count must not be first; easiest to make it last. */
 	so_gen_t so_gencnt;		/* generation count */
 };
 
 /*
  * Socket state bits.
  */
 #define	SS_NOFDREF		0x0001	/* no file table ref any more */
 #define	SS_ISCONNECTED		0x0002	/* socket connected to a peer */
 #define	SS_ISCONNECTING		0x0004	/* in process of connecting to peer */
 #define	SS_ISDISCONNECTING	0x0008	/* in process of disconnecting */
 #define	SS_CANTSENDMORE		0x0010	/* can't send more data to peer */
 #define	SS_CANTRCVMORE		0x0020	/* can't receive more data from peer */
 #define	SS_RCVATMARK		0x0040	/* at mark on input */
 
 #define	SS_NBIO			0x0100	/* non-blocking ops */
 #define	SS_ASYNC		0x0200	/* async i/o notify */
 #define	SS_ISCONFIRMING		0x0400	/* deciding to accept connection req */
 
 #define	SS_INCOMP		0x0800	/* unaccepted, incomplete connection */
 #define	SS_COMP			0x1000	/* unaccepted, complete connection */
 
 /*
  * Externalized form of struct socket used by the sysctl(3) interface.
  */
 struct	xsocket {
 	size_t	xso_len;	/* length of this structure */
 	struct	socket *xso_so;	/* makes a convenient handle sometimes */
 	short	so_type;
 	short	so_options;
 	short	so_linger;
 	short	so_state;
 	caddr_t	so_pcb;		/* another convenient handle */
 	int	xso_protocol;
 	int	xso_family;
 	short	so_qlen;
 	short	so_incqlen;
 	short	so_qlimit;
 	short	so_timeo;
 	u_short	so_error;
 	pid_t	so_pgid;
 	u_long	so_oobmark;
 	struct	xsockbuf {
 		u_long	sb_cc;
 		u_long	sb_hiwat;
 		u_long	sb_mbcnt;
 		u_long	sb_mbmax;
 		long	sb_lowat;
 		short	sb_flags;
 		short	sb_timeo;
 	} so_rcv, so_snd;
 	uid_t	so_uid;		/* XXX */
 };
 
 /*
  * Macros for sockets and socket buffering.
  */
 
 /*
  * Do we need to notify the other side when I/O is possible?
  */
 #define sb_notify(sb)	(((sb)->sb_flags & (SB_WAIT|SB_SEL|SB_ASYNC|SB_UPCALL)) != 0)
 
 /*
  * How much space is there in a socket buffer (so->so_snd or so->so_rcv)?
  * This is problematical if the fields are unsigned, as the space might
  * still be negative (cc > hiwat or mbcnt > mbmax).  Should detect
  * overflow and return 0.  Should use "lmin" but it doesn't exist now.
  */
 #define	sbspace(sb) \
     ((long) imin((int)((sb)->sb_hiwat - (sb)->sb_cc), \
 	 (int)((sb)->sb_mbmax - (sb)->sb_mbcnt)))
 
 /* do we have to send all at once on a socket? */
 #define	sosendallatonce(so) \
     ((so)->so_proto->pr_flags & PR_ATOMIC)
 
 /* can we read something from so? */
 #define	soreadable(so) \
     ((so)->so_rcv.sb_cc >= (so)->so_rcv.sb_lowat || \
 	((so)->so_state & SS_CANTRCVMORE) || \
 	(so)->so_comp.tqh_first || (so)->so_error)
 
 /* can we write something to so? */
 #define	sowriteable(so) \
     ((sbspace(&(so)->so_snd) >= (so)->so_snd.sb_lowat && \
 	(((so)->so_state&SS_ISCONNECTED) || \
 	  ((so)->so_proto->pr_flags&PR_CONNREQUIRED)==0)) || \
      ((so)->so_state & SS_CANTSENDMORE) || \
      (so)->so_error)
 
 /* adjust counters in sb reflecting allocation of m */
 #define	sballoc(sb, m) { \
 	(sb)->sb_cc += (m)->m_len; \
 	(sb)->sb_mbcnt += MSIZE; \
 	if ((m)->m_flags & M_EXT) \
 		(sb)->sb_mbcnt += (m)->m_ext.ext_size; \
 }
 
 /* adjust counters in sb reflecting freeing of m */
 #define	sbfree(sb, m) { \
 	(sb)->sb_cc -= (m)->m_len; \
 	(sb)->sb_mbcnt -= MSIZE; \
 	if ((m)->m_flags & M_EXT) \
 		(sb)->sb_mbcnt -= (m)->m_ext.ext_size; \
 }
 
 /*
  * Set lock on sockbuf sb; sleep if lock is already held.
  * Unless SB_NOINTR is set on sockbuf, sleep is interruptible.
  * Returns error without lock if sleep is interrupted.
  */
 #define sblock(sb, wf) ((sb)->sb_flags & SB_LOCK ? \
 		(((wf) == M_WAITOK) ? sb_lock(sb) : EWOULDBLOCK) : \
 		((sb)->sb_flags |= SB_LOCK), 0)
 
 /* release lock on sockbuf sb */
 #define	sbunlock(sb) { \
 	(sb)->sb_flags &= ~SB_LOCK; \
 	if ((sb)->sb_flags & SB_WANT) { \
 		(sb)->sb_flags &= ~SB_WANT; \
 		wakeup((caddr_t)&(sb)->sb_flags); \
 	} \
 }
 
 #define	sorwakeup(so)	do { \
 			  if (sb_notify(&(so)->so_rcv)) \
 			    sowakeup((so), &(so)->so_rcv); \
 			} while (0)
 
 #define	sowwakeup(so)	do { \
 			  if (sb_notify(&(so)->so_snd)) \
 			    sowakeup((so), &(so)->so_snd); \
 			} while (0)
 
 #ifdef KERNEL
 
 /*
  * Argument structure for sosetopt et seq.  This is in the KERNEL
  * section because it will never be visible to user code.
  */
 enum sopt_dir { SOPT_GET, SOPT_SET };
 struct sockopt {
 	enum	sopt_dir sopt_dir; /* is this a get or a set? */
 	int	sopt_level;	/* second arg of [gs]etsockopt */
 	int	sopt_name;	/* third arg of [gs]etsockopt */
 	void   *sopt_val;	/* fourth arg of [gs]etsockopt */
 	size_t	sopt_valsize;	/* (almost) fifth arg of [gs]etsockopt */
 	struct	proc *sopt_p;	/* calling process or null if kernel */
 };
 
 struct sf_buf {
 	SLIST_ENTRY(sf_buf) free_list;	/* list of free buffer slots */
 	int		refcnt;		/* reference count */
 	struct		vm_page *m;	/* currently mapped page */
 	vm_offset_t	kva;		/* va of mapping */
 };
 
 #ifdef MALLOC_DECLARE
 MALLOC_DECLARE(M_PCB);
 MALLOC_DECLARE(M_SONAME);
 #endif
 
 extern int	maxsockets;
 extern u_long	sb_max;
 extern struct	vm_zone *socket_zone;
 extern so_gen_t so_gencnt;
 
 struct file;
 struct filedesc;
 struct mbuf;
 struct sockaddr;
 struct stat;
 struct ucred;
 struct uio;
 
 /*
  * File operations on sockets.
  */
 int	soo_ioctl __P((struct file *fp, u_long cmd, caddr_t data,
 	    struct proc *p));
 int	soo_poll __P((struct file *fp, int events, struct ucred *cred,
 	    struct proc *p));
 int	soo_stat __P((struct socket *so, struct stat *ub));
 
 /*
  * From uipc_socket and friends
  */
 struct	sockaddr *dup_sockaddr __P((struct sockaddr *sa, int canwait));
 int	getsock __P((struct filedesc *fdp, int fdes, struct file **fpp));
 int	sockargs __P((struct mbuf **mp, caddr_t buf, int buflen, int type));
 int	getsockaddr __P((struct sockaddr **namp, caddr_t uaddr, size_t len));
 void	sbappend __P((struct sockbuf *sb, struct mbuf *m));
 int	sbappendaddr __P((struct sockbuf *sb, struct sockaddr *asa,
 	    struct mbuf *m0, struct mbuf *control));
 int	sbappendcontrol __P((struct sockbuf *sb, struct mbuf *m0,
 	    struct mbuf *control));
 void	sbappendrecord __P((struct sockbuf *sb, struct mbuf *m0));
 void	sbcheck __P((struct sockbuf *sb));
 void	sbcompress __P((struct sockbuf *sb, struct mbuf *m, struct mbuf *n));
 struct mbuf *
 	sbcreatecontrol __P((caddr_t p, int size, int type, int level));
 void	sbdrop __P((struct sockbuf *sb, int len));
 void	sbdroprecord __P((struct sockbuf *sb));
 void	sbflush __P((struct sockbuf *sb));
 void	sbinsertoob __P((struct sockbuf *sb, struct mbuf *m0));
 void	sbrelease __P((struct sockbuf *sb));
 int	sbreserve __P((struct sockbuf *sb, u_long cc));
 void	sbtoxsockbuf __P((struct sockbuf *sb, struct xsockbuf *xsb));
 int	sbwait __P((struct sockbuf *sb));
 int	sb_lock __P((struct sockbuf *sb));
 int	soabort __P((struct socket *so));
 int	soaccept __P((struct socket *so, struct sockaddr **nam));
 struct	socket *soalloc __P((int waitok));
 int	sobind __P((struct socket *so, struct sockaddr *nam, struct proc *p));
 void	socantrcvmore __P((struct socket *so));
 void	socantsendmore __P((struct socket *so));
 int	soclose __P((struct socket *so));
 int	soconnect __P((struct socket *so, struct sockaddr *nam, struct proc *p));
 int	soconnect2 __P((struct socket *so1, struct socket *so2));
 int	socreate __P((int dom, struct socket **aso, int type, int proto,
 	    struct proc *p));
 void	sodealloc __P((struct socket *so));
 int	sodisconnect __P((struct socket *so));
 void	sofree __P((struct socket *so));
 int	sogetopt __P((struct socket *so, struct sockopt *sopt));
 void	sohasoutofband __P((struct socket *so));
 void	soisconnected __P((struct socket *so));
 void	soisconnecting __P((struct socket *so));
 void	soisdisconnected __P((struct socket *so));
 void	soisdisconnecting __P((struct socket *so));
 int	solisten __P((struct socket *so, int backlog, struct proc *p));
 struct socket *
 	sodropablereq __P((struct socket *head));
 struct socket *
 	sonewconn __P((struct socket *head, int connstatus));
 int	sooptcopyin __P((struct sockopt *sopt, void *buf, size_t len,
 			 size_t minlen));
 int	sooptcopyout __P((struct sockopt *sopt, void *buf, size_t len));
 int	sopoll __P((struct socket *so, int events, struct ucred *cred,
 		    struct proc *p));
 int	soreceive __P((struct socket *so, struct sockaddr **paddr,
 		       struct uio *uio, struct mbuf **mp0,
 		       struct mbuf **controlp, int *flagsp));
 int	soreserve __P((struct socket *so, u_long sndcc, u_long rcvcc));
 void	sorflush __P((struct socket *so));
 int	sosend __P((struct socket *so, struct sockaddr *addr, struct uio *uio,
 		    struct mbuf *top, struct mbuf *control, int flags,
 		    struct proc *p));
 int	sosetopt __P((struct socket *so, struct sockopt *sopt));
 int	soshutdown __P((struct socket *so, int how));
 void	sotoxsocket __P((struct socket *so, struct xsocket *xso));
 void	sowakeup __P((struct socket *so, struct sockbuf *sb));
 
 #endif /* KERNEL */
 
 #endif /* !_SYS_SOCKETVAR_H_ */
Index: head/sys/sys/tty.h
===================================================================
--- head/sys/sys/tty.h	(revision 41085)
+++ head/sys/sys/tty.h	(revision 41086)
@@ -1,273 +1,274 @@
 /*-
  * Copyright (c) 1982, 1986, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)tty.h	8.6 (Berkeley) 1/21/94
- * $Id: tty.h,v 1.41 1998/03/07 15:36:25 bde Exp $
+ * $Id: tty.h,v 1.42 1998/06/07 17:13:04 dfr Exp $
  */
 
 #ifndef _SYS_TTY_H_
 #define	_SYS_TTY_H_
 
 #include <sys/termios.h>
 #include <sys/select.h>		/* For struct selinfo. */
 
 /*
  * Clists are character lists, which is a variable length linked list
  * of cblocks, with a count of the number of characters in the list.
  */
 struct clist {
 	int	c_cc;		/* Number of characters in the clist. */
 	int	c_cbcount;	/* Number of cblocks. */
 	int	c_cbmax;	/* Max # cblocks allowed for this clist. */
 	int	c_cbreserved;	/* # cblocks reserved for this clist. */
 	char	*c_cf;		/* Pointer to the first cblock. */
 	char	*c_cl;		/* Pointer to the last cblock. */
 };
 
 /*
  * Per-tty structure.
  *
  * Should be split in two, into device and tty drivers.
  * Glue could be masks of what to echo and circular buffer
  * (low, high, timeout).
  */
 struct tty {
 	struct	clist t_rawq;		/* Device raw input queue. */
 	long	t_rawcc;		/* Raw input queue statistics. */
 	struct	clist t_canq;		/* Device canonical queue. */
 	long	t_cancc;		/* Canonical queue statistics. */
 	struct	clist t_outq;		/* Device output queue. */
 	long	t_outcc;		/* Output queue statistics. */
 	int	t_line;			/* Interface to device drivers. */
 	dev_t	t_dev;			/* Device. */
 	int	t_state;		/* Device and driver (TS*) state. */
 	int	t_flags;		/* Tty flags. */
 	int     t_timeout;              /* Timeout for ttywait() */
 	struct	pgrp *t_pgrp;		/* Foreground process group. */
 	struct	session *t_session;	/* Enclosing session. */
+	struct  sigio *t_sigio;		/* information for SIGIO */
 	struct	selinfo t_rsel;		/* Tty read/oob select. */
 	struct	selinfo t_wsel;		/* Tty write select. */
 	struct	termios t_termios;	/* Termios state. */
 	struct	winsize t_winsize;	/* Window size. */
 					/* Start output. */
 	void	(*t_oproc) __P((struct tty *));
 					/* Stop output. */
 	void	(*t_stop) __P((struct tty *, int));
 					/* Set hardware state. */
 	int	(*t_param) __P((struct tty *, struct termios *));
 	void	*t_sc;			/* XXX: net/if_sl.c:sl_softc. */
 	int	t_column;		/* Tty output column. */
 	int	t_rocount, t_rocol;	/* Tty. */
 	int	t_ififosize;		/* Total size of upstream fifos. */
 	int	t_ihiwat;		/* High water mark for input. */
 	int	t_ilowat;		/* Low water mark for input. */
 	speed_t	t_ispeedwat;		/* t_ispeed override for watermarks. */
 	int	t_ohiwat;		/* High water mark for output. */
 	int	t_olowat;		/* Low water mark for output. */
 	speed_t	t_ospeedwat;		/* t_ospeed override for watermarks. */
 	int	t_gen;			/* Generation number. */
 };
 
 #define	t_cc		t_termios.c_cc
 #define	t_cflag		t_termios.c_cflag
 #define	t_iflag		t_termios.c_iflag
 #define	t_ispeed	t_termios.c_ispeed
 #define	t_lflag		t_termios.c_lflag
 #define	t_min		t_termios.c_min
 #define	t_oflag		t_termios.c_oflag
 #define	t_ospeed	t_termios.c_ospeed
 #define	t_time		t_termios.c_time
 
 #define	TTIPRI	25			/* Sleep priority for tty reads. */
 #define	TTOPRI	26			/* Sleep priority for tty writes. */
 
 /*
  * User data unfortunately has to be copied through buffers on the way to
  * and from clists.  The buffers are on the stack so their sizes must be
  * fairly small.
  */
 #define	IBUFSIZ	384			/* Should be >= max value of MIN. */
 #define	OBUFSIZ	100
 
 #ifndef TTYHOG
 #define	TTYHOG	1024
 #endif
 
 #ifdef KERNEL
 #define	TTMAXHIWAT	roundup(2048, CBSIZE)
 #define	TTMINHIWAT	roundup(100, CBSIZE)
 #define	TTMAXLOWAT	256
 #define	TTMINLOWAT	32
 #endif
 
 /* These flags are kept in t_state. */
 #define	TS_SO_OLOWAT	0x00001		/* Wake up when output <= low water. */
 #define	TS_ASYNC	0x00002		/* Tty in async I/O mode. */
 #define	TS_BUSY		0x00004		/* Draining output. */
 #define	TS_CARR_ON	0x00008		/* Carrier is present. */
 #define	TS_FLUSH	0x00010		/* Outq has been flushed during DMA. */
 #define	TS_ISOPEN	0x00020		/* Open has completed. */
 #define	TS_TBLOCK	0x00040		/* Further input blocked. */
 #define	TS_TIMEOUT	0x00080		/* Wait for output char processing. */
 #define	TS_TTSTOP	0x00100		/* Output paused. */
 #ifdef notyet
 #define	TS_WOPEN	0x00200		/* Open in progress. */
 #endif
 #define	TS_XCLUDE	0x00400		/* Tty requires exclusivity. */
 
 /* State for intra-line fancy editing work. */
 #define	TS_BKSL		0x00800		/* State for lowercase \ work. */
 #define	TS_CNTTB	0x01000		/* Counting tab width, ignore FLUSHO. */
 #define	TS_ERASE	0x02000		/* Within a \.../ for PRTRUB. */
 #define	TS_LNCH		0x04000		/* Next character is literal. */
 #define	TS_TYPEN	0x08000		/* Retyping suspended input (PENDIN). */
 #define	TS_LOCAL	(TS_BKSL | TS_CNTTB | TS_ERASE | TS_LNCH | TS_TYPEN)
 
 /* Extras. */
 #define	TS_CAN_BYPASS_L_RINT 0x010000	/* Device in "raw" mode. */
 #define	TS_CONNECTED	0x020000	/* Connection open. */
 #define	TS_SNOOP	0x040000	/* Device is being snooped on. */
 #define	TS_SO_OCOMPLETE	0x080000	/* Wake up when output completes. */
 #define	TS_ZOMBIE	0x100000	/* Connection lost. */
 
 /* Hardware flow-control-invoked bits. */
 #define	TS_CAR_OFLOW	0x200000	/* For MDMBUF (XXX handle in driver). */
 #ifdef notyet
 #define	TS_CTS_OFLOW	0x400000	/* For CCTS_OFLOW. */
 #define	TS_DSR_OFLOW	0x800000	/* For CDSR_OFLOW. */
 #endif
 
 /* Character type information. */
 #define	ORDINARY	0
 #define	CONTROL		1
 #define	BACKSPACE	2
 #define	NEWLINE		3
 #define	TAB		4
 #define	VTAB		5
 #define	RETURN		6
 
 struct speedtab {
 	int sp_speed;			/* Speed. */
 	int sp_code;			/* Code. */
 };
 
 /* Modem control commands (driver). */
 #define	DMSET		0
 #define	DMBIS		1
 #define	DMBIC		2
 #define	DMGET		3
 
 /* Flags on a character passed to ttyinput. */
 #define	TTY_CHARMASK	0x000000ff	/* Character mask */
 #define	TTY_QUOTE	0x00000100	/* Character quoted */
 #define	TTY_ERRORMASK	0xff000000	/* Error mask */
 #define	TTY_FE		0x01000000	/* Framing error */
 #define	TTY_PE		0x02000000	/* Parity error */
 #define	TTY_OE		0x04000000	/* Overrun error */
 #define	TTY_BI		0x08000000	/* Break condition */
 
 /* Is tp controlling terminal for p? */
 #define	isctty(p, tp)							\
 	((p)->p_session == (tp)->t_session && (p)->p_flag & P_CONTROLT)
 
 /* Is p in background of tp? */
 #define	isbackground(p, tp)						\
 	(isctty((p), (tp)) && (p)->p_pgrp != (tp)->t_pgrp)
 
 /* Unique sleep addresses. */
 #define	TSA_CARR_ON(tp)		((void *)&(tp)->t_rawq)
 #define	TSA_HUP_OR_INPUT(tp)	((void *)&(tp)->t_rawq.c_cf)
 #define	TSA_OCOMPLETE(tp)	((void *)&(tp)->t_outq.c_cl)
 #define	TSA_OLOWAT(tp)		((void *)&(tp)->t_outq)
 #define	TSA_PTC_READ(tp)	((void *)&(tp)->t_outq.c_cf)
 #define	TSA_PTC_WRITE(tp)	((void *)&(tp)->t_rawq.c_cl)
 #define	TSA_PTS_READ(tp)	((void *)&(tp)->t_canq)
 
 #ifdef KERNEL
 #ifdef MALLOC_DECLARE
 MALLOC_DECLARE(M_TTYS);
 #endif
 extern	struct tty *constty;	/* Temporary virtual console. */
 
 int	 b_to_q __P((char *cp, int cc, struct clist *q));
 void	 catq __P((struct clist *from, struct clist *to));
 void	 clist_alloc_cblocks __P((struct clist *q, int ccmax, int ccres));
 void	 clist_free_cblocks __P((struct clist *q));
 /* void	 clist_init __P((void)); */ /* defined in systm.h for main() */
 int	 getc __P((struct clist *q));
 void	 ndflush __P((struct clist *q, int cc));
 int	 ndqb __P((struct clist *q, int flag));
 char	*nextc __P((struct clist *q, char *cp, int *c));
 int	 putc __P((int c, struct clist *q));
 int	 q_to_b __P((struct clist *q, char *cp, int cc));
 int	 unputc __P((struct clist *q));
 
 int	ttcompat __P((struct tty *tp, u_long com, caddr_t data, int flag));
 int     ttsetcompat __P((struct tty *tp, u_long *com, caddr_t data, struct termios *term));
 
 void	 termioschars __P((struct termios *t));
 int	 tputchar __P((int c, struct tty *tp));
 int	 ttioctl __P((struct tty *tp, u_long com, void *data, int flag));
 int	 ttread __P((struct tty *tp, struct uio *uio, int flag));
 void	 ttrstrt __P((void *tp));
 int	 ttypoll __P((struct tty *tp, int events, struct proc *p));
 int	 ttpoll __P((dev_t dev, int events, struct proc *p));
 void	 ttsetwater __P((struct tty *tp));
 int	 ttspeedtab __P((int speed, struct speedtab *table));
 int	 ttstart __P((struct tty *tp));
 void	 ttwakeup __P((struct tty *tp));
 int	 ttwrite __P((struct tty *tp, struct uio *uio, int flag));
 void	 ttwwakeup __P((struct tty *tp));
 void	 ttyblock __P((struct tty *tp));
 void	 ttychars __P((struct tty *tp));
 int	 ttycheckoutq __P((struct tty *tp, int wait));
 int	 ttyclose __P((struct tty *tp));
 void	 ttyflush __P((struct tty *tp, int rw));
 void	 ttyinfo __P((struct tty *tp));
 int	 ttyinput __P((int c, struct tty *tp));
 int	 ttylclose __P((struct tty *tp, int flag));
 int	 ttymodem __P((struct tty *tp, int flag));
 int	 ttyopen __P((dev_t device, struct tty *tp));
 int	 ttysleep __P((struct tty *tp,
 	    void *chan, int pri, char *wmesg, int timeout));
 int	 ttywait __P((struct tty *tp));
 struct tty *ttymalloc __P((void));
 void     ttyfree __P((struct tty *));
 
 #endif /* KERNEL */
 
 #endif /* !_SYS_TTY_H_ */