Index: head/sys/kern/sysv_msg.c
===================================================================
--- head/sys/kern/sysv_msg.c	(revision 122200)
+++ head/sys/kern/sysv_msg.c	(revision 122201)
@@ -1,1151 +1,1151 @@
 /*
  * Implementation of SVID messages
  *
  * Author:  Daniel Boulet
  *
  * Copyright 1993 Daniel Boulet and RTMX Inc.
  *
  * This system call was implemented by Daniel Boulet under contract from RTMX.
  *
  * Redistribution and use in source forms, with and without modification,
  * are permitted provided that this entire comment appears intact.
  *
  * Redistribution in binary form may occur without any restrictions.
  * Obviously, it would be nice if you gave credit where credit is due
  * but requiring it would be too onerous.
  *
  * This software is provided ``AS IS'' without any warranties of any kind.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_sysvipc.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/sysproto.h>
 #include <sys/kernel.h>
 #include <sys/proc.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/msg.h>
 #include <sys/syscall.h>
 #include <sys/sysent.h>
 #include <sys/sysctl.h>
 #include <sys/malloc.h>
 #include <sys/jail.h>
 
 static MALLOC_DEFINE(M_MSG, "msg", "SVID compatible message queues");
 
 static void msginit(void);
 static int msgunload(void);
 static int sysvmsg_modload(struct module *, int, void *);
 
 #ifdef MSG_DEBUG
 #define DPRINTF(a)	printf a
 #else
 #define DPRINTF(a)
 #endif
 
 static void msg_freehdr(struct msg *msghdr);
 
 /* XXX casting to (sy_call_t *) is bogus, as usual. */
 static sy_call_t *msgcalls[] = {
 	(sy_call_t *)msgctl, (sy_call_t *)msgget,
 	(sy_call_t *)msgsnd, (sy_call_t *)msgrcv
 };
 
 struct msg {
 	struct	msg *msg_next;	/* next msg in the chain */
 	long	msg_type;	/* type of this message */
     				/* >0 -> type of this message */
     				/* 0 -> free header */
 	u_short	msg_ts;		/* size of this message */
 	short	msg_spot;	/* location of start of msg in buffer */
 };
 
 
 #ifndef MSGSSZ
 #define MSGSSZ	8		/* Each segment must be 2^N long */
 #endif
 #ifndef MSGSEG
 #define MSGSEG	2048		/* must be less than 32767 */
 #endif
 #define MSGMAX	(MSGSSZ*MSGSEG)
 #ifndef MSGMNB
 #define MSGMNB	2048		/* max # of bytes in a queue */
 #endif
 #ifndef MSGMNI
 #define MSGMNI	40
 #endif
 #ifndef MSGTQL
 #define MSGTQL	40
 #endif
 
 /*
  * Based on the configuration parameters described in an SVR2 (yes, two)
  * config(1m) man page.
  *
  * Each message is broken up and stored in segments that are msgssz bytes
  * long.  For efficiency reasons, this should be a power of two.  Also,
  * it doesn't make sense if it is less than 8 or greater than about 256.
  * Consequently, msginit in kern/sysv_msg.c checks that msgssz is a power of
  * two between 8 and 1024 inclusive (and panic's if it isn't).
  */
 struct msginfo msginfo = {
                 MSGMAX,         /* max chars in a message */
                 MSGMNI,         /* # of message queue identifiers */
                 MSGMNB,         /* max chars in a queue */
                 MSGTQL,         /* max messages in system */
                 MSGSSZ,         /* size of a message segment */
                 		/* (must be small power of 2 greater than 4) */
                 MSGSEG          /* number of message segments */
 };
 
 /*
  * macros to convert between msqid_ds's and msqid's.
  * (specific to this implementation)
  */
 #define MSQID(ix,ds)	((ix) & 0xffff | (((ds).msg_perm.seq << 16) & 0xffff0000))
 #define MSQID_IX(id)	((id) & 0xffff)
 #define MSQID_SEQ(id)	(((id) >> 16) & 0xffff)
 
 /*
  * The rest of this file is specific to this particular implementation.
  */
 
 struct msgmap {
 	short	next;		/* next segment in buffer */
     				/* -1 -> available */
     				/* 0..(MSGSEG-1) -> index of next segment */
 };
 
 #define MSG_LOCKED	01000	/* Is this msqid_ds locked? */
 
 static int nfree_msgmaps;	/* # of free map entries */
 static short free_msgmaps;	/* head of linked list of free map entries */
 static struct msg *free_msghdrs;/* list of free msg headers */
 static char *msgpool;		/* MSGMAX byte long msg buffer pool */
 static struct msgmap *msgmaps;	/* MSGSEG msgmap structures */
 static struct msg *msghdrs;	/* MSGTQL msg headers */
 static struct msqid_ds *msqids;	/* MSGMNI msqid_ds struct's */
 static struct mtx msq_mtx;	/* global mutex for message queues. */
 
 static void
 msginit()
 {
 	register int i;
 
 	TUNABLE_INT_FETCH("kern.ipc.msgseg", &msginfo.msgseg);
 	TUNABLE_INT_FETCH("kern.ipc.msgssz", &msginfo.msgssz);
 	msginfo.msgmax = msginfo.msgseg * msginfo.msgssz;
 	TUNABLE_INT_FETCH("kern.ipc.msgmni", &msginfo.msgmni);
 
 	msgpool = malloc(msginfo.msgmax, M_MSG, M_WAITOK);
 	if (msgpool == NULL)
 		panic("msgpool is NULL");
 	msgmaps = malloc(sizeof(struct msgmap) * msginfo.msgseg, M_MSG, M_WAITOK);
 	if (msgmaps == NULL)
 		panic("msgmaps is NULL");
 	msghdrs = malloc(sizeof(struct msg) * msginfo.msgtql, M_MSG, M_WAITOK);
 	if (msghdrs == NULL)
 		panic("msghdrs is NULL");
 	msqids = malloc(sizeof(struct msqid_ds) * msginfo.msgmni, M_MSG, M_WAITOK);
 	if (msqids == NULL)
 		panic("msqids is NULL");
 
 	/*
 	 * msginfo.msgssz should be a power of two for efficiency reasons.
 	 * It is also pretty silly if msginfo.msgssz is less than 8
 	 * or greater than about 256 so ...
 	 */
 
 	i = 8;
 	while (i < 1024 && i != msginfo.msgssz)
 		i <<= 1;
     	if (i != msginfo.msgssz) {
 		DPRINTF(("msginfo.msgssz=%d (0x%x)\n", msginfo.msgssz,
 		    msginfo.msgssz));
 		panic("msginfo.msgssz not a small power of 2");
 	}
 
 	if (msginfo.msgseg > 32767) {
 		DPRINTF(("msginfo.msgseg=%d\n", msginfo.msgseg));
 		panic("msginfo.msgseg > 32767");
 	}
 
 	if (msgmaps == NULL)
 		panic("msgmaps is NULL");
 
 	for (i = 0; i < msginfo.msgseg; i++) {
 		if (i > 0)
 			msgmaps[i-1].next = i;
 		msgmaps[i].next = -1;	/* implies entry is available */
 	}
 	free_msgmaps = 0;
 	nfree_msgmaps = msginfo.msgseg;
 
 	if (msghdrs == NULL)
 		panic("msghdrs is NULL");
 
 	for (i = 0; i < msginfo.msgtql; i++) {
 		msghdrs[i].msg_type = 0;
 		if (i > 0)
 			msghdrs[i-1].msg_next = &msghdrs[i];
 		msghdrs[i].msg_next = NULL;
     	}
 	free_msghdrs = &msghdrs[0];
 
 	if (msqids == NULL)
 		panic("msqids is NULL");
 
 	for (i = 0; i < msginfo.msgmni; i++) {
 		msqids[i].msg_qbytes = 0;	/* implies entry is available */
 		msqids[i].msg_perm.seq = 0;	/* reset to a known value */
 		msqids[i].msg_perm.mode = 0;
 	}
 	mtx_init(&msq_mtx, "msq", NULL, MTX_DEF);
 }
 
 static int
 msgunload()
 {
 	struct msqid_ds *msqptr;
 	int msqid;
 
 	for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
 		/*
 		 * Look for an unallocated and unlocked msqid_ds.
 		 * msqid_ds's can be locked by msgsnd or msgrcv while
 		 * they are copying the message in/out.  We can't
 		 * re-use the entry until they release it.
 		 */
 		msqptr = &msqids[msqid];
 		if (msqptr->msg_qbytes != 0 ||
 		    (msqptr->msg_perm.mode & MSG_LOCKED) != 0)
 			break;
 	}
 	if (msqid != msginfo.msgmni)
 		return (EBUSY);
 
 	free(msgpool, M_MSG);
 	free(msgmaps, M_MSG);
 	free(msghdrs, M_MSG);
 	free(msqids, M_MSG);
 	mtx_destroy(&msq_mtx);
 	return (0);
 }
 
 
 static int
 sysvmsg_modload(struct module *module, int cmd, void *arg)
 {
 	int error = 0;
 
 	switch (cmd) {
 	case MOD_LOAD:
 		msginit();
 		break;
 	case MOD_UNLOAD:
 		error = msgunload();
 		break;
 	case MOD_SHUTDOWN:
 		break;
 	default:
 		error = EINVAL;
 		break;
 	}
 	return (error);
 }
 
 static moduledata_t sysvmsg_mod = {
 	"sysvmsg",
 	&sysvmsg_modload,
 	NULL
 };
 
 SYSCALL_MODULE_HELPER(msgsys);
 SYSCALL_MODULE_HELPER(msgctl);
 SYSCALL_MODULE_HELPER(msgget);
 SYSCALL_MODULE_HELPER(msgsnd);
 SYSCALL_MODULE_HELPER(msgrcv);
 
 DECLARE_MODULE(sysvmsg, sysvmsg_mod,
 	SI_SUB_SYSV_MSG, SI_ORDER_FIRST);
 MODULE_VERSION(sysvmsg, 1);
 
 /*
  * Entry point for all MSG calls
  *
  * MPSAFE
  */
 int
 msgsys(td, uap)
 	struct thread *td;
 	/* XXX actually varargs. */
 	struct msgsys_args /* {
 		int	which;
 		int	a2;
 		int	a3;
 		int	a4;
 		int	a5;
 		int	a6;
 	} */ *uap;
 {
 	int error;
 
 	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
 		return (ENOSYS);
 	if (uap->which < 0 ||
 	    uap->which >= sizeof(msgcalls)/sizeof(msgcalls[0]))
 		return (EINVAL);
 	error = (*msgcalls[uap->which])(td, &uap->a2);
 	return (error);
 }
 
 static void
 msg_freehdr(msghdr)
 	struct msg *msghdr;
 {
 	while (msghdr->msg_ts > 0) {
 		short next;
 		if (msghdr->msg_spot < 0 || msghdr->msg_spot >= msginfo.msgseg)
 			panic("msghdr->msg_spot out of range");
 		next = msgmaps[msghdr->msg_spot].next;
 		msgmaps[msghdr->msg_spot].next = free_msgmaps;
 		free_msgmaps = msghdr->msg_spot;
 		nfree_msgmaps++;
 		msghdr->msg_spot = next;
 		if (msghdr->msg_ts >= msginfo.msgssz)
 			msghdr->msg_ts -= msginfo.msgssz;
 		else
 			msghdr->msg_ts = 0;
 	}
 	if (msghdr->msg_spot != -1)
 		panic("msghdr->msg_spot != -1");
 	msghdr->msg_next = free_msghdrs;
 	free_msghdrs = msghdr;
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct msgctl_args {
 	int	msqid;
 	int	cmd;
 	struct	msqid_ds *buf;
 };
 #endif
 
 /*
  * MPSAFE
  */
 int
 msgctl(td, uap)
 	struct thread *td;
 	register struct msgctl_args *uap;
 {
 	int msqid = uap->msqid;
 	int cmd = uap->cmd;
 	struct msqid_ds *user_msqptr = uap->buf;
 	int rval, error;
 	struct msqid_ds msqbuf;
 	register struct msqid_ds *msqptr;
 
 	DPRINTF(("call to msgctl(%d, %d, 0x%x)\n", msqid, cmd, user_msqptr));
 	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
 		return (ENOSYS);
 
 	msqid = IPCID_TO_IX(msqid);
 
 	if (msqid < 0 || msqid >= msginfo.msgmni) {
 		DPRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", msqid,
 		    msginfo.msgmni));
 		return (EINVAL);
 	}
 	if (cmd == IPC_SET &&
 	    (error = copyin(user_msqptr, &msqbuf, sizeof(msqbuf))) != 0)
 		return (error);
 
 	msqptr = &msqids[msqid];
 
 	mtx_lock(&msq_mtx);
 	if (msqptr->msg_qbytes == 0) {
 		DPRINTF(("no such msqid\n"));
 		error = EINVAL;
 		goto done2;
 	}
 	if (msqptr->msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
 		DPRINTF(("wrong sequence number\n"));
 		error = EINVAL;
 		goto done2;
 	}
 
 	error = 0;
 	rval = 0;
 
 	switch (cmd) {
 
 	case IPC_RMID:
 	{
 		struct msg *msghdr;
 		if ((error = ipcperm(td, &msqptr->msg_perm, IPC_M)))
 			goto done2;
 		/* Free the message headers */
 		msghdr = msqptr->msg_first;
 		while (msghdr != NULL) {
 			struct msg *msghdr_tmp;
 
 			/* Free the segments of each message */
 			msqptr->msg_cbytes -= msghdr->msg_ts;
 			msqptr->msg_qnum--;
 			msghdr_tmp = msghdr;
 			msghdr = msghdr->msg_next;
 			msg_freehdr(msghdr_tmp);
 		}
 
 		if (msqptr->msg_cbytes != 0)
 			panic("msg_cbytes is screwed up");
 		if (msqptr->msg_qnum != 0)
 			panic("msg_qnum is screwed up");
 
 		msqptr->msg_qbytes = 0;	/* Mark it as free */
 
 		wakeup(msqptr);
 	}
 
 		break;
 
 	case IPC_SET:
 		if ((error = ipcperm(td, &msqptr->msg_perm, IPC_M)))
 			goto done2;
 		if (msqbuf.msg_qbytes > msqptr->msg_qbytes) {
 			error = suser(td);
 			if (error)
 				goto done2;
 		}
 		if (msqbuf.msg_qbytes > msginfo.msgmnb) {
 			DPRINTF(("can't increase msg_qbytes beyond %d"
 			    "(truncating)\n", msginfo.msgmnb));
 			msqbuf.msg_qbytes = msginfo.msgmnb;	/* silently restrict qbytes to system limit */
 		}
 		if (msqbuf.msg_qbytes == 0) {
 			DPRINTF(("can't reduce msg_qbytes to 0\n"));
 			error = EINVAL;		/* non-standard errno! */
 			goto done2;
 		}
 		msqptr->msg_perm.uid = msqbuf.msg_perm.uid;	/* change the owner */
 		msqptr->msg_perm.gid = msqbuf.msg_perm.gid;	/* change the owner */
 		msqptr->msg_perm.mode = (msqptr->msg_perm.mode & ~0777) |
 		    (msqbuf.msg_perm.mode & 0777);
 		msqptr->msg_qbytes = msqbuf.msg_qbytes;
 		msqptr->msg_ctime = time_second;
 		break;
 
 	case IPC_STAT:
 		if ((error = ipcperm(td, &msqptr->msg_perm, IPC_R))) {
 			DPRINTF(("requester doesn't have read access\n"));
 			goto done2;
 		}
 		break;
 
 	default:
 		DPRINTF(("invalid command %d\n", cmd));
 		error = EINVAL;
 		goto done2;
 	}
 
 	if (error == 0)
 		td->td_retval[0] = rval;
 done2:
 	mtx_unlock(&msq_mtx);
 	if (cmd == IPC_STAT && error == 0)
 		error = copyout(msqptr, user_msqptr, sizeof(struct msqid_ds));
 	return(error);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct msgget_args {
 	key_t	key;
 	int	msgflg;
 };
 #endif
 
 /*
  * MPSAFE
  */
 int
 msgget(td, uap)
 	struct thread *td;
 	register struct msgget_args *uap;
 {
 	int msqid, error = 0;
 	int key = uap->key;
 	int msgflg = uap->msgflg;
 	struct ucred *cred = td->td_ucred;
 	register struct msqid_ds *msqptr = NULL;
 
 	DPRINTF(("msgget(0x%x, 0%o)\n", key, msgflg));
 
 	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
 		return (ENOSYS);
 
 	mtx_lock(&msq_mtx);
 	if (key != IPC_PRIVATE) {
 		for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
 			msqptr = &msqids[msqid];
 			if (msqptr->msg_qbytes != 0 &&
 			    msqptr->msg_perm.key == key)
 				break;
 		}
 		if (msqid < msginfo.msgmni) {
 			DPRINTF(("found public key\n"));
 			if ((msgflg & IPC_CREAT) && (msgflg & IPC_EXCL)) {
 				DPRINTF(("not exclusive\n"));
 				error = EEXIST;
 				goto done2;
 			}
-			if ((error = ipcperm(td, &msqptr->msg_perm, msgflg & 0700 ))) {
+			if ((error = ipcperm(td, &msqptr->msg_perm, msgflg & 0700))) {
 				DPRINTF(("requester doesn't have 0%o access\n",
 				    msgflg & 0700));
 				goto done2;
 			}
 			goto found;
 		}
 	}
 
 	DPRINTF(("need to allocate the msqid_ds\n"));
 	if (key == IPC_PRIVATE || (msgflg & IPC_CREAT)) {
 		for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
 			/*
 			 * Look for an unallocated and unlocked msqid_ds.
 			 * msqid_ds's can be locked by msgsnd or msgrcv while
 			 * they are copying the message in/out.  We can't
 			 * re-use the entry until they release it.
 			 */
 			msqptr = &msqids[msqid];
 			if (msqptr->msg_qbytes == 0 &&
 			    (msqptr->msg_perm.mode & MSG_LOCKED) == 0)
 				break;
 		}
 		if (msqid == msginfo.msgmni) {
 			DPRINTF(("no more msqid_ds's available\n"));
 			error = ENOSPC;
 			goto done2;
 		}
 		DPRINTF(("msqid %d is available\n", msqid));
 		msqptr->msg_perm.key = key;
 		msqptr->msg_perm.cuid = cred->cr_uid;
 		msqptr->msg_perm.uid = cred->cr_uid;
 		msqptr->msg_perm.cgid = cred->cr_gid;
 		msqptr->msg_perm.gid = cred->cr_gid;
 		msqptr->msg_perm.mode = (msgflg & 0777);
 		/* Make sure that the returned msqid is unique */
 		msqptr->msg_perm.seq = (msqptr->msg_perm.seq + 1) & 0x7fff;
 		msqptr->msg_first = NULL;
 		msqptr->msg_last = NULL;
 		msqptr->msg_cbytes = 0;
 		msqptr->msg_qnum = 0;
 		msqptr->msg_qbytes = msginfo.msgmnb;
 		msqptr->msg_lspid = 0;
 		msqptr->msg_lrpid = 0;
 		msqptr->msg_stime = 0;
 		msqptr->msg_rtime = 0;
 		msqptr->msg_ctime = time_second;
 	} else {
 		DPRINTF(("didn't find it and wasn't asked to create it\n"));
 		error = ENOENT;
 		goto done2;
 	}
 
 found:
 	/* Construct the unique msqid */
 	td->td_retval[0] = IXSEQ_TO_IPCID(msqid, msqptr->msg_perm);
 done2:
 	mtx_unlock(&msq_mtx);
 	return (error);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct msgsnd_args {
 	int	msqid;
 	const void	*msgp;
 	size_t	msgsz;
 	int	msgflg;
 };
 #endif
 
 /*
  * MPSAFE
  */
 int
 msgsnd(td, uap)
 	struct thread *td;
 	register struct msgsnd_args *uap;
 {
 	int msqid = uap->msqid;
 	const void *user_msgp = uap->msgp;
 	size_t msgsz = uap->msgsz;
 	int msgflg = uap->msgflg;
 	int segs_needed, error = 0;
 	register struct msqid_ds *msqptr;
 	register struct msg *msghdr;
 	short next;
 
 	DPRINTF(("call to msgsnd(%d, 0x%x, %d, %d)\n", msqid, user_msgp, msgsz,
 	    msgflg));
 	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
 		return (ENOSYS);
 
 	mtx_lock(&msq_mtx);
 	msqid = IPCID_TO_IX(msqid);
 
 	if (msqid < 0 || msqid >= msginfo.msgmni) {
 		DPRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", msqid,
 		    msginfo.msgmni));
 		error = EINVAL;
 		goto done2;
 	}
 
 	msqptr = &msqids[msqid];
 	if (msqptr->msg_qbytes == 0) {
 		DPRINTF(("no such message queue id\n"));
 		error = EINVAL;
 		goto done2;
 	}
 	if (msqptr->msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
 		DPRINTF(("wrong sequence number\n"));
 		error = EINVAL;
 		goto done2;
 	}
 
 	if ((error = ipcperm(td, &msqptr->msg_perm, IPC_W))) {
 		DPRINTF(("requester doesn't have write access\n"));
 		goto done2;
 	}
 
 	segs_needed = (msgsz + msginfo.msgssz - 1) / msginfo.msgssz;
 	DPRINTF(("msgsz=%d, msgssz=%d, segs_needed=%d\n", msgsz, msginfo.msgssz,
 	    segs_needed));
 	for (;;) {
 		int need_more_resources = 0;
 
 		/*
 		 * check msgsz
 		 * (inside this loop in case msg_qbytes changes while we sleep)
 		 */
 
 		if (msgsz > msqptr->msg_qbytes) {
 			DPRINTF(("msgsz > msqptr->msg_qbytes\n"));
 			error = EINVAL;
 			goto done2;
 		}
 
 		if (msqptr->msg_perm.mode & MSG_LOCKED) {
 			DPRINTF(("msqid is locked\n"));
 			need_more_resources = 1;
 		}
 		if (msgsz + msqptr->msg_cbytes > msqptr->msg_qbytes) {
 			DPRINTF(("msgsz + msg_cbytes > msg_qbytes\n"));
 			need_more_resources = 1;
 		}
 		if (segs_needed > nfree_msgmaps) {
 			DPRINTF(("segs_needed > nfree_msgmaps\n"));
 			need_more_resources = 1;
 		}
 		if (free_msghdrs == NULL) {
 			DPRINTF(("no more msghdrs\n"));
 			need_more_resources = 1;
 		}
 
 		if (need_more_resources) {
 			int we_own_it;
 
 			if ((msgflg & IPC_NOWAIT) != 0) {
 				DPRINTF(("need more resources but caller "
 				    "doesn't want to wait\n"));
 				error = EAGAIN;
 				goto done2;
 			}
 
 			if ((msqptr->msg_perm.mode & MSG_LOCKED) != 0) {
 				DPRINTF(("we don't own the msqid_ds\n"));
 				we_own_it = 0;
 			} else {
 				/* Force later arrivals to wait for our
 				   request */
 				DPRINTF(("we own the msqid_ds\n"));
 				msqptr->msg_perm.mode |= MSG_LOCKED;
 				we_own_it = 1;
 			}
 			DPRINTF(("goodnight\n"));
 			error = msleep(msqptr, &msq_mtx, (PZERO - 4) | PCATCH,
 			    "msgwait", 0);
 			DPRINTF(("good morning, error=%d\n", error));
 			if (we_own_it)
 				msqptr->msg_perm.mode &= ~MSG_LOCKED;
 			if (error != 0) {
 				DPRINTF(("msgsnd:  interrupted system call\n"));
 				error = EINTR;
 				goto done2;
 			}
 
 			/*
 			 * Make sure that the msq queue still exists
 			 */
 
 			if (msqptr->msg_qbytes == 0) {
 				DPRINTF(("msqid deleted\n"));
 				error = EIDRM;
 				goto done2;
 			}
 
 		} else {
 			DPRINTF(("got all the resources that we need\n"));
 			break;
 		}
 	}
 
 	/*
 	 * We have the resources that we need.
 	 * Make sure!
 	 */
 
 	if (msqptr->msg_perm.mode & MSG_LOCKED)
 		panic("msg_perm.mode & MSG_LOCKED");
 	if (segs_needed > nfree_msgmaps)
 		panic("segs_needed > nfree_msgmaps");
 	if (msgsz + msqptr->msg_cbytes > msqptr->msg_qbytes)
 		panic("msgsz + msg_cbytes > msg_qbytes");
 	if (free_msghdrs == NULL)
 		panic("no more msghdrs");
 
 	/*
 	 * Re-lock the msqid_ds in case we page-fault when copying in the
 	 * message
 	 */
 
 	if ((msqptr->msg_perm.mode & MSG_LOCKED) != 0)
 		panic("msqid_ds is already locked");
 	msqptr->msg_perm.mode |= MSG_LOCKED;
 
 	/*
 	 * Allocate a message header
 	 */
 
 	msghdr = free_msghdrs;
 	free_msghdrs = msghdr->msg_next;
 	msghdr->msg_spot = -1;
 	msghdr->msg_ts = msgsz;
 
 	/*
 	 * Allocate space for the message
 	 */
 
 	while (segs_needed > 0) {
 		if (nfree_msgmaps <= 0)
 			panic("not enough msgmaps");
 		if (free_msgmaps == -1)
 			panic("nil free_msgmaps");
 		next = free_msgmaps;
 		if (next <= -1)
 			panic("next too low #1");
 		if (next >= msginfo.msgseg)
 			panic("next out of range #1");
 		DPRINTF(("allocating segment %d to message\n", next));
 		free_msgmaps = msgmaps[next].next;
 		nfree_msgmaps--;
 		msgmaps[next].next = msghdr->msg_spot;
 		msghdr->msg_spot = next;
 		segs_needed--;
 	}
 
 	/*
 	 * Copy in the message type
 	 */
 
 	mtx_unlock(&msq_mtx);
 	if ((error = copyin(user_msgp, &msghdr->msg_type,
 	    sizeof(msghdr->msg_type))) != 0) {
 		mtx_lock(&msq_mtx);
 		DPRINTF(("error %d copying the message type\n", error));
 		msg_freehdr(msghdr);
 		msqptr->msg_perm.mode &= ~MSG_LOCKED;
 		wakeup(msqptr);
 		goto done2;
 	}
 	mtx_lock(&msq_mtx);
 	user_msgp = (const char *)user_msgp + sizeof(msghdr->msg_type);
 
 	/*
 	 * Validate the message type
 	 */
 
 	if (msghdr->msg_type < 1) {
 		msg_freehdr(msghdr);
 		msqptr->msg_perm.mode &= ~MSG_LOCKED;
 		wakeup(msqptr);
 		DPRINTF(("mtype (%d) < 1\n", msghdr->msg_type));
 		error = EINVAL;
 		goto done2;
 	}
 
 	/*
 	 * Copy in the message body
 	 */
 
 	next = msghdr->msg_spot;
 	while (msgsz > 0) {
 		size_t tlen;
 		if (msgsz > msginfo.msgssz)
 			tlen = msginfo.msgssz;
 		else
 			tlen = msgsz;
 		if (next <= -1)
 			panic("next too low #2");
 		if (next >= msginfo.msgseg)
 			panic("next out of range #2");
 		mtx_unlock(&msq_mtx);
 		if ((error = copyin(user_msgp, &msgpool[next * msginfo.msgssz],
 		    tlen)) != 0) {
 			mtx_lock(&msq_mtx);
 			DPRINTF(("error %d copying in message segment\n",
 			    error));
 			msg_freehdr(msghdr);
 			msqptr->msg_perm.mode &= ~MSG_LOCKED;
 			wakeup(msqptr);
 			goto done2;
 		}
 		mtx_lock(&msq_mtx);
 		msgsz -= tlen;
 		user_msgp = (const char *)user_msgp + tlen;
 		next = msgmaps[next].next;
 	}
 	if (next != -1)
 		panic("didn't use all the msg segments");
 
 	/*
 	 * We've got the message.  Unlock the msqid_ds.
 	 */
 
 	msqptr->msg_perm.mode &= ~MSG_LOCKED;
 
 	/*
 	 * Make sure that the msqid_ds is still allocated.
 	 */
 
 	if (msqptr->msg_qbytes == 0) {
 		msg_freehdr(msghdr);
 		wakeup(msqptr);
 		error = EIDRM;
 		goto done2;
 	}
 
 	/*
 	 * Put the message into the queue
 	 */
 
 	if (msqptr->msg_first == NULL) {
 		msqptr->msg_first = msghdr;
 		msqptr->msg_last = msghdr;
 	} else {
 		msqptr->msg_last->msg_next = msghdr;
 		msqptr->msg_last = msghdr;
 	}
 	msqptr->msg_last->msg_next = NULL;
 
 	msqptr->msg_cbytes += msghdr->msg_ts;
 	msqptr->msg_qnum++;
 	msqptr->msg_lspid = td->td_proc->p_pid;
 	msqptr->msg_stime = time_second;
 
 	wakeup(msqptr);
 	td->td_retval[0] = 0;
 done2:
 	mtx_unlock(&msq_mtx);
 	return (error);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct msgrcv_args {
 	int	msqid;
 	void	*msgp;
 	size_t	msgsz;
 	long	msgtyp;
 	int	msgflg;
 };
 #endif
 
 /*
  * MPSAFE
  */
 int
 msgrcv(td, uap)
 	struct thread *td;
 	register struct msgrcv_args *uap;
 {
 	int msqid = uap->msqid;
 	void *user_msgp = uap->msgp;
 	size_t msgsz = uap->msgsz;
 	long msgtyp = uap->msgtyp;
 	int msgflg = uap->msgflg;
 	size_t len;
 	register struct msqid_ds *msqptr;
 	register struct msg *msghdr;
 	int error = 0;
 	short next;
 
 	DPRINTF(("call to msgrcv(%d, 0x%x, %d, %ld, %d)\n", msqid, user_msgp,
 	    msgsz, msgtyp, msgflg));
 
 	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
 		return (ENOSYS);
 
 	msqid = IPCID_TO_IX(msqid);
 
 	if (msqid < 0 || msqid >= msginfo.msgmni) {
 		DPRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", msqid,
 		    msginfo.msgmni));
 		return (EINVAL);
 	}
 
 	msqptr = &msqids[msqid];
 	mtx_lock(&msq_mtx);
 	if (msqptr->msg_qbytes == 0) {
 		DPRINTF(("no such message queue id\n"));
 		error = EINVAL;
 		goto done2;
 	}
 	if (msqptr->msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
 		DPRINTF(("wrong sequence number\n"));
 		error = EINVAL;
 		goto done2;
 	}
 
 	if ((error = ipcperm(td, &msqptr->msg_perm, IPC_R))) {
 		DPRINTF(("requester doesn't have read access\n"));
 		goto done2;
 	}
 
 	msghdr = NULL;
 	while (msghdr == NULL) {
 		if (msgtyp == 0) {
 			msghdr = msqptr->msg_first;
 			if (msghdr != NULL) {
 				if (msgsz < msghdr->msg_ts &&
 				    (msgflg & MSG_NOERROR) == 0) {
 					DPRINTF(("first message on the queue "
 					    "is too big (want %d, got %d)\n",
 					    msgsz, msghdr->msg_ts));
 					error = E2BIG;
 					goto done2;
 				}
 				if (msqptr->msg_first == msqptr->msg_last) {
 					msqptr->msg_first = NULL;
 					msqptr->msg_last = NULL;
 				} else {
 					msqptr->msg_first = msghdr->msg_next;
 					if (msqptr->msg_first == NULL)
 						panic("msg_first/last screwed up #1");
 				}
 			}
 		} else {
 			struct msg *previous;
 			struct msg **prev;
 
 			previous = NULL;
 			prev = &(msqptr->msg_first);
 			while ((msghdr = *prev) != NULL) {
 				/*
 				 * Is this message's type an exact match or is
 				 * this message's type less than or equal to
 				 * the absolute value of a negative msgtyp?
 				 * Note that the second half of this test can
 				 * NEVER be true if msgtyp is positive since
 				 * msg_type is always positive!
 				 */
 
 				if (msgtyp == msghdr->msg_type ||
 				    msghdr->msg_type <= -msgtyp) {
 					DPRINTF(("found message type %d, "
 					    "requested %d\n",
 					    msghdr->msg_type, msgtyp));
 					if (msgsz < msghdr->msg_ts &&
 					    (msgflg & MSG_NOERROR) == 0) {
 						DPRINTF(("requested message "
 						    "on the queue is too big "
 						    "(want %d, got %d)\n",
 						    msgsz, msghdr->msg_ts));
 						error = E2BIG;
 						goto done2;
 					}
 					*prev = msghdr->msg_next;
 					if (msghdr == msqptr->msg_last) {
 						if (previous == NULL) {
 							if (prev !=
 							    &msqptr->msg_first)
 								panic("msg_first/last screwed up #2");
 							msqptr->msg_first =
 							    NULL;
 							msqptr->msg_last =
 							    NULL;
 						} else {
 							if (prev ==
 							    &msqptr->msg_first)
 								panic("msg_first/last screwed up #3");
 							msqptr->msg_last =
 							    previous;
 						}
 					}
 					break;
 				}
 				previous = msghdr;
 				prev = &(msghdr->msg_next);
 			}
 		}
 
 		/*
 		 * We've either extracted the msghdr for the appropriate
 		 * message or there isn't one.
 		 * If there is one then bail out of this loop.
 		 */
 
 		if (msghdr != NULL)
 			break;
 
 		/*
 		 * Hmph!  No message found.  Does the user want to wait?
 		 */
 
 		if ((msgflg & IPC_NOWAIT) != 0) {
 			DPRINTF(("no appropriate message found (msgtyp=%d)\n",
 			    msgtyp));
 			/* The SVID says to return ENOMSG. */
 			error = ENOMSG;
 			goto done2;
 		}
 
 		/*
 		 * Wait for something to happen
 		 */
 
 		DPRINTF(("msgrcv:  goodnight\n"));
 		error = msleep(msqptr, &msq_mtx, (PZERO - 4) | PCATCH,
 		    "msgwait", 0);
 		DPRINTF(("msgrcv:  good morning (error=%d)\n", error));
 
 		if (error != 0) {
 			DPRINTF(("msgsnd:  interrupted system call\n"));
 			error = EINTR;
 			goto done2;
 		}
 
 		/*
 		 * Make sure that the msq queue still exists
 		 */
 
 		if (msqptr->msg_qbytes == 0 ||
 		    msqptr->msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
 			DPRINTF(("msqid deleted\n"));
 			error = EIDRM;
 			goto done2;
 		}
 	}
 
 	/*
 	 * Return the message to the user.
 	 *
 	 * First, do the bookkeeping (before we risk being interrupted).
 	 */
 
 	msqptr->msg_cbytes -= msghdr->msg_ts;
 	msqptr->msg_qnum--;
 	msqptr->msg_lrpid = td->td_proc->p_pid;
 	msqptr->msg_rtime = time_second;
 
 	/*
 	 * Make msgsz the actual amount that we'll be returning.
 	 * Note that this effectively truncates the message if it is too long
 	 * (since msgsz is never increased).
 	 */
 
 	DPRINTF(("found a message, msgsz=%d, msg_ts=%d\n", msgsz,
 	    msghdr->msg_ts));
 	if (msgsz > msghdr->msg_ts)
 		msgsz = msghdr->msg_ts;
 
 	/*
 	 * Return the type to the user.
 	 */
 
 	mtx_unlock(&msq_mtx);
 	error = copyout(&(msghdr->msg_type), user_msgp,
 	    sizeof(msghdr->msg_type));
 	mtx_lock(&msq_mtx);
 	if (error != 0) {
 		DPRINTF(("error (%d) copying out message type\n", error));
 		msg_freehdr(msghdr);
 		wakeup(msqptr);
 		goto done2;
 	}
 	user_msgp = (char *)user_msgp + sizeof(msghdr->msg_type);
 
 	/*
 	 * Return the segments to the user
 	 */
 
 	next = msghdr->msg_spot;
 	for (len = 0; len < msgsz; len += msginfo.msgssz) {
 		size_t tlen;
 
 		if (msgsz - len > msginfo.msgssz)
 			tlen = msginfo.msgssz;
 		else
 			tlen = msgsz - len;
 		if (next <= -1)
 			panic("next too low #3");
 		if (next >= msginfo.msgseg)
 			panic("next out of range #3");
 		mtx_unlock(&msq_mtx);
 		error = copyout(&msgpool[next * msginfo.msgssz],
 		    user_msgp, tlen);
 		mtx_lock(&msq_mtx);
 		if (error != 0) {
 			DPRINTF(("error (%d) copying out message segment\n",
 			    error));
 			msg_freehdr(msghdr);
 			wakeup(msqptr);
 			goto done2;
 		}
 		user_msgp = (char *)user_msgp + tlen;
 		next = msgmaps[next].next;
 	}
 
 	/*
 	 * Done, return the actual number of bytes copied out.
 	 */
 
 	msg_freehdr(msghdr);
 	wakeup(msqptr);
 	td->td_retval[0] = msgsz;
 done2:
 	mtx_unlock(&msq_mtx);
 	return (error);
 }
 
 static int
 sysctl_msqids(SYSCTL_HANDLER_ARGS)
 {
 
 	return (SYSCTL_OUT(req, msqids,
 	    sizeof(struct msqid_ds) * msginfo.msgmni));
 }
 
 SYSCTL_DECL(_kern_ipc);
 SYSCTL_INT(_kern_ipc, OID_AUTO, msgmax, CTLFLAG_RD, &msginfo.msgmax, 0, "");
 SYSCTL_INT(_kern_ipc, OID_AUTO, msgmni, CTLFLAG_RDTUN, &msginfo.msgmni, 0, "");
 SYSCTL_INT(_kern_ipc, OID_AUTO, msgmnb, CTLFLAG_RD, &msginfo.msgmnb, 0, "");
 SYSCTL_INT(_kern_ipc, OID_AUTO, msgtql, CTLFLAG_RD, &msginfo.msgtql, 0, "");
 SYSCTL_INT(_kern_ipc, OID_AUTO, msgssz, CTLFLAG_RDTUN, &msginfo.msgssz, 0, "");
 SYSCTL_INT(_kern_ipc, OID_AUTO, msgseg, CTLFLAG_RDTUN, &msginfo.msgseg, 0, "");
 SYSCTL_PROC(_kern_ipc, OID_AUTO, msqids, CTLFLAG_RD,
     NULL, 0, sysctl_msqids, "", "Message queue IDs");
Index: head/sys/kern/sysv_sem.c
===================================================================
--- head/sys/kern/sysv_sem.c	(revision 122200)
+++ head/sys/kern/sysv_sem.c	(revision 122201)
@@ -1,1230 +1,1230 @@
 /*
  * Implementation of SVID semaphores
  *
  * Author:  Daniel Boulet
  *
  * This software is provided ``AS IS'' without any warranties of any kind.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_sysvipc.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/sysproto.h>
 #include <sys/eventhandler.h>
 #include <sys/kernel.h>
 #include <sys/proc.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/sem.h>
 #include <sys/syscall.h>
 #include <sys/sysent.h>
 #include <sys/sysctl.h>
 #include <sys/malloc.h>
 #include <sys/jail.h>
 
 static MALLOC_DEFINE(M_SEM, "sem", "SVID compatible semaphores");
 
 #ifdef SEM_DEBUG
 #define DPRINTF(a)	printf a
 #else
 #define DPRINTF(a)
 #endif
 
 static void seminit(void);
 static int sysvsem_modload(struct module *, int, void *);
 static int semunload(void);
 static void semexit_myhook(void *arg, struct proc *p);
 static int sysctl_sema(SYSCTL_HANDLER_ARGS);
 static int semvalid(int semid, struct semid_ds *semaptr);
 
 #ifndef _SYS_SYSPROTO_H_
 struct __semctl_args;
 int __semctl(struct thread *td, struct __semctl_args *uap);
 struct semget_args;
 int semget(struct thread *td, struct semget_args *uap);
 struct semop_args;
 int semop(struct thread *td, struct semop_args *uap);
 #endif
 
 static struct sem_undo *semu_alloc(struct thread *td);
-static int semundo_adjust(struct thread *td, struct sem_undo **supptr, 
+static int semundo_adjust(struct thread *td, struct sem_undo **supptr,
 		int semid, int semnum, int adjval);
 static void semundo_clear(int semid, int semnum);
 
 /* XXX casting to (sy_call_t *) is bogus, as usual. */
 static sy_call_t *semcalls[] = {
 	(sy_call_t *)__semctl, (sy_call_t *)semget,
 	(sy_call_t *)semop
 };
 
 static struct mtx	sem_mtx;	/* semaphore global lock */
 static int	semtot = 0;
 static struct semid_ds *sema;	/* semaphore id pool */
 static struct mtx *sema_mtx;	/* semaphore id pool mutexes*/
 static struct sem *sem;		/* semaphore pool */
 SLIST_HEAD(, sem_undo) semu_list;	/* list of active undo structures */
 static int	*semu;		/* undo structure pool */
 static eventhandler_tag semexit_tag;
 
 #define SEMUNDO_MTX		sem_mtx
 #define SEMUNDO_LOCK()		mtx_lock(&SEMUNDO_MTX);
 #define SEMUNDO_UNLOCK()	mtx_unlock(&SEMUNDO_MTX);
 #define SEMUNDO_LOCKASSERT(how)	mtx_assert(&SEMUNDO_MTX, (how));
 
 struct sem {
 	u_short	semval;		/* semaphore value */
 	pid_t	sempid;		/* pid of last operation */
 	u_short	semncnt;	/* # awaiting semval > cval */
 	u_short	semzcnt;	/* # awaiting semval = 0 */
 };
 
 /*
  * Undo structure (one per process)
  */
 struct sem_undo {
 	SLIST_ENTRY(sem_undo) un_next;	/* ptr to next active undo structure */
 	struct	proc *un_proc;		/* owner of this structure */
 	short	un_cnt;			/* # of active entries */
 	struct undo {
 		short	un_adjval;	/* adjust on exit values */
 		short	un_num;		/* semaphore # */
 		int	un_id;		/* semid */
 	} un_ent[1];			/* undo entries */
 };
 
 /*
  * Configuration parameters
  */
 #ifndef SEMMNI
 #define SEMMNI	10		/* # of semaphore identifiers */
 #endif
 #ifndef SEMMNS
 #define SEMMNS	60		/* # of semaphores in system */
 #endif
 #ifndef SEMUME
 #define SEMUME	10		/* max # of undo entries per process */
 #endif
 #ifndef SEMMNU
 #define SEMMNU	30		/* # of undo structures in system */
 #endif
 
 /* shouldn't need tuning */
 #ifndef SEMMAP
 #define SEMMAP	30		/* # of entries in semaphore map */
 #endif
 #ifndef SEMMSL
 #define SEMMSL	SEMMNS		/* max # of semaphores per id */
 #endif
 #ifndef SEMOPM
 #define SEMOPM	100		/* max # of operations per semop call */
 #endif
 
 #define SEMVMX	32767		/* semaphore maximum value */
 #define SEMAEM	16384		/* adjust on exit max value */
 
 /*
  * Due to the way semaphore memory is allocated, we have to ensure that
  * SEMUSZ is properly aligned.
  */
 
 #define SEM_ALIGN(bytes) (((bytes) + (sizeof(long) - 1)) & ~(sizeof(long) - 1))
 
 /* actual size of an undo structure */
 #define SEMUSZ	SEM_ALIGN(offsetof(struct sem_undo, un_ent[SEMUME]))
 
 /*
  * Macro to find a particular sem_undo vector
  */
 #define SEMU(ix) \
 	((struct sem_undo *)(((intptr_t)semu)+ix * seminfo.semusz))
 
 /*
  * semaphore info struct
  */
 struct seminfo seminfo = {
                 SEMMAP,         /* # of entries in semaphore map */
                 SEMMNI,         /* # of semaphore identifiers */
                 SEMMNS,         /* # of semaphores in system */
                 SEMMNU,         /* # of undo structures in system */
                 SEMMSL,         /* max # of semaphores per id */
                 SEMOPM,         /* max # of operations per semop call */
                 SEMUME,         /* max # of undo entries per process */
                 SEMUSZ,         /* size in bytes of undo structure */
                 SEMVMX,         /* semaphore maximum value */
                 SEMAEM          /* adjust on exit max value */
 };
 
 SYSCTL_DECL(_kern_ipc);
 SYSCTL_INT(_kern_ipc, OID_AUTO, semmap, CTLFLAG_RW, &seminfo.semmap, 0, "");
 SYSCTL_INT(_kern_ipc, OID_AUTO, semmni, CTLFLAG_RDTUN, &seminfo.semmni, 0, "");
 SYSCTL_INT(_kern_ipc, OID_AUTO, semmns, CTLFLAG_RDTUN, &seminfo.semmns, 0, "");
 SYSCTL_INT(_kern_ipc, OID_AUTO, semmnu, CTLFLAG_RDTUN, &seminfo.semmnu, 0, "");
 SYSCTL_INT(_kern_ipc, OID_AUTO, semmsl, CTLFLAG_RW, &seminfo.semmsl, 0, "");
 SYSCTL_INT(_kern_ipc, OID_AUTO, semopm, CTLFLAG_RDTUN, &seminfo.semopm, 0, "");
 SYSCTL_INT(_kern_ipc, OID_AUTO, semume, CTLFLAG_RDTUN, &seminfo.semume, 0, "");
 SYSCTL_INT(_kern_ipc, OID_AUTO, semusz, CTLFLAG_RDTUN, &seminfo.semusz, 0, "");
 SYSCTL_INT(_kern_ipc, OID_AUTO, semvmx, CTLFLAG_RW, &seminfo.semvmx, 0, "");
 SYSCTL_INT(_kern_ipc, OID_AUTO, semaem, CTLFLAG_RW, &seminfo.semaem, 0, "");
 SYSCTL_PROC(_kern_ipc, OID_AUTO, sema, CTLFLAG_RD,
     NULL, 0, sysctl_sema, "", "");
 
 static void
 seminit(void)
 {
 	int i;
 
 	TUNABLE_INT_FETCH("kern.ipc.semmap", &seminfo.semmap);
 	TUNABLE_INT_FETCH("kern.ipc.semmni", &seminfo.semmni);
 	TUNABLE_INT_FETCH("kern.ipc.semmns", &seminfo.semmns);
 	TUNABLE_INT_FETCH("kern.ipc.semmnu", &seminfo.semmnu);
 	TUNABLE_INT_FETCH("kern.ipc.semmsl", &seminfo.semmsl);
 	TUNABLE_INT_FETCH("kern.ipc.semopm", &seminfo.semopm);
 	TUNABLE_INT_FETCH("kern.ipc.semume", &seminfo.semume);
 	TUNABLE_INT_FETCH("kern.ipc.semusz", &seminfo.semusz);
 	TUNABLE_INT_FETCH("kern.ipc.semvmx", &seminfo.semvmx);
 	TUNABLE_INT_FETCH("kern.ipc.semaem", &seminfo.semaem);
 
 	sem = malloc(sizeof(struct sem) * seminfo.semmns, M_SEM, M_WAITOK);
 	sema = malloc(sizeof(struct semid_ds) * seminfo.semmni, M_SEM,
 	    M_WAITOK);
 	sema_mtx = malloc(sizeof(struct mtx) * seminfo.semmni, M_SEM,
 	    M_WAITOK | M_ZERO);
 	semu = malloc(seminfo.semmnu * seminfo.semusz, M_SEM, M_WAITOK);
 
 	for (i = 0; i < seminfo.semmni; i++) {
 		sema[i].sem_base = 0;
 		sema[i].sem_perm.mode = 0;
 	}
 	for (i = 0; i < seminfo.semmni; i++)
 		mtx_init(&sema_mtx[i], "semid", NULL, MTX_DEF);
 	for (i = 0; i < seminfo.semmnu; i++) {
 		struct sem_undo *suptr = SEMU(i);
 		suptr->un_proc = NULL;
 	}
 	SLIST_INIT(&semu_list);
 	mtx_init(&sem_mtx, "sem", NULL, MTX_DEF);
 	semexit_tag = EVENTHANDLER_REGISTER(process_exit, semexit_myhook, NULL,
 	    EVENTHANDLER_PRI_ANY);
 }
 
 static int
 semunload(void)
 {
 	int i;
 
 	if (semtot != 0)
 		return (EBUSY);
 
 	EVENTHANDLER_DEREGISTER(process_exit, semexit_tag);
 	free(sem, M_SEM);
 	free(sema, M_SEM);
 	free(semu, M_SEM);
 	for (i = 0; i < seminfo.semmni; i++)
 		mtx_destroy(&sema_mtx[i]);
 	mtx_destroy(&sem_mtx);
 	return (0);
 }
 
 static int
 sysvsem_modload(struct module *module, int cmd, void *arg)
 {
 	int error = 0;
 
 	switch (cmd) {
 	case MOD_LOAD:
 		seminit();
 		break;
 	case MOD_UNLOAD:
 		error = semunload();
 		break;
 	case MOD_SHUTDOWN:
 		break;
 	default:
 		error = EINVAL;
 		break;
 	}
 	return (error);
 }
 
 static moduledata_t sysvsem_mod = {
 	"sysvsem",
 	&sysvsem_modload,
 	NULL
 };
 
 SYSCALL_MODULE_HELPER(semsys);
 SYSCALL_MODULE_HELPER(__semctl);
 SYSCALL_MODULE_HELPER(semget);
 SYSCALL_MODULE_HELPER(semop);
 
 DECLARE_MODULE(sysvsem, sysvsem_mod,
 	SI_SUB_SYSV_SEM, SI_ORDER_FIRST);
 MODULE_VERSION(sysvsem, 1);
 
 /*
  * Entry point for all SEM calls
  *
  * MPSAFE
  */
 int
 semsys(td, uap)
 	struct thread *td;
 	/* XXX actually varargs. */
 	struct semsys_args /* {
 		int	which;
 		int	a2;
 		int	a3;
 		int	a4;
 		int	a5;
 	} */ *uap;
 {
 	int error;
 
 	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
 		return (ENOSYS);
 	if (uap->which < 0 ||
 	    uap->which >= sizeof(semcalls)/sizeof(semcalls[0]))
 		return (EINVAL);
 	error = (*semcalls[uap->which])(td, &uap->a2);
 	return (error);
 }
 
 /*
  * Allocate a new sem_undo structure for a process
  * (returns ptr to structure or NULL if no more room)
  */
 
 static struct sem_undo *
 semu_alloc(td)
 	struct thread *td;
 {
 	int i;
 	struct sem_undo *suptr;
 	struct sem_undo **supptr;
 	int attempt;
 
 	SEMUNDO_LOCKASSERT(MA_OWNED);
 	/*
 	 * Try twice to allocate something.
 	 * (we'll purge any empty structures after the first pass so
 	 * two passes are always enough)
 	 */
 
 	for (attempt = 0; attempt < 2; attempt++) {
 		/*
 		 * Look for a free structure.
 		 * Fill it in and return it if we find one.
 		 */
 
 		for (i = 0; i < seminfo.semmnu; i++) {
 			suptr = SEMU(i);
 			if (suptr->un_proc == NULL) {
 				SLIST_INSERT_HEAD(&semu_list, suptr, un_next);
 				suptr->un_cnt = 0;
 				suptr->un_proc = td->td_proc;
 				return(suptr);
 			}
 		}
 
 		/*
 		 * We didn't find a free one, if this is the first attempt
 		 * then try to free some structures.
 		 */
 
 		if (attempt == 0) {
 			/* All the structures are in use - try to free some */
 			int did_something = 0;
 
 			SLIST_FOREACH_PREVPTR(suptr, supptr, &semu_list,
 			    un_next) {
 				if (suptr->un_cnt == 0) {
 					suptr->un_proc = NULL;
 					did_something = 1;
 					*supptr = SLIST_NEXT(suptr, un_next);
 				}
 			}
 
 			/* If we didn't free anything then just give-up */
 			if (!did_something)
 				return(NULL);
 		} else {
 			/*
 			 * The second pass failed even though we freed
 			 * something after the first pass!
 			 * This is IMPOSSIBLE!
 			 */
 			panic("semu_alloc - second attempt failed");
 		}
 	}
 	return (NULL);
 }
 
 /*
  * Adjust a particular entry for a particular proc
  */
 
 static int
 semundo_adjust(td, supptr, semid, semnum, adjval)
 	struct thread *td;
 	struct sem_undo **supptr;
 	int semid, semnum;
 	int adjval;
 {
 	struct proc *p = td->td_proc;
 	struct sem_undo *suptr;
 	struct undo *sunptr;
 	int i;
 
 	SEMUNDO_LOCKASSERT(MA_OWNED);
 	/* Look for and remember the sem_undo if the caller doesn't provide
 	   it */
 
 	suptr = *supptr;
 	if (suptr == NULL) {
 		SLIST_FOREACH(suptr, &semu_list, un_next) {
 			if (suptr->un_proc == p) {
 				*supptr = suptr;
 				break;
 			}
 		}
 		if (suptr == NULL) {
 			if (adjval == 0)
 				return(0);
 			suptr = semu_alloc(td);
 			if (suptr == NULL)
 				return(ENOSPC);
 			*supptr = suptr;
 		}
 	}
 
 	/*
 	 * Look for the requested entry and adjust it (delete if adjval becomes
 	 * 0).
 	 */
 	sunptr = &suptr->un_ent[0];
 	for (i = 0; i < suptr->un_cnt; i++, sunptr++) {
 		if (sunptr->un_id != semid || sunptr->un_num != semnum)
 			continue;
 		if (adjval != 0) {
 			adjval += sunptr->un_adjval;
 			if (adjval > seminfo.semaem || adjval < -seminfo.semaem)
 				return (ERANGE);
 		}
 		sunptr->un_adjval = adjval;
 		if (sunptr->un_adjval == 0) {
 			suptr->un_cnt--;
 			if (i < suptr->un_cnt)
 				suptr->un_ent[i] =
 				    suptr->un_ent[suptr->un_cnt];
 		}
 		return(0);
 	}
 
 	/* Didn't find the right entry - create it */
 	if (adjval == 0)
 		return(0);
 	if (adjval > seminfo.semaem || adjval < -seminfo.semaem)
 		return (ERANGE);
 	if (suptr->un_cnt != seminfo.semume) {
 		sunptr = &suptr->un_ent[suptr->un_cnt];
 		suptr->un_cnt++;
 		sunptr->un_adjval = adjval;
 		sunptr->un_id = semid; sunptr->un_num = semnum;
 	} else
 		return(EINVAL);
 	return(0);
 }
 
 static void
 semundo_clear(semid, semnum)
 	int semid, semnum;
 {
 	struct sem_undo *suptr;
 
 	SEMUNDO_LOCKASSERT(MA_OWNED);
 	SLIST_FOREACH(suptr, &semu_list, un_next) {
 		struct undo *sunptr = &suptr->un_ent[0];
 		int i = 0;
 
 		while (i < suptr->un_cnt) {
 			if (sunptr->un_id == semid) {
 				if (semnum == -1 || sunptr->un_num == semnum) {
 					suptr->un_cnt--;
 					if (i < suptr->un_cnt) {
 						suptr->un_ent[i] =
 						  suptr->un_ent[suptr->un_cnt];
 						continue;
 					}
 				}
 				if (semnum != -1)
 					break;
 			}
 			i++, sunptr++;
 		}
 	}
 }
 
 static int
 semvalid(semid, semaptr)
 	int semid;
 	struct semid_ds *semaptr;
 {
 
 	return ((semaptr->sem_perm.mode & SEM_ALLOC) == 0 ||
 	    semaptr->sem_perm.seq != IPCID_TO_SEQ(semid) ? EINVAL : 0);
 }
 
 /*
  * Note that the user-mode half of this passes a union, not a pointer
  */
 #ifndef _SYS_SYSPROTO_H_
 struct __semctl_args {
 	int	semid;
 	int	semnum;
 	int	cmd;
 	union	semun *arg;
 };
 #endif
 
 /*
  * MPSAFE
  */
 int
 __semctl(td, uap)
 	struct thread *td;
 	struct __semctl_args *uap;
 {
 	int semid = uap->semid;
 	int semnum = uap->semnum;
 	int cmd = uap->cmd;
 	u_short *array;
 	union semun *arg = uap->arg;
 	union semun real_arg;
 	struct ucred *cred = td->td_ucred;
 	int i, rval, error;
 	struct semid_ds sbuf;
 	struct semid_ds *semaptr;
 	struct mtx *sema_mtxp;
 	u_short usval, count;
 
 	DPRINTF(("call to semctl(%d, %d, %d, 0x%x)\n",
 	    semid, semnum, cmd, arg));
 	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
 		return (ENOSYS);
 
 	array = NULL;
 
 	switch(cmd) {
 	case SEM_STAT:
 		if (semid < 0 || semid >= seminfo.semmni)
 			return (EINVAL);
 		if ((error = copyin(arg, &real_arg, sizeof(real_arg))) != 0)
 			return (error);
 		semaptr = &sema[semid];
 		sema_mtxp = &sema_mtx[semid];
 		mtx_lock(sema_mtxp);
-		if ((semaptr->sem_perm.mode & SEM_ALLOC) == 0 ) {
+		if ((semaptr->sem_perm.mode & SEM_ALLOC) == 0) {
 			error = EINVAL;
 			goto done2;
 		}
 		if ((error = ipcperm(td, &semaptr->sem_perm, IPC_R)))
 			goto done2;
 		mtx_unlock(sema_mtxp);
 		error = copyout(semaptr, real_arg.buf, sizeof(struct semid_ds));
 		rval = IXSEQ_TO_IPCID(semid,semaptr->sem_perm);
 		if (error == 0)
 			td->td_retval[0] = rval;
 		return (error);
 	}
 
 	semid = IPCID_TO_IX(semid);
 	if (semid < 0 || semid >= seminfo.semmni)
 		return (EINVAL);
 
 	semaptr = &sema[semid];
 	sema_mtxp = &sema_mtx[semid];
 		
 	error = 0;
 	rval = 0;
 
 	switch (cmd) {
 	case IPC_RMID:
 		mtx_lock(sema_mtxp);
 		if ((error = semvalid(uap->semid, semaptr)) != 0)
 			goto done2;
 		if ((error = ipcperm(td, &semaptr->sem_perm, IPC_M)))
 			goto done2;
 		semaptr->sem_perm.cuid = cred->cr_uid;
 		semaptr->sem_perm.uid = cred->cr_uid;
 		semtot -= semaptr->sem_nsems;
 		for (i = semaptr->sem_base - sem; i < semtot; i++)
 			sem[i] = sem[i + semaptr->sem_nsems];
 		for (i = 0; i < seminfo.semmni; i++) {
 			if ((sema[i].sem_perm.mode & SEM_ALLOC) &&
 			    sema[i].sem_base > semaptr->sem_base)
 				sema[i].sem_base -= semaptr->sem_nsems;
 		}
 		semaptr->sem_perm.mode = 0;
 		SEMUNDO_LOCK();
 		semundo_clear(semid, -1);
 		SEMUNDO_UNLOCK();
 		wakeup(semaptr);
 		break;
 
 	case IPC_SET:
 		if ((error = copyin(arg, &real_arg, sizeof(real_arg))) != 0)
 			goto done2;
 		if ((error = copyin(real_arg.buf, &sbuf, sizeof(sbuf))) != 0)
 			goto done2;
 		mtx_lock(sema_mtxp);
 		if ((error = semvalid(uap->semid, semaptr)) != 0)
 			goto done2;
 		if ((error = ipcperm(td, &semaptr->sem_perm, IPC_M)))
 			goto done2;
 		semaptr->sem_perm.uid = sbuf.sem_perm.uid;
 		semaptr->sem_perm.gid = sbuf.sem_perm.gid;
 		semaptr->sem_perm.mode = (semaptr->sem_perm.mode & ~0777) |
 		    (sbuf.sem_perm.mode & 0777);
 		semaptr->sem_ctime = time_second;
 		break;
 
 	case IPC_STAT:
 		if ((error = copyin(arg, &real_arg, sizeof(real_arg))) != 0)
 			goto done2;
 		mtx_lock(sema_mtxp);
 		if ((error = semvalid(uap->semid, semaptr)) != 0)
 			goto done2;
 		if ((error = ipcperm(td, &semaptr->sem_perm, IPC_R)))
 			goto done2;
 		sbuf = *semaptr;
 		mtx_unlock(sema_mtxp);
 		error = copyout(semaptr, real_arg.buf,
 				sizeof(struct semid_ds));
 		break;
 
 	case GETNCNT:
 		mtx_lock(sema_mtxp);
 		if ((error = semvalid(uap->semid, semaptr)) != 0)
 			goto done2;
 		if ((error = ipcperm(td, &semaptr->sem_perm, IPC_R)))
 			goto done2;
 		if (semnum < 0 || semnum >= semaptr->sem_nsems) {
 			error = EINVAL;
 			goto done2;
 		}
 		rval = semaptr->sem_base[semnum].semncnt;
 		break;
 
 	case GETPID:
 		mtx_lock(sema_mtxp);
 		if ((error = semvalid(uap->semid, semaptr)) != 0)
 			goto done2;
 		if ((error = ipcperm(td, &semaptr->sem_perm, IPC_R)))
 			goto done2;
 		if (semnum < 0 || semnum >= semaptr->sem_nsems) {
 			error = EINVAL;
 			goto done2;
 		}
 		rval = semaptr->sem_base[semnum].sempid;
 		break;
 
 	case GETVAL:
 		mtx_lock(sema_mtxp);
 		if ((error = semvalid(uap->semid, semaptr)) != 0)
 			goto done2;
 		if ((error = ipcperm(td, &semaptr->sem_perm, IPC_R)))
 			goto done2;
 		if (semnum < 0 || semnum >= semaptr->sem_nsems) {
 			error = EINVAL;
 			goto done2;
 		}
 		rval = semaptr->sem_base[semnum].semval;
 		break;
 
 	case GETALL:
 		if ((error = copyin(arg, &real_arg, sizeof(real_arg))) != 0)
 			goto done2;
 		array = malloc(sizeof(*array) * semaptr->sem_nsems, M_TEMP,
 		    M_WAITOK);
 		mtx_lock(sema_mtxp);
 		if ((error = semvalid(uap->semid, semaptr)) != 0)
 			goto done2;
 		if ((error = ipcperm(td, &semaptr->sem_perm, IPC_R)))
 			goto done2;
 		for (i = 0; i < semaptr->sem_nsems; i++)
 			array[i] = semaptr->sem_base[i].semval;
 		mtx_unlock(sema_mtxp);
 		error = copyout(array, real_arg.array,
 		    i * sizeof(real_arg.array[0]));
 		break;
 
 	case GETZCNT:
 		mtx_lock(sema_mtxp);
 		if ((error = semvalid(uap->semid, semaptr)) != 0)
 			goto done2;
 		if ((error = ipcperm(td, &semaptr->sem_perm, IPC_R)))
 			goto done2;
 		if (semnum < 0 || semnum >= semaptr->sem_nsems) {
 			error = EINVAL;
 			goto done2;
 		}
 		rval = semaptr->sem_base[semnum].semzcnt;
 		break;
 
 	case SETVAL:
 		if ((error = copyin(arg, &real_arg, sizeof(real_arg))) != 0)
 			goto done2;
 		mtx_lock(sema_mtxp);
 		if ((error = semvalid(uap->semid, semaptr)) != 0)
 			goto done2;
 		if ((error = ipcperm(td, &semaptr->sem_perm, IPC_W)))
 			goto done2;
 		if (semnum < 0 || semnum >= semaptr->sem_nsems) {
 			error = EINVAL;
 			goto done2;
 		}
 		if (real_arg.val < 0 || real_arg.val > seminfo.semvmx) {
 			error = ERANGE;
 			goto done2;
 		}
 		semaptr->sem_base[semnum].semval = real_arg.val;
 		SEMUNDO_LOCK();
 		semundo_clear(semid, semnum);
 		SEMUNDO_UNLOCK();
 		wakeup(semaptr);
 		break;
 
 	case SETALL:
 		mtx_lock(sema_mtxp);
 raced:
 		if ((error = semvalid(uap->semid, semaptr)) != 0)
 			goto done2;
 		count = semaptr->sem_nsems;
 		mtx_unlock(sema_mtxp);
 		if ((error = copyin(arg, &real_arg, sizeof(real_arg))) != 0)
 			goto done2;
 		array = malloc(sizeof(*array) * count, M_TEMP, M_WAITOK);
 		copyin(real_arg.array, array, count * sizeof(*array));
 		if (error)
 			break;
 		mtx_lock(sema_mtxp);
 		if ((error = semvalid(uap->semid, semaptr)) != 0)
 			goto done2;
 		/* we could have raced? */
 		if (count != semaptr->sem_nsems) {
 			free(array, M_TEMP);
 			array = NULL;
 			goto raced;
 		}
 		if ((error = ipcperm(td, &semaptr->sem_perm, IPC_W)))
 			goto done2;
 		for (i = 0; i < semaptr->sem_nsems; i++) {
 			usval = array[i];
 			if (usval > seminfo.semvmx) {
 				error = ERANGE;
 				break;
 			}
 			semaptr->sem_base[i].semval = usval;
 		}
 		SEMUNDO_LOCK();
 		semundo_clear(semid, -1);
 		SEMUNDO_UNLOCK();
 		wakeup(semaptr);
 		break;
 
 	default:
 		error = EINVAL;
 		break;
 	}
 
 	if (error == 0)
 		td->td_retval[0] = rval;
 done2:
 	if (mtx_owned(sema_mtxp))
 		mtx_unlock(sema_mtxp);
 	if (array != NULL)
 		free(array, M_TEMP);
 	return(error);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct semget_args {
 	key_t	key;
 	int	nsems;
 	int	semflg;
 };
 #endif
 
 /*
  * MPSAFE
  */
 int
 semget(td, uap)
 	struct thread *td;
 	struct semget_args *uap;
 {
 	int semid, error = 0;
 	int key = uap->key;
 	int nsems = uap->nsems;
 	int semflg = uap->semflg;
 	struct ucred *cred = td->td_ucred;
 
 	DPRINTF(("semget(0x%x, %d, 0%o)\n", key, nsems, semflg));
 	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
 		return (ENOSYS);
 
 	mtx_lock(&Giant);
 	if (key != IPC_PRIVATE) {
 		for (semid = 0; semid < seminfo.semmni; semid++) {
 			if ((sema[semid].sem_perm.mode & SEM_ALLOC) &&
 			    sema[semid].sem_perm.key == key)
 				break;
 		}
 		if (semid < seminfo.semmni) {
 			DPRINTF(("found public key\n"));
 			if ((error = ipcperm(td, &sema[semid].sem_perm,
 			    semflg & 0700))) {
 				goto done2;
 			}
 			if (nsems > 0 && sema[semid].sem_nsems < nsems) {
 				DPRINTF(("too small\n"));
 				error = EINVAL;
 				goto done2;
 			}
 			if ((semflg & IPC_CREAT) && (semflg & IPC_EXCL)) {
 				DPRINTF(("not exclusive\n"));
 				error = EEXIST;
 				goto done2;
 			}
 			goto found;
 		}
 	}
 
 	DPRINTF(("need to allocate the semid_ds\n"));
 	if (key == IPC_PRIVATE || (semflg & IPC_CREAT)) {
 		if (nsems <= 0 || nsems > seminfo.semmsl) {
 			DPRINTF(("nsems out of range (0<%d<=%d)\n", nsems,
 			    seminfo.semmsl));
 			error = EINVAL;
 			goto done2;
 		}
 		if (nsems > seminfo.semmns - semtot) {
 			DPRINTF((
 			    "not enough semaphores left (need %d, got %d)\n",
 			    nsems, seminfo.semmns - semtot));
 			error = ENOSPC;
 			goto done2;
 		}
 		for (semid = 0; semid < seminfo.semmni; semid++) {
 			if ((sema[semid].sem_perm.mode & SEM_ALLOC) == 0)
 				break;
 		}
 		if (semid == seminfo.semmni) {
 			DPRINTF(("no more semid_ds's available\n"));
 			error = ENOSPC;
 			goto done2;
 		}
 		DPRINTF(("semid %d is available\n", semid));
 		sema[semid].sem_perm.key = key;
 		sema[semid].sem_perm.cuid = cred->cr_uid;
 		sema[semid].sem_perm.uid = cred->cr_uid;
 		sema[semid].sem_perm.cgid = cred->cr_gid;
 		sema[semid].sem_perm.gid = cred->cr_gid;
 		sema[semid].sem_perm.mode = (semflg & 0777) | SEM_ALLOC;
 		sema[semid].sem_perm.seq =
 		    (sema[semid].sem_perm.seq + 1) & 0x7fff;
 		sema[semid].sem_nsems = nsems;
 		sema[semid].sem_otime = 0;
 		sema[semid].sem_ctime = time_second;
 		sema[semid].sem_base = &sem[semtot];
 		semtot += nsems;
 		bzero(sema[semid].sem_base,
 		    sizeof(sema[semid].sem_base[0])*nsems);
 		DPRINTF(("sembase = 0x%x, next = 0x%x\n", sema[semid].sem_base,
 		    &sem[semtot]));
 	} else {
 		DPRINTF(("didn't find it and wasn't asked to create it\n"));
 		error = ENOENT;
 		goto done2;
 	}
 
 found:
 	td->td_retval[0] = IXSEQ_TO_IPCID(semid, sema[semid].sem_perm);
 done2:
 	mtx_unlock(&Giant);
 	return (error);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct semop_args {
 	int	semid;
 	struct	sembuf *sops;
 	size_t	nsops;
 };
 #endif
 
 /*
  * MPSAFE
  */
 int
 semop(td, uap)
 	struct thread *td;
 	struct semop_args *uap;
 {
 	int semid = uap->semid;
 	size_t nsops = uap->nsops;
 	struct sembuf *sops;
 	struct semid_ds *semaptr;
 	struct sembuf *sopptr = 0;
 	struct sem *semptr = 0;
 	struct sem_undo *suptr;
 	struct mtx *sema_mtxp;
 	size_t i, j, k;
 	int error;
 	int do_wakeup, do_undos;
 
 	DPRINTF(("call to semop(%d, 0x%x, %u)\n", semid, sops, nsops));
 
 	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
 		return (ENOSYS);
 
 	semid = IPCID_TO_IX(semid);	/* Convert back to zero origin */
 
 	if (semid < 0 || semid >= seminfo.semmni)
 		return (EINVAL);
 
 	/* Allocate memory for sem_ops */
 	if (nsops > seminfo.semopm) {
 		DPRINTF(("too many sops (max=%d, nsops=%d)\n", seminfo.semopm,
 		    nsops));
 		return (E2BIG);
 	}
 	sops = malloc(nsops * sizeof(sops[0]), M_SEM, M_WAITOK);
 	if ((error = copyin(uap->sops, sops, nsops * sizeof(sops[0]))) != 0) {
 		DPRINTF(("error = %d from copyin(%08x, %08x, %d)\n", error,
 		    uap->sops, sops, nsops * sizeof(sops[0])));
 		free(sops, M_SEM);
 		return (error);
 	}
 
 	semaptr = &sema[semid];
 	sema_mtxp = &sema_mtx[semid];
 	mtx_lock(sema_mtxp);
 	if ((semaptr->sem_perm.mode & SEM_ALLOC) == 0) {
 		error = EINVAL;
 		goto done2;
 	}
 	if (semaptr->sem_perm.seq != IPCID_TO_SEQ(uap->semid)) {
 		error = EINVAL;
 		goto done2;
 	}
 	/*
 	 * Initial pass thru sops to see what permissions are needed.
 	 * Also perform any checks that don't need repeating on each
 	 * attempt to satisfy the request vector.
 	 */
 	j = 0;		/* permission needed */
 	do_undos = 0;
 	for (i = 0; i < nsops; i++) {
 		sopptr = &sops[i];
 		if (sopptr->sem_num >= semaptr->sem_nsems) {
 			error = EFBIG;
 			goto done2;
 		}
 		if (sopptr->sem_flg & SEM_UNDO && sopptr->sem_op != 0)
 			do_undos = 1;
 		j |= (sopptr->sem_op == 0) ? SEM_R : SEM_A;
 	}
 
 	if ((error = ipcperm(td, &semaptr->sem_perm, j))) {
 		DPRINTF(("error = %d from ipaccess\n", error));
 		goto done2;
 	}
 
 	/*
 	 * Loop trying to satisfy the vector of requests.
 	 * If we reach a point where we must wait, any requests already
 	 * performed are rolled back and we go to sleep until some other
 	 * process wakes us up.  At this point, we start all over again.
 	 *
 	 * This ensures that from the perspective of other tasks, a set
 	 * of requests is atomic (never partially satisfied).
 	 */
 	for (;;) {
 		do_wakeup = 0;
 		error = 0;	/* error return if necessary */
 
 		for (i = 0; i < nsops; i++) {
 			sopptr = &sops[i];
 			semptr = &semaptr->sem_base[sopptr->sem_num];
 
 			DPRINTF((
 			    "semop:  semaptr=%x, sem_base=%x, "
 			    "semptr=%x, sem[%d]=%d : op=%d, flag=%s\n",
 			    semaptr, semaptr->sem_base, semptr,
 			    sopptr->sem_num, semptr->semval, sopptr->sem_op,
 			    (sopptr->sem_flg & IPC_NOWAIT) ?
 			    "nowait" : "wait"));
 
 			if (sopptr->sem_op < 0) {
 				if (semptr->semval + sopptr->sem_op < 0) {
 					DPRINTF(("semop:  can't do it now\n"));
 					break;
 				} else {
 					semptr->semval += sopptr->sem_op;
 					if (semptr->semval == 0 &&
 					    semptr->semzcnt > 0)
 						do_wakeup = 1;
 				}
 			} else if (sopptr->sem_op == 0) {
 				if (semptr->semval != 0) {
 					DPRINTF(("semop:  not zero now\n"));
 					break;
 				}
 			} else if (semptr->semval + sopptr->sem_op >
 			    seminfo.semvmx) {
 				error = ERANGE;
 				break;
 			} else {
 				if (semptr->semncnt > 0)
 					do_wakeup = 1;
 				semptr->semval += sopptr->sem_op;
 			}
 		}
 
 		/*
 		 * Did we get through the entire vector?
 		 */
 		if (i >= nsops)
 			goto done;
 
 		/*
 		 * No ... rollback anything that we've already done
 		 */
 		DPRINTF(("semop:  rollback 0 through %d\n", i-1));
 		for (j = 0; j < i; j++)
 			semaptr->sem_base[sops[j].sem_num].semval -=
 			    sops[j].sem_op;
 
 		/* If we detected an error, return it */
 		if (error != 0)
 			goto done2;
 
 		/*
 		 * If the request that we couldn't satisfy has the
 		 * NOWAIT flag set then return with EAGAIN.
 		 */
 		if (sopptr->sem_flg & IPC_NOWAIT) {
 			error = EAGAIN;
 			goto done2;
 		}
 
 		if (sopptr->sem_op == 0)
 			semptr->semzcnt++;
 		else
 			semptr->semncnt++;
 
 		DPRINTF(("semop:  good night!\n"));
 		error = msleep(semaptr, sema_mtxp, (PZERO - 4) | PCATCH,
 		    "semwait", 0);
 		DPRINTF(("semop:  good morning (error=%d)!\n", error));
 
 		if (error != 0) {
 			error = EINTR;
 			goto done2;
 		}
 		DPRINTF(("semop:  good morning!\n"));
 
 		/*
 		 * Make sure that the semaphore still exists
 		 */
 		if ((semaptr->sem_perm.mode & SEM_ALLOC) == 0 ||
 		    semaptr->sem_perm.seq != IPCID_TO_SEQ(uap->semid)) {
 			error = EIDRM;
 			goto done2;
 		}
 
 		/*
 		 * The semaphore is still alive.  Readjust the count of
 		 * waiting processes.
 		 */
 		if (sopptr->sem_op == 0)
 			semptr->semzcnt--;
 		else
 			semptr->semncnt--;
 	}
 
 done:
 	/*
 	 * Process any SEM_UNDO requests.
 	 */
 	if (do_undos) {
 		SEMUNDO_LOCK();
 		suptr = NULL;
 		for (i = 0; i < nsops; i++) {
 			/*
 			 * We only need to deal with SEM_UNDO's for non-zero
 			 * op's.
 			 */
 			int adjval;
 
 			if ((sops[i].sem_flg & SEM_UNDO) == 0)
 				continue;
 			adjval = sops[i].sem_op;
 			if (adjval == 0)
 				continue;
 			error = semundo_adjust(td, &suptr, semid,
 			    sops[i].sem_num, -adjval);
 			if (error == 0)
 				continue;
 
 			/*
 			 * Oh-Oh!  We ran out of either sem_undo's or undo's.
 			 * Rollback the adjustments to this point and then
 			 * rollback the semaphore ups and down so we can return
 			 * with an error with all structures restored.  We
 			 * rollback the undo's in the exact reverse order that
 			 * we applied them.  This guarantees that we won't run
 			 * out of space as we roll things back out.
 			 */
 			for (j = 0; j < i; j++) {
 				k = i - j - 1;
 				if ((sops[k].sem_flg & SEM_UNDO) == 0)
 					continue;
 				adjval = sops[k].sem_op;
 				if (adjval == 0)
 					continue;
 				if (semundo_adjust(td, &suptr, semid,
 				    sops[k].sem_num, adjval) != 0)
 					panic("semop - can't undo undos");
 			}
 
 			for (j = 0; j < nsops; j++)
 				semaptr->sem_base[sops[j].sem_num].semval -=
 				    sops[j].sem_op;
 
 			DPRINTF(("error = %d from semundo_adjust\n", error));
 			SEMUNDO_UNLOCK();
 			goto done2;
 		} /* loop through the sops */
 		SEMUNDO_UNLOCK();
 	} /* if (do_undos) */
 
 	/* We're definitely done - set the sempid's and time */
 	for (i = 0; i < nsops; i++) {
 		sopptr = &sops[i];
 		semptr = &semaptr->sem_base[sopptr->sem_num];
 		semptr->sempid = td->td_proc->p_pid;
 	}
 	semaptr->sem_otime = time_second;
 
 	/*
 	 * Do a wakeup if any semaphore was up'd whilst something was
 	 * sleeping on it.
 	 */
 	if (do_wakeup) {
 		DPRINTF(("semop:  doing wakeup\n"));
 		wakeup(semaptr);
 		DPRINTF(("semop:  back from wakeup\n"));
 	}
 	DPRINTF(("semop:  done\n"));
 	td->td_retval[0] = 0;
 done2:
 	mtx_unlock(sema_mtxp);
 	free(sops, M_SEM);
 	return (error);
 }
 
 /*
  * Go through the undo structures for this process and apply the adjustments to
  * semaphores.
  */
 static void
 semexit_myhook(arg, p)
 	void *arg;
 	struct proc *p;
 {
 	struct sem_undo *suptr;
 	struct sem_undo **supptr;
 
 	/*
 	 * Go through the chain of undo vectors looking for one
 	 * associated with this process.
 	 */
 	SEMUNDO_LOCK();
 	SLIST_FOREACH_PREVPTR(suptr, supptr, &semu_list, un_next) {
 		if (suptr->un_proc == p)
 			break;
 	}
 	SEMUNDO_UNLOCK();
 
 	if (suptr == NULL)
 		return;
 
 	DPRINTF(("proc @%08x has undo structure with %d entries\n", p,
 	    suptr->un_cnt));
 
 	/*
 	 * If there are any active undo elements then process them.
 	 */
 	if (suptr->un_cnt > 0) {
 		int ix;
 
 		for (ix = 0; ix < suptr->un_cnt; ix++) {
 			int semid = suptr->un_ent[ix].un_id;
 			int semnum = suptr->un_ent[ix].un_num;
 			int adjval = suptr->un_ent[ix].un_adjval;
 			struct semid_ds *semaptr;
 			struct mtx *sema_mtxp;
 
 			semaptr = &sema[semid];
 			sema_mtxp = &sema_mtx[semid];
 			mtx_lock(sema_mtxp);
 			SEMUNDO_LOCK();
 			if ((semaptr->sem_perm.mode & SEM_ALLOC) == 0)
 				panic("semexit - semid not allocated");
 			if (semnum >= semaptr->sem_nsems)
 				panic("semexit - semnum out of range");
 
 			DPRINTF((
 			    "semexit:  %08x id=%d num=%d(adj=%d) ; sem=%d\n",
 			    suptr->un_proc, suptr->un_ent[ix].un_id,
 			    suptr->un_ent[ix].un_num,
 			    suptr->un_ent[ix].un_adjval,
 			    semaptr->sem_base[semnum].semval));
 
 			if (adjval < 0) {
 				if (semaptr->sem_base[semnum].semval < -adjval)
 					semaptr->sem_base[semnum].semval = 0;
 				else
 					semaptr->sem_base[semnum].semval +=
 					    adjval;
 			} else
 				semaptr->sem_base[semnum].semval += adjval;
 
 			wakeup(semaptr);
 			DPRINTF(("semexit:  back from wakeup\n"));
 			mtx_unlock(sema_mtxp);
 			SEMUNDO_UNLOCK();
 		}
 	}
 
 	/*
 	 * Deallocate the undo vector.
 	 */
 	DPRINTF(("removing vector\n"));
 	suptr->un_proc = NULL;
 	*supptr = SLIST_NEXT(suptr, un_next);
 }
 
 static int
 sysctl_sema(SYSCTL_HANDLER_ARGS)
 {
 
 	return (SYSCTL_OUT(req, sema,
 	    sizeof(struct semid_ds) * seminfo.semmni));
 }
Index: head/sys/kern/sysv_shm.c
===================================================================
--- head/sys/kern/sysv_shm.c	(revision 122200)
+++ head/sys/kern/sysv_shm.c	(revision 122201)
@@ -1,953 +1,953 @@
 /*	$NetBSD: sysv_shm.c,v 1.23 1994/07/04 23:25:12 glass Exp $	*/
 /*
  * Copyright (c) 1994 Adam Glass and Charles Hannum.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by Adam Glass and Charles
  *	Hannum.
  * 4. The names of the authors may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_compat.h"
 #include "opt_sysvipc.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/sysctl.h>
 #include <sys/shm.h>
 #include <sys/proc.h>
 #include <sys/malloc.h>
 #include <sys/mman.h>
 #include <sys/mutex.h>
 #include <sys/stat.h>
 #include <sys/syscall.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysent.h>
 #include <sys/sysproto.h>
 #include <sys/jail.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 #include <vm/vm_object.h>
 #include <vm/vm_map.h>
 #include <vm/vm_page.h>
 #include <vm/vm_pager.h>
 
 static MALLOC_DEFINE(M_SHM, "shm", "SVID compatible shared memory segments");
 
 struct oshmctl_args;
 static int oshmctl(struct thread *td, struct oshmctl_args *uap);
 
 static int shmget_allocate_segment(struct thread *td,
     struct shmget_args *uap, int mode);
 static int shmget_existing(struct thread *td, struct shmget_args *uap,
     int mode, int segnum);
 
 /* XXX casting to (sy_call_t *) is bogus, as usual. */
 static sy_call_t *shmcalls[] = {
 	(sy_call_t *)shmat, (sy_call_t *)oshmctl,
 	(sy_call_t *)shmdt, (sy_call_t *)shmget,
 	(sy_call_t *)shmctl
 };
 
 #define	SHMSEG_FREE     	0x0200
 #define	SHMSEG_REMOVED  	0x0400
 #define	SHMSEG_ALLOCATED	0x0800
 #define	SHMSEG_WANTED		0x1000
 
 static int shm_last_free, shm_nused, shm_committed, shmalloced;
 static struct shmid_ds	*shmsegs;
 
 struct shm_handle {
 	/* vm_offset_t kva; */
 	vm_object_t shm_object;
 };
 
 struct shmmap_state {
 	vm_offset_t va;
 	int shmid;
 };
 
 static void shm_deallocate_segment(struct shmid_ds *);
 static int shm_find_segment_by_key(key_t);
 static struct shmid_ds *shm_find_segment_by_shmid(int);
 static struct shmid_ds *shm_find_segment_by_shmidx(int);
 static int shm_delete_mapping(struct vmspace *vm, struct shmmap_state *);
 static void shmrealloc(void);
 static void shminit(void);
 static int sysvshm_modload(struct module *, int, void *);
 static int shmunload(void);
 static void shmexit_myhook(struct vmspace *vm);
 static void shmfork_myhook(struct proc *p1, struct proc *p2);
 static int sysctl_shmsegs(SYSCTL_HANDLER_ARGS);
 
 /*
  * Tuneable values.
  */
 #ifndef SHMMAXPGS
 #define	SHMMAXPGS	8192	/* Note: sysv shared memory is swap backed. */
 #endif
 #ifndef SHMMAX
 #define	SHMMAX	(SHMMAXPGS*PAGE_SIZE)
 #endif
 #ifndef SHMMIN
 #define	SHMMIN	1
 #endif
 #ifndef SHMMNI
 #define	SHMMNI	192
 #endif
 #ifndef SHMSEG
 #define	SHMSEG	128
 #endif
 #ifndef SHMALL
 #define	SHMALL	(SHMMAXPGS)
 #endif
 
 struct	shminfo shminfo = {
 	SHMMAX,
 	SHMMIN,
 	SHMMNI,
 	SHMSEG,
 	SHMALL
 };
 
 static int shm_use_phys;
 static int shm_allow_removed;
 
 SYSCTL_DECL(_kern_ipc);
 SYSCTL_INT(_kern_ipc, OID_AUTO, shmmax, CTLFLAG_RW, &shminfo.shmmax, 0, "");
 SYSCTL_INT(_kern_ipc, OID_AUTO, shmmin, CTLFLAG_RW, &shminfo.shmmin, 0, "");
 SYSCTL_INT(_kern_ipc, OID_AUTO, shmmni, CTLFLAG_RDTUN, &shminfo.shmmni, 0, "");
 SYSCTL_INT(_kern_ipc, OID_AUTO, shmseg, CTLFLAG_RDTUN, &shminfo.shmseg, 0, "");
 SYSCTL_INT(_kern_ipc, OID_AUTO, shmall, CTLFLAG_RW, &shminfo.shmall, 0, "");
 SYSCTL_INT(_kern_ipc, OID_AUTO, shm_use_phys, CTLFLAG_RW,
     &shm_use_phys, 0, "");
 SYSCTL_INT(_kern_ipc, OID_AUTO, shm_allow_removed, CTLFLAG_RW,
     &shm_allow_removed, 0, "");
 SYSCTL_PROC(_kern_ipc, OID_AUTO, shmsegs, CTLFLAG_RD,
     NULL, 0, sysctl_shmsegs, "", "");
 
 static int
 shm_find_segment_by_key(key)
 	key_t key;
 {
 	int i;
 
 	for (i = 0; i < shmalloced; i++)
 		if ((shmsegs[i].shm_perm.mode & SHMSEG_ALLOCATED) &&
 		    shmsegs[i].shm_perm.key == key)
 			return (i);
 	return (-1);
 }
 
 static struct shmid_ds *
 shm_find_segment_by_shmid(int shmid)
 {
 	int segnum;
 	struct shmid_ds *shmseg;
 
 	segnum = IPCID_TO_IX(shmid);
 	if (segnum < 0 || segnum >= shmalloced)
 		return (NULL);
 	shmseg = &shmsegs[segnum];
 	if ((shmseg->shm_perm.mode & SHMSEG_ALLOCATED) == 0 ||
 	    (!shm_allow_removed &&
 	     (shmseg->shm_perm.mode & SHMSEG_REMOVED) != 0) ||
 	    shmseg->shm_perm.seq != IPCID_TO_SEQ(shmid))
 		return (NULL);
 	return (shmseg);
 }
 
 static struct shmid_ds *
 shm_find_segment_by_shmidx(int segnum)
 {
 	struct shmid_ds *shmseg;
 
 	if (segnum < 0 || segnum >= shmalloced)
 		return (NULL);
 	shmseg = &shmsegs[segnum];
 	if ((shmseg->shm_perm.mode & SHMSEG_ALLOCATED) == 0 ||
 	    (!shm_allow_removed &&
 	     (shmseg->shm_perm.mode & SHMSEG_REMOVED) != 0))
 		return (NULL);
 	return (shmseg);
 }
 
 static void
 shm_deallocate_segment(shmseg)
 	struct shmid_ds *shmseg;
 {
 	struct shm_handle *shm_handle;
 	size_t size;
 
 	GIANT_REQUIRED;
 
 	shm_handle = shmseg->shm_internal;
 	vm_object_deallocate(shm_handle->shm_object);
 	free(shm_handle, M_SHM);
 	shmseg->shm_internal = NULL;
 	size = round_page(shmseg->shm_segsz);
 	shm_committed -= btoc(size);
 	shm_nused--;
 	shmseg->shm_perm.mode = SHMSEG_FREE;
 }
 
 static int
 shm_delete_mapping(struct vmspace *vm, struct shmmap_state *shmmap_s)
 {
 	struct shmid_ds *shmseg;
 	int segnum, result;
 	size_t size;
 
 	GIANT_REQUIRED;
 
 	segnum = IPCID_TO_IX(shmmap_s->shmid);
 	shmseg = &shmsegs[segnum];
 	size = round_page(shmseg->shm_segsz);
 	result = vm_map_remove(&vm->vm_map, shmmap_s->va, shmmap_s->va + size);
 	if (result != KERN_SUCCESS)
 		return (EINVAL);
 	shmmap_s->shmid = -1;
 	shmseg->shm_dtime = time_second;
 	if ((--shmseg->shm_nattch <= 0) &&
 	    (shmseg->shm_perm.mode & SHMSEG_REMOVED)) {
 		shm_deallocate_segment(shmseg);
 		shm_last_free = segnum;
 	}
 	return (0);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct shmdt_args {
 	const void *shmaddr;
 };
 #endif
 
 /*
  * MPSAFE
  */
 int
 shmdt(td, uap)
 	struct thread *td;
 	struct shmdt_args *uap;
 {
 	struct proc *p = td->td_proc;
 	struct shmmap_state *shmmap_s;
 	int i;
 	int error = 0;
 
 	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
 		return (ENOSYS);
 	mtx_lock(&Giant);
 	shmmap_s = p->p_vmspace->vm_shm;
  	if (shmmap_s == NULL) {
 		error = EINVAL;
 		goto done2;
 	}
 	for (i = 0; i < shminfo.shmseg; i++, shmmap_s++) {
 		if (shmmap_s->shmid != -1 &&
 		    shmmap_s->va == (vm_offset_t)uap->shmaddr) {
 			break;
 		}
 	}
 	if (i == shminfo.shmseg) {
 		error = EINVAL;
 		goto done2;
 	}
 	error = shm_delete_mapping(p->p_vmspace, shmmap_s);
 done2:
 	mtx_unlock(&Giant);
 	return (error);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct shmat_args {
 	int shmid;
 	const void *shmaddr;
 	int shmflg;
 };
 #endif
 
 /*
  * MPSAFE
  */
 int
 kern_shmat(td, shmid, shmaddr, shmflg)
 	struct thread *td;
 	int shmid;
 	const void *shmaddr;
 	int shmflg;
 {
 	struct proc *p = td->td_proc;
 	int i, flags;
 	struct shmid_ds *shmseg;
 	struct shmmap_state *shmmap_s = NULL;
 	struct shm_handle *shm_handle;
 	vm_offset_t attach_va;
 	vm_prot_t prot;
 	vm_size_t size;
 	int rv;
 	int error = 0;
 
 	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
 		return (ENOSYS);
 	mtx_lock(&Giant);
 	shmmap_s = p->p_vmspace->vm_shm;
 	if (shmmap_s == NULL) {
 		size = shminfo.shmseg * sizeof(struct shmmap_state);
 		shmmap_s = malloc(size, M_SHM, M_WAITOK);
 		for (i = 0; i < shminfo.shmseg; i++)
 			shmmap_s[i].shmid = -1;
 		p->p_vmspace->vm_shm = shmmap_s;
 	}
 	shmseg = shm_find_segment_by_shmid(shmid);
 	if (shmseg == NULL) {
 		error = EINVAL;
 		goto done2;
 	}
 	error = ipcperm(td, &shmseg->shm_perm,
 	    (shmflg & SHM_RDONLY) ? IPC_R : IPC_R|IPC_W);
 	if (error)
 		goto done2;
 	for (i = 0; i < shminfo.shmseg; i++) {
 		if (shmmap_s->shmid == -1)
 			break;
 		shmmap_s++;
 	}
 	if (i >= shminfo.shmseg) {
 		error = EMFILE;
 		goto done2;
 	}
 	size = round_page(shmseg->shm_segsz);
 #ifdef VM_PROT_READ_IS_EXEC
 	prot = VM_PROT_READ | VM_PROT_EXECUTE;
 #else
 	prot = VM_PROT_READ;
 #endif
 	if ((shmflg & SHM_RDONLY) == 0)
 		prot |= VM_PROT_WRITE;
 	flags = MAP_ANON | MAP_SHARED;
 	if (shmaddr) {
 		flags |= MAP_FIXED;
 		if (shmflg & SHM_RND) {
 			attach_va = (vm_offset_t)shmaddr & ~(SHMLBA-1);
 		} else if (((vm_offset_t)shmaddr & (SHMLBA-1)) == 0) {
 			attach_va = (vm_offset_t)shmaddr;
 		} else {
 			error = EINVAL;
 			goto done2;
 		}
 	} else {
 		/*
 		 * This is just a hint to vm_map_find() about where to
 		 * put it.
 		 */
 		attach_va = round_page((vm_offset_t)p->p_vmspace->vm_taddr
 		    + maxtsiz + maxdsiz);
 	}
 
 	shm_handle = shmseg->shm_internal;
 	vm_object_reference(shm_handle->shm_object);
 	rv = vm_map_find(&p->p_vmspace->vm_map, shm_handle->shm_object,
 		0, &attach_va, size, (flags & MAP_FIXED)?0:1, prot, prot, 0);
 	if (rv != KERN_SUCCESS) {
 		error = ENOMEM;
 		goto done2;
 	}
 	vm_map_inherit(&p->p_vmspace->vm_map,
 		attach_va, attach_va + size, VM_INHERIT_SHARE);
 
 	shmmap_s->va = attach_va;
 	shmmap_s->shmid = shmid;
 	shmseg->shm_lpid = p->p_pid;
 	shmseg->shm_atime = time_second;
 	shmseg->shm_nattch++;
 	td->td_retval[0] = attach_va;
 done2:
 	mtx_unlock(&Giant);
 	return (error);
 }
 
-int 
+int
 shmat(td, uap)
 	struct thread *td;
 	struct shmat_args *uap;
 {
 	return kern_shmat(td, uap->shmid, uap->shmaddr, uap->shmflg);
 }
 
 struct oshmid_ds {
 	struct	ipc_perm shm_perm;	/* operation perms */
 	int	shm_segsz;		/* size of segment (bytes) */
 	u_short	shm_cpid;		/* pid, creator */
 	u_short	shm_lpid;		/* pid, last operation */
 	short	shm_nattch;		/* no. of current attaches */
 	time_t	shm_atime;		/* last attach time */
 	time_t	shm_dtime;		/* last detach time */
 	time_t	shm_ctime;		/* last change time */
 	void	*shm_handle;		/* internal handle for shm segment */
 };
 
 struct oshmctl_args {
 	int shmid;
 	int cmd;
 	struct oshmid_ds *ubuf;
 };
 
 /*
  * MPSAFE
  */
 static int
 oshmctl(td, uap)
 	struct thread *td;
 	struct oshmctl_args *uap;
 {
 #ifdef COMPAT_43
 	int error = 0;
 	struct shmid_ds *shmseg;
 	struct oshmid_ds outbuf;
 
 	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
 		return (ENOSYS);
 	mtx_lock(&Giant);
 	shmseg = shm_find_segment_by_shmid(uap->shmid);
 	if (shmseg == NULL) {
 		error = EINVAL;
 		goto done2;
 	}
 	switch (uap->cmd) {
 	case IPC_STAT:
 		error = ipcperm(td, &shmseg->shm_perm, IPC_R);
 		if (error)
 			goto done2;
 		outbuf.shm_perm = shmseg->shm_perm;
 		outbuf.shm_segsz = shmseg->shm_segsz;
 		outbuf.shm_cpid = shmseg->shm_cpid;
 		outbuf.shm_lpid = shmseg->shm_lpid;
 		outbuf.shm_nattch = shmseg->shm_nattch;
 		outbuf.shm_atime = shmseg->shm_atime;
 		outbuf.shm_dtime = shmseg->shm_dtime;
 		outbuf.shm_ctime = shmseg->shm_ctime;
 		outbuf.shm_handle = shmseg->shm_internal;
 		error = copyout(&outbuf, uap->ubuf, sizeof(outbuf));
 		if (error)
 			goto done2;
 		break;
 	default:
 		/* XXX casting to (sy_call_t *) is bogus, as usual. */
 		error = ((sy_call_t *)shmctl)(td, uap);
 		break;
 	}
 done2:
 	mtx_unlock(&Giant);
 	return (error);
 #else
 	return (EINVAL);
 #endif
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct shmctl_args {
 	int shmid;
 	int cmd;
 	struct shmid_ds *buf;
 };
 #endif
 
 /*
  * MPSAFE
  */
 int
 kern_shmctl(td, shmid, cmd, buf, bufsz)
 	struct thread *td;
 	int shmid;
 	int cmd;
 	void *buf;
 	size_t *bufsz;
 {
 	int error = 0;
 	struct shmid_ds *shmseg;
 
 	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
 		return (ENOSYS);
 
 	mtx_lock(&Giant);
 	switch (cmd) {
 	case IPC_INFO:
 		memcpy(buf, &shminfo, sizeof(shminfo));
 		if (bufsz)
 			*bufsz = sizeof(shminfo);
 		td->td_retval[0] = shmalloced;
 		goto done2;
 	case SHM_INFO: {
 		struct shm_info shm_info;
 		shm_info.used_ids = shm_nused;
 		shm_info.shm_rss = 0;	/*XXX where to get from ? */
 		shm_info.shm_tot = 0;	/*XXX where to get from ? */
 		shm_info.shm_swp = 0;	/*XXX where to get from ? */
 		shm_info.swap_attempts = 0;	/*XXX where to get from ? */
 		shm_info.swap_successes = 0;	/*XXX where to get from ? */
 		memcpy(buf, &shm_info, sizeof(shm_info));
 		if (bufsz)
 			*bufsz = sizeof(shm_info);
 		td->td_retval[0] = shmalloced;
 		goto done2;
 	}
 	}
 	if (cmd == SHM_STAT)
 		shmseg = shm_find_segment_by_shmidx(shmid);
 	else
 		shmseg = shm_find_segment_by_shmid(shmid);
 	if (shmseg == NULL) {
 		error = EINVAL;
 		goto done2;
 	}
 	switch (cmd) {
 	case SHM_STAT:
 	case IPC_STAT:
 		error = ipcperm(td, &shmseg->shm_perm, IPC_R);
 		if (error)
 			goto done2;
 		memcpy(buf, shmseg, sizeof(struct shmid_ds));
 		if (bufsz)
 			*bufsz = sizeof(struct shmid_ds);
 		if (cmd == SHM_STAT)
 			td->td_retval[0] = IXSEQ_TO_IPCID(shmid, shmseg->shm_perm);
 		break;
 	case IPC_SET: {
 		struct shmid_ds *shmid;
 
 		shmid = (struct shmid_ds *)buf;
 		error = ipcperm(td, &shmseg->shm_perm, IPC_M);
 		if (error)
 			goto done2;
 		shmseg->shm_perm.uid = shmid->shm_perm.uid;
 		shmseg->shm_perm.gid = shmid->shm_perm.gid;
 		shmseg->shm_perm.mode =
 		    (shmseg->shm_perm.mode & ~ACCESSPERMS) |
 		    (shmid->shm_perm.mode & ACCESSPERMS);
 		shmseg->shm_ctime = time_second;
 		break;
 	}
 	case IPC_RMID:
 		error = ipcperm(td, &shmseg->shm_perm, IPC_M);
 		if (error)
 			goto done2;
 		shmseg->shm_perm.key = IPC_PRIVATE;
 		shmseg->shm_perm.mode |= SHMSEG_REMOVED;
 		if (shmseg->shm_nattch <= 0) {
 			shm_deallocate_segment(shmseg);
 			shm_last_free = IPCID_TO_IX(shmid);
 		}
 		break;
 #if 0
 	case SHM_LOCK:
 	case SHM_UNLOCK:
 #endif
 	default:
 		error = EINVAL;
 		break;
 	}
 done2:
 	mtx_unlock(&Giant);
 	return (error);
 }
 
 int
 shmctl(td, uap)
 	struct thread *td;
 	struct shmctl_args *uap;
 {
 	int error = 0;
 	struct shmid_ds buf;
 	size_t bufsz;
 	
 	/* IPC_SET needs to copyin the buffer before calling kern_shmctl */
 	if (uap->cmd == IPC_SET) {
 		if ((error = copyin(uap->buf, &buf, sizeof(struct shmid_ds))))
 			goto done;
 	}
 	
 	error = kern_shmctl(td, uap->shmid, uap->cmd, (void *)&buf, &bufsz);
 	if (error)
 		goto done;
 	
 	/* Cases in which we need to copyout */
 	switch (uap->cmd) {
 	case IPC_INFO:
 	case SHM_INFO:
 	case SHM_STAT:
 	case IPC_STAT:
 		error = copyout(&buf, uap->buf, bufsz);
 		break;
 	}
 
 done:
 	if (error) {
 		/* Invalidate the return value */
 		td->td_retval[0] = -1;
 	}
 	return (error);
 }
 
 
 #ifndef _SYS_SYSPROTO_H_
 struct shmget_args {
 	key_t key;
 	size_t size;
 	int shmflg;
 };
 #endif
 
 static int
 shmget_existing(td, uap, mode, segnum)
 	struct thread *td;
 	struct shmget_args *uap;
 	int mode;
 	int segnum;
 {
 	struct shmid_ds *shmseg;
 	int error;
 
 	shmseg = &shmsegs[segnum];
 	if (shmseg->shm_perm.mode & SHMSEG_REMOVED) {
 		/*
 		 * This segment is in the process of being allocated.  Wait
 		 * until it's done, and look the key up again (in case the
 		 * allocation failed or it was freed).
 		 */
 		shmseg->shm_perm.mode |= SHMSEG_WANTED;
 		error = tsleep(shmseg, PLOCK | PCATCH, "shmget", 0);
 		if (error)
 			return (error);
 		return (EAGAIN);
 	}
 	if ((uap->shmflg & (IPC_CREAT | IPC_EXCL)) == (IPC_CREAT | IPC_EXCL))
 		return (EEXIST);
 	error = ipcperm(td, &shmseg->shm_perm, mode);
 	if (error)
 		return (error);
 	if (uap->size && uap->size > shmseg->shm_segsz)
 		return (EINVAL);
 	td->td_retval[0] = IXSEQ_TO_IPCID(segnum, shmseg->shm_perm);
 	return (0);
 }
 
 static int
 shmget_allocate_segment(td, uap, mode)
 	struct thread *td;
 	struct shmget_args *uap;
 	int mode;
 {
 	int i, segnum, shmid, size;
 	struct ucred *cred = td->td_ucred;
 	struct shmid_ds *shmseg;
 	struct shm_handle *shm_handle;
 
 	GIANT_REQUIRED;
 
 	if (uap->size < shminfo.shmmin || uap->size > shminfo.shmmax)
 		return (EINVAL);
 	if (shm_nused >= shminfo.shmmni) /* Any shmids left? */
 		return (ENOSPC);
 	size = round_page(uap->size);
 	if (shm_committed + btoc(size) > shminfo.shmall)
 		return (ENOMEM);
 	if (shm_last_free < 0) {
 		shmrealloc();	/* Maybe expand the shmsegs[] array. */
 		for (i = 0; i < shmalloced; i++)
 			if (shmsegs[i].shm_perm.mode & SHMSEG_FREE)
 				break;
 		if (i == shmalloced)
 			return (ENOSPC);
 		segnum = i;
 	} else  {
 		segnum = shm_last_free;
 		shm_last_free = -1;
 	}
 	shmseg = &shmsegs[segnum];
 	/*
 	 * In case we sleep in malloc(), mark the segment present but deleted
 	 * so that noone else tries to create the same key.
 	 */
 	shmseg->shm_perm.mode = SHMSEG_ALLOCATED | SHMSEG_REMOVED;
 	shmseg->shm_perm.key = uap->key;
 	shmseg->shm_perm.seq = (shmseg->shm_perm.seq + 1) & 0x7fff;
 	shm_handle = (struct shm_handle *)
 	    malloc(sizeof(struct shm_handle), M_SHM, M_WAITOK);
 	shmid = IXSEQ_TO_IPCID(segnum, shmseg->shm_perm);
 	
 	/*
 	 * We make sure that we have allocated a pager before we need
 	 * to.
 	 */
 	if (shm_use_phys) {
 		shm_handle->shm_object =
 		    vm_pager_allocate(OBJT_PHYS, 0, size, VM_PROT_DEFAULT, 0);
 	} else {
 		shm_handle->shm_object =
 		    vm_pager_allocate(OBJT_SWAP, 0, size, VM_PROT_DEFAULT, 0);
 	}
 	VM_OBJECT_LOCK(shm_handle->shm_object);
 	vm_object_clear_flag(shm_handle->shm_object, OBJ_ONEMAPPING);
 	vm_object_set_flag(shm_handle->shm_object, OBJ_NOSPLIT);
 	VM_OBJECT_UNLOCK(shm_handle->shm_object);
 
 	shmseg->shm_internal = shm_handle;
 	shmseg->shm_perm.cuid = shmseg->shm_perm.uid = cred->cr_uid;
 	shmseg->shm_perm.cgid = shmseg->shm_perm.gid = cred->cr_gid;
 	shmseg->shm_perm.mode = (shmseg->shm_perm.mode & SHMSEG_WANTED) |
 	    (mode & ACCESSPERMS) | SHMSEG_ALLOCATED;
 	shmseg->shm_segsz = uap->size;
 	shmseg->shm_cpid = td->td_proc->p_pid;
 	shmseg->shm_lpid = shmseg->shm_nattch = 0;
 	shmseg->shm_atime = shmseg->shm_dtime = 0;
 	shmseg->shm_ctime = time_second;
 	shm_committed += btoc(size);
 	shm_nused++;
 	if (shmseg->shm_perm.mode & SHMSEG_WANTED) {
 		/*
 		 * Somebody else wanted this key while we were asleep.  Wake
 		 * them up now.
 		 */
 		shmseg->shm_perm.mode &= ~SHMSEG_WANTED;
 		wakeup(shmseg);
 	}
 	td->td_retval[0] = shmid;
 	return (0);
 }
 
 /*
  * MPSAFE
  */
 int
 shmget(td, uap)
 	struct thread *td;
 	struct shmget_args *uap;
 {
 	int segnum, mode;
 	int error;
 
 	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
 		return (ENOSYS);
 	mtx_lock(&Giant);
 	mode = uap->shmflg & ACCESSPERMS;
 	if (uap->key != IPC_PRIVATE) {
 	again:
 		segnum = shm_find_segment_by_key(uap->key);
 		if (segnum >= 0) {
 			error = shmget_existing(td, uap, mode, segnum);
 			if (error == EAGAIN)
 				goto again;
 			goto done2;
 		}
 		if ((uap->shmflg & IPC_CREAT) == 0) {
 			error = ENOENT;
 			goto done2;
 		}
 	}
 	error = shmget_allocate_segment(td, uap, mode);
 done2:
 	mtx_unlock(&Giant);
 	return (error);
 }
 
 /*
  * MPSAFE
  */
 int
 shmsys(td, uap)
 	struct thread *td;
 	/* XXX actually varargs. */
 	struct shmsys_args /* {
 		int	which;
 		int	a2;
 		int	a3;
 		int	a4;
 	} */ *uap;
 {
 	int error;
 
 	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
 		return (ENOSYS);
 	if (uap->which < 0 ||
 	    uap->which >= sizeof(shmcalls)/sizeof(shmcalls[0]))
 		return (EINVAL);
 	mtx_lock(&Giant);
 	error = (*shmcalls[uap->which])(td, &uap->a2);
 	mtx_unlock(&Giant);
 	return (error);
 }
 
 static void
 shmfork_myhook(p1, p2)
 	struct proc *p1, *p2;
 {
 	struct shmmap_state *shmmap_s;
 	size_t size;
 	int i;
 
 	size = shminfo.shmseg * sizeof(struct shmmap_state);
 	shmmap_s = malloc(size, M_SHM, M_WAITOK);
 	bcopy(p1->p_vmspace->vm_shm, shmmap_s, size);
 	p2->p_vmspace->vm_shm = shmmap_s;
 	for (i = 0; i < shminfo.shmseg; i++, shmmap_s++)
 		if (shmmap_s->shmid != -1)
 			shmsegs[IPCID_TO_IX(shmmap_s->shmid)].shm_nattch++;
 }
 
 static void
 shmexit_myhook(struct vmspace *vm)
 {
 	struct shmmap_state *base, *shm;
 	int i;
 
 	GIANT_REQUIRED;
 
 	if ((base = vm->vm_shm) != NULL) {
 		vm->vm_shm = NULL;
 		for (i = 0, shm = base; i < shminfo.shmseg; i++, shm++) {
 			if (shm->shmid != -1)
 				shm_delete_mapping(vm, shm);
 		}
 		free(base, M_SHM);
 	}
 }
 
 static void
 shmrealloc(void)
 {
 	int i;
 	struct shmid_ds *newsegs;
 
 	if (shmalloced >= shminfo.shmmni)
 		return;
 
 	newsegs = malloc(shminfo.shmmni * sizeof(*newsegs), M_SHM, M_WAITOK);
 	if (newsegs == NULL)
 		return;
 	for (i = 0; i < shmalloced; i++)
 		bcopy(&shmsegs[i], &newsegs[i], sizeof(newsegs[0]));
 	for (; i < shminfo.shmmni; i++) {
 		shmsegs[i].shm_perm.mode = SHMSEG_FREE;
 		shmsegs[i].shm_perm.seq = 0;
 	}
 	free(shmsegs, M_SHM);
 	shmsegs = newsegs;
 	shmalloced = shminfo.shmmni;
 }
 
 static void
 shminit()
 {
 	int i;
 
 	TUNABLE_INT_FETCH("kern.ipc.shmmaxpgs", &shminfo.shmall);
 	for (i = PAGE_SIZE; i > 0; i--) {
 		shminfo.shmmax = shminfo.shmall * PAGE_SIZE;
 		if (shminfo.shmmax >= shminfo.shmall)
 			break;
 	}
 	TUNABLE_INT_FETCH("kern.ipc.shmmin", &shminfo.shmmin);
 	TUNABLE_INT_FETCH("kern.ipc.shmmni", &shminfo.shmmni);
 	TUNABLE_INT_FETCH("kern.ipc.shmseg", &shminfo.shmseg);
 	TUNABLE_INT_FETCH("kern.ipc.shm_use_phys", &shm_use_phys);
 
 	shmalloced = shminfo.shmmni;
 	shmsegs = malloc(shmalloced * sizeof(shmsegs[0]), M_SHM, M_WAITOK);
 	if (shmsegs == NULL)
 		panic("cannot allocate initial memory for sysvshm");
 	for (i = 0; i < shmalloced; i++) {
 		shmsegs[i].shm_perm.mode = SHMSEG_FREE;
 		shmsegs[i].shm_perm.seq = 0;
 	}
 	shm_last_free = 0;
 	shm_nused = 0;
 	shm_committed = 0;
 	shmexit_hook = &shmexit_myhook;
 	shmfork_hook = &shmfork_myhook;
 }
 
 static int
 shmunload()
 {
 
 	if (shm_nused > 0)
 		return (EBUSY);
 
 	free(shmsegs, M_SHM);
 	shmexit_hook = NULL;
 	shmfork_hook = NULL;
 	return (0);
 }
 
 static int
 sysctl_shmsegs(SYSCTL_HANDLER_ARGS)
 {
 
 	return (SYSCTL_OUT(req, shmsegs, shmalloced * sizeof(shmsegs[0])));
 }
 
 static int
 sysvshm_modload(struct module *module, int cmd, void *arg)
 {
 	int error = 0;
 
 	switch (cmd) {
 	case MOD_LOAD:
 		shminit();
 		break;
 	case MOD_UNLOAD:
 		error = shmunload();
 		break;
 	case MOD_SHUTDOWN:
 		break;
 	default:
 		error = EINVAL;
 		break;
 	}
 	return (error);
 }
 
 static moduledata_t sysvshm_mod = {
 	"sysvshm",
 	&sysvshm_modload,
 	NULL
 };
 
 SYSCALL_MODULE_HELPER(shmsys);
 SYSCALL_MODULE_HELPER(shmat);
 SYSCALL_MODULE_HELPER(shmctl);
 SYSCALL_MODULE_HELPER(shmdt);
 SYSCALL_MODULE_HELPER(shmget);
 
 DECLARE_MODULE(sysvshm, sysvshm_mod,
 	SI_SUB_SYSV_SHM, SI_ORDER_FIRST);
 MODULE_VERSION(sysvshm, 1);