Index: head/sys/kern/sysv_msg.c =================================================================== --- head/sys/kern/sysv_msg.c (revision 316184) +++ head/sys/kern/sysv_msg.c (revision 316185) @@ -1,1881 +1,1884 @@ /*- * Implementation of SVID messages * * Author: Daniel Boulet * * Copyright 1993 Daniel Boulet and RTMX Inc. * * This system call was implemented by Daniel Boulet under contract from RTMX. * * Redistribution and use in source forms, with and without modification, * are permitted provided that this entire comment appears intact. * * Redistribution in binary form may occur without any restrictions. * Obviously, it would be nice if you gave credit where credit is due * but requiring it would be too onerous. * * This software is provided ``AS IS'' without any warranties of any kind. */ /*- * Copyright (c) 2003-2005 McAfee, Inc. * All rights reserved. * * This software was developed for the FreeBSD Project in part by McAfee * Research, the Security Research Division of McAfee, Inc under DARPA/SPAWAR * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS research * program. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include "opt_sysvipc.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include +#include #include FEATURE(sysv_msg, "System V message queues support"); static MALLOC_DEFINE(M_MSG, "msg", "SVID compatible message queues"); static int msginit(void); static int msgunload(void); static int sysvmsg_modload(struct module *, int, void *); static void msq_remove(struct msqid_kernel *); static struct prison *msg_find_prison(struct ucred *); static int msq_prison_cansee(struct prison *, struct msqid_kernel *); static int msg_prison_check(void *, void *); static int msg_prison_set(void *, void *); static int msg_prison_get(void *, void *); static int msg_prison_remove(void *, void *); static void msg_prison_cleanup(struct prison *); #ifdef MSG_DEBUG #define DPRINTF(a) printf a #else #define DPRINTF(a) (void)0 #endif static void msg_freehdr(struct msg *msghdr); #ifndef MSGSSZ #define MSGSSZ 8 /* Each segment must be 2^N long */ #endif #ifndef MSGSEG #define MSGSEG 2048 /* must be less than 32767 */ #endif #define MSGMAX (MSGSSZ*MSGSEG) #ifndef MSGMNB #define MSGMNB 2048 /* max # of bytes in a queue */ #endif #ifndef MSGMNI #define MSGMNI 40 #endif #ifndef MSGTQL #define MSGTQL 40 #endif /* * Based on the configuration parameters described in an SVR2 (yes, two) * config(1m) man page. * * Each message is broken up and stored in segments that are msgssz bytes * long. For efficiency reasons, this should be a power of two. Also, * it doesn't make sense if it is less than 8 or greater than about 256. * Consequently, msginit in kern/sysv_msg.c checks that msgssz is a power of * two between 8 and 1024 inclusive (and panic's if it isn't). */ struct msginfo msginfo = { MSGMAX, /* max chars in a message */ MSGMNI, /* # of message queue identifiers */ MSGMNB, /* max chars in a queue */ MSGTQL, /* max messages in system */ MSGSSZ, /* size of a message segment */ /* (must be small power of 2 greater than 4) */ MSGSEG /* number of message segments */ }; /* * macros to convert between msqid_ds's and msqid's. * (specific to this implementation) */ #define MSQID(ix,ds) ((ix) & 0xffff | (((ds).msg_perm.seq << 16) & 0xffff0000)) #define MSQID_IX(id) ((id) & 0xffff) #define MSQID_SEQ(id) (((id) >> 16) & 0xffff) /* * The rest of this file is specific to this particular implementation. */ struct msgmap { short next; /* next segment in buffer */ /* -1 -> available */ /* 0..(MSGSEG-1) -> index of next segment */ }; #define MSG_LOCKED 01000 /* Is this msqid_ds locked? */ static int nfree_msgmaps; /* # of free map entries */ static short free_msgmaps; /* head of linked list of free map entries */ static struct msg *free_msghdrs;/* list of free msg headers */ static char *msgpool; /* MSGMAX byte long msg buffer pool */ static struct msgmap *msgmaps; /* MSGSEG msgmap structures */ static struct msg *msghdrs; /* MSGTQL msg headers */ static struct msqid_kernel *msqids; /* MSGMNI msqid_kernel struct's */ static struct mtx msq_mtx; /* global mutex for message queues. */ static unsigned msg_prison_slot;/* prison OSD slot */ static struct syscall_helper_data msg_syscalls[] = { SYSCALL_INIT_HELPER(msgctl), SYSCALL_INIT_HELPER(msgget), SYSCALL_INIT_HELPER(msgsnd), SYSCALL_INIT_HELPER(msgrcv), #if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \ defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7) SYSCALL_INIT_HELPER(msgsys), SYSCALL_INIT_HELPER_COMPAT(freebsd7_msgctl), #endif SYSCALL_INIT_LAST }; #ifdef COMPAT_FREEBSD32 #include #include #include #include #include #include static struct syscall_helper_data msg32_syscalls[] = { SYSCALL32_INIT_HELPER(freebsd32_msgctl), SYSCALL32_INIT_HELPER(freebsd32_msgsnd), SYSCALL32_INIT_HELPER(freebsd32_msgrcv), SYSCALL32_INIT_HELPER_COMPAT(msgget), SYSCALL32_INIT_HELPER(freebsd32_msgsys), #if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \ defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7) SYSCALL32_INIT_HELPER(freebsd7_freebsd32_msgctl), #endif SYSCALL_INIT_LAST }; #endif static int msginit() { struct prison *pr; void **rsv; int i, error; osd_method_t methods[PR_MAXMETHOD] = { [PR_METHOD_CHECK] = msg_prison_check, [PR_METHOD_SET] = msg_prison_set, [PR_METHOD_GET] = msg_prison_get, [PR_METHOD_REMOVE] = msg_prison_remove, }; msginfo.msgmax = msginfo.msgseg * msginfo.msgssz; msgpool = malloc(msginfo.msgmax, M_MSG, M_WAITOK); msgmaps = malloc(sizeof(struct msgmap) * msginfo.msgseg, M_MSG, M_WAITOK); msghdrs = malloc(sizeof(struct msg) * msginfo.msgtql, M_MSG, M_WAITOK); msqids = malloc(sizeof(struct msqid_kernel) * msginfo.msgmni, M_MSG, M_WAITOK); /* * msginfo.msgssz should be a power of two for efficiency reasons. * It is also pretty silly if msginfo.msgssz is less than 8 * or greater than about 256 so ... */ i = 8; while (i < 1024 && i != msginfo.msgssz) i <<= 1; if (i != msginfo.msgssz) { DPRINTF(("msginfo.msgssz=%d (0x%x)\n", msginfo.msgssz, msginfo.msgssz)); panic("msginfo.msgssz not a small power of 2"); } if (msginfo.msgseg > 32767) { DPRINTF(("msginfo.msgseg=%d\n", msginfo.msgseg)); panic("msginfo.msgseg > 32767"); } for (i = 0; i < msginfo.msgseg; i++) { if (i > 0) msgmaps[i-1].next = i; msgmaps[i].next = -1; /* implies entry is available */ } free_msgmaps = 0; nfree_msgmaps = msginfo.msgseg; for (i = 0; i < msginfo.msgtql; i++) { msghdrs[i].msg_type = 0; if (i > 0) msghdrs[i-1].msg_next = &msghdrs[i]; msghdrs[i].msg_next = NULL; #ifdef MAC mac_sysvmsg_init(&msghdrs[i]); #endif } free_msghdrs = &msghdrs[0]; for (i = 0; i < msginfo.msgmni; i++) { msqids[i].u.msg_qbytes = 0; /* implies entry is available */ msqids[i].u.msg_perm.seq = 0; /* reset to a known value */ msqids[i].u.msg_perm.mode = 0; #ifdef MAC mac_sysvmsq_init(&msqids[i]); #endif } mtx_init(&msq_mtx, "msq", NULL, MTX_DEF); /* Set current prisons according to their allow.sysvipc. */ msg_prison_slot = osd_jail_register(NULL, methods); rsv = osd_reserve(msg_prison_slot); prison_lock(&prison0); (void)osd_jail_set_reserved(&prison0, msg_prison_slot, rsv, &prison0); prison_unlock(&prison0); rsv = NULL; sx_slock(&allprison_lock); TAILQ_FOREACH(pr, &allprison, pr_list) { if (rsv == NULL) rsv = osd_reserve(msg_prison_slot); prison_lock(pr); if ((pr->pr_allow & PR_ALLOW_SYSVIPC) && pr->pr_ref > 0) { (void)osd_jail_set_reserved(pr, msg_prison_slot, rsv, &prison0); rsv = NULL; } prison_unlock(pr); } if (rsv != NULL) osd_free_reserved(rsv); sx_sunlock(&allprison_lock); error = syscall_helper_register(msg_syscalls, SY_THR_STATIC_KLD); if (error != 0) return (error); #ifdef COMPAT_FREEBSD32 error = syscall32_helper_register(msg32_syscalls, SY_THR_STATIC_KLD); if (error != 0) return (error); #endif return (0); } static int msgunload() { struct msqid_kernel *msqkptr; int msqid; #ifdef MAC int i; #endif syscall_helper_unregister(msg_syscalls); #ifdef COMPAT_FREEBSD32 syscall32_helper_unregister(msg32_syscalls); #endif for (msqid = 0; msqid < msginfo.msgmni; msqid++) { msqkptr = &msqids[msqid]; if (msqkptr->u.msg_qbytes != 0 || (msqkptr->u.msg_perm.mode & MSG_LOCKED) != 0) break; } if (msqid != msginfo.msgmni) return (EBUSY); if (msg_prison_slot != 0) osd_jail_deregister(msg_prison_slot); #ifdef MAC for (i = 0; i < msginfo.msgtql; i++) mac_sysvmsg_destroy(&msghdrs[i]); for (msqid = 0; msqid < msginfo.msgmni; msqid++) mac_sysvmsq_destroy(&msqids[msqid]); #endif free(msgpool, M_MSG); free(msgmaps, M_MSG); free(msghdrs, M_MSG); free(msqids, M_MSG); mtx_destroy(&msq_mtx); return (0); } static int sysvmsg_modload(struct module *module, int cmd, void *arg) { int error = 0; switch (cmd) { case MOD_LOAD: error = msginit(); if (error != 0) msgunload(); break; case MOD_UNLOAD: error = msgunload(); break; case MOD_SHUTDOWN: break; default: error = EINVAL; break; } return (error); } static moduledata_t sysvmsg_mod = { "sysvmsg", &sysvmsg_modload, NULL }; DECLARE_MODULE(sysvmsg, sysvmsg_mod, SI_SUB_SYSV_MSG, SI_ORDER_FIRST); MODULE_VERSION(sysvmsg, 1); static void msg_freehdr(msghdr) struct msg *msghdr; { while (msghdr->msg_ts > 0) { short next; if (msghdr->msg_spot < 0 || msghdr->msg_spot >= msginfo.msgseg) panic("msghdr->msg_spot out of range"); next = msgmaps[msghdr->msg_spot].next; msgmaps[msghdr->msg_spot].next = free_msgmaps; free_msgmaps = msghdr->msg_spot; nfree_msgmaps++; msghdr->msg_spot = next; if (msghdr->msg_ts >= msginfo.msgssz) msghdr->msg_ts -= msginfo.msgssz; else msghdr->msg_ts = 0; } if (msghdr->msg_spot != -1) panic("msghdr->msg_spot != -1"); msghdr->msg_next = free_msghdrs; free_msghdrs = msghdr; #ifdef MAC mac_sysvmsg_cleanup(msghdr); #endif } static void msq_remove(struct msqid_kernel *msqkptr) { struct msg *msghdr; racct_sub_cred(msqkptr->cred, RACCT_NMSGQ, 1); racct_sub_cred(msqkptr->cred, RACCT_MSGQQUEUED, msqkptr->u.msg_qnum); racct_sub_cred(msqkptr->cred, RACCT_MSGQSIZE, msqkptr->u.msg_cbytes); crfree(msqkptr->cred); msqkptr->cred = NULL; /* Free the message headers */ msghdr = msqkptr->u.msg_first; while (msghdr != NULL) { struct msg *msghdr_tmp; /* Free the segments of each message */ msqkptr->u.msg_cbytes -= msghdr->msg_ts; msqkptr->u.msg_qnum--; msghdr_tmp = msghdr; msghdr = msghdr->msg_next; msg_freehdr(msghdr_tmp); } if (msqkptr->u.msg_cbytes != 0) panic("msg_cbytes is screwed up"); if (msqkptr->u.msg_qnum != 0) panic("msg_qnum is screwed up"); msqkptr->u.msg_qbytes = 0; /* Mark it as free */ #ifdef MAC mac_sysvmsq_cleanup(msqkptr); #endif wakeup(msqkptr); } static struct prison * msg_find_prison(struct ucred *cred) { struct prison *pr, *rpr; pr = cred->cr_prison; prison_lock(pr); rpr = osd_jail_get(pr, msg_prison_slot); prison_unlock(pr); return rpr; } static int msq_prison_cansee(struct prison *rpr, struct msqid_kernel *msqkptr) { if (msqkptr->cred == NULL || !(rpr == msqkptr->cred->cr_prison || prison_ischild(rpr, msqkptr->cred->cr_prison))) return (EINVAL); return (0); } #ifndef _SYS_SYSPROTO_H_ struct msgctl_args { int msqid; int cmd; struct msqid_ds *buf; }; #endif int sys_msgctl(td, uap) struct thread *td; register struct msgctl_args *uap; { int msqid = uap->msqid; int cmd = uap->cmd; struct msqid_ds msqbuf; int error; DPRINTF(("call to msgctl(%d, %d, %p)\n", msqid, cmd, uap->buf)); if (cmd == IPC_SET && (error = copyin(uap->buf, &msqbuf, sizeof(msqbuf))) != 0) return (error); error = kern_msgctl(td, msqid, cmd, &msqbuf); if (cmd == IPC_STAT && error == 0) error = copyout(&msqbuf, uap->buf, sizeof(struct msqid_ds)); return (error); } int kern_msgctl(td, msqid, cmd, msqbuf) struct thread *td; int msqid; int cmd; struct msqid_ds *msqbuf; { int rval, error, msqix; register struct msqid_kernel *msqkptr; struct prison *rpr; rpr = msg_find_prison(td->td_ucred); if (rpr == NULL) return (ENOSYS); msqix = IPCID_TO_IX(msqid); if (msqix < 0 || msqix >= msginfo.msgmni) { DPRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", msqix, msginfo.msgmni)); return (EINVAL); } msqkptr = &msqids[msqix]; mtx_lock(&msq_mtx); if (msqkptr->u.msg_qbytes == 0) { DPRINTF(("no such msqid\n")); error = EINVAL; goto done2; } if (msqkptr->u.msg_perm.seq != IPCID_TO_SEQ(msqid)) { DPRINTF(("wrong sequence number\n")); error = EINVAL; goto done2; } error = msq_prison_cansee(rpr, msqkptr); if (error != 0) { DPRINTF(("requester can't see prison\n")); goto done2; } #ifdef MAC error = mac_sysvmsq_check_msqctl(td->td_ucred, msqkptr, cmd); if (error != 0) goto done2; #endif error = 0; rval = 0; switch (cmd) { case IPC_RMID: { #ifdef MAC struct msg *msghdr; #endif if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_M))) goto done2; #ifdef MAC /* * Check that the thread has MAC access permissions to * individual msghdrs. Note: We need to do this in a * separate loop because the actual loop alters the * msq/msghdr info as it progresses, and there is no going * back if half the way through we discover that the * thread cannot free a certain msghdr. The msq will get * into an inconsistent state. */ for (msghdr = msqkptr->u.msg_first; msghdr != NULL; msghdr = msghdr->msg_next) { error = mac_sysvmsq_check_msgrmid(td->td_ucred, msghdr); if (error != 0) goto done2; } #endif msq_remove(msqkptr); } break; case IPC_SET: if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_M))) goto done2; if (msqbuf->msg_qbytes > msqkptr->u.msg_qbytes) { error = priv_check(td, PRIV_IPC_MSGSIZE); if (error) goto done2; } if (msqbuf->msg_qbytes > msginfo.msgmnb) { DPRINTF(("can't increase msg_qbytes beyond %d" "(truncating)\n", msginfo.msgmnb)); msqbuf->msg_qbytes = msginfo.msgmnb; /* silently restrict qbytes to system limit */ } if (msqbuf->msg_qbytes == 0) { DPRINTF(("can't reduce msg_qbytes to 0\n")); error = EINVAL; /* non-standard errno! */ goto done2; } msqkptr->u.msg_perm.uid = msqbuf->msg_perm.uid; /* change the owner */ msqkptr->u.msg_perm.gid = msqbuf->msg_perm.gid; /* change the owner */ msqkptr->u.msg_perm.mode = (msqkptr->u.msg_perm.mode & ~0777) | (msqbuf->msg_perm.mode & 0777); msqkptr->u.msg_qbytes = msqbuf->msg_qbytes; msqkptr->u.msg_ctime = time_second; break; case IPC_STAT: if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_R))) { DPRINTF(("requester doesn't have read access\n")); goto done2; } *msqbuf = msqkptr->u; if (td->td_ucred->cr_prison != msqkptr->cred->cr_prison) msqbuf->msg_perm.key = IPC_PRIVATE; break; default: DPRINTF(("invalid command %d\n", cmd)); error = EINVAL; goto done2; } if (error == 0) td->td_retval[0] = rval; done2: mtx_unlock(&msq_mtx); return (error); } #ifndef _SYS_SYSPROTO_H_ struct msgget_args { key_t key; int msgflg; }; #endif int sys_msgget(td, uap) struct thread *td; register struct msgget_args *uap; { int msqid, error = 0; int key = uap->key; int msgflg = uap->msgflg; struct ucred *cred = td->td_ucred; register struct msqid_kernel *msqkptr = NULL; DPRINTF(("msgget(0x%x, 0%o)\n", key, msgflg)); if (msg_find_prison(cred) == NULL) return (ENOSYS); mtx_lock(&msq_mtx); if (key != IPC_PRIVATE) { for (msqid = 0; msqid < msginfo.msgmni; msqid++) { msqkptr = &msqids[msqid]; if (msqkptr->u.msg_qbytes != 0 && msqkptr->cred != NULL && msqkptr->cred->cr_prison == cred->cr_prison && msqkptr->u.msg_perm.key == key) break; } if (msqid < msginfo.msgmni) { DPRINTF(("found public key\n")); if ((msgflg & IPC_CREAT) && (msgflg & IPC_EXCL)) { DPRINTF(("not exclusive\n")); error = EEXIST; goto done2; } if ((error = ipcperm(td, &msqkptr->u.msg_perm, msgflg & 0700))) { DPRINTF(("requester doesn't have 0%o access\n", msgflg & 0700)); goto done2; } #ifdef MAC error = mac_sysvmsq_check_msqget(cred, msqkptr); if (error != 0) goto done2; #endif goto found; } } DPRINTF(("need to allocate the msqid_ds\n")); if (key == IPC_PRIVATE || (msgflg & IPC_CREAT)) { for (msqid = 0; msqid < msginfo.msgmni; msqid++) { /* * Look for an unallocated and unlocked msqid_ds. * msqid_ds's can be locked by msgsnd or msgrcv while * they are copying the message in/out. We can't * re-use the entry until they release it. */ msqkptr = &msqids[msqid]; if (msqkptr->u.msg_qbytes == 0 && (msqkptr->u.msg_perm.mode & MSG_LOCKED) == 0) break; } if (msqid == msginfo.msgmni) { DPRINTF(("no more msqid_ds's available\n")); error = ENOSPC; goto done2; } #ifdef RACCT if (racct_enable) { PROC_LOCK(td->td_proc); error = racct_add(td->td_proc, RACCT_NMSGQ, 1); PROC_UNLOCK(td->td_proc); if (error != 0) { error = ENOSPC; goto done2; } } #endif DPRINTF(("msqid %d is available\n", msqid)); msqkptr->u.msg_perm.key = key; msqkptr->u.msg_perm.cuid = cred->cr_uid; msqkptr->u.msg_perm.uid = cred->cr_uid; msqkptr->u.msg_perm.cgid = cred->cr_gid; msqkptr->u.msg_perm.gid = cred->cr_gid; msqkptr->u.msg_perm.mode = (msgflg & 0777); msqkptr->cred = crhold(cred); /* Make sure that the returned msqid is unique */ msqkptr->u.msg_perm.seq = (msqkptr->u.msg_perm.seq + 1) & 0x7fff; msqkptr->u.msg_first = NULL; msqkptr->u.msg_last = NULL; msqkptr->u.msg_cbytes = 0; msqkptr->u.msg_qnum = 0; msqkptr->u.msg_qbytes = msginfo.msgmnb; msqkptr->u.msg_lspid = 0; msqkptr->u.msg_lrpid = 0; msqkptr->u.msg_stime = 0; msqkptr->u.msg_rtime = 0; msqkptr->u.msg_ctime = time_second; #ifdef MAC mac_sysvmsq_create(cred, msqkptr); #endif } else { DPRINTF(("didn't find it and wasn't asked to create it\n")); error = ENOENT; goto done2; } found: /* Construct the unique msqid */ td->td_retval[0] = IXSEQ_TO_IPCID(msqid, msqkptr->u.msg_perm); done2: mtx_unlock(&msq_mtx); return (error); } #ifndef _SYS_SYSPROTO_H_ struct msgsnd_args { int msqid; const void *msgp; size_t msgsz; int msgflg; }; #endif int kern_msgsnd(td, msqid, msgp, msgsz, msgflg, mtype) struct thread *td; int msqid; const void *msgp; /* XXX msgp is actually mtext. */ size_t msgsz; int msgflg; long mtype; { int msqix, segs_needed, error = 0; register struct msqid_kernel *msqkptr; register struct msg *msghdr; struct prison *rpr; short next; #ifdef RACCT size_t saved_msgsz; #endif rpr = msg_find_prison(td->td_ucred); if (rpr == NULL) return (ENOSYS); mtx_lock(&msq_mtx); msqix = IPCID_TO_IX(msqid); if (msqix < 0 || msqix >= msginfo.msgmni) { DPRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", msqix, msginfo.msgmni)); error = EINVAL; goto done2; } msqkptr = &msqids[msqix]; if (msqkptr->u.msg_qbytes == 0) { DPRINTF(("no such message queue id\n")); error = EINVAL; goto done2; } if (msqkptr->u.msg_perm.seq != IPCID_TO_SEQ(msqid)) { DPRINTF(("wrong sequence number\n")); error = EINVAL; goto done2; } if ((error = msq_prison_cansee(rpr, msqkptr))) { DPRINTF(("requester can't see prison\n")); goto done2; } if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_W))) { DPRINTF(("requester doesn't have write access\n")); goto done2; } #ifdef MAC error = mac_sysvmsq_check_msqsnd(td->td_ucred, msqkptr); if (error != 0) goto done2; #endif #ifdef RACCT if (racct_enable) { PROC_LOCK(td->td_proc); if (racct_add(td->td_proc, RACCT_MSGQQUEUED, 1)) { PROC_UNLOCK(td->td_proc); error = EAGAIN; goto done2; } saved_msgsz = msgsz; if (racct_add(td->td_proc, RACCT_MSGQSIZE, msgsz)) { racct_sub(td->td_proc, RACCT_MSGQQUEUED, 1); PROC_UNLOCK(td->td_proc); error = EAGAIN; goto done2; } PROC_UNLOCK(td->td_proc); } #endif segs_needed = howmany(msgsz, msginfo.msgssz); DPRINTF(("msgsz=%zu, msgssz=%d, segs_needed=%d\n", msgsz, msginfo.msgssz, segs_needed)); for (;;) { int need_more_resources = 0; /* * check msgsz * (inside this loop in case msg_qbytes changes while we sleep) */ if (msgsz > msqkptr->u.msg_qbytes) { DPRINTF(("msgsz > msqkptr->u.msg_qbytes\n")); error = EINVAL; goto done3; } if (msqkptr->u.msg_perm.mode & MSG_LOCKED) { DPRINTF(("msqid is locked\n")); need_more_resources = 1; } if (msgsz + msqkptr->u.msg_cbytes > msqkptr->u.msg_qbytes) { DPRINTF(("msgsz + msg_cbytes > msg_qbytes\n")); need_more_resources = 1; } if (segs_needed > nfree_msgmaps) { DPRINTF(("segs_needed > nfree_msgmaps\n")); need_more_resources = 1; } if (free_msghdrs == NULL) { DPRINTF(("no more msghdrs\n")); need_more_resources = 1; } if (need_more_resources) { int we_own_it; if ((msgflg & IPC_NOWAIT) != 0) { DPRINTF(("need more resources but caller " "doesn't want to wait\n")); error = EAGAIN; goto done3; } if ((msqkptr->u.msg_perm.mode & MSG_LOCKED) != 0) { DPRINTF(("we don't own the msqid_ds\n")); we_own_it = 0; } else { /* Force later arrivals to wait for our request */ DPRINTF(("we own the msqid_ds\n")); msqkptr->u.msg_perm.mode |= MSG_LOCKED; we_own_it = 1; } DPRINTF(("msgsnd: goodnight\n")); error = msleep(msqkptr, &msq_mtx, (PZERO - 4) | PCATCH, "msgsnd", hz); DPRINTF(("msgsnd: good morning, error=%d\n", error)); if (we_own_it) msqkptr->u.msg_perm.mode &= ~MSG_LOCKED; if (error == EWOULDBLOCK) { DPRINTF(("msgsnd: timed out\n")); continue; } if (error != 0) { DPRINTF(("msgsnd: interrupted system call\n")); error = EINTR; goto done3; } /* * Make sure that the msq queue still exists */ if (msqkptr->u.msg_qbytes == 0) { DPRINTF(("msqid deleted\n")); error = EIDRM; goto done3; } } else { DPRINTF(("got all the resources that we need\n")); break; } } /* * We have the resources that we need. * Make sure! */ if (msqkptr->u.msg_perm.mode & MSG_LOCKED) panic("msg_perm.mode & MSG_LOCKED"); if (segs_needed > nfree_msgmaps) panic("segs_needed > nfree_msgmaps"); if (msgsz + msqkptr->u.msg_cbytes > msqkptr->u.msg_qbytes) panic("msgsz + msg_cbytes > msg_qbytes"); if (free_msghdrs == NULL) panic("no more msghdrs"); /* * Re-lock the msqid_ds in case we page-fault when copying in the * message */ if ((msqkptr->u.msg_perm.mode & MSG_LOCKED) != 0) panic("msqid_ds is already locked"); msqkptr->u.msg_perm.mode |= MSG_LOCKED; /* * Allocate a message header */ msghdr = free_msghdrs; free_msghdrs = msghdr->msg_next; msghdr->msg_spot = -1; msghdr->msg_ts = msgsz; msghdr->msg_type = mtype; #ifdef MAC /* * XXXMAC: Should the mac_sysvmsq_check_msgmsq check follow here * immediately? Or, should it be checked just before the msg is * enqueued in the msgq (as it is done now)? */ mac_sysvmsg_create(td->td_ucred, msqkptr, msghdr); #endif /* * Allocate space for the message */ while (segs_needed > 0) { if (nfree_msgmaps <= 0) panic("not enough msgmaps"); if (free_msgmaps == -1) panic("nil free_msgmaps"); next = free_msgmaps; if (next <= -1) panic("next too low #1"); if (next >= msginfo.msgseg) panic("next out of range #1"); DPRINTF(("allocating segment %d to message\n", next)); free_msgmaps = msgmaps[next].next; nfree_msgmaps--; msgmaps[next].next = msghdr->msg_spot; msghdr->msg_spot = next; segs_needed--; } /* * Validate the message type */ if (msghdr->msg_type < 1) { msg_freehdr(msghdr); msqkptr->u.msg_perm.mode &= ~MSG_LOCKED; wakeup(msqkptr); DPRINTF(("mtype (%ld) < 1\n", msghdr->msg_type)); error = EINVAL; goto done3; } /* * Copy in the message body */ next = msghdr->msg_spot; while (msgsz > 0) { size_t tlen; if (msgsz > msginfo.msgssz) tlen = msginfo.msgssz; else tlen = msgsz; if (next <= -1) panic("next too low #2"); if (next >= msginfo.msgseg) panic("next out of range #2"); mtx_unlock(&msq_mtx); if ((error = copyin(msgp, &msgpool[next * msginfo.msgssz], tlen)) != 0) { mtx_lock(&msq_mtx); DPRINTF(("error %d copying in message segment\n", error)); msg_freehdr(msghdr); msqkptr->u.msg_perm.mode &= ~MSG_LOCKED; wakeup(msqkptr); goto done3; } mtx_lock(&msq_mtx); msgsz -= tlen; msgp = (const char *)msgp + tlen; next = msgmaps[next].next; } if (next != -1) panic("didn't use all the msg segments"); /* * We've got the message. Unlock the msqid_ds. */ msqkptr->u.msg_perm.mode &= ~MSG_LOCKED; /* * Make sure that the msqid_ds is still allocated. */ if (msqkptr->u.msg_qbytes == 0) { msg_freehdr(msghdr); wakeup(msqkptr); error = EIDRM; goto done3; } #ifdef MAC /* * Note: Since the task/thread allocates the msghdr and usually * primes it with its own MAC label, for a majority of policies, it * won't be necessary to check whether the msghdr has access * permissions to the msgq. The mac_sysvmsq_check_msqsnd check would * suffice in that case. However, this hook may be required where * individual policies derive a non-identical label for the msghdr * from the current thread label and may want to check the msghdr * enqueue permissions, along with read/write permissions to the * msgq. */ error = mac_sysvmsq_check_msgmsq(td->td_ucred, msghdr, msqkptr); if (error != 0) { msg_freehdr(msghdr); wakeup(msqkptr); goto done3; } #endif /* * Put the message into the queue */ if (msqkptr->u.msg_first == NULL) { msqkptr->u.msg_first = msghdr; msqkptr->u.msg_last = msghdr; } else { msqkptr->u.msg_last->msg_next = msghdr; msqkptr->u.msg_last = msghdr; } msqkptr->u.msg_last->msg_next = NULL; msqkptr->u.msg_cbytes += msghdr->msg_ts; msqkptr->u.msg_qnum++; msqkptr->u.msg_lspid = td->td_proc->p_pid; msqkptr->u.msg_stime = time_second; wakeup(msqkptr); td->td_retval[0] = 0; done3: #ifdef RACCT if (racct_enable && error != 0) { PROC_LOCK(td->td_proc); racct_sub(td->td_proc, RACCT_MSGQQUEUED, 1); racct_sub(td->td_proc, RACCT_MSGQSIZE, saved_msgsz); PROC_UNLOCK(td->td_proc); } #endif done2: mtx_unlock(&msq_mtx); return (error); } int sys_msgsnd(td, uap) struct thread *td; register struct msgsnd_args *uap; { int error; long mtype; DPRINTF(("call to msgsnd(%d, %p, %zu, %d)\n", uap->msqid, uap->msgp, uap->msgsz, uap->msgflg)); if ((error = copyin(uap->msgp, &mtype, sizeof(mtype))) != 0) { DPRINTF(("error %d copying the message type\n", error)); return (error); } return (kern_msgsnd(td, uap->msqid, (const char *)uap->msgp + sizeof(mtype), uap->msgsz, uap->msgflg, mtype)); } #ifndef _SYS_SYSPROTO_H_ struct msgrcv_args { int msqid; void *msgp; size_t msgsz; long msgtyp; int msgflg; }; #endif int kern_msgrcv(td, msqid, msgp, msgsz, msgtyp, msgflg, mtype) struct thread *td; int msqid; void *msgp; /* XXX msgp is actually mtext. */ size_t msgsz; long msgtyp; int msgflg; long *mtype; { size_t len; register struct msqid_kernel *msqkptr; register struct msg *msghdr; struct prison *rpr; int msqix, error = 0; short next; rpr = msg_find_prison(td->td_ucred); if (rpr == NULL) return (ENOSYS); msqix = IPCID_TO_IX(msqid); if (msqix < 0 || msqix >= msginfo.msgmni) { DPRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", msqix, msginfo.msgmni)); return (EINVAL); } msqkptr = &msqids[msqix]; mtx_lock(&msq_mtx); if (msqkptr->u.msg_qbytes == 0) { DPRINTF(("no such message queue id\n")); error = EINVAL; goto done2; } if (msqkptr->u.msg_perm.seq != IPCID_TO_SEQ(msqid)) { DPRINTF(("wrong sequence number\n")); error = EINVAL; goto done2; } if ((error = msq_prison_cansee(rpr, msqkptr))) { DPRINTF(("requester can't see prison\n")); goto done2; } if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_R))) { DPRINTF(("requester doesn't have read access\n")); goto done2; } #ifdef MAC error = mac_sysvmsq_check_msqrcv(td->td_ucred, msqkptr); if (error != 0) goto done2; #endif msghdr = NULL; while (msghdr == NULL) { if (msgtyp == 0) { msghdr = msqkptr->u.msg_first; if (msghdr != NULL) { if (msgsz < msghdr->msg_ts && (msgflg & MSG_NOERROR) == 0) { DPRINTF(("first message on the queue " "is too big (want %zu, got %d)\n", msgsz, msghdr->msg_ts)); error = E2BIG; goto done2; } #ifdef MAC error = mac_sysvmsq_check_msgrcv(td->td_ucred, msghdr); if (error != 0) goto done2; #endif if (msqkptr->u.msg_first == msqkptr->u.msg_last) { msqkptr->u.msg_first = NULL; msqkptr->u.msg_last = NULL; } else { msqkptr->u.msg_first = msghdr->msg_next; if (msqkptr->u.msg_first == NULL) panic("msg_first/last screwed up #1"); } } } else { struct msg *previous; struct msg **prev; previous = NULL; prev = &(msqkptr->u.msg_first); while ((msghdr = *prev) != NULL) { /* * Is this message's type an exact match or is * this message's type less than or equal to * the absolute value of a negative msgtyp? * Note that the second half of this test can * NEVER be true if msgtyp is positive since * msg_type is always positive! */ if (msgtyp == msghdr->msg_type || msghdr->msg_type <= -msgtyp) { DPRINTF(("found message type %ld, " "requested %ld\n", msghdr->msg_type, msgtyp)); if (msgsz < msghdr->msg_ts && (msgflg & MSG_NOERROR) == 0) { DPRINTF(("requested message " "on the queue is too big " "(want %zu, got %hu)\n", msgsz, msghdr->msg_ts)); error = E2BIG; goto done2; } #ifdef MAC error = mac_sysvmsq_check_msgrcv( td->td_ucred, msghdr); if (error != 0) goto done2; #endif *prev = msghdr->msg_next; if (msghdr == msqkptr->u.msg_last) { if (previous == NULL) { if (prev != &msqkptr->u.msg_first) panic("msg_first/last screwed up #2"); msqkptr->u.msg_first = NULL; msqkptr->u.msg_last = NULL; } else { if (prev == &msqkptr->u.msg_first) panic("msg_first/last screwed up #3"); msqkptr->u.msg_last = previous; } } break; } previous = msghdr; prev = &(msghdr->msg_next); } } /* * We've either extracted the msghdr for the appropriate * message or there isn't one. * If there is one then bail out of this loop. */ if (msghdr != NULL) break; /* * Hmph! No message found. Does the user want to wait? */ if ((msgflg & IPC_NOWAIT) != 0) { DPRINTF(("no appropriate message found (msgtyp=%ld)\n", msgtyp)); /* The SVID says to return ENOMSG. */ error = ENOMSG; goto done2; } /* * Wait for something to happen */ DPRINTF(("msgrcv: goodnight\n")); error = msleep(msqkptr, &msq_mtx, (PZERO - 4) | PCATCH, "msgrcv", 0); DPRINTF(("msgrcv: good morning (error=%d)\n", error)); if (error != 0) { DPRINTF(("msgrcv: interrupted system call\n")); error = EINTR; goto done2; } /* * Make sure that the msq queue still exists */ if (msqkptr->u.msg_qbytes == 0 || msqkptr->u.msg_perm.seq != IPCID_TO_SEQ(msqid)) { DPRINTF(("msqid deleted\n")); error = EIDRM; goto done2; } } /* * Return the message to the user. * * First, do the bookkeeping (before we risk being interrupted). */ msqkptr->u.msg_cbytes -= msghdr->msg_ts; msqkptr->u.msg_qnum--; msqkptr->u.msg_lrpid = td->td_proc->p_pid; msqkptr->u.msg_rtime = time_second; racct_sub_cred(msqkptr->cred, RACCT_MSGQQUEUED, 1); racct_sub_cred(msqkptr->cred, RACCT_MSGQSIZE, msghdr->msg_ts); /* * Make msgsz the actual amount that we'll be returning. * Note that this effectively truncates the message if it is too long * (since msgsz is never increased). */ DPRINTF(("found a message, msgsz=%zu, msg_ts=%hu\n", msgsz, msghdr->msg_ts)); if (msgsz > msghdr->msg_ts) msgsz = msghdr->msg_ts; *mtype = msghdr->msg_type; /* * Return the segments to the user */ next = msghdr->msg_spot; for (len = 0; len < msgsz; len += msginfo.msgssz) { size_t tlen; if (msgsz - len > msginfo.msgssz) tlen = msginfo.msgssz; else tlen = msgsz - len; if (next <= -1) panic("next too low #3"); if (next >= msginfo.msgseg) panic("next out of range #3"); mtx_unlock(&msq_mtx); error = copyout(&msgpool[next * msginfo.msgssz], msgp, tlen); mtx_lock(&msq_mtx); if (error != 0) { DPRINTF(("error (%d) copying out message segment\n", error)); msg_freehdr(msghdr); wakeup(msqkptr); goto done2; } msgp = (char *)msgp + tlen; next = msgmaps[next].next; } /* * Done, return the actual number of bytes copied out. */ msg_freehdr(msghdr); wakeup(msqkptr); td->td_retval[0] = msgsz; done2: mtx_unlock(&msq_mtx); return (error); } int sys_msgrcv(td, uap) struct thread *td; register struct msgrcv_args *uap; { int error; long mtype; DPRINTF(("call to msgrcv(%d, %p, %zu, %ld, %d)\n", uap->msqid, uap->msgp, uap->msgsz, uap->msgtyp, uap->msgflg)); if ((error = kern_msgrcv(td, uap->msqid, (char *)uap->msgp + sizeof(mtype), uap->msgsz, uap->msgtyp, uap->msgflg, &mtype)) != 0) return (error); if ((error = copyout(&mtype, uap->msgp, sizeof(mtype))) != 0) DPRINTF(("error %d copying the message type\n", error)); return (error); } static int sysctl_msqids(SYSCTL_HANDLER_ARGS) { struct msqid_kernel tmsqk; struct prison *pr, *rpr; int error, i; pr = req->td->td_ucred->cr_prison; rpr = msg_find_prison(req->td->td_ucred); error = 0; for (i = 0; i < msginfo.msgmni; i++) { mtx_lock(&msq_mtx); if (msqids[i].u.msg_qbytes == 0 || rpr == NULL || msq_prison_cansee(rpr, &msqids[i]) != 0) bzero(&tmsqk, sizeof(tmsqk)); else { tmsqk = msqids[i]; if (tmsqk.cred->cr_prison != pr) tmsqk.u.msg_perm.key = IPC_PRIVATE; } mtx_unlock(&msq_mtx); error = SYSCTL_OUT(req, &tmsqk, sizeof(tmsqk)); if (error != 0) break; } return (error); } SYSCTL_INT(_kern_ipc, OID_AUTO, msgmax, CTLFLAG_RD, &msginfo.msgmax, 0, "Maximum message size"); SYSCTL_INT(_kern_ipc, OID_AUTO, msgmni, CTLFLAG_RDTUN, &msginfo.msgmni, 0, "Number of message queue identifiers"); SYSCTL_INT(_kern_ipc, OID_AUTO, msgmnb, CTLFLAG_RDTUN, &msginfo.msgmnb, 0, "Maximum number of bytes in a queue"); SYSCTL_INT(_kern_ipc, OID_AUTO, msgtql, CTLFLAG_RDTUN, &msginfo.msgtql, 0, "Maximum number of messages in the system"); SYSCTL_INT(_kern_ipc, OID_AUTO, msgssz, CTLFLAG_RDTUN, &msginfo.msgssz, 0, "Size of a message segment"); SYSCTL_INT(_kern_ipc, OID_AUTO, msgseg, CTLFLAG_RDTUN, &msginfo.msgseg, 0, "Number of message segments"); SYSCTL_PROC(_kern_ipc, OID_AUTO, msqids, CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, sysctl_msqids, "", "Message queue IDs"); static int msg_prison_check(void *obj, void *data) { struct prison *pr = obj; struct prison *prpr; struct vfsoptlist *opts = data; int error, jsys; /* * sysvmsg is a jailsys integer. * It must be "disable" if the parent jail is disabled. */ error = vfs_copyopt(opts, "sysvmsg", &jsys, sizeof(jsys)); if (error != ENOENT) { if (error != 0) return (error); switch (jsys) { case JAIL_SYS_DISABLE: break; case JAIL_SYS_NEW: case JAIL_SYS_INHERIT: prison_lock(pr->pr_parent); prpr = osd_jail_get(pr->pr_parent, msg_prison_slot); prison_unlock(pr->pr_parent); if (prpr == NULL) return (EPERM); break; default: return (EINVAL); } } return (0); } static int msg_prison_set(void *obj, void *data) { struct prison *pr = obj; struct prison *tpr, *orpr, *nrpr, *trpr; struct vfsoptlist *opts = data; void *rsv; int jsys, descend; /* * sysvmsg controls which jail is the root of the associated msgs (this * jail or same as the parent), or if the feature is available at all. */ if (vfs_copyopt(opts, "sysvmsg", &jsys, sizeof(jsys)) == ENOENT) jsys = vfs_flagopt(opts, "allow.sysvipc", NULL, 0) ? JAIL_SYS_INHERIT : vfs_flagopt(opts, "allow.nosysvipc", NULL, 0) ? JAIL_SYS_DISABLE : -1; if (jsys == JAIL_SYS_DISABLE) { prison_lock(pr); orpr = osd_jail_get(pr, msg_prison_slot); if (orpr != NULL) osd_jail_del(pr, msg_prison_slot); prison_unlock(pr); if (orpr != NULL) { if (orpr == pr) msg_prison_cleanup(pr); /* Disable all child jails as well. */ FOREACH_PRISON_DESCENDANT(pr, tpr, descend) { prison_lock(tpr); trpr = osd_jail_get(tpr, msg_prison_slot); if (trpr != NULL) { osd_jail_del(tpr, msg_prison_slot); prison_unlock(tpr); if (trpr == tpr) msg_prison_cleanup(tpr); } else { prison_unlock(tpr); descend = 0; } } } } else if (jsys != -1) { if (jsys == JAIL_SYS_NEW) nrpr = pr; else { prison_lock(pr->pr_parent); nrpr = osd_jail_get(pr->pr_parent, msg_prison_slot); prison_unlock(pr->pr_parent); } rsv = osd_reserve(msg_prison_slot); prison_lock(pr); orpr = osd_jail_get(pr, msg_prison_slot); if (orpr != nrpr) (void)osd_jail_set_reserved(pr, msg_prison_slot, rsv, nrpr); else osd_free_reserved(rsv); prison_unlock(pr); if (orpr != nrpr) { if (orpr == pr) msg_prison_cleanup(pr); if (orpr != NULL) { /* Change child jails matching the old root, */ FOREACH_PRISON_DESCENDANT(pr, tpr, descend) { prison_lock(tpr); trpr = osd_jail_get(tpr, msg_prison_slot); if (trpr == orpr) { (void)osd_jail_set(tpr, msg_prison_slot, nrpr); prison_unlock(tpr); if (trpr == tpr) msg_prison_cleanup(tpr); } else { prison_unlock(tpr); descend = 0; } } } } } return (0); } static int msg_prison_get(void *obj, void *data) { struct prison *pr = obj; struct prison *rpr; struct vfsoptlist *opts = data; int error, jsys; /* Set sysvmsg based on the jail's root prison. */ prison_lock(pr); rpr = osd_jail_get(pr, msg_prison_slot); prison_unlock(pr); jsys = rpr == NULL ? JAIL_SYS_DISABLE : rpr == pr ? JAIL_SYS_NEW : JAIL_SYS_INHERIT; error = vfs_setopt(opts, "sysvmsg", &jsys, sizeof(jsys)); if (error == ENOENT) error = 0; return (error); } static int msg_prison_remove(void *obj, void *data __unused) { struct prison *pr = obj; struct prison *rpr; prison_lock(pr); rpr = osd_jail_get(pr, msg_prison_slot); prison_unlock(pr); if (rpr == pr) msg_prison_cleanup(pr); return (0); } static void msg_prison_cleanup(struct prison *pr) { struct msqid_kernel *msqkptr; int i; /* Remove any msqs that belong to this jail. */ mtx_lock(&msq_mtx); for (i = 0; i < msginfo.msgmni; i++) { msqkptr = &msqids[i]; if (msqkptr->u.msg_qbytes != 0 && msqkptr->cred != NULL && msqkptr->cred->cr_prison == pr) msq_remove(msqkptr); } mtx_unlock(&msq_mtx); } SYSCTL_JAIL_PARAM_SYS_NODE(sysvmsg, CTLFLAG_RW, "SYSV message queues"); #ifdef COMPAT_FREEBSD32 int freebsd32_msgsys(struct thread *td, struct freebsd32_msgsys_args *uap) { #if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \ defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7) + AUDIT_ARG_SVIPC_WHICH(uap->which); switch (uap->which) { case 0: return (freebsd7_freebsd32_msgctl(td, (struct freebsd7_freebsd32_msgctl_args *)&uap->a2)); case 2: return (freebsd32_msgsnd(td, (struct freebsd32_msgsnd_args *)&uap->a2)); case 3: return (freebsd32_msgrcv(td, (struct freebsd32_msgrcv_args *)&uap->a2)); default: return (sys_msgsys(td, (struct msgsys_args *)uap)); } #else return (nosys(td, NULL)); #endif } #if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \ defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7) int freebsd7_freebsd32_msgctl(struct thread *td, struct freebsd7_freebsd32_msgctl_args *uap) { struct msqid_ds msqbuf; struct msqid_ds32_old msqbuf32; int error; if (uap->cmd == IPC_SET) { error = copyin(uap->buf, &msqbuf32, sizeof(msqbuf32)); if (error) return (error); freebsd32_ipcperm_old_in(&msqbuf32.msg_perm, &msqbuf.msg_perm); PTRIN_CP(msqbuf32, msqbuf, msg_first); PTRIN_CP(msqbuf32, msqbuf, msg_last); CP(msqbuf32, msqbuf, msg_cbytes); CP(msqbuf32, msqbuf, msg_qnum); CP(msqbuf32, msqbuf, msg_qbytes); CP(msqbuf32, msqbuf, msg_lspid); CP(msqbuf32, msqbuf, msg_lrpid); CP(msqbuf32, msqbuf, msg_stime); CP(msqbuf32, msqbuf, msg_rtime); CP(msqbuf32, msqbuf, msg_ctime); } error = kern_msgctl(td, uap->msqid, uap->cmd, &msqbuf); if (error) return (error); if (uap->cmd == IPC_STAT) { bzero(&msqbuf32, sizeof(msqbuf32)); freebsd32_ipcperm_old_out(&msqbuf.msg_perm, &msqbuf32.msg_perm); PTROUT_CP(msqbuf, msqbuf32, msg_first); PTROUT_CP(msqbuf, msqbuf32, msg_last); CP(msqbuf, msqbuf32, msg_cbytes); CP(msqbuf, msqbuf32, msg_qnum); CP(msqbuf, msqbuf32, msg_qbytes); CP(msqbuf, msqbuf32, msg_lspid); CP(msqbuf, msqbuf32, msg_lrpid); CP(msqbuf, msqbuf32, msg_stime); CP(msqbuf, msqbuf32, msg_rtime); CP(msqbuf, msqbuf32, msg_ctime); error = copyout(&msqbuf32, uap->buf, sizeof(struct msqid_ds32)); } return (error); } #endif int freebsd32_msgctl(struct thread *td, struct freebsd32_msgctl_args *uap) { struct msqid_ds msqbuf; struct msqid_ds32 msqbuf32; int error; if (uap->cmd == IPC_SET) { error = copyin(uap->buf, &msqbuf32, sizeof(msqbuf32)); if (error) return (error); freebsd32_ipcperm_in(&msqbuf32.msg_perm, &msqbuf.msg_perm); PTRIN_CP(msqbuf32, msqbuf, msg_first); PTRIN_CP(msqbuf32, msqbuf, msg_last); CP(msqbuf32, msqbuf, msg_cbytes); CP(msqbuf32, msqbuf, msg_qnum); CP(msqbuf32, msqbuf, msg_qbytes); CP(msqbuf32, msqbuf, msg_lspid); CP(msqbuf32, msqbuf, msg_lrpid); CP(msqbuf32, msqbuf, msg_stime); CP(msqbuf32, msqbuf, msg_rtime); CP(msqbuf32, msqbuf, msg_ctime); } error = kern_msgctl(td, uap->msqid, uap->cmd, &msqbuf); if (error) return (error); if (uap->cmd == IPC_STAT) { freebsd32_ipcperm_out(&msqbuf.msg_perm, &msqbuf32.msg_perm); PTROUT_CP(msqbuf, msqbuf32, msg_first); PTROUT_CP(msqbuf, msqbuf32, msg_last); CP(msqbuf, msqbuf32, msg_cbytes); CP(msqbuf, msqbuf32, msg_qnum); CP(msqbuf, msqbuf32, msg_qbytes); CP(msqbuf, msqbuf32, msg_lspid); CP(msqbuf, msqbuf32, msg_lrpid); CP(msqbuf, msqbuf32, msg_stime); CP(msqbuf, msqbuf32, msg_rtime); CP(msqbuf, msqbuf32, msg_ctime); error = copyout(&msqbuf32, uap->buf, sizeof(struct msqid_ds32)); } return (error); } int freebsd32_msgsnd(struct thread *td, struct freebsd32_msgsnd_args *uap) { const void *msgp; long mtype; int32_t mtype32; int error; msgp = PTRIN(uap->msgp); if ((error = copyin(msgp, &mtype32, sizeof(mtype32))) != 0) return (error); mtype = mtype32; return (kern_msgsnd(td, uap->msqid, (const char *)msgp + sizeof(mtype32), uap->msgsz, uap->msgflg, mtype)); } int freebsd32_msgrcv(struct thread *td, struct freebsd32_msgrcv_args *uap) { void *msgp; long mtype; int32_t mtype32; int error; msgp = PTRIN(uap->msgp); if ((error = kern_msgrcv(td, uap->msqid, (char *)msgp + sizeof(mtype32), uap->msgsz, uap->msgtyp, uap->msgflg, &mtype)) != 0) return (error); mtype32 = (int32_t)mtype; return (copyout(&mtype32, msgp, sizeof(mtype32))); } #endif #if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \ defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7) /* XXX casting to (sy_call_t *) is bogus, as usual. */ static sy_call_t *msgcalls[] = { (sy_call_t *)freebsd7_msgctl, (sy_call_t *)sys_msgget, (sy_call_t *)sys_msgsnd, (sy_call_t *)sys_msgrcv }; /* * Entry point for all MSG calls. */ int sys_msgsys(td, uap) struct thread *td; /* XXX actually varargs. */ struct msgsys_args /* { int which; int a2; int a3; int a4; int a5; int a6; } */ *uap; { int error; + AUDIT_ARG_SVIPC_WHICH(uap->which); if (uap->which < 0 || uap->which >= nitems(msgcalls)) return (EINVAL); error = (*msgcalls[uap->which])(td, &uap->a2); return (error); } #ifndef CP #define CP(src, dst, fld) do { (dst).fld = (src).fld; } while (0) #endif #ifndef _SYS_SYSPROTO_H_ struct freebsd7_msgctl_args { int msqid; int cmd; struct msqid_ds_old *buf; }; #endif int freebsd7_msgctl(td, uap) struct thread *td; struct freebsd7_msgctl_args *uap; { struct msqid_ds_old msqold; struct msqid_ds msqbuf; int error; DPRINTF(("call to freebsd7_msgctl(%d, %d, %p)\n", uap->msqid, uap->cmd, uap->buf)); if (uap->cmd == IPC_SET) { error = copyin(uap->buf, &msqold, sizeof(msqold)); if (error) return (error); ipcperm_old2new(&msqold.msg_perm, &msqbuf.msg_perm); CP(msqold, msqbuf, msg_first); CP(msqold, msqbuf, msg_last); CP(msqold, msqbuf, msg_cbytes); CP(msqold, msqbuf, msg_qnum); CP(msqold, msqbuf, msg_qbytes); CP(msqold, msqbuf, msg_lspid); CP(msqold, msqbuf, msg_lrpid); CP(msqold, msqbuf, msg_stime); CP(msqold, msqbuf, msg_rtime); CP(msqold, msqbuf, msg_ctime); } error = kern_msgctl(td, uap->msqid, uap->cmd, &msqbuf); if (error) return (error); if (uap->cmd == IPC_STAT) { bzero(&msqold, sizeof(msqold)); ipcperm_new2old(&msqbuf.msg_perm, &msqold.msg_perm); CP(msqbuf, msqold, msg_first); CP(msqbuf, msqold, msg_last); CP(msqbuf, msqold, msg_cbytes); CP(msqbuf, msqold, msg_qnum); CP(msqbuf, msqold, msg_qbytes); CP(msqbuf, msqold, msg_lspid); CP(msqbuf, msqold, msg_lrpid); CP(msqbuf, msqold, msg_stime); CP(msqbuf, msqold, msg_rtime); CP(msqbuf, msqold, msg_ctime); error = copyout(&msqold, uap->buf, sizeof(struct msqid_ds_old)); } return (error); } #undef CP #endif /* COMPAT_FREEBSD4 || COMPAT_FREEBSD5 || COMPAT_FREEBSD6 || COMPAT_FREEBSD7 */ Index: head/sys/kern/sysv_sem.c =================================================================== --- head/sys/kern/sysv_sem.c (revision 316184) +++ head/sys/kern/sysv_sem.c (revision 316185) @@ -1,1952 +1,1955 @@ /*- * Implementation of SVID semaphores * * Author: Daniel Boulet * * This software is provided ``AS IS'' without any warranties of any kind. */ /*- * Copyright (c) 2003-2005 McAfee, Inc. * All rights reserved. * * This software was developed for the FreeBSD Project in part by McAfee * Research, the Security Research Division of McAfee, Inc under DARPA/SPAWAR * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS research * program. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include "opt_sysvipc.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include +#include #include FEATURE(sysv_sem, "System V semaphores support"); static MALLOC_DEFINE(M_SEM, "sem", "SVID compatible semaphores"); #ifdef SEM_DEBUG #define DPRINTF(a) printf a #else #define DPRINTF(a) #endif static int seminit(void); static int sysvsem_modload(struct module *, int, void *); static int semunload(void); static void semexit_myhook(void *arg, struct proc *p); static int sysctl_sema(SYSCTL_HANDLER_ARGS); static int semvalid(int semid, struct prison *rpr, struct semid_kernel *semakptr); static void sem_remove(int semidx, struct ucred *cred); static struct prison *sem_find_prison(struct ucred *); static int sem_prison_cansee(struct prison *, struct semid_kernel *); static int sem_prison_check(void *, void *); static int sem_prison_set(void *, void *); static int sem_prison_get(void *, void *); static int sem_prison_remove(void *, void *); static void sem_prison_cleanup(struct prison *); #ifndef _SYS_SYSPROTO_H_ struct __semctl_args; int __semctl(struct thread *td, struct __semctl_args *uap); struct semget_args; int semget(struct thread *td, struct semget_args *uap); struct semop_args; int semop(struct thread *td, struct semop_args *uap); #endif static struct sem_undo *semu_alloc(struct thread *td); static int semundo_adjust(struct thread *td, struct sem_undo **supptr, int semid, int semseq, int semnum, int adjval); static void semundo_clear(int semid, int semnum); static struct mtx sem_mtx; /* semaphore global lock */ static struct mtx sem_undo_mtx; static int semtot = 0; static struct semid_kernel *sema; /* semaphore id pool */ static struct mtx *sema_mtx; /* semaphore id pool mutexes*/ static struct sem *sem; /* semaphore pool */ LIST_HEAD(, sem_undo) semu_list; /* list of active undo structures */ LIST_HEAD(, sem_undo) semu_free_list; /* list of free undo structures */ static int *semu; /* undo structure pool */ static eventhandler_tag semexit_tag; static unsigned sem_prison_slot; /* prison OSD slot */ #define SEMUNDO_MTX sem_undo_mtx #define SEMUNDO_LOCK() mtx_lock(&SEMUNDO_MTX); #define SEMUNDO_UNLOCK() mtx_unlock(&SEMUNDO_MTX); #define SEMUNDO_LOCKASSERT(how) mtx_assert(&SEMUNDO_MTX, (how)); struct sem { u_short semval; /* semaphore value */ pid_t sempid; /* pid of last operation */ u_short semncnt; /* # awaiting semval > cval */ u_short semzcnt; /* # awaiting semval = 0 */ }; /* * Undo structure (one per process) */ struct sem_undo { LIST_ENTRY(sem_undo) un_next; /* ptr to next active undo structure */ struct proc *un_proc; /* owner of this structure */ short un_cnt; /* # of active entries */ struct undo { short un_adjval; /* adjust on exit values */ short un_num; /* semaphore # */ int un_id; /* semid */ unsigned short un_seq; } un_ent[1]; /* undo entries */ }; /* * Configuration parameters */ #ifndef SEMMNI #define SEMMNI 50 /* # of semaphore identifiers */ #endif #ifndef SEMMNS #define SEMMNS 340 /* # of semaphores in system */ #endif #ifndef SEMUME #define SEMUME 50 /* max # of undo entries per process */ #endif #ifndef SEMMNU #define SEMMNU 150 /* # of undo structures in system */ #endif /* shouldn't need tuning */ #ifndef SEMMSL #define SEMMSL SEMMNS /* max # of semaphores per id */ #endif #ifndef SEMOPM #define SEMOPM 100 /* max # of operations per semop call */ #endif #define SEMVMX 32767 /* semaphore maximum value */ #define SEMAEM 16384 /* adjust on exit max value */ /* * Due to the way semaphore memory is allocated, we have to ensure that * SEMUSZ is properly aligned. */ #define SEM_ALIGN(bytes) roundup2(bytes, sizeof(long)) /* actual size of an undo structure */ #define SEMUSZ SEM_ALIGN(offsetof(struct sem_undo, un_ent[SEMUME])) /* * Macro to find a particular sem_undo vector */ #define SEMU(ix) \ ((struct sem_undo *)(((intptr_t)semu)+ix * seminfo.semusz)) /* * semaphore info struct */ struct seminfo seminfo = { SEMMNI, /* # of semaphore identifiers */ SEMMNS, /* # of semaphores in system */ SEMMNU, /* # of undo structures in system */ SEMMSL, /* max # of semaphores per id */ SEMOPM, /* max # of operations per semop call */ SEMUME, /* max # of undo entries per process */ SEMUSZ, /* size in bytes of undo structure */ SEMVMX, /* semaphore maximum value */ SEMAEM /* adjust on exit max value */ }; SYSCTL_INT(_kern_ipc, OID_AUTO, semmni, CTLFLAG_RDTUN, &seminfo.semmni, 0, "Number of semaphore identifiers"); SYSCTL_INT(_kern_ipc, OID_AUTO, semmns, CTLFLAG_RDTUN, &seminfo.semmns, 0, "Maximum number of semaphores in the system"); SYSCTL_INT(_kern_ipc, OID_AUTO, semmnu, CTLFLAG_RDTUN, &seminfo.semmnu, 0, "Maximum number of undo structures in the system"); SYSCTL_INT(_kern_ipc, OID_AUTO, semmsl, CTLFLAG_RWTUN, &seminfo.semmsl, 0, "Max semaphores per id"); SYSCTL_INT(_kern_ipc, OID_AUTO, semopm, CTLFLAG_RDTUN, &seminfo.semopm, 0, "Max operations per semop call"); SYSCTL_INT(_kern_ipc, OID_AUTO, semume, CTLFLAG_RDTUN, &seminfo.semume, 0, "Max undo entries per process"); SYSCTL_INT(_kern_ipc, OID_AUTO, semusz, CTLFLAG_RDTUN, &seminfo.semusz, 0, "Size in bytes of undo structure"); SYSCTL_INT(_kern_ipc, OID_AUTO, semvmx, CTLFLAG_RWTUN, &seminfo.semvmx, 0, "Semaphore maximum value"); SYSCTL_INT(_kern_ipc, OID_AUTO, semaem, CTLFLAG_RWTUN, &seminfo.semaem, 0, "Adjust on exit max value"); SYSCTL_PROC(_kern_ipc, OID_AUTO, sema, CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, sysctl_sema, "", "Semaphore id pool"); static struct syscall_helper_data sem_syscalls[] = { SYSCALL_INIT_HELPER(__semctl), SYSCALL_INIT_HELPER(semget), SYSCALL_INIT_HELPER(semop), #if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \ defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7) SYSCALL_INIT_HELPER(semsys), SYSCALL_INIT_HELPER_COMPAT(freebsd7___semctl), #endif SYSCALL_INIT_LAST }; #ifdef COMPAT_FREEBSD32 #include #include #include #include #include #include static struct syscall_helper_data sem32_syscalls[] = { SYSCALL32_INIT_HELPER(freebsd32_semctl), SYSCALL32_INIT_HELPER_COMPAT(semget), SYSCALL32_INIT_HELPER_COMPAT(semop), SYSCALL32_INIT_HELPER(freebsd32_semsys), #if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \ defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7) SYSCALL32_INIT_HELPER(freebsd7_freebsd32_semctl), #endif SYSCALL_INIT_LAST }; #endif static int seminit(void) { struct prison *pr; void **rsv; int i, error; osd_method_t methods[PR_MAXMETHOD] = { [PR_METHOD_CHECK] = sem_prison_check, [PR_METHOD_SET] = sem_prison_set, [PR_METHOD_GET] = sem_prison_get, [PR_METHOD_REMOVE] = sem_prison_remove, }; sem = malloc(sizeof(struct sem) * seminfo.semmns, M_SEM, M_WAITOK); sema = malloc(sizeof(struct semid_kernel) * seminfo.semmni, M_SEM, M_WAITOK); sema_mtx = malloc(sizeof(struct mtx) * seminfo.semmni, M_SEM, M_WAITOK | M_ZERO); semu = malloc(seminfo.semmnu * seminfo.semusz, M_SEM, M_WAITOK); for (i = 0; i < seminfo.semmni; i++) { sema[i].u.sem_base = 0; sema[i].u.sem_perm.mode = 0; sema[i].u.sem_perm.seq = 0; #ifdef MAC mac_sysvsem_init(&sema[i]); #endif } for (i = 0; i < seminfo.semmni; i++) mtx_init(&sema_mtx[i], "semid", NULL, MTX_DEF); LIST_INIT(&semu_free_list); for (i = 0; i < seminfo.semmnu; i++) { struct sem_undo *suptr = SEMU(i); suptr->un_proc = NULL; LIST_INSERT_HEAD(&semu_free_list, suptr, un_next); } LIST_INIT(&semu_list); mtx_init(&sem_mtx, "sem", NULL, MTX_DEF); mtx_init(&sem_undo_mtx, "semu", NULL, MTX_DEF); semexit_tag = EVENTHANDLER_REGISTER(process_exit, semexit_myhook, NULL, EVENTHANDLER_PRI_ANY); /* Set current prisons according to their allow.sysvipc. */ sem_prison_slot = osd_jail_register(NULL, methods); rsv = osd_reserve(sem_prison_slot); prison_lock(&prison0); (void)osd_jail_set_reserved(&prison0, sem_prison_slot, rsv, &prison0); prison_unlock(&prison0); rsv = NULL; sx_slock(&allprison_lock); TAILQ_FOREACH(pr, &allprison, pr_list) { if (rsv == NULL) rsv = osd_reserve(sem_prison_slot); prison_lock(pr); if ((pr->pr_allow & PR_ALLOW_SYSVIPC) && pr->pr_ref > 0) { (void)osd_jail_set_reserved(pr, sem_prison_slot, rsv, &prison0); rsv = NULL; } prison_unlock(pr); } if (rsv != NULL) osd_free_reserved(rsv); sx_sunlock(&allprison_lock); error = syscall_helper_register(sem_syscalls, SY_THR_STATIC_KLD); if (error != 0) return (error); #ifdef COMPAT_FREEBSD32 error = syscall32_helper_register(sem32_syscalls, SY_THR_STATIC_KLD); if (error != 0) return (error); #endif return (0); } static int semunload(void) { int i; /* XXXKIB */ if (semtot != 0) return (EBUSY); #ifdef COMPAT_FREEBSD32 syscall32_helper_unregister(sem32_syscalls); #endif syscall_helper_unregister(sem_syscalls); EVENTHANDLER_DEREGISTER(process_exit, semexit_tag); if (sem_prison_slot != 0) osd_jail_deregister(sem_prison_slot); #ifdef MAC for (i = 0; i < seminfo.semmni; i++) mac_sysvsem_destroy(&sema[i]); #endif free(sem, M_SEM); free(sema, M_SEM); free(semu, M_SEM); for (i = 0; i < seminfo.semmni; i++) mtx_destroy(&sema_mtx[i]); free(sema_mtx, M_SEM); mtx_destroy(&sem_mtx); mtx_destroy(&sem_undo_mtx); return (0); } static int sysvsem_modload(struct module *module, int cmd, void *arg) { int error = 0; switch (cmd) { case MOD_LOAD: error = seminit(); if (error != 0) semunload(); break; case MOD_UNLOAD: error = semunload(); break; case MOD_SHUTDOWN: break; default: error = EINVAL; break; } return (error); } static moduledata_t sysvsem_mod = { "sysvsem", &sysvsem_modload, NULL }; DECLARE_MODULE(sysvsem, sysvsem_mod, SI_SUB_SYSV_SEM, SI_ORDER_FIRST); MODULE_VERSION(sysvsem, 1); /* * Allocate a new sem_undo structure for a process * (returns ptr to structure or NULL if no more room) */ static struct sem_undo * semu_alloc(struct thread *td) { struct sem_undo *suptr; SEMUNDO_LOCKASSERT(MA_OWNED); if ((suptr = LIST_FIRST(&semu_free_list)) == NULL) return (NULL); LIST_REMOVE(suptr, un_next); LIST_INSERT_HEAD(&semu_list, suptr, un_next); suptr->un_cnt = 0; suptr->un_proc = td->td_proc; return (suptr); } static int semu_try_free(struct sem_undo *suptr) { SEMUNDO_LOCKASSERT(MA_OWNED); if (suptr->un_cnt != 0) return (0); LIST_REMOVE(suptr, un_next); LIST_INSERT_HEAD(&semu_free_list, suptr, un_next); return (1); } /* * Adjust a particular entry for a particular proc */ static int semundo_adjust(struct thread *td, struct sem_undo **supptr, int semid, int semseq, int semnum, int adjval) { struct proc *p = td->td_proc; struct sem_undo *suptr; struct undo *sunptr; int i; SEMUNDO_LOCKASSERT(MA_OWNED); /* Look for and remember the sem_undo if the caller doesn't provide it */ suptr = *supptr; if (suptr == NULL) { LIST_FOREACH(suptr, &semu_list, un_next) { if (suptr->un_proc == p) { *supptr = suptr; break; } } if (suptr == NULL) { if (adjval == 0) return(0); suptr = semu_alloc(td); if (suptr == NULL) return (ENOSPC); *supptr = suptr; } } /* * Look for the requested entry and adjust it (delete if adjval becomes * 0). */ sunptr = &suptr->un_ent[0]; for (i = 0; i < suptr->un_cnt; i++, sunptr++) { if (sunptr->un_id != semid || sunptr->un_num != semnum) continue; if (adjval != 0) { adjval += sunptr->un_adjval; if (adjval > seminfo.semaem || adjval < -seminfo.semaem) return (ERANGE); } sunptr->un_adjval = adjval; if (sunptr->un_adjval == 0) { suptr->un_cnt--; if (i < suptr->un_cnt) suptr->un_ent[i] = suptr->un_ent[suptr->un_cnt]; if (suptr->un_cnt == 0) semu_try_free(suptr); } return (0); } /* Didn't find the right entry - create it */ if (adjval == 0) return (0); if (adjval > seminfo.semaem || adjval < -seminfo.semaem) return (ERANGE); if (suptr->un_cnt != seminfo.semume) { sunptr = &suptr->un_ent[suptr->un_cnt]; suptr->un_cnt++; sunptr->un_adjval = adjval; sunptr->un_id = semid; sunptr->un_num = semnum; sunptr->un_seq = semseq; } else return (EINVAL); return (0); } static void semundo_clear(int semid, int semnum) { struct sem_undo *suptr, *suptr1; struct undo *sunptr; int i; SEMUNDO_LOCKASSERT(MA_OWNED); LIST_FOREACH_SAFE(suptr, &semu_list, un_next, suptr1) { sunptr = &suptr->un_ent[0]; for (i = 0; i < suptr->un_cnt; i++, sunptr++) { if (sunptr->un_id != semid) continue; if (semnum == -1 || sunptr->un_num == semnum) { suptr->un_cnt--; if (i < suptr->un_cnt) { suptr->un_ent[i] = suptr->un_ent[suptr->un_cnt]; continue; } semu_try_free(suptr); } if (semnum != -1) break; } } } static int semvalid(int semid, struct prison *rpr, struct semid_kernel *semakptr) { return ((semakptr->u.sem_perm.mode & SEM_ALLOC) == 0 || semakptr->u.sem_perm.seq != IPCID_TO_SEQ(semid) || sem_prison_cansee(rpr, semakptr) ? EINVAL : 0); } static void sem_remove(int semidx, struct ucred *cred) { struct semid_kernel *semakptr; int i; KASSERT(semidx >= 0 && semidx < seminfo.semmni, ("semidx out of bounds")); semakptr = &sema[semidx]; semakptr->u.sem_perm.cuid = cred ? cred->cr_uid : 0; semakptr->u.sem_perm.uid = cred ? cred->cr_uid : 0; semakptr->u.sem_perm.mode = 0; racct_sub_cred(semakptr->cred, RACCT_NSEM, semakptr->u.sem_nsems); crfree(semakptr->cred); semakptr->cred = NULL; SEMUNDO_LOCK(); semundo_clear(semidx, -1); SEMUNDO_UNLOCK(); #ifdef MAC mac_sysvsem_cleanup(semakptr); #endif wakeup(semakptr); for (i = 0; i < seminfo.semmni; i++) { if ((sema[i].u.sem_perm.mode & SEM_ALLOC) && sema[i].u.sem_base > semakptr->u.sem_base) mtx_lock_flags(&sema_mtx[i], LOP_DUPOK); } for (i = semakptr->u.sem_base - sem; i < semtot; i++) sem[i] = sem[i + semakptr->u.sem_nsems]; for (i = 0; i < seminfo.semmni; i++) { if ((sema[i].u.sem_perm.mode & SEM_ALLOC) && sema[i].u.sem_base > semakptr->u.sem_base) { sema[i].u.sem_base -= semakptr->u.sem_nsems; mtx_unlock(&sema_mtx[i]); } } semtot -= semakptr->u.sem_nsems; } static struct prison * sem_find_prison(struct ucred *cred) { struct prison *pr, *rpr; pr = cred->cr_prison; prison_lock(pr); rpr = osd_jail_get(pr, sem_prison_slot); prison_unlock(pr); return rpr; } static int sem_prison_cansee(struct prison *rpr, struct semid_kernel *semakptr) { if (semakptr->cred == NULL || !(rpr == semakptr->cred->cr_prison || prison_ischild(rpr, semakptr->cred->cr_prison))) return (EINVAL); return (0); } /* * Note that the user-mode half of this passes a union, not a pointer. */ #ifndef _SYS_SYSPROTO_H_ struct __semctl_args { int semid; int semnum; int cmd; union semun *arg; }; #endif int sys___semctl(struct thread *td, struct __semctl_args *uap) { struct semid_ds dsbuf; union semun arg, semun; register_t rval; int error; switch (uap->cmd) { case SEM_STAT: case IPC_SET: case IPC_STAT: case GETALL: case SETVAL: case SETALL: error = copyin(uap->arg, &arg, sizeof(arg)); if (error) return (error); break; } switch (uap->cmd) { case SEM_STAT: case IPC_STAT: semun.buf = &dsbuf; break; case IPC_SET: error = copyin(arg.buf, &dsbuf, sizeof(dsbuf)); if (error) return (error); semun.buf = &dsbuf; break; case GETALL: case SETALL: semun.array = arg.array; break; case SETVAL: semun.val = arg.val; break; } error = kern_semctl(td, uap->semid, uap->semnum, uap->cmd, &semun, &rval); if (error) return (error); switch (uap->cmd) { case SEM_STAT: case IPC_STAT: error = copyout(&dsbuf, arg.buf, sizeof(dsbuf)); break; } if (error == 0) td->td_retval[0] = rval; return (error); } int kern_semctl(struct thread *td, int semid, int semnum, int cmd, union semun *arg, register_t *rval) { u_short *array; struct ucred *cred = td->td_ucred; int i, error; struct prison *rpr; struct semid_ds *sbuf; struct semid_kernel *semakptr; struct mtx *sema_mtxp; u_short usval, count; int semidx; DPRINTF(("call to semctl(%d, %d, %d, 0x%p)\n", semid, semnum, cmd, arg)); rpr = sem_find_prison(td->td_ucred); if (sem == NULL) return (ENOSYS); array = NULL; switch(cmd) { case SEM_STAT: /* * For this command we assume semid is an array index * rather than an IPC id. */ if (semid < 0 || semid >= seminfo.semmni) return (EINVAL); semakptr = &sema[semid]; sema_mtxp = &sema_mtx[semid]; mtx_lock(sema_mtxp); if ((semakptr->u.sem_perm.mode & SEM_ALLOC) == 0) { error = EINVAL; goto done2; } if ((error = sem_prison_cansee(rpr, semakptr))) goto done2; if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_R))) goto done2; #ifdef MAC error = mac_sysvsem_check_semctl(cred, semakptr, cmd); if (error != 0) goto done2; #endif bcopy(&semakptr->u, arg->buf, sizeof(struct semid_ds)); if (cred->cr_prison != semakptr->cred->cr_prison) arg->buf->sem_perm.key = IPC_PRIVATE; *rval = IXSEQ_TO_IPCID(semid, semakptr->u.sem_perm); mtx_unlock(sema_mtxp); return (0); } semidx = IPCID_TO_IX(semid); if (semidx < 0 || semidx >= seminfo.semmni) return (EINVAL); semakptr = &sema[semidx]; sema_mtxp = &sema_mtx[semidx]; if (cmd == IPC_RMID) mtx_lock(&sem_mtx); mtx_lock(sema_mtxp); #ifdef MAC error = mac_sysvsem_check_semctl(cred, semakptr, cmd); if (error != 0) goto done2; #endif error = 0; *rval = 0; switch (cmd) { case IPC_RMID: if ((error = semvalid(semid, rpr, semakptr)) != 0) goto done2; if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_M))) goto done2; sem_remove(semidx, cred); break; case IPC_SET: if ((error = semvalid(semid, rpr, semakptr)) != 0) goto done2; if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_M))) goto done2; sbuf = arg->buf; semakptr->u.sem_perm.uid = sbuf->sem_perm.uid; semakptr->u.sem_perm.gid = sbuf->sem_perm.gid; semakptr->u.sem_perm.mode = (semakptr->u.sem_perm.mode & ~0777) | (sbuf->sem_perm.mode & 0777); semakptr->u.sem_ctime = time_second; break; case IPC_STAT: if ((error = semvalid(semid, rpr, semakptr)) != 0) goto done2; if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_R))) goto done2; bcopy(&semakptr->u, arg->buf, sizeof(struct semid_ds)); if (cred->cr_prison != semakptr->cred->cr_prison) arg->buf->sem_perm.key = IPC_PRIVATE; break; case GETNCNT: if ((error = semvalid(semid, rpr, semakptr)) != 0) goto done2; if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_R))) goto done2; if (semnum < 0 || semnum >= semakptr->u.sem_nsems) { error = EINVAL; goto done2; } *rval = semakptr->u.sem_base[semnum].semncnt; break; case GETPID: if ((error = semvalid(semid, rpr, semakptr)) != 0) goto done2; if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_R))) goto done2; if (semnum < 0 || semnum >= semakptr->u.sem_nsems) { error = EINVAL; goto done2; } *rval = semakptr->u.sem_base[semnum].sempid; break; case GETVAL: if ((error = semvalid(semid, rpr, semakptr)) != 0) goto done2; if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_R))) goto done2; if (semnum < 0 || semnum >= semakptr->u.sem_nsems) { error = EINVAL; goto done2; } *rval = semakptr->u.sem_base[semnum].semval; break; case GETALL: /* * Unfortunately, callers of this function don't know * in advance how many semaphores are in this set. * While we could just allocate the maximum size array * and pass the actual size back to the caller, that * won't work for SETALL since we can't copyin() more * data than the user specified as we may return a * spurious EFAULT. * * Note that the number of semaphores in a set is * fixed for the life of that set. The only way that * the 'count' could change while are blocked in * malloc() is if this semaphore set were destroyed * and a new one created with the same index. * However, semvalid() will catch that due to the * sequence number unless exactly 0x8000 (or a * multiple thereof) semaphore sets for the same index * are created and destroyed while we are in malloc! * */ count = semakptr->u.sem_nsems; mtx_unlock(sema_mtxp); array = malloc(sizeof(*array) * count, M_TEMP, M_WAITOK); mtx_lock(sema_mtxp); if ((error = semvalid(semid, rpr, semakptr)) != 0) goto done2; KASSERT(count == semakptr->u.sem_nsems, ("nsems changed")); if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_R))) goto done2; for (i = 0; i < semakptr->u.sem_nsems; i++) array[i] = semakptr->u.sem_base[i].semval; mtx_unlock(sema_mtxp); error = copyout(array, arg->array, count * sizeof(*array)); mtx_lock(sema_mtxp); break; case GETZCNT: if ((error = semvalid(semid, rpr, semakptr)) != 0) goto done2; if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_R))) goto done2; if (semnum < 0 || semnum >= semakptr->u.sem_nsems) { error = EINVAL; goto done2; } *rval = semakptr->u.sem_base[semnum].semzcnt; break; case SETVAL: if ((error = semvalid(semid, rpr, semakptr)) != 0) goto done2; if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_W))) goto done2; if (semnum < 0 || semnum >= semakptr->u.sem_nsems) { error = EINVAL; goto done2; } if (arg->val < 0 || arg->val > seminfo.semvmx) { error = ERANGE; goto done2; } semakptr->u.sem_base[semnum].semval = arg->val; SEMUNDO_LOCK(); semundo_clear(semidx, semnum); SEMUNDO_UNLOCK(); wakeup(semakptr); break; case SETALL: /* * See comment on GETALL for why 'count' shouldn't change * and why we require a userland buffer. */ count = semakptr->u.sem_nsems; mtx_unlock(sema_mtxp); array = malloc(sizeof(*array) * count, M_TEMP, M_WAITOK); error = copyin(arg->array, array, count * sizeof(*array)); mtx_lock(sema_mtxp); if (error) break; if ((error = semvalid(semid, rpr, semakptr)) != 0) goto done2; KASSERT(count == semakptr->u.sem_nsems, ("nsems changed")); if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_W))) goto done2; for (i = 0; i < semakptr->u.sem_nsems; i++) { usval = array[i]; if (usval > seminfo.semvmx) { error = ERANGE; break; } semakptr->u.sem_base[i].semval = usval; } SEMUNDO_LOCK(); semundo_clear(semidx, -1); SEMUNDO_UNLOCK(); wakeup(semakptr); break; default: error = EINVAL; break; } done2: mtx_unlock(sema_mtxp); if (cmd == IPC_RMID) mtx_unlock(&sem_mtx); if (array != NULL) free(array, M_TEMP); return(error); } #ifndef _SYS_SYSPROTO_H_ struct semget_args { key_t key; int nsems; int semflg; }; #endif int sys_semget(struct thread *td, struct semget_args *uap) { int semid, error = 0; int key = uap->key; int nsems = uap->nsems; int semflg = uap->semflg; struct ucred *cred = td->td_ucred; DPRINTF(("semget(0x%x, %d, 0%o)\n", key, nsems, semflg)); if (sem_find_prison(cred) == NULL) return (ENOSYS); mtx_lock(&sem_mtx); if (key != IPC_PRIVATE) { for (semid = 0; semid < seminfo.semmni; semid++) { if ((sema[semid].u.sem_perm.mode & SEM_ALLOC) && sema[semid].cred != NULL && sema[semid].cred->cr_prison == cred->cr_prison && sema[semid].u.sem_perm.key == key) break; } if (semid < seminfo.semmni) { DPRINTF(("found public key\n")); if ((semflg & IPC_CREAT) && (semflg & IPC_EXCL)) { DPRINTF(("not exclusive\n")); error = EEXIST; goto done2; } if ((error = ipcperm(td, &sema[semid].u.sem_perm, semflg & 0700))) { goto done2; } if (nsems > 0 && sema[semid].u.sem_nsems < nsems) { DPRINTF(("too small\n")); error = EINVAL; goto done2; } #ifdef MAC error = mac_sysvsem_check_semget(cred, &sema[semid]); if (error != 0) goto done2; #endif goto found; } } DPRINTF(("need to allocate the semid_kernel\n")); if (key == IPC_PRIVATE || (semflg & IPC_CREAT)) { if (nsems <= 0 || nsems > seminfo.semmsl) { DPRINTF(("nsems out of range (0<%d<=%d)\n", nsems, seminfo.semmsl)); error = EINVAL; goto done2; } if (nsems > seminfo.semmns - semtot) { DPRINTF(( "not enough semaphores left (need %d, got %d)\n", nsems, seminfo.semmns - semtot)); error = ENOSPC; goto done2; } for (semid = 0; semid < seminfo.semmni; semid++) { if ((sema[semid].u.sem_perm.mode & SEM_ALLOC) == 0) break; } if (semid == seminfo.semmni) { DPRINTF(("no more semid_kernel's available\n")); error = ENOSPC; goto done2; } #ifdef RACCT if (racct_enable) { PROC_LOCK(td->td_proc); error = racct_add(td->td_proc, RACCT_NSEM, nsems); PROC_UNLOCK(td->td_proc); if (error != 0) { error = ENOSPC; goto done2; } } #endif DPRINTF(("semid %d is available\n", semid)); mtx_lock(&sema_mtx[semid]); KASSERT((sema[semid].u.sem_perm.mode & SEM_ALLOC) == 0, ("Lost semaphore %d", semid)); sema[semid].u.sem_perm.key = key; sema[semid].u.sem_perm.cuid = cred->cr_uid; sema[semid].u.sem_perm.uid = cred->cr_uid; sema[semid].u.sem_perm.cgid = cred->cr_gid; sema[semid].u.sem_perm.gid = cred->cr_gid; sema[semid].u.sem_perm.mode = (semflg & 0777) | SEM_ALLOC; sema[semid].cred = crhold(cred); sema[semid].u.sem_perm.seq = (sema[semid].u.sem_perm.seq + 1) & 0x7fff; sema[semid].u.sem_nsems = nsems; sema[semid].u.sem_otime = 0; sema[semid].u.sem_ctime = time_second; sema[semid].u.sem_base = &sem[semtot]; semtot += nsems; bzero(sema[semid].u.sem_base, sizeof(sema[semid].u.sem_base[0])*nsems); #ifdef MAC mac_sysvsem_create(cred, &sema[semid]); #endif mtx_unlock(&sema_mtx[semid]); DPRINTF(("sembase = %p, next = %p\n", sema[semid].u.sem_base, &sem[semtot])); } else { DPRINTF(("didn't find it and wasn't asked to create it\n")); error = ENOENT; goto done2; } found: td->td_retval[0] = IXSEQ_TO_IPCID(semid, sema[semid].u.sem_perm); done2: mtx_unlock(&sem_mtx); return (error); } #ifndef _SYS_SYSPROTO_H_ struct semop_args { int semid; struct sembuf *sops; size_t nsops; }; #endif int sys_semop(struct thread *td, struct semop_args *uap) { #define SMALL_SOPS 8 struct sembuf small_sops[SMALL_SOPS]; int semid = uap->semid; size_t nsops = uap->nsops; struct prison *rpr; struct sembuf *sops; struct semid_kernel *semakptr; struct sembuf *sopptr = NULL; struct sem *semptr = NULL; struct sem_undo *suptr; struct mtx *sema_mtxp; size_t i, j, k; int error; int do_wakeup, do_undos; unsigned short seq; #ifdef SEM_DEBUG sops = NULL; #endif DPRINTF(("call to semop(%d, %p, %u)\n", semid, sops, nsops)); rpr = sem_find_prison(td->td_ucred); if (sem == NULL) return (ENOSYS); semid = IPCID_TO_IX(semid); /* Convert back to zero origin */ if (semid < 0 || semid >= seminfo.semmni) return (EINVAL); /* Allocate memory for sem_ops */ if (nsops <= SMALL_SOPS) sops = small_sops; else if (nsops > seminfo.semopm) { DPRINTF(("too many sops (max=%d, nsops=%d)\n", seminfo.semopm, nsops)); return (E2BIG); } else { #ifdef RACCT if (racct_enable) { PROC_LOCK(td->td_proc); if (nsops > racct_get_available(td->td_proc, RACCT_NSEMOP)) { PROC_UNLOCK(td->td_proc); return (E2BIG); } PROC_UNLOCK(td->td_proc); } #endif sops = malloc(nsops * sizeof(*sops), M_TEMP, M_WAITOK); } if ((error = copyin(uap->sops, sops, nsops * sizeof(sops[0]))) != 0) { DPRINTF(("error = %d from copyin(%p, %p, %d)\n", error, uap->sops, sops, nsops * sizeof(sops[0]))); if (sops != small_sops) free(sops, M_SEM); return (error); } semakptr = &sema[semid]; sema_mtxp = &sema_mtx[semid]; mtx_lock(sema_mtxp); if ((semakptr->u.sem_perm.mode & SEM_ALLOC) == 0) { error = EINVAL; goto done2; } seq = semakptr->u.sem_perm.seq; if (seq != IPCID_TO_SEQ(uap->semid)) { error = EINVAL; goto done2; } if ((error = sem_prison_cansee(rpr, semakptr)) != 0) goto done2; /* * Initial pass through sops to see what permissions are needed. * Also perform any checks that don't need repeating on each * attempt to satisfy the request vector. */ j = 0; /* permission needed */ do_undos = 0; for (i = 0; i < nsops; i++) { sopptr = &sops[i]; if (sopptr->sem_num >= semakptr->u.sem_nsems) { error = EFBIG; goto done2; } if (sopptr->sem_flg & SEM_UNDO && sopptr->sem_op != 0) do_undos = 1; j |= (sopptr->sem_op == 0) ? SEM_R : SEM_A; } if ((error = ipcperm(td, &semakptr->u.sem_perm, j))) { DPRINTF(("error = %d from ipaccess\n", error)); goto done2; } #ifdef MAC error = mac_sysvsem_check_semop(td->td_ucred, semakptr, j); if (error != 0) goto done2; #endif /* * Loop trying to satisfy the vector of requests. * If we reach a point where we must wait, any requests already * performed are rolled back and we go to sleep until some other * process wakes us up. At this point, we start all over again. * * This ensures that from the perspective of other tasks, a set * of requests is atomic (never partially satisfied). */ for (;;) { do_wakeup = 0; error = 0; /* error return if necessary */ for (i = 0; i < nsops; i++) { sopptr = &sops[i]; semptr = &semakptr->u.sem_base[sopptr->sem_num]; DPRINTF(( "semop: semakptr=%p, sem_base=%p, " "semptr=%p, sem[%d]=%d : op=%d, flag=%s\n", semakptr, semakptr->u.sem_base, semptr, sopptr->sem_num, semptr->semval, sopptr->sem_op, (sopptr->sem_flg & IPC_NOWAIT) ? "nowait" : "wait")); if (sopptr->sem_op < 0) { if (semptr->semval + sopptr->sem_op < 0) { DPRINTF(("semop: can't do it now\n")); break; } else { semptr->semval += sopptr->sem_op; if (semptr->semval == 0 && semptr->semzcnt > 0) do_wakeup = 1; } } else if (sopptr->sem_op == 0) { if (semptr->semval != 0) { DPRINTF(("semop: not zero now\n")); break; } } else if (semptr->semval + sopptr->sem_op > seminfo.semvmx) { error = ERANGE; break; } else { if (semptr->semncnt > 0) do_wakeup = 1; semptr->semval += sopptr->sem_op; } } /* * Did we get through the entire vector? */ if (i >= nsops) goto done; /* * No ... rollback anything that we've already done */ DPRINTF(("semop: rollback 0 through %d\n", i-1)); for (j = 0; j < i; j++) semakptr->u.sem_base[sops[j].sem_num].semval -= sops[j].sem_op; /* If we detected an error, return it */ if (error != 0) goto done2; /* * If the request that we couldn't satisfy has the * NOWAIT flag set then return with EAGAIN. */ if (sopptr->sem_flg & IPC_NOWAIT) { error = EAGAIN; goto done2; } if (sopptr->sem_op == 0) semptr->semzcnt++; else semptr->semncnt++; DPRINTF(("semop: good night!\n")); error = msleep(semakptr, sema_mtxp, (PZERO - 4) | PCATCH, "semwait", 0); DPRINTF(("semop: good morning (error=%d)!\n", error)); /* return code is checked below, after sem[nz]cnt-- */ /* * Make sure that the semaphore still exists */ seq = semakptr->u.sem_perm.seq; if ((semakptr->u.sem_perm.mode & SEM_ALLOC) == 0 || seq != IPCID_TO_SEQ(uap->semid)) { error = EIDRM; goto done2; } /* * Renew the semaphore's pointer after wakeup since * during msleep sem_base may have been modified and semptr * is not valid any more */ semptr = &semakptr->u.sem_base[sopptr->sem_num]; /* * The semaphore is still alive. Readjust the count of * waiting processes. */ if (sopptr->sem_op == 0) semptr->semzcnt--; else semptr->semncnt--; /* * Is it really morning, or was our sleep interrupted? * (Delayed check of msleep() return code because we * need to decrement sem[nz]cnt either way.) */ if (error != 0) { error = EINTR; goto done2; } DPRINTF(("semop: good morning!\n")); } done: /* * Process any SEM_UNDO requests. */ if (do_undos) { SEMUNDO_LOCK(); suptr = NULL; for (i = 0; i < nsops; i++) { /* * We only need to deal with SEM_UNDO's for non-zero * op's. */ int adjval; if ((sops[i].sem_flg & SEM_UNDO) == 0) continue; adjval = sops[i].sem_op; if (adjval == 0) continue; error = semundo_adjust(td, &suptr, semid, seq, sops[i].sem_num, -adjval); if (error == 0) continue; /* * Oh-Oh! We ran out of either sem_undo's or undo's. * Rollback the adjustments to this point and then * rollback the semaphore ups and down so we can return * with an error with all structures restored. We * rollback the undo's in the exact reverse order that * we applied them. This guarantees that we won't run * out of space as we roll things back out. */ for (j = 0; j < i; j++) { k = i - j - 1; if ((sops[k].sem_flg & SEM_UNDO) == 0) continue; adjval = sops[k].sem_op; if (adjval == 0) continue; if (semundo_adjust(td, &suptr, semid, seq, sops[k].sem_num, adjval) != 0) panic("semop - can't undo undos"); } for (j = 0; j < nsops; j++) semakptr->u.sem_base[sops[j].sem_num].semval -= sops[j].sem_op; DPRINTF(("error = %d from semundo_adjust\n", error)); SEMUNDO_UNLOCK(); goto done2; } /* loop through the sops */ SEMUNDO_UNLOCK(); } /* if (do_undos) */ /* We're definitely done - set the sempid's and time */ for (i = 0; i < nsops; i++) { sopptr = &sops[i]; semptr = &semakptr->u.sem_base[sopptr->sem_num]; semptr->sempid = td->td_proc->p_pid; } semakptr->u.sem_otime = time_second; /* * Do a wakeup if any semaphore was up'd whilst something was * sleeping on it. */ if (do_wakeup) { DPRINTF(("semop: doing wakeup\n")); wakeup(semakptr); DPRINTF(("semop: back from wakeup\n")); } DPRINTF(("semop: done\n")); td->td_retval[0] = 0; done2: mtx_unlock(sema_mtxp); if (sops != small_sops) free(sops, M_SEM); return (error); } /* * Go through the undo structures for this process and apply the adjustments to * semaphores. */ static void semexit_myhook(void *arg, struct proc *p) { struct sem_undo *suptr; struct semid_kernel *semakptr; struct mtx *sema_mtxp; int semid, semnum, adjval, ix; unsigned short seq; /* * Go through the chain of undo vectors looking for one * associated with this process. */ SEMUNDO_LOCK(); LIST_FOREACH(suptr, &semu_list, un_next) { if (suptr->un_proc == p) break; } if (suptr == NULL) { SEMUNDO_UNLOCK(); return; } LIST_REMOVE(suptr, un_next); DPRINTF(("proc @%p has undo structure with %d entries\n", p, suptr->un_cnt)); /* * If there are any active undo elements then process them. */ if (suptr->un_cnt > 0) { SEMUNDO_UNLOCK(); for (ix = 0; ix < suptr->un_cnt; ix++) { semid = suptr->un_ent[ix].un_id; semnum = suptr->un_ent[ix].un_num; adjval = suptr->un_ent[ix].un_adjval; seq = suptr->un_ent[ix].un_seq; semakptr = &sema[semid]; sema_mtxp = &sema_mtx[semid]; mtx_lock(sema_mtxp); if ((semakptr->u.sem_perm.mode & SEM_ALLOC) == 0 || (semakptr->u.sem_perm.seq != seq)) { mtx_unlock(sema_mtxp); continue; } if (semnum >= semakptr->u.sem_nsems) panic("semexit - semnum out of range"); DPRINTF(( "semexit: %p id=%d num=%d(adj=%d) ; sem=%d\n", suptr->un_proc, suptr->un_ent[ix].un_id, suptr->un_ent[ix].un_num, suptr->un_ent[ix].un_adjval, semakptr->u.sem_base[semnum].semval)); if (adjval < 0 && semakptr->u.sem_base[semnum].semval < -adjval) semakptr->u.sem_base[semnum].semval = 0; else semakptr->u.sem_base[semnum].semval += adjval; wakeup(semakptr); DPRINTF(("semexit: back from wakeup\n")); mtx_unlock(sema_mtxp); } SEMUNDO_LOCK(); } /* * Deallocate the undo vector. */ DPRINTF(("removing vector\n")); suptr->un_proc = NULL; suptr->un_cnt = 0; LIST_INSERT_HEAD(&semu_free_list, suptr, un_next); SEMUNDO_UNLOCK(); } static int sysctl_sema(SYSCTL_HANDLER_ARGS) { struct prison *pr, *rpr; struct semid_kernel tsemak; int error, i; pr = req->td->td_ucred->cr_prison; rpr = sem_find_prison(req->td->td_ucred); error = 0; for (i = 0; i < seminfo.semmni; i++) { mtx_lock(&sema_mtx[i]); if ((sema[i].u.sem_perm.mode & SEM_ALLOC) == 0 || rpr == NULL || sem_prison_cansee(rpr, &sema[i]) != 0) bzero(&tsemak, sizeof(tsemak)); else { tsemak = sema[i]; if (tsemak.cred->cr_prison != pr) tsemak.u.sem_perm.key = IPC_PRIVATE; } mtx_unlock(&sema_mtx[i]); error = SYSCTL_OUT(req, &tsemak, sizeof(tsemak)); if (error != 0) break; } return (error); } static int sem_prison_check(void *obj, void *data) { struct prison *pr = obj; struct prison *prpr; struct vfsoptlist *opts = data; int error, jsys; /* * sysvsem is a jailsys integer. * It must be "disable" if the parent jail is disabled. */ error = vfs_copyopt(opts, "sysvsem", &jsys, sizeof(jsys)); if (error != ENOENT) { if (error != 0) return (error); switch (jsys) { case JAIL_SYS_DISABLE: break; case JAIL_SYS_NEW: case JAIL_SYS_INHERIT: prison_lock(pr->pr_parent); prpr = osd_jail_get(pr->pr_parent, sem_prison_slot); prison_unlock(pr->pr_parent); if (prpr == NULL) return (EPERM); break; default: return (EINVAL); } } return (0); } static int sem_prison_set(void *obj, void *data) { struct prison *pr = obj; struct prison *tpr, *orpr, *nrpr, *trpr; struct vfsoptlist *opts = data; void *rsv; int jsys, descend; /* * sysvsem controls which jail is the root of the associated sems (this * jail or same as the parent), or if the feature is available at all. */ if (vfs_copyopt(opts, "sysvsem", &jsys, sizeof(jsys)) == ENOENT) jsys = vfs_flagopt(opts, "allow.sysvipc", NULL, 0) ? JAIL_SYS_INHERIT : vfs_flagopt(opts, "allow.nosysvipc", NULL, 0) ? JAIL_SYS_DISABLE : -1; if (jsys == JAIL_SYS_DISABLE) { prison_lock(pr); orpr = osd_jail_get(pr, sem_prison_slot); if (orpr != NULL) osd_jail_del(pr, sem_prison_slot); prison_unlock(pr); if (orpr != NULL) { if (orpr == pr) sem_prison_cleanup(pr); /* Disable all child jails as well. */ FOREACH_PRISON_DESCENDANT(pr, tpr, descend) { prison_lock(tpr); trpr = osd_jail_get(tpr, sem_prison_slot); if (trpr != NULL) { osd_jail_del(tpr, sem_prison_slot); prison_unlock(tpr); if (trpr == tpr) sem_prison_cleanup(tpr); } else { prison_unlock(tpr); descend = 0; } } } } else if (jsys != -1) { if (jsys == JAIL_SYS_NEW) nrpr = pr; else { prison_lock(pr->pr_parent); nrpr = osd_jail_get(pr->pr_parent, sem_prison_slot); prison_unlock(pr->pr_parent); } rsv = osd_reserve(sem_prison_slot); prison_lock(pr); orpr = osd_jail_get(pr, sem_prison_slot); if (orpr != nrpr) (void)osd_jail_set_reserved(pr, sem_prison_slot, rsv, nrpr); else osd_free_reserved(rsv); prison_unlock(pr); if (orpr != nrpr) { if (orpr == pr) sem_prison_cleanup(pr); if (orpr != NULL) { /* Change child jails matching the old root, */ FOREACH_PRISON_DESCENDANT(pr, tpr, descend) { prison_lock(tpr); trpr = osd_jail_get(tpr, sem_prison_slot); if (trpr == orpr) { (void)osd_jail_set(tpr, sem_prison_slot, nrpr); prison_unlock(tpr); if (trpr == tpr) sem_prison_cleanup(tpr); } else { prison_unlock(tpr); descend = 0; } } } } } return (0); } static int sem_prison_get(void *obj, void *data) { struct prison *pr = obj; struct prison *rpr; struct vfsoptlist *opts = data; int error, jsys; /* Set sysvsem based on the jail's root prison. */ prison_lock(pr); rpr = osd_jail_get(pr, sem_prison_slot); prison_unlock(pr); jsys = rpr == NULL ? JAIL_SYS_DISABLE : rpr == pr ? JAIL_SYS_NEW : JAIL_SYS_INHERIT; error = vfs_setopt(opts, "sysvsem", &jsys, sizeof(jsys)); if (error == ENOENT) error = 0; return (error); } static int sem_prison_remove(void *obj, void *data __unused) { struct prison *pr = obj; struct prison *rpr; prison_lock(pr); rpr = osd_jail_get(pr, sem_prison_slot); prison_unlock(pr); if (rpr == pr) sem_prison_cleanup(pr); return (0); } static void sem_prison_cleanup(struct prison *pr) { int i; /* Remove any sems that belong to this jail. */ mtx_lock(&sem_mtx); for (i = 0; i < seminfo.semmni; i++) { if ((sema[i].u.sem_perm.mode & SEM_ALLOC) && sema[i].cred != NULL && sema[i].cred->cr_prison == pr) { mtx_lock(&sema_mtx[i]); sem_remove(i, NULL); mtx_unlock(&sema_mtx[i]); } } mtx_unlock(&sem_mtx); } SYSCTL_JAIL_PARAM_SYS_NODE(sysvsem, CTLFLAG_RW, "SYSV semaphores"); #if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \ defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7) /* XXX casting to (sy_call_t *) is bogus, as usual. */ static sy_call_t *semcalls[] = { (sy_call_t *)freebsd7___semctl, (sy_call_t *)sys_semget, (sy_call_t *)sys_semop }; /* * Entry point for all SEM calls. */ int sys_semsys(td, uap) struct thread *td; /* XXX actually varargs. */ struct semsys_args /* { int which; int a2; int a3; int a4; int a5; } */ *uap; { int error; + AUDIT_ARG_SVIPC_WHICH(uap->which); if (uap->which < 0 || uap->which >= nitems(semcalls)) return (EINVAL); error = (*semcalls[uap->which])(td, &uap->a2); return (error); } #ifndef CP #define CP(src, dst, fld) do { (dst).fld = (src).fld; } while (0) #endif #ifndef _SYS_SYSPROTO_H_ struct freebsd7___semctl_args { int semid; int semnum; int cmd; union semun_old *arg; }; #endif int freebsd7___semctl(struct thread *td, struct freebsd7___semctl_args *uap) { struct semid_ds_old dsold; struct semid_ds dsbuf; union semun_old arg; union semun semun; register_t rval; int error; switch (uap->cmd) { case SEM_STAT: case IPC_SET: case IPC_STAT: case GETALL: case SETVAL: case SETALL: error = copyin(uap->arg, &arg, sizeof(arg)); if (error) return (error); break; } switch (uap->cmd) { case SEM_STAT: case IPC_STAT: semun.buf = &dsbuf; break; case IPC_SET: error = copyin(arg.buf, &dsold, sizeof(dsold)); if (error) return (error); ipcperm_old2new(&dsold.sem_perm, &dsbuf.sem_perm); CP(dsold, dsbuf, sem_base); CP(dsold, dsbuf, sem_nsems); CP(dsold, dsbuf, sem_otime); CP(dsold, dsbuf, sem_ctime); semun.buf = &dsbuf; break; case GETALL: case SETALL: semun.array = arg.array; break; case SETVAL: semun.val = arg.val; break; } error = kern_semctl(td, uap->semid, uap->semnum, uap->cmd, &semun, &rval); if (error) return (error); switch (uap->cmd) { case SEM_STAT: case IPC_STAT: bzero(&dsold, sizeof(dsold)); ipcperm_new2old(&dsbuf.sem_perm, &dsold.sem_perm); CP(dsbuf, dsold, sem_base); CP(dsbuf, dsold, sem_nsems); CP(dsbuf, dsold, sem_otime); CP(dsbuf, dsold, sem_ctime); error = copyout(&dsold, arg.buf, sizeof(dsold)); break; } if (error == 0) td->td_retval[0] = rval; return (error); } #endif /* COMPAT_FREEBSD{4,5,6,7} */ #ifdef COMPAT_FREEBSD32 int freebsd32_semsys(struct thread *td, struct freebsd32_semsys_args *uap) { #if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \ defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7) + AUDIT_ARG_SVIPC_WHICH(uap->which); switch (uap->which) { case 0: return (freebsd7_freebsd32_semctl(td, (struct freebsd7_freebsd32_semctl_args *)&uap->a2)); default: return (sys_semsys(td, (struct semsys_args *)uap)); } #else return (nosys(td, NULL)); #endif } #if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \ defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7) int freebsd7_freebsd32_semctl(struct thread *td, struct freebsd7_freebsd32_semctl_args *uap) { struct semid_ds32_old dsbuf32; struct semid_ds dsbuf; union semun semun; union semun32 arg; register_t rval; int error; switch (uap->cmd) { case SEM_STAT: case IPC_SET: case IPC_STAT: case GETALL: case SETVAL: case SETALL: error = copyin(uap->arg, &arg, sizeof(arg)); if (error) return (error); break; } switch (uap->cmd) { case SEM_STAT: case IPC_STAT: semun.buf = &dsbuf; break; case IPC_SET: error = copyin(PTRIN(arg.buf), &dsbuf32, sizeof(dsbuf32)); if (error) return (error); freebsd32_ipcperm_old_in(&dsbuf32.sem_perm, &dsbuf.sem_perm); PTRIN_CP(dsbuf32, dsbuf, sem_base); CP(dsbuf32, dsbuf, sem_nsems); CP(dsbuf32, dsbuf, sem_otime); CP(dsbuf32, dsbuf, sem_ctime); semun.buf = &dsbuf; break; case GETALL: case SETALL: semun.array = PTRIN(arg.array); break; case SETVAL: semun.val = arg.val; break; } error = kern_semctl(td, uap->semid, uap->semnum, uap->cmd, &semun, &rval); if (error) return (error); switch (uap->cmd) { case SEM_STAT: case IPC_STAT: bzero(&dsbuf32, sizeof(dsbuf32)); freebsd32_ipcperm_old_out(&dsbuf.sem_perm, &dsbuf32.sem_perm); PTROUT_CP(dsbuf, dsbuf32, sem_base); CP(dsbuf, dsbuf32, sem_nsems); CP(dsbuf, dsbuf32, sem_otime); CP(dsbuf, dsbuf32, sem_ctime); error = copyout(&dsbuf32, PTRIN(arg.buf), sizeof(dsbuf32)); break; } if (error == 0) td->td_retval[0] = rval; return (error); } #endif int freebsd32_semctl(struct thread *td, struct freebsd32_semctl_args *uap) { struct semid_ds32 dsbuf32; struct semid_ds dsbuf; union semun semun; union semun32 arg; register_t rval; int error; switch (uap->cmd) { case SEM_STAT: case IPC_SET: case IPC_STAT: case GETALL: case SETVAL: case SETALL: error = copyin(uap->arg, &arg, sizeof(arg)); if (error) return (error); break; } switch (uap->cmd) { case SEM_STAT: case IPC_STAT: semun.buf = &dsbuf; break; case IPC_SET: error = copyin(PTRIN(arg.buf), &dsbuf32, sizeof(dsbuf32)); if (error) return (error); freebsd32_ipcperm_in(&dsbuf32.sem_perm, &dsbuf.sem_perm); PTRIN_CP(dsbuf32, dsbuf, sem_base); CP(dsbuf32, dsbuf, sem_nsems); CP(dsbuf32, dsbuf, sem_otime); CP(dsbuf32, dsbuf, sem_ctime); semun.buf = &dsbuf; break; case GETALL: case SETALL: semun.array = PTRIN(arg.array); break; case SETVAL: semun.val = arg.val; break; } error = kern_semctl(td, uap->semid, uap->semnum, uap->cmd, &semun, &rval); if (error) return (error); switch (uap->cmd) { case SEM_STAT: case IPC_STAT: bzero(&dsbuf32, sizeof(dsbuf32)); freebsd32_ipcperm_out(&dsbuf.sem_perm, &dsbuf32.sem_perm); PTROUT_CP(dsbuf, dsbuf32, sem_base); CP(dsbuf, dsbuf32, sem_nsems); CP(dsbuf, dsbuf32, sem_otime); CP(dsbuf, dsbuf32, sem_ctime); error = copyout(&dsbuf32, PTRIN(arg.buf), sizeof(dsbuf32)); break; } if (error == 0) td->td_retval[0] = rval; return (error); } #endif /* COMPAT_FREEBSD32 */ Index: head/sys/kern/sysv_shm.c =================================================================== --- head/sys/kern/sysv_shm.c (revision 316184) +++ head/sys/kern/sysv_shm.c (revision 316185) @@ -1,1642 +1,1645 @@ /* $NetBSD: sysv_shm.c,v 1.23 1994/07/04 23:25:12 glass Exp $ */ /*- * Copyright (c) 1994 Adam Glass and Charles Hannum. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Adam Glass and Charles * Hannum. * 4. The names of the authors may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /*- * Copyright (c) 2003-2005 McAfee, Inc. * All rights reserved. * * This software was developed for the FreeBSD Project in part by McAfee * Research, the Security Research Division of McAfee, Inc under DARPA/SPAWAR * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS research * program. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include "opt_sysvipc.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include +#include #include #include #include #include #include #include #include #include FEATURE(sysv_shm, "System V shared memory segments support"); static MALLOC_DEFINE(M_SHM, "shm", "SVID compatible shared memory segments"); static int shmget_allocate_segment(struct thread *td, struct shmget_args *uap, int mode); static int shmget_existing(struct thread *td, struct shmget_args *uap, int mode, int segnum); #define SHMSEG_FREE 0x0200 #define SHMSEG_REMOVED 0x0400 #define SHMSEG_ALLOCATED 0x0800 static int shm_last_free, shm_nused, shmalloced; vm_size_t shm_committed; static struct shmid_kernel *shmsegs; static unsigned shm_prison_slot; struct shmmap_state { vm_offset_t va; int shmid; }; static void shm_deallocate_segment(struct shmid_kernel *); static int shm_find_segment_by_key(struct prison *, key_t); static struct shmid_kernel *shm_find_segment(struct prison *, int, bool); static int shm_delete_mapping(struct vmspace *vm, struct shmmap_state *); static void shmrealloc(void); static int shminit(void); static int sysvshm_modload(struct module *, int, void *); static int shmunload(void); static void shmexit_myhook(struct vmspace *vm); static void shmfork_myhook(struct proc *p1, struct proc *p2); static int sysctl_shmsegs(SYSCTL_HANDLER_ARGS); static void shm_remove(struct shmid_kernel *, int); static struct prison *shm_find_prison(struct ucred *); static int shm_prison_cansee(struct prison *, struct shmid_kernel *); static int shm_prison_check(void *, void *); static int shm_prison_set(void *, void *); static int shm_prison_get(void *, void *); static int shm_prison_remove(void *, void *); static void shm_prison_cleanup(struct prison *); /* * Tuneable values. */ #ifndef SHMMAXPGS #define SHMMAXPGS 131072 /* Note: sysv shared memory is swap backed. */ #endif #ifndef SHMMAX #define SHMMAX (SHMMAXPGS*PAGE_SIZE) #endif #ifndef SHMMIN #define SHMMIN 1 #endif #ifndef SHMMNI #define SHMMNI 192 #endif #ifndef SHMSEG #define SHMSEG 128 #endif #ifndef SHMALL #define SHMALL (SHMMAXPGS) #endif struct shminfo shminfo = { .shmmax = SHMMAX, .shmmin = SHMMIN, .shmmni = SHMMNI, .shmseg = SHMSEG, .shmall = SHMALL }; static int shm_use_phys; static int shm_allow_removed = 1; SYSCTL_ULONG(_kern_ipc, OID_AUTO, shmmax, CTLFLAG_RWTUN, &shminfo.shmmax, 0, "Maximum shared memory segment size"); SYSCTL_ULONG(_kern_ipc, OID_AUTO, shmmin, CTLFLAG_RWTUN, &shminfo.shmmin, 0, "Minimum shared memory segment size"); SYSCTL_ULONG(_kern_ipc, OID_AUTO, shmmni, CTLFLAG_RDTUN, &shminfo.shmmni, 0, "Number of shared memory identifiers"); SYSCTL_ULONG(_kern_ipc, OID_AUTO, shmseg, CTLFLAG_RDTUN, &shminfo.shmseg, 0, "Number of segments per process"); SYSCTL_ULONG(_kern_ipc, OID_AUTO, shmall, CTLFLAG_RWTUN, &shminfo.shmall, 0, "Maximum number of pages available for shared memory"); SYSCTL_INT(_kern_ipc, OID_AUTO, shm_use_phys, CTLFLAG_RWTUN, &shm_use_phys, 0, "Enable/Disable locking of shared memory pages in core"); SYSCTL_INT(_kern_ipc, OID_AUTO, shm_allow_removed, CTLFLAG_RWTUN, &shm_allow_removed, 0, "Enable/Disable attachment to attached segments marked for removal"); SYSCTL_PROC(_kern_ipc, OID_AUTO, shmsegs, CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, sysctl_shmsegs, "", "Current number of shared memory segments allocated"); static struct sx sysvshmsx; #define SYSVSHM_LOCK() sx_xlock(&sysvshmsx) #define SYSVSHM_UNLOCK() sx_xunlock(&sysvshmsx) #define SYSVSHM_ASSERT_LOCKED() sx_assert(&sysvshmsx, SA_XLOCKED) static int shm_find_segment_by_key(struct prison *pr, key_t key) { int i; for (i = 0; i < shmalloced; i++) if ((shmsegs[i].u.shm_perm.mode & SHMSEG_ALLOCATED) && shmsegs[i].cred != NULL && shmsegs[i].cred->cr_prison == pr && shmsegs[i].u.shm_perm.key == key) return (i); return (-1); } /* * Finds segment either by shmid if is_shmid is true, or by segnum if * is_shmid is false. */ static struct shmid_kernel * shm_find_segment(struct prison *rpr, int arg, bool is_shmid) { struct shmid_kernel *shmseg; int segnum; segnum = is_shmid ? IPCID_TO_IX(arg) : arg; if (segnum < 0 || segnum >= shmalloced) return (NULL); shmseg = &shmsegs[segnum]; if ((shmseg->u.shm_perm.mode & SHMSEG_ALLOCATED) == 0 || (!shm_allow_removed && (shmseg->u.shm_perm.mode & SHMSEG_REMOVED) != 0) || (is_shmid && shmseg->u.shm_perm.seq != IPCID_TO_SEQ(arg)) || shm_prison_cansee(rpr, shmseg) != 0) return (NULL); return (shmseg); } static void shm_deallocate_segment(struct shmid_kernel *shmseg) { vm_size_t size; SYSVSHM_ASSERT_LOCKED(); vm_object_deallocate(shmseg->object); shmseg->object = NULL; size = round_page(shmseg->u.shm_segsz); shm_committed -= btoc(size); shm_nused--; shmseg->u.shm_perm.mode = SHMSEG_FREE; #ifdef MAC mac_sysvshm_cleanup(shmseg); #endif racct_sub_cred(shmseg->cred, RACCT_NSHM, 1); racct_sub_cred(shmseg->cred, RACCT_SHMSIZE, size); crfree(shmseg->cred); shmseg->cred = NULL; } static int shm_delete_mapping(struct vmspace *vm, struct shmmap_state *shmmap_s) { struct shmid_kernel *shmseg; int segnum, result; vm_size_t size; SYSVSHM_ASSERT_LOCKED(); segnum = IPCID_TO_IX(shmmap_s->shmid); KASSERT(segnum >= 0 && segnum < shmalloced, ("segnum %d shmalloced %d", segnum, shmalloced)); shmseg = &shmsegs[segnum]; size = round_page(shmseg->u.shm_segsz); result = vm_map_remove(&vm->vm_map, shmmap_s->va, shmmap_s->va + size); if (result != KERN_SUCCESS) return (EINVAL); shmmap_s->shmid = -1; shmseg->u.shm_dtime = time_second; if (--shmseg->u.shm_nattch == 0 && (shmseg->u.shm_perm.mode & SHMSEG_REMOVED)) { shm_deallocate_segment(shmseg); shm_last_free = segnum; } return (0); } static void shm_remove(struct shmid_kernel *shmseg, int segnum) { shmseg->u.shm_perm.key = IPC_PRIVATE; shmseg->u.shm_perm.mode |= SHMSEG_REMOVED; if (shmseg->u.shm_nattch == 0) { shm_deallocate_segment(shmseg); shm_last_free = segnum; } } static struct prison * shm_find_prison(struct ucred *cred) { struct prison *pr, *rpr; pr = cred->cr_prison; prison_lock(pr); rpr = osd_jail_get(pr, shm_prison_slot); prison_unlock(pr); return rpr; } static int shm_prison_cansee(struct prison *rpr, struct shmid_kernel *shmseg) { if (shmseg->cred == NULL || !(rpr == shmseg->cred->cr_prison || prison_ischild(rpr, shmseg->cred->cr_prison))) return (EINVAL); return (0); } static int kern_shmdt_locked(struct thread *td, const void *shmaddr) { struct proc *p = td->td_proc; struct shmmap_state *shmmap_s; #ifdef MAC struct shmid_kernel *shmsegptr; int error; #endif int i; SYSVSHM_ASSERT_LOCKED(); if (shm_find_prison(td->td_ucred) == NULL) return (ENOSYS); shmmap_s = p->p_vmspace->vm_shm; if (shmmap_s == NULL) return (EINVAL); for (i = 0; i < shminfo.shmseg; i++, shmmap_s++) { if (shmmap_s->shmid != -1 && shmmap_s->va == (vm_offset_t)shmaddr) { break; } } if (i == shminfo.shmseg) return (EINVAL); #ifdef MAC shmsegptr = &shmsegs[IPCID_TO_IX(shmmap_s->shmid)]; error = mac_sysvshm_check_shmdt(td->td_ucred, shmsegptr); if (error != 0) return (error); #endif return (shm_delete_mapping(p->p_vmspace, shmmap_s)); } #ifndef _SYS_SYSPROTO_H_ struct shmdt_args { const void *shmaddr; }; #endif int sys_shmdt(struct thread *td, struct shmdt_args *uap) { int error; SYSVSHM_LOCK(); error = kern_shmdt_locked(td, uap->shmaddr); SYSVSHM_UNLOCK(); return (error); } static int kern_shmat_locked(struct thread *td, int shmid, const void *shmaddr, int shmflg) { struct prison *rpr; struct proc *p = td->td_proc; struct shmid_kernel *shmseg; struct shmmap_state *shmmap_s; vm_offset_t attach_va; vm_prot_t prot; vm_size_t size; int error, i, rv; SYSVSHM_ASSERT_LOCKED(); rpr = shm_find_prison(td->td_ucred); if (rpr == NULL) return (ENOSYS); shmmap_s = p->p_vmspace->vm_shm; if (shmmap_s == NULL) { shmmap_s = malloc(shminfo.shmseg * sizeof(struct shmmap_state), M_SHM, M_WAITOK); for (i = 0; i < shminfo.shmseg; i++) shmmap_s[i].shmid = -1; KASSERT(p->p_vmspace->vm_shm == NULL, ("raced")); p->p_vmspace->vm_shm = shmmap_s; } shmseg = shm_find_segment(rpr, shmid, true); if (shmseg == NULL) return (EINVAL); error = ipcperm(td, &shmseg->u.shm_perm, (shmflg & SHM_RDONLY) ? IPC_R : IPC_R|IPC_W); if (error != 0) return (error); #ifdef MAC error = mac_sysvshm_check_shmat(td->td_ucred, shmseg, shmflg); if (error != 0) return (error); #endif for (i = 0; i < shminfo.shmseg; i++) { if (shmmap_s->shmid == -1) break; shmmap_s++; } if (i >= shminfo.shmseg) return (EMFILE); size = round_page(shmseg->u.shm_segsz); prot = VM_PROT_READ; if ((shmflg & SHM_RDONLY) == 0) prot |= VM_PROT_WRITE; if (shmaddr != NULL) { if ((shmflg & SHM_RND) != 0) attach_va = rounddown2((vm_offset_t)shmaddr, SHMLBA); else if (((vm_offset_t)shmaddr & (SHMLBA-1)) == 0) attach_va = (vm_offset_t)shmaddr; else return (EINVAL); } else { /* * This is just a hint to vm_map_find() about where to * put it. */ attach_va = round_page((vm_offset_t)p->p_vmspace->vm_daddr + lim_max(td, RLIMIT_DATA)); } vm_object_reference(shmseg->object); rv = vm_map_find(&p->p_vmspace->vm_map, shmseg->object, 0, &attach_va, size, 0, shmaddr != NULL ? VMFS_NO_SPACE : VMFS_OPTIMAL_SPACE, prot, prot, MAP_INHERIT_SHARE | MAP_PREFAULT_PARTIAL); if (rv != KERN_SUCCESS) { vm_object_deallocate(shmseg->object); return (ENOMEM); } shmmap_s->va = attach_va; shmmap_s->shmid = shmid; shmseg->u.shm_lpid = p->p_pid; shmseg->u.shm_atime = time_second; shmseg->u.shm_nattch++; td->td_retval[0] = attach_va; return (error); } int kern_shmat(struct thread *td, int shmid, const void *shmaddr, int shmflg) { int error; SYSVSHM_LOCK(); error = kern_shmat_locked(td, shmid, shmaddr, shmflg); SYSVSHM_UNLOCK(); return (error); } #ifndef _SYS_SYSPROTO_H_ struct shmat_args { int shmid; const void *shmaddr; int shmflg; }; #endif int sys_shmat(struct thread *td, struct shmat_args *uap) { return (kern_shmat(td, uap->shmid, uap->shmaddr, uap->shmflg)); } static int kern_shmctl_locked(struct thread *td, int shmid, int cmd, void *buf, size_t *bufsz) { struct prison *rpr; struct shmid_kernel *shmseg; struct shmid_ds *shmidp; struct shm_info shm_info; int error; SYSVSHM_ASSERT_LOCKED(); rpr = shm_find_prison(td->td_ucred); if (rpr == NULL) return (ENOSYS); switch (cmd) { /* * It is possible that kern_shmctl is being called from the Linux ABI * layer, in which case, we will need to implement IPC_INFO. It should * be noted that other shmctl calls will be funneled through here for * Linix binaries as well. * * NB: The Linux ABI layer will convert this data to structure(s) more * consistent with the Linux ABI. */ case IPC_INFO: memcpy(buf, &shminfo, sizeof(shminfo)); if (bufsz) *bufsz = sizeof(shminfo); td->td_retval[0] = shmalloced; return (0); case SHM_INFO: { shm_info.used_ids = shm_nused; shm_info.shm_rss = 0; /*XXX where to get from ? */ shm_info.shm_tot = 0; /*XXX where to get from ? */ shm_info.shm_swp = 0; /*XXX where to get from ? */ shm_info.swap_attempts = 0; /*XXX where to get from ? */ shm_info.swap_successes = 0; /*XXX where to get from ? */ memcpy(buf, &shm_info, sizeof(shm_info)); if (bufsz != NULL) *bufsz = sizeof(shm_info); td->td_retval[0] = shmalloced; return (0); } } shmseg = shm_find_segment(rpr, shmid, cmd != SHM_STAT); if (shmseg == NULL) return (EINVAL); #ifdef MAC error = mac_sysvshm_check_shmctl(td->td_ucred, shmseg, cmd); if (error != 0) return (error); #endif switch (cmd) { case SHM_STAT: case IPC_STAT: shmidp = (struct shmid_ds *)buf; error = ipcperm(td, &shmseg->u.shm_perm, IPC_R); if (error != 0) return (error); memcpy(shmidp, &shmseg->u, sizeof(struct shmid_ds)); if (td->td_ucred->cr_prison != shmseg->cred->cr_prison) shmidp->shm_perm.key = IPC_PRIVATE; if (bufsz != NULL) *bufsz = sizeof(struct shmid_ds); if (cmd == SHM_STAT) { td->td_retval[0] = IXSEQ_TO_IPCID(shmid, shmseg->u.shm_perm); } break; case IPC_SET: shmidp = (struct shmid_ds *)buf; error = ipcperm(td, &shmseg->u.shm_perm, IPC_M); if (error != 0) return (error); shmseg->u.shm_perm.uid = shmidp->shm_perm.uid; shmseg->u.shm_perm.gid = shmidp->shm_perm.gid; shmseg->u.shm_perm.mode = (shmseg->u.shm_perm.mode & ~ACCESSPERMS) | (shmidp->shm_perm.mode & ACCESSPERMS); shmseg->u.shm_ctime = time_second; break; case IPC_RMID: error = ipcperm(td, &shmseg->u.shm_perm, IPC_M); if (error != 0) return (error); shm_remove(shmseg, IPCID_TO_IX(shmid)); break; #if 0 case SHM_LOCK: case SHM_UNLOCK: #endif default: error = EINVAL; break; } return (error); } int kern_shmctl(struct thread *td, int shmid, int cmd, void *buf, size_t *bufsz) { int error; SYSVSHM_LOCK(); error = kern_shmctl_locked(td, shmid, cmd, buf, bufsz); SYSVSHM_UNLOCK(); return (error); } #ifndef _SYS_SYSPROTO_H_ struct shmctl_args { int shmid; int cmd; struct shmid_ds *buf; }; #endif int sys_shmctl(struct thread *td, struct shmctl_args *uap) { int error; struct shmid_ds buf; size_t bufsz; /* * The only reason IPC_INFO, SHM_INFO, SHM_STAT exists is to support * Linux binaries. If we see the call come through the FreeBSD ABI, * return an error back to the user since we do not to support this. */ if (uap->cmd == IPC_INFO || uap->cmd == SHM_INFO || uap->cmd == SHM_STAT) return (EINVAL); /* IPC_SET needs to copyin the buffer before calling kern_shmctl */ if (uap->cmd == IPC_SET) { if ((error = copyin(uap->buf, &buf, sizeof(struct shmid_ds)))) goto done; } error = kern_shmctl(td, uap->shmid, uap->cmd, (void *)&buf, &bufsz); if (error) goto done; /* Cases in which we need to copyout */ switch (uap->cmd) { case IPC_STAT: error = copyout(&buf, uap->buf, bufsz); break; } done: if (error) { /* Invalidate the return value */ td->td_retval[0] = -1; } return (error); } static int shmget_existing(struct thread *td, struct shmget_args *uap, int mode, int segnum) { struct shmid_kernel *shmseg; #ifdef MAC int error; #endif SYSVSHM_ASSERT_LOCKED(); KASSERT(segnum >= 0 && segnum < shmalloced, ("segnum %d shmalloced %d", segnum, shmalloced)); shmseg = &shmsegs[segnum]; if ((uap->shmflg & (IPC_CREAT | IPC_EXCL)) == (IPC_CREAT | IPC_EXCL)) return (EEXIST); #ifdef MAC error = mac_sysvshm_check_shmget(td->td_ucred, shmseg, uap->shmflg); if (error != 0) return (error); #endif if (uap->size != 0 && uap->size > shmseg->u.shm_segsz) return (EINVAL); td->td_retval[0] = IXSEQ_TO_IPCID(segnum, shmseg->u.shm_perm); return (0); } static int shmget_allocate_segment(struct thread *td, struct shmget_args *uap, int mode) { struct ucred *cred = td->td_ucred; struct shmid_kernel *shmseg; vm_object_t shm_object; int i, segnum; size_t size; SYSVSHM_ASSERT_LOCKED(); if (uap->size < shminfo.shmmin || uap->size > shminfo.shmmax) return (EINVAL); if (shm_nused >= shminfo.shmmni) /* Any shmids left? */ return (ENOSPC); size = round_page(uap->size); if (shm_committed + btoc(size) > shminfo.shmall) return (ENOMEM); if (shm_last_free < 0) { shmrealloc(); /* Maybe expand the shmsegs[] array. */ for (i = 0; i < shmalloced; i++) if (shmsegs[i].u.shm_perm.mode & SHMSEG_FREE) break; if (i == shmalloced) return (ENOSPC); segnum = i; } else { segnum = shm_last_free; shm_last_free = -1; } KASSERT(segnum >= 0 && segnum < shmalloced, ("segnum %d shmalloced %d", segnum, shmalloced)); shmseg = &shmsegs[segnum]; #ifdef RACCT if (racct_enable) { PROC_LOCK(td->td_proc); if (racct_add(td->td_proc, RACCT_NSHM, 1)) { PROC_UNLOCK(td->td_proc); return (ENOSPC); } if (racct_add(td->td_proc, RACCT_SHMSIZE, size)) { racct_sub(td->td_proc, RACCT_NSHM, 1); PROC_UNLOCK(td->td_proc); return (ENOMEM); } PROC_UNLOCK(td->td_proc); } #endif /* * We make sure that we have allocated a pager before we need * to. */ shm_object = vm_pager_allocate(shm_use_phys ? OBJT_PHYS : OBJT_SWAP, 0, size, VM_PROT_DEFAULT, 0, cred); if (shm_object == NULL) { #ifdef RACCT if (racct_enable) { PROC_LOCK(td->td_proc); racct_sub(td->td_proc, RACCT_NSHM, 1); racct_sub(td->td_proc, RACCT_SHMSIZE, size); PROC_UNLOCK(td->td_proc); } #endif return (ENOMEM); } shm_object->pg_color = 0; VM_OBJECT_WLOCK(shm_object); vm_object_clear_flag(shm_object, OBJ_ONEMAPPING); vm_object_set_flag(shm_object, OBJ_COLORED | OBJ_NOSPLIT); VM_OBJECT_WUNLOCK(shm_object); shmseg->object = shm_object; shmseg->u.shm_perm.cuid = shmseg->u.shm_perm.uid = cred->cr_uid; shmseg->u.shm_perm.cgid = shmseg->u.shm_perm.gid = cred->cr_gid; shmseg->u.shm_perm.mode = (mode & ACCESSPERMS) | SHMSEG_ALLOCATED; shmseg->u.shm_perm.key = uap->key; shmseg->u.shm_perm.seq = (shmseg->u.shm_perm.seq + 1) & 0x7fff; shmseg->cred = crhold(cred); shmseg->u.shm_segsz = uap->size; shmseg->u.shm_cpid = td->td_proc->p_pid; shmseg->u.shm_lpid = shmseg->u.shm_nattch = 0; shmseg->u.shm_atime = shmseg->u.shm_dtime = 0; #ifdef MAC mac_sysvshm_create(cred, shmseg); #endif shmseg->u.shm_ctime = time_second; shm_committed += btoc(size); shm_nused++; td->td_retval[0] = IXSEQ_TO_IPCID(segnum, shmseg->u.shm_perm); return (0); } #ifndef _SYS_SYSPROTO_H_ struct shmget_args { key_t key; size_t size; int shmflg; }; #endif int sys_shmget(struct thread *td, struct shmget_args *uap) { int segnum, mode; int error; if (shm_find_prison(td->td_ucred) == NULL) return (ENOSYS); mode = uap->shmflg & ACCESSPERMS; SYSVSHM_LOCK(); if (uap->key == IPC_PRIVATE) { error = shmget_allocate_segment(td, uap, mode); } else { segnum = shm_find_segment_by_key(td->td_ucred->cr_prison, uap->key); if (segnum >= 0) error = shmget_existing(td, uap, mode, segnum); else if ((uap->shmflg & IPC_CREAT) == 0) error = ENOENT; else error = shmget_allocate_segment(td, uap, mode); } SYSVSHM_UNLOCK(); return (error); } static void shmfork_myhook(struct proc *p1, struct proc *p2) { struct shmmap_state *shmmap_s; size_t size; int i; SYSVSHM_LOCK(); size = shminfo.shmseg * sizeof(struct shmmap_state); shmmap_s = malloc(size, M_SHM, M_WAITOK); bcopy(p1->p_vmspace->vm_shm, shmmap_s, size); p2->p_vmspace->vm_shm = shmmap_s; for (i = 0; i < shminfo.shmseg; i++, shmmap_s++) { if (shmmap_s->shmid != -1) { KASSERT(IPCID_TO_IX(shmmap_s->shmid) >= 0 && IPCID_TO_IX(shmmap_s->shmid) < shmalloced, ("segnum %d shmalloced %d", IPCID_TO_IX(shmmap_s->shmid), shmalloced)); shmsegs[IPCID_TO_IX(shmmap_s->shmid)].u.shm_nattch++; } } SYSVSHM_UNLOCK(); } static void shmexit_myhook(struct vmspace *vm) { struct shmmap_state *base, *shm; int i; base = vm->vm_shm; if (base != NULL) { vm->vm_shm = NULL; SYSVSHM_LOCK(); for (i = 0, shm = base; i < shminfo.shmseg; i++, shm++) { if (shm->shmid != -1) shm_delete_mapping(vm, shm); } SYSVSHM_UNLOCK(); free(base, M_SHM); } } static void shmrealloc(void) { struct shmid_kernel *newsegs; int i; SYSVSHM_ASSERT_LOCKED(); if (shmalloced >= shminfo.shmmni) return; newsegs = malloc(shminfo.shmmni * sizeof(*newsegs), M_SHM, M_WAITOK); for (i = 0; i < shmalloced; i++) bcopy(&shmsegs[i], &newsegs[i], sizeof(newsegs[0])); for (; i < shminfo.shmmni; i++) { newsegs[i].u.shm_perm.mode = SHMSEG_FREE; newsegs[i].u.shm_perm.seq = 0; #ifdef MAC mac_sysvshm_init(&newsegs[i]); #endif } free(shmsegs, M_SHM); shmsegs = newsegs; shmalloced = shminfo.shmmni; } static struct syscall_helper_data shm_syscalls[] = { SYSCALL_INIT_HELPER(shmat), SYSCALL_INIT_HELPER(shmctl), SYSCALL_INIT_HELPER(shmdt), SYSCALL_INIT_HELPER(shmget), #if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \ defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7) SYSCALL_INIT_HELPER_COMPAT(freebsd7_shmctl), #endif #if defined(__i386__) && (defined(COMPAT_FREEBSD4) || defined(COMPAT_43)) SYSCALL_INIT_HELPER(shmsys), #endif SYSCALL_INIT_LAST }; #ifdef COMPAT_FREEBSD32 #include #include #include #include #include #include static struct syscall_helper_data shm32_syscalls[] = { SYSCALL32_INIT_HELPER_COMPAT(shmat), SYSCALL32_INIT_HELPER_COMPAT(shmdt), SYSCALL32_INIT_HELPER_COMPAT(shmget), SYSCALL32_INIT_HELPER(freebsd32_shmsys), SYSCALL32_INIT_HELPER(freebsd32_shmctl), #if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \ defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7) SYSCALL32_INIT_HELPER(freebsd7_freebsd32_shmctl), #endif SYSCALL_INIT_LAST }; #endif static int shminit(void) { struct prison *pr; void **rsv; int i, error; osd_method_t methods[PR_MAXMETHOD] = { [PR_METHOD_CHECK] = shm_prison_check, [PR_METHOD_SET] = shm_prison_set, [PR_METHOD_GET] = shm_prison_get, [PR_METHOD_REMOVE] = shm_prison_remove, }; #ifndef BURN_BRIDGES if (TUNABLE_ULONG_FETCH("kern.ipc.shmmaxpgs", &shminfo.shmall) != 0) printf("kern.ipc.shmmaxpgs is now called kern.ipc.shmall!\n"); #endif if (shminfo.shmmax == SHMMAX) { /* Initialize shmmax dealing with possible overflow. */ for (i = PAGE_SIZE; i != 0; i--) { shminfo.shmmax = shminfo.shmall * i; if ((shminfo.shmmax / shminfo.shmall) == (u_long)i) break; } } shmalloced = shminfo.shmmni; shmsegs = malloc(shmalloced * sizeof(shmsegs[0]), M_SHM, M_WAITOK); for (i = 0; i < shmalloced; i++) { shmsegs[i].u.shm_perm.mode = SHMSEG_FREE; shmsegs[i].u.shm_perm.seq = 0; #ifdef MAC mac_sysvshm_init(&shmsegs[i]); #endif } shm_last_free = 0; shm_nused = 0; shm_committed = 0; sx_init(&sysvshmsx, "sysvshmsx"); shmexit_hook = &shmexit_myhook; shmfork_hook = &shmfork_myhook; /* Set current prisons according to their allow.sysvipc. */ shm_prison_slot = osd_jail_register(NULL, methods); rsv = osd_reserve(shm_prison_slot); prison_lock(&prison0); (void)osd_jail_set_reserved(&prison0, shm_prison_slot, rsv, &prison0); prison_unlock(&prison0); rsv = NULL; sx_slock(&allprison_lock); TAILQ_FOREACH(pr, &allprison, pr_list) { if (rsv == NULL) rsv = osd_reserve(shm_prison_slot); prison_lock(pr); if ((pr->pr_allow & PR_ALLOW_SYSVIPC) && pr->pr_ref > 0) { (void)osd_jail_set_reserved(pr, shm_prison_slot, rsv, &prison0); rsv = NULL; } prison_unlock(pr); } if (rsv != NULL) osd_free_reserved(rsv); sx_sunlock(&allprison_lock); error = syscall_helper_register(shm_syscalls, SY_THR_STATIC_KLD); if (error != 0) return (error); #ifdef COMPAT_FREEBSD32 error = syscall32_helper_register(shm32_syscalls, SY_THR_STATIC_KLD); if (error != 0) return (error); #endif return (0); } static int shmunload(void) { int i; if (shm_nused > 0) return (EBUSY); #ifdef COMPAT_FREEBSD32 syscall32_helper_unregister(shm32_syscalls); #endif syscall_helper_unregister(shm_syscalls); if (shm_prison_slot != 0) osd_jail_deregister(shm_prison_slot); for (i = 0; i < shmalloced; i++) { #ifdef MAC mac_sysvshm_destroy(&shmsegs[i]); #endif /* * Objects might be still mapped into the processes * address spaces. Actual free would happen on the * last mapping destruction. */ if (shmsegs[i].u.shm_perm.mode != SHMSEG_FREE) vm_object_deallocate(shmsegs[i].object); } free(shmsegs, M_SHM); shmexit_hook = NULL; shmfork_hook = NULL; sx_destroy(&sysvshmsx); return (0); } static int sysctl_shmsegs(SYSCTL_HANDLER_ARGS) { struct shmid_kernel tshmseg; struct prison *pr, *rpr; int error, i; SYSVSHM_LOCK(); pr = req->td->td_ucred->cr_prison; rpr = shm_find_prison(req->td->td_ucred); error = 0; for (i = 0; i < shmalloced; i++) { if ((shmsegs[i].u.shm_perm.mode & SHMSEG_ALLOCATED) == 0 || rpr == NULL || shm_prison_cansee(rpr, &shmsegs[i]) != 0) { bzero(&tshmseg, sizeof(tshmseg)); tshmseg.u.shm_perm.mode = SHMSEG_FREE; } else { tshmseg = shmsegs[i]; if (tshmseg.cred->cr_prison != pr) tshmseg.u.shm_perm.key = IPC_PRIVATE; } error = SYSCTL_OUT(req, &tshmseg, sizeof(tshmseg)); if (error != 0) break; } SYSVSHM_UNLOCK(); return (error); } static int shm_prison_check(void *obj, void *data) { struct prison *pr = obj; struct prison *prpr; struct vfsoptlist *opts = data; int error, jsys; /* * sysvshm is a jailsys integer. * It must be "disable" if the parent jail is disabled. */ error = vfs_copyopt(opts, "sysvshm", &jsys, sizeof(jsys)); if (error != ENOENT) { if (error != 0) return (error); switch (jsys) { case JAIL_SYS_DISABLE: break; case JAIL_SYS_NEW: case JAIL_SYS_INHERIT: prison_lock(pr->pr_parent); prpr = osd_jail_get(pr->pr_parent, shm_prison_slot); prison_unlock(pr->pr_parent); if (prpr == NULL) return (EPERM); break; default: return (EINVAL); } } return (0); } static int shm_prison_set(void *obj, void *data) { struct prison *pr = obj; struct prison *tpr, *orpr, *nrpr, *trpr; struct vfsoptlist *opts = data; void *rsv; int jsys, descend; /* * sysvshm controls which jail is the root of the associated segments * (this jail or same as the parent), or if the feature is available * at all. */ if (vfs_copyopt(opts, "sysvshm", &jsys, sizeof(jsys)) == ENOENT) jsys = vfs_flagopt(opts, "allow.sysvipc", NULL, 0) ? JAIL_SYS_INHERIT : vfs_flagopt(opts, "allow.nosysvipc", NULL, 0) ? JAIL_SYS_DISABLE : -1; if (jsys == JAIL_SYS_DISABLE) { prison_lock(pr); orpr = osd_jail_get(pr, shm_prison_slot); if (orpr != NULL) osd_jail_del(pr, shm_prison_slot); prison_unlock(pr); if (orpr != NULL) { if (orpr == pr) shm_prison_cleanup(pr); /* Disable all child jails as well. */ FOREACH_PRISON_DESCENDANT(pr, tpr, descend) { prison_lock(tpr); trpr = osd_jail_get(tpr, shm_prison_slot); if (trpr != NULL) { osd_jail_del(tpr, shm_prison_slot); prison_unlock(tpr); if (trpr == tpr) shm_prison_cleanup(tpr); } else { prison_unlock(tpr); descend = 0; } } } } else if (jsys != -1) { if (jsys == JAIL_SYS_NEW) nrpr = pr; else { prison_lock(pr->pr_parent); nrpr = osd_jail_get(pr->pr_parent, shm_prison_slot); prison_unlock(pr->pr_parent); } rsv = osd_reserve(shm_prison_slot); prison_lock(pr); orpr = osd_jail_get(pr, shm_prison_slot); if (orpr != nrpr) (void)osd_jail_set_reserved(pr, shm_prison_slot, rsv, nrpr); else osd_free_reserved(rsv); prison_unlock(pr); if (orpr != nrpr) { if (orpr == pr) shm_prison_cleanup(pr); if (orpr != NULL) { /* Change child jails matching the old root, */ FOREACH_PRISON_DESCENDANT(pr, tpr, descend) { prison_lock(tpr); trpr = osd_jail_get(tpr, shm_prison_slot); if (trpr == orpr) { (void)osd_jail_set(tpr, shm_prison_slot, nrpr); prison_unlock(tpr); if (trpr == tpr) shm_prison_cleanup(tpr); } else { prison_unlock(tpr); descend = 0; } } } } } return (0); } static int shm_prison_get(void *obj, void *data) { struct prison *pr = obj; struct prison *rpr; struct vfsoptlist *opts = data; int error, jsys; /* Set sysvshm based on the jail's root prison. */ prison_lock(pr); rpr = osd_jail_get(pr, shm_prison_slot); prison_unlock(pr); jsys = rpr == NULL ? JAIL_SYS_DISABLE : rpr == pr ? JAIL_SYS_NEW : JAIL_SYS_INHERIT; error = vfs_setopt(opts, "sysvshm", &jsys, sizeof(jsys)); if (error == ENOENT) error = 0; return (error); } static int shm_prison_remove(void *obj, void *data __unused) { struct prison *pr = obj; struct prison *rpr; SYSVSHM_LOCK(); prison_lock(pr); rpr = osd_jail_get(pr, shm_prison_slot); prison_unlock(pr); if (rpr == pr) shm_prison_cleanup(pr); SYSVSHM_UNLOCK(); return (0); } static void shm_prison_cleanup(struct prison *pr) { struct shmid_kernel *shmseg; int i; /* Remove any segments that belong to this jail. */ for (i = 0; i < shmalloced; i++) { shmseg = &shmsegs[i]; if ((shmseg->u.shm_perm.mode & SHMSEG_ALLOCATED) && shmseg->cred != NULL && shmseg->cred->cr_prison == pr) { shm_remove(shmseg, i); } } } SYSCTL_JAIL_PARAM_SYS_NODE(sysvshm, CTLFLAG_RW, "SYSV shared memory"); #if defined(__i386__) && (defined(COMPAT_FREEBSD4) || defined(COMPAT_43)) struct oshmid_ds { struct ipc_perm_old shm_perm; /* operation perms */ int shm_segsz; /* size of segment (bytes) */ u_short shm_cpid; /* pid, creator */ u_short shm_lpid; /* pid, last operation */ short shm_nattch; /* no. of current attaches */ time_t shm_atime; /* last attach time */ time_t shm_dtime; /* last detach time */ time_t shm_ctime; /* last change time */ void *shm_handle; /* internal handle for shm segment */ }; struct oshmctl_args { int shmid; int cmd; struct oshmid_ds *ubuf; }; static int oshmctl(struct thread *td, struct oshmctl_args *uap) { #ifdef COMPAT_43 int error = 0; struct prison *rpr; struct shmid_kernel *shmseg; struct oshmid_ds outbuf; rpr = shm_find_prison(td->td_ucred); if (rpr == NULL) return (ENOSYS); if (uap->cmd != IPC_STAT) { return (freebsd7_shmctl(td, (struct freebsd7_shmctl_args *)uap)); } SYSVSHM_LOCK(); shmseg = shm_find_segment(rpr, uap->shmid, true); if (shmseg == NULL) { SYSVSHM_UNLOCK(); return (EINVAL); } error = ipcperm(td, &shmseg->u.shm_perm, IPC_R); if (error != 0) { SYSVSHM_UNLOCK(); return (error); } #ifdef MAC error = mac_sysvshm_check_shmctl(td->td_ucred, shmseg, uap->cmd); if (error != 0) { SYSVSHM_UNLOCK(); return (error); } #endif ipcperm_new2old(&shmseg->u.shm_perm, &outbuf.shm_perm); outbuf.shm_segsz = shmseg->u.shm_segsz; outbuf.shm_cpid = shmseg->u.shm_cpid; outbuf.shm_lpid = shmseg->u.shm_lpid; outbuf.shm_nattch = shmseg->u.shm_nattch; outbuf.shm_atime = shmseg->u.shm_atime; outbuf.shm_dtime = shmseg->u.shm_dtime; outbuf.shm_ctime = shmseg->u.shm_ctime; outbuf.shm_handle = shmseg->object; SYSVSHM_UNLOCK(); return (copyout(&outbuf, uap->ubuf, sizeof(outbuf))); #else return (EINVAL); #endif } /* XXX casting to (sy_call_t *) is bogus, as usual. */ static sy_call_t *shmcalls[] = { (sy_call_t *)sys_shmat, (sy_call_t *)oshmctl, (sy_call_t *)sys_shmdt, (sy_call_t *)sys_shmget, (sy_call_t *)freebsd7_shmctl }; #ifndef _SYS_SYSPROTO_H_ /* XXX actually varargs. */ struct shmsys_args { int which; int a2; int a3; int a4; }; #endif int sys_shmsys(struct thread *td, struct shmsys_args *uap) { + AUDIT_ARG_SVIPC_WHICH(uap->which); if (uap->which < 0 || uap->which >= nitems(shmcalls)) return (EINVAL); return ((*shmcalls[uap->which])(td, &uap->a2)); } #endif /* i386 && (COMPAT_FREEBSD4 || COMPAT_43) */ #ifdef COMPAT_FREEBSD32 int freebsd32_shmsys(struct thread *td, struct freebsd32_shmsys_args *uap) { #if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \ defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7) + AUDIT_ARG_SVIPC_WHICH(uap->which); switch (uap->which) { case 0: { /* shmat */ struct shmat_args ap; ap.shmid = uap->a2; ap.shmaddr = PTRIN(uap->a3); ap.shmflg = uap->a4; return (sysent[SYS_shmat].sy_call(td, &ap)); } case 2: { /* shmdt */ struct shmdt_args ap; ap.shmaddr = PTRIN(uap->a2); return (sysent[SYS_shmdt].sy_call(td, &ap)); } case 3: { /* shmget */ struct shmget_args ap; ap.key = uap->a2; ap.size = uap->a3; ap.shmflg = uap->a4; return (sysent[SYS_shmget].sy_call(td, &ap)); } case 4: { /* shmctl */ struct freebsd7_freebsd32_shmctl_args ap; ap.shmid = uap->a2; ap.cmd = uap->a3; ap.buf = PTRIN(uap->a4); return (freebsd7_freebsd32_shmctl(td, &ap)); } case 1: /* oshmctl */ default: return (EINVAL); } #else return (nosys(td, NULL)); #endif } #if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \ defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7) int freebsd7_freebsd32_shmctl(struct thread *td, struct freebsd7_freebsd32_shmctl_args *uap) { int error; union { struct shmid_ds shmid_ds; struct shm_info shm_info; struct shminfo shminfo; } u; union { struct shmid_ds32_old shmid_ds32; struct shm_info32 shm_info32; struct shminfo32 shminfo32; } u32; size_t sz; if (uap->cmd == IPC_SET) { if ((error = copyin(uap->buf, &u32.shmid_ds32, sizeof(u32.shmid_ds32)))) goto done; freebsd32_ipcperm_old_in(&u32.shmid_ds32.shm_perm, &u.shmid_ds.shm_perm); CP(u32.shmid_ds32, u.shmid_ds, shm_segsz); CP(u32.shmid_ds32, u.shmid_ds, shm_lpid); CP(u32.shmid_ds32, u.shmid_ds, shm_cpid); CP(u32.shmid_ds32, u.shmid_ds, shm_nattch); CP(u32.shmid_ds32, u.shmid_ds, shm_atime); CP(u32.shmid_ds32, u.shmid_ds, shm_dtime); CP(u32.shmid_ds32, u.shmid_ds, shm_ctime); } error = kern_shmctl(td, uap->shmid, uap->cmd, (void *)&u, &sz); if (error) goto done; /* Cases in which we need to copyout */ switch (uap->cmd) { case IPC_INFO: CP(u.shminfo, u32.shminfo32, shmmax); CP(u.shminfo, u32.shminfo32, shmmin); CP(u.shminfo, u32.shminfo32, shmmni); CP(u.shminfo, u32.shminfo32, shmseg); CP(u.shminfo, u32.shminfo32, shmall); error = copyout(&u32.shminfo32, uap->buf, sizeof(u32.shminfo32)); break; case SHM_INFO: CP(u.shm_info, u32.shm_info32, used_ids); CP(u.shm_info, u32.shm_info32, shm_rss); CP(u.shm_info, u32.shm_info32, shm_tot); CP(u.shm_info, u32.shm_info32, shm_swp); CP(u.shm_info, u32.shm_info32, swap_attempts); CP(u.shm_info, u32.shm_info32, swap_successes); error = copyout(&u32.shm_info32, uap->buf, sizeof(u32.shm_info32)); break; case SHM_STAT: case IPC_STAT: freebsd32_ipcperm_old_out(&u.shmid_ds.shm_perm, &u32.shmid_ds32.shm_perm); if (u.shmid_ds.shm_segsz > INT32_MAX) u32.shmid_ds32.shm_segsz = INT32_MAX; else CP(u.shmid_ds, u32.shmid_ds32, shm_segsz); CP(u.shmid_ds, u32.shmid_ds32, shm_lpid); CP(u.shmid_ds, u32.shmid_ds32, shm_cpid); CP(u.shmid_ds, u32.shmid_ds32, shm_nattch); CP(u.shmid_ds, u32.shmid_ds32, shm_atime); CP(u.shmid_ds, u32.shmid_ds32, shm_dtime); CP(u.shmid_ds, u32.shmid_ds32, shm_ctime); u32.shmid_ds32.shm_internal = 0; error = copyout(&u32.shmid_ds32, uap->buf, sizeof(u32.shmid_ds32)); break; } done: if (error) { /* Invalidate the return value */ td->td_retval[0] = -1; } return (error); } #endif int freebsd32_shmctl(struct thread *td, struct freebsd32_shmctl_args *uap) { int error; union { struct shmid_ds shmid_ds; struct shm_info shm_info; struct shminfo shminfo; } u; union { struct shmid_ds32 shmid_ds32; struct shm_info32 shm_info32; struct shminfo32 shminfo32; } u32; size_t sz; if (uap->cmd == IPC_SET) { if ((error = copyin(uap->buf, &u32.shmid_ds32, sizeof(u32.shmid_ds32)))) goto done; freebsd32_ipcperm_in(&u32.shmid_ds32.shm_perm, &u.shmid_ds.shm_perm); CP(u32.shmid_ds32, u.shmid_ds, shm_segsz); CP(u32.shmid_ds32, u.shmid_ds, shm_lpid); CP(u32.shmid_ds32, u.shmid_ds, shm_cpid); CP(u32.shmid_ds32, u.shmid_ds, shm_nattch); CP(u32.shmid_ds32, u.shmid_ds, shm_atime); CP(u32.shmid_ds32, u.shmid_ds, shm_dtime); CP(u32.shmid_ds32, u.shmid_ds, shm_ctime); } error = kern_shmctl(td, uap->shmid, uap->cmd, (void *)&u, &sz); if (error) goto done; /* Cases in which we need to copyout */ switch (uap->cmd) { case IPC_INFO: CP(u.shminfo, u32.shminfo32, shmmax); CP(u.shminfo, u32.shminfo32, shmmin); CP(u.shminfo, u32.shminfo32, shmmni); CP(u.shminfo, u32.shminfo32, shmseg); CP(u.shminfo, u32.shminfo32, shmall); error = copyout(&u32.shminfo32, uap->buf, sizeof(u32.shminfo32)); break; case SHM_INFO: CP(u.shm_info, u32.shm_info32, used_ids); CP(u.shm_info, u32.shm_info32, shm_rss); CP(u.shm_info, u32.shm_info32, shm_tot); CP(u.shm_info, u32.shm_info32, shm_swp); CP(u.shm_info, u32.shm_info32, swap_attempts); CP(u.shm_info, u32.shm_info32, swap_successes); error = copyout(&u32.shm_info32, uap->buf, sizeof(u32.shm_info32)); break; case SHM_STAT: case IPC_STAT: freebsd32_ipcperm_out(&u.shmid_ds.shm_perm, &u32.shmid_ds32.shm_perm); if (u.shmid_ds.shm_segsz > INT32_MAX) u32.shmid_ds32.shm_segsz = INT32_MAX; else CP(u.shmid_ds, u32.shmid_ds32, shm_segsz); CP(u.shmid_ds, u32.shmid_ds32, shm_lpid); CP(u.shmid_ds, u32.shmid_ds32, shm_cpid); CP(u.shmid_ds, u32.shmid_ds32, shm_nattch); CP(u.shmid_ds, u32.shmid_ds32, shm_atime); CP(u.shmid_ds, u32.shmid_ds32, shm_dtime); CP(u.shmid_ds, u32.shmid_ds32, shm_ctime); error = copyout(&u32.shmid_ds32, uap->buf, sizeof(u32.shmid_ds32)); break; } done: if (error) { /* Invalidate the return value */ td->td_retval[0] = -1; } return (error); } #endif #if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \ defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7) #ifndef CP #define CP(src, dst, fld) do { (dst).fld = (src).fld; } while (0) #endif #ifndef _SYS_SYSPROTO_H_ struct freebsd7_shmctl_args { int shmid; int cmd; struct shmid_ds_old *buf; }; #endif int freebsd7_shmctl(struct thread *td, struct freebsd7_shmctl_args *uap) { int error; struct shmid_ds_old old; struct shmid_ds buf; size_t bufsz; /* * The only reason IPC_INFO, SHM_INFO, SHM_STAT exists is to support * Linux binaries. If we see the call come through the FreeBSD ABI, * return an error back to the user since we do not to support this. */ if (uap->cmd == IPC_INFO || uap->cmd == SHM_INFO || uap->cmd == SHM_STAT) return (EINVAL); /* IPC_SET needs to copyin the buffer before calling kern_shmctl */ if (uap->cmd == IPC_SET) { if ((error = copyin(uap->buf, &old, sizeof(old)))) goto done; ipcperm_old2new(&old.shm_perm, &buf.shm_perm); CP(old, buf, shm_segsz); CP(old, buf, shm_lpid); CP(old, buf, shm_cpid); CP(old, buf, shm_nattch); CP(old, buf, shm_atime); CP(old, buf, shm_dtime); CP(old, buf, shm_ctime); } error = kern_shmctl(td, uap->shmid, uap->cmd, (void *)&buf, &bufsz); if (error) goto done; /* Cases in which we need to copyout */ switch (uap->cmd) { case IPC_STAT: ipcperm_new2old(&buf.shm_perm, &old.shm_perm); if (buf.shm_segsz > INT_MAX) old.shm_segsz = INT_MAX; else CP(buf, old, shm_segsz); CP(buf, old, shm_lpid); CP(buf, old, shm_cpid); if (buf.shm_nattch > SHRT_MAX) old.shm_nattch = SHRT_MAX; else CP(buf, old, shm_nattch); CP(buf, old, shm_atime); CP(buf, old, shm_dtime); CP(buf, old, shm_ctime); old.shm_internal = NULL; error = copyout(&old, uap->buf, sizeof(old)); break; } done: if (error) { /* Invalidate the return value */ td->td_retval[0] = -1; } return (error); } #endif /* COMPAT_FREEBSD4 || COMPAT_FREEBSD5 || COMPAT_FREEBSD6 || COMPAT_FREEBSD7 */ static int sysvshm_modload(struct module *module, int cmd, void *arg) { int error = 0; switch (cmd) { case MOD_LOAD: error = shminit(); if (error != 0) shmunload(); break; case MOD_UNLOAD: error = shmunload(); break; case MOD_SHUTDOWN: break; default: error = EINVAL; break; } return (error); } static moduledata_t sysvshm_mod = { "sysvshm", &sysvshm_modload, NULL }; DECLARE_MODULE(sysvshm, sysvshm_mod, SI_SUB_SYSV_SHM, SI_ORDER_FIRST); MODULE_VERSION(sysvshm, 1); Index: head/sys/security/audit/audit.c =================================================================== --- head/sys/security/audit/audit.c (revision 316184) +++ head/sys/security/audit/audit.c (revision 316185) @@ -1,791 +1,809 @@ /*- * Copyright (c) 1999-2005 Apple Inc. - * Copyright (c) 2006-2007, 2016 Robert N. M. Watson + * Copyright (c) 2006-2007, 2016-2017 Robert N. M. Watson * All rights reserved. * * Portions of this software were developed by BAE Systems, the University of * Cambridge Computer Laboratory, and Memorial University under DARPA/AFRL * contract FA8650-15-C-7558 ("CADETS"), as part of the DARPA Transparent * Computing (TC) research program. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of Apple Inc. ("Apple") nor the names of * its contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include FEATURE(audit, "BSM audit support"); static uma_zone_t audit_record_zone; static MALLOC_DEFINE(M_AUDITCRED, "audit_cred", "Audit cred storage"); MALLOC_DEFINE(M_AUDITDATA, "audit_data", "Audit data storage"); MALLOC_DEFINE(M_AUDITPATH, "audit_path", "Audit path storage"); MALLOC_DEFINE(M_AUDITTEXT, "audit_text", "Audit text storage"); MALLOC_DEFINE(M_AUDITGIDSET, "audit_gidset", "Audit GID set storage"); static SYSCTL_NODE(_security, OID_AUTO, audit, CTLFLAG_RW, 0, "TrustedBSD audit controls"); /* * Audit control settings that are set/read by system calls and are hence * non-static. * * Define the audit control flags. */ int audit_enabled; int audit_suspended; /* * Flags controlling behavior in low storage situations. Should we panic if * a write fails? Should we fail stop if we're out of disk space? */ int audit_panic_on_write_fail; int audit_fail_stop; int audit_argv; int audit_arge; /* * Are we currently "failing stop" due to out of disk space? */ int audit_in_failure; /* * Global audit statistics. */ struct audit_fstat audit_fstat; /* * Preselection mask for non-attributable events. */ struct au_mask audit_nae_mask; /* * Mutex to protect global variables shared between various threads and * processes. */ struct mtx audit_mtx; /* * Queue of audit records ready for delivery to disk. We insert new records * at the tail, and remove records from the head. Also, a count of the * number of records used for checking queue depth. In addition, a counter * of records that we have allocated but are not yet in the queue, which is * needed to estimate the total size of the combined set of records * outstanding in the system. */ struct kaudit_queue audit_q; int audit_q_len; int audit_pre_q_len; /* * Audit queue control settings (minimum free, low/high water marks, etc.) */ struct au_qctrl audit_qctrl; /* * Condition variable to signal to the worker that it has work to do: either * new records are in the queue, or a log replacement is taking place. */ struct cv audit_worker_cv; /* * Condition variable to flag when crossing the low watermark, meaning that * threads blocked due to hitting the high watermark can wake up and continue * to commit records. */ struct cv audit_watermark_cv; /* * Condition variable for auditing threads wait on when in fail-stop mode. * Threads wait on this CV forever (and ever), never seeing the light of day * again. */ static struct cv audit_fail_cv; /* * Optional DTrace audit provider support: function pointers for preselection * and commit events. */ #ifdef KDTRACE_HOOKS void *(*dtaudit_hook_preselect)(au_id_t auid, au_event_t event, au_class_t class); int (*dtaudit_hook_commit)(struct kaudit_record *kar, au_id_t auid, au_event_t event, au_class_t class, int sorf); void (*dtaudit_hook_bsm)(struct kaudit_record *kar, au_id_t auid, au_event_t event, au_class_t class, int sorf, void *bsm_data, size_t bsm_lenlen); #endif /* * Kernel audit information. This will store the current audit address * or host information that the kernel will use when it's generating * audit records. This data is modified by the A_GET{SET}KAUDIT auditon(2) * command. */ static struct auditinfo_addr audit_kinfo; static struct rwlock audit_kinfo_lock; #define KINFO_LOCK_INIT() rw_init(&audit_kinfo_lock, \ "audit_kinfo_lock") #define KINFO_RLOCK() rw_rlock(&audit_kinfo_lock) #define KINFO_WLOCK() rw_wlock(&audit_kinfo_lock) #define KINFO_RUNLOCK() rw_runlock(&audit_kinfo_lock) #define KINFO_WUNLOCK() rw_wunlock(&audit_kinfo_lock) void audit_set_kinfo(struct auditinfo_addr *ak) { KASSERT(ak->ai_termid.at_type == AU_IPv4 || ak->ai_termid.at_type == AU_IPv6, ("audit_set_kinfo: invalid address type")); KINFO_WLOCK(); audit_kinfo = *ak; KINFO_WUNLOCK(); } void audit_get_kinfo(struct auditinfo_addr *ak) { KASSERT(audit_kinfo.ai_termid.at_type == AU_IPv4 || audit_kinfo.ai_termid.at_type == AU_IPv6, ("audit_set_kinfo: invalid address type")); KINFO_RLOCK(); *ak = audit_kinfo; KINFO_RUNLOCK(); } /* * Construct an audit record for the passed thread. */ static int audit_record_ctor(void *mem, int size, void *arg, int flags) { struct kaudit_record *ar; struct thread *td; struct ucred *cred; struct prison *pr; KASSERT(sizeof(*ar) == size, ("audit_record_ctor: wrong size")); td = arg; ar = mem; bzero(ar, sizeof(*ar)); ar->k_ar.ar_magic = AUDIT_RECORD_MAGIC; nanotime(&ar->k_ar.ar_starttime); /* * Export the subject credential. */ cred = td->td_ucred; cru2x(cred, &ar->k_ar.ar_subj_cred); ar->k_ar.ar_subj_ruid = cred->cr_ruid; ar->k_ar.ar_subj_rgid = cred->cr_rgid; ar->k_ar.ar_subj_egid = cred->cr_groups[0]; ar->k_ar.ar_subj_auid = cred->cr_audit.ai_auid; ar->k_ar.ar_subj_asid = cred->cr_audit.ai_asid; ar->k_ar.ar_subj_pid = td->td_proc->p_pid; ar->k_ar.ar_subj_amask = cred->cr_audit.ai_mask; ar->k_ar.ar_subj_term_addr = cred->cr_audit.ai_termid; /* * If this process is jailed, make sure we capture the name of the * jail so we can use it to generate a zonename token when we covert * this record to BSM. */ if (jailed(cred)) { pr = cred->cr_prison; (void) strlcpy(ar->k_ar.ar_jailname, pr->pr_name, sizeof(ar->k_ar.ar_jailname)); } else ar->k_ar.ar_jailname[0] = '\0'; return (0); } static void audit_record_dtor(void *mem, int size, void *arg) { struct kaudit_record *ar; KASSERT(sizeof(*ar) == size, ("audit_record_dtor: wrong size")); ar = mem; if (ar->k_ar.ar_arg_upath1 != NULL) free(ar->k_ar.ar_arg_upath1, M_AUDITPATH); if (ar->k_ar.ar_arg_upath2 != NULL) free(ar->k_ar.ar_arg_upath2, M_AUDITPATH); if (ar->k_ar.ar_arg_text != NULL) free(ar->k_ar.ar_arg_text, M_AUDITTEXT); if (ar->k_udata != NULL) free(ar->k_udata, M_AUDITDATA); if (ar->k_ar.ar_arg_argv != NULL) free(ar->k_ar.ar_arg_argv, M_AUDITTEXT); if (ar->k_ar.ar_arg_envv != NULL) free(ar->k_ar.ar_arg_envv, M_AUDITTEXT); if (ar->k_ar.ar_arg_groups.gidset != NULL) free(ar->k_ar.ar_arg_groups.gidset, M_AUDITGIDSET); } /* * Initialize the Audit subsystem: configuration state, work queue, * synchronization primitives, worker thread, and trigger device node. Also * call into the BSM assembly code to initialize it. */ static void audit_init(void) { audit_enabled = 0; audit_suspended = 0; audit_panic_on_write_fail = 0; audit_fail_stop = 0; audit_in_failure = 0; audit_argv = 0; audit_arge = 0; audit_fstat.af_filesz = 0; /* '0' means unset, unbounded. */ audit_fstat.af_currsz = 0; audit_nae_mask.am_success = 0; audit_nae_mask.am_failure = 0; TAILQ_INIT(&audit_q); audit_q_len = 0; audit_pre_q_len = 0; audit_qctrl.aq_hiwater = AQ_HIWATER; audit_qctrl.aq_lowater = AQ_LOWATER; audit_qctrl.aq_bufsz = AQ_BUFSZ; audit_qctrl.aq_minfree = AU_FS_MINFREE; audit_kinfo.ai_termid.at_type = AU_IPv4; audit_kinfo.ai_termid.at_addr[0] = INADDR_ANY; mtx_init(&audit_mtx, "audit_mtx", NULL, MTX_DEF); KINFO_LOCK_INIT(); cv_init(&audit_worker_cv, "audit_worker_cv"); cv_init(&audit_watermark_cv, "audit_watermark_cv"); cv_init(&audit_fail_cv, "audit_fail_cv"); audit_record_zone = uma_zcreate("audit_record", sizeof(struct kaudit_record), audit_record_ctor, audit_record_dtor, NULL, NULL, UMA_ALIGN_PTR, 0); /* Initialize the BSM audit subsystem. */ kau_init(); audit_trigger_init(); /* Register shutdown handler. */ EVENTHANDLER_REGISTER(shutdown_pre_sync, audit_shutdown, NULL, SHUTDOWN_PRI_FIRST); /* Start audit worker thread. */ audit_worker_init(); } SYSINIT(audit_init, SI_SUB_AUDIT, SI_ORDER_FIRST, audit_init, NULL); /* * Drain the audit queue and close the log at shutdown. Note that this can * be called both from the system shutdown path and also from audit * configuration syscalls, so 'arg' and 'howto' are ignored. * * XXXRW: In FreeBSD 7.x and 8.x, this fails to wait for the record queue to * drain before returning, which could lead to lost records on shutdown. */ void audit_shutdown(void *arg, int howto) { audit_rotate_vnode(NULL, NULL); } /* * Return the current thread's audit record, if any. */ struct kaudit_record * currecord(void) { return (curthread->td_ar); } /* * XXXAUDIT: There are a number of races present in the code below due to * release and re-grab of the mutex. The code should be revised to become * slightly less racy. * * XXXAUDIT: Shouldn't there be logic here to sleep waiting on available * pre_q space, suspending the system call until there is room? */ struct kaudit_record * audit_new(int event, struct thread *td) { struct kaudit_record *ar; int no_record; mtx_lock(&audit_mtx); no_record = (audit_suspended || !audit_enabled); mtx_unlock(&audit_mtx); if (no_record) return (NULL); /* * Note: the number of outstanding uncommitted audit records is * limited to the number of concurrent threads servicing system calls * in the kernel. */ ar = uma_zalloc_arg(audit_record_zone, td, M_WAITOK); ar->k_ar.ar_event = event; mtx_lock(&audit_mtx); audit_pre_q_len++; mtx_unlock(&audit_mtx); return (ar); } void audit_free(struct kaudit_record *ar) { uma_zfree(audit_record_zone, ar); } void audit_commit(struct kaudit_record *ar, int error, int retval) { au_event_t event; au_class_t class; au_id_t auid; int sorf; struct au_mask *aumask; if (ar == NULL) return; ar->k_ar.ar_errno = error; ar->k_ar.ar_retval = retval; nanotime(&ar->k_ar.ar_endtime); /* * Decide whether to commit the audit record by checking the error * value from the system call and using the appropriate audit mask. */ if (ar->k_ar.ar_subj_auid == AU_DEFAUDITID) aumask = &audit_nae_mask; else aumask = &ar->k_ar.ar_subj_amask; if (error) sorf = AU_PRS_FAILURE; else sorf = AU_PRS_SUCCESS; /* * syscalls.master sometimes contains a prototype event number, which * we will transform into a more specific event number now that we * have more complete information gathered during the system call. */ switch(ar->k_ar.ar_event) { case AUE_OPEN_RWTC: ar->k_ar.ar_event = audit_flags_and_error_to_openevent( ar->k_ar.ar_arg_fflags, error); break; case AUE_OPENAT_RWTC: ar->k_ar.ar_event = audit_flags_and_error_to_openatevent( ar->k_ar.ar_arg_fflags, error); break; case AUE_SYSCTL: ar->k_ar.ar_event = audit_ctlname_to_sysctlevent( ar->k_ar.ar_arg_ctlname, ar->k_ar.ar_valid_arg); break; case AUE_AUDITON: /* Convert the auditon() command to an event. */ ar->k_ar.ar_event = auditon_command_event(ar->k_ar.ar_arg_cmd); + break; + + case AUE_MSGSYS: + if (ARG_IS_VALID(ar, ARG_SVIPC_WHICH)) + ar->k_ar.ar_event = + audit_msgsys_to_event(ar->k_ar.ar_arg_svipc_which); + break; + + case AUE_SEMSYS: + if (ARG_IS_VALID(ar, ARG_SVIPC_WHICH)) + ar->k_ar.ar_event = + audit_semsys_to_event(ar->k_ar.ar_arg_svipc_which); + break; + + case AUE_SHMSYS: + if (ARG_IS_VALID(ar, ARG_SVIPC_WHICH)) + ar->k_ar.ar_event = + audit_shmsys_to_event(ar->k_ar.ar_arg_svipc_which); break; } auid = ar->k_ar.ar_subj_auid; event = ar->k_ar.ar_event; class = au_event_class(event); ar->k_ar_commit |= AR_COMMIT_KERNEL; if (au_preselect(event, class, aumask, sorf) != 0) ar->k_ar_commit |= AR_PRESELECT_TRAIL; if (audit_pipe_preselect(auid, event, class, sorf, ar->k_ar_commit & AR_PRESELECT_TRAIL) != 0) ar->k_ar_commit |= AR_PRESELECT_PIPE; #ifdef KDTRACE_HOOKS /* * Expose the audit record to DTrace, both to allow the "commit" probe * to fire if it's desirable, and also to allow a decision to be made * about later firing with BSM in the audit worker. */ if (dtaudit_hook_commit != NULL) { if (dtaudit_hook_commit(ar, auid, event, class, sorf) != 0) ar->k_ar_commit |= AR_PRESELECT_DTRACE; } #endif if ((ar->k_ar_commit & (AR_PRESELECT_TRAIL | AR_PRESELECT_PIPE | AR_PRESELECT_USER_TRAIL | AR_PRESELECT_USER_PIPE | AR_PRESELECT_DTRACE)) == 0) { mtx_lock(&audit_mtx); audit_pre_q_len--; mtx_unlock(&audit_mtx); audit_free(ar); return; } /* * Note: it could be that some records initiated while audit was * enabled should still be committed? */ mtx_lock(&audit_mtx); if (audit_suspended || !audit_enabled) { audit_pre_q_len--; mtx_unlock(&audit_mtx); audit_free(ar); return; } /* * Constrain the number of committed audit records based on the * configurable parameter. */ while (audit_q_len >= audit_qctrl.aq_hiwater) cv_wait(&audit_watermark_cv, &audit_mtx); TAILQ_INSERT_TAIL(&audit_q, ar, k_q); audit_q_len++; audit_pre_q_len--; cv_signal(&audit_worker_cv); mtx_unlock(&audit_mtx); } /* * audit_syscall_enter() is called on entry to each system call. It is * responsible for deciding whether or not to audit the call (preselection), * and if so, allocating a per-thread audit record. audit_new() will fill in * basic thread/credential properties. */ void audit_syscall_enter(unsigned short code, struct thread *td) { struct au_mask *aumask; #ifdef KDTRACE_HOOKS void *dtaudit_state; #endif au_class_t class; au_event_t event; au_id_t auid; int record_needed; KASSERT(td->td_ar == NULL, ("audit_syscall_enter: td->td_ar != NULL")); KASSERT((td->td_pflags & TDP_AUDITREC) == 0, ("audit_syscall_enter: TDP_AUDITREC set")); /* * In FreeBSD, each ABI has its own system call table, and hence * mapping of system call codes to audit events. Convert the code to * an audit event identifier using the process system call table * reference. In Darwin, there's only one, so we use the global * symbol for the system call table. No audit record is generated * for bad system calls, as no operation has been performed. */ if (code >= td->td_proc->p_sysent->sv_size) return; event = td->td_proc->p_sysent->sv_table[code].sy_auevent; if (event == AUE_NULL) return; /* * Check which audit mask to use; either the kernel non-attributable * event mask or the process audit mask. */ auid = td->td_ucred->cr_audit.ai_auid; if (auid == AU_DEFAUDITID) aumask = &audit_nae_mask; else aumask = &td->td_ucred->cr_audit.ai_mask; /* * Determine whether trail or pipe preselection would like an audit * record allocated for this system call. */ class = au_event_class(event); if (au_preselect(event, class, aumask, AU_PRS_BOTH)) { /* * If we're out of space and need to suspend unprivileged * processes, do that here rather than trying to allocate * another audit record. * * Note: we might wish to be able to continue here in the * future, if the system recovers. That should be possible * by means of checking the condition in a loop around * cv_wait(). It might be desirable to reevaluate whether an * audit record is still required for this event by * re-calling au_preselect(). */ if (audit_in_failure && priv_check(td, PRIV_AUDIT_FAILSTOP) != 0) { cv_wait(&audit_fail_cv, &audit_mtx); panic("audit_failing_stop: thread continued"); } record_needed = 1; } else if (audit_pipe_preselect(auid, event, class, AU_PRS_BOTH, 0)) { record_needed = 1; } else { record_needed = 0; } /* * After audit trails and pipes have made their policy choices, DTrace * may request that records be generated as well. This is a slightly * complex affair, as the DTrace audit provider needs the audit * framework to maintain some state on the audit record, which has not * been allocated at the point where the decision has to be made. * This hook must run even if we are not changing the decision, as * DTrace may want to stick event state onto a record we were going to * produce due to the trail or pipes. The event state returned by the * DTrace provider must be safe without locks held between here and * below -- i.e., dtaudit_state must must refer to stable memory. */ #ifdef KDTRACE_HOOKS dtaudit_state = NULL; if (dtaudit_hook_preselect != NULL) { dtaudit_state = dtaudit_hook_preselect(auid, event, class); if (dtaudit_state != NULL) record_needed = 1; } #endif /* * If a record is required, allocate it and attach it to the thread * for use throughout the system call. Also attach DTrace state if * required. * * XXXRW: If we decide to reference count the evname_elem underlying * dtaudit_state, we will need to free here if no record is allocated * or allocatable. */ if (record_needed) { td->td_ar = audit_new(event, td); if (td->td_ar != NULL) { td->td_pflags |= TDP_AUDITREC; #ifdef KDTRACE_HOOKS td->td_ar->k_dtaudit_state = dtaudit_state; #endif } } else td->td_ar = NULL; } /* * audit_syscall_exit() is called from the return of every system call, or in * the event of exit1(), during the execution of exit1(). It is responsible * for committing the audit record, if any, along with return condition. */ void audit_syscall_exit(int error, struct thread *td) { int retval; /* * Commit the audit record as desired; once we pass the record into * audit_commit(), the memory is owned by the audit subsystem. The * return value from the system call is stored on the user thread. * If there was an error, the return value is set to -1, imitating * the behavior of the cerror routine. */ if (error) retval = -1; else retval = td->td_retval[0]; audit_commit(td->td_ar, error, retval); td->td_ar = NULL; td->td_pflags &= ~TDP_AUDITREC; } void audit_cred_copy(struct ucred *src, struct ucred *dest) { bcopy(&src->cr_audit, &dest->cr_audit, sizeof(dest->cr_audit)); } void audit_cred_destroy(struct ucred *cred) { } void audit_cred_init(struct ucred *cred) { bzero(&cred->cr_audit, sizeof(cred->cr_audit)); } /* * Initialize audit information for the first kernel process (proc 0) and for * the first user process (init). */ void audit_cred_kproc0(struct ucred *cred) { cred->cr_audit.ai_auid = AU_DEFAUDITID; cred->cr_audit.ai_termid.at_type = AU_IPv4; } void audit_cred_proc1(struct ucred *cred) { cred->cr_audit.ai_auid = AU_DEFAUDITID; cred->cr_audit.ai_termid.at_type = AU_IPv4; } void audit_thread_alloc(struct thread *td) { td->td_ar = NULL; } void audit_thread_free(struct thread *td) { KASSERT(td->td_ar == NULL, ("audit_thread_free: td_ar != NULL")); KASSERT((td->td_pflags & TDP_AUDITREC) == 0, ("audit_thread_free: TDP_AUDITREC set")); } void audit_proc_coredump(struct thread *td, char *path, int errcode) { struct kaudit_record *ar; struct au_mask *aumask; struct ucred *cred; au_class_t class; int ret, sorf; char **pathp; au_id_t auid; ret = 0; /* * Make sure we are using the correct preselection mask. */ cred = td->td_ucred; auid = cred->cr_audit.ai_auid; if (auid == AU_DEFAUDITID) aumask = &audit_nae_mask; else aumask = &cred->cr_audit.ai_mask; /* * It's possible for coredump(9) generation to fail. Make sure that * we handle this case correctly for preselection. */ if (errcode != 0) sorf = AU_PRS_FAILURE; else sorf = AU_PRS_SUCCESS; class = au_event_class(AUE_CORE); if (au_preselect(AUE_CORE, class, aumask, sorf) == 0 && audit_pipe_preselect(auid, AUE_CORE, class, sorf, 0) == 0) return; /* * If we are interested in seeing this audit record, allocate it. * Where possible coredump records should contain a pathname and arg32 * (signal) tokens. */ ar = audit_new(AUE_CORE, td); if (ar == NULL) return; if (path != NULL) { pathp = &ar->k_ar.ar_arg_upath1; *pathp = malloc(MAXPATHLEN, M_AUDITPATH, M_WAITOK); audit_canon_path(td, AT_FDCWD, path, *pathp); ARG_SET_VALID(ar, ARG_UPATH1); } ar->k_ar.ar_arg_signum = td->td_proc->p_sig; ARG_SET_VALID(ar, ARG_SIGNUM); if (errcode != 0) ret = 1; audit_commit(ar, errcode, ret); } Index: head/sys/security/audit/audit.h =================================================================== --- head/sys/security/audit/audit.h (revision 316184) +++ head/sys/security/audit/audit.h (revision 316185) @@ -1,391 +1,404 @@ /*- * Copyright (c) 1999-2005 Apple Inc. + * Copyright (c) 2016-2017 Robert N. M. Watson * All rights reserved. * + * This software was developed by BAE Systems, the University of Cambridge + * Computer Laboratory, and Memorial University under DARPA/AFRL contract + * FA8650-15-C-7558 ("CADETS"), as part of the DARPA Transparent Computing + * (TC) research program. + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of Apple Inc. ("Apple") nor the names of * its contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ /* * This header includes function prototypes and type definitions that are * necessary for the kernel as a whole to interact with the audit subsystem. */ #ifndef _SECURITY_AUDIT_KERNEL_H_ #define _SECURITY_AUDIT_KERNEL_H_ #ifndef _KERNEL #error "no user-serviceable parts inside" #endif #include #include #include /* * Audit subsystem condition flags. The audit_enabled flag is set and * removed automatically as a result of configuring log files, and can be * observed but should not be directly manipulated. The audit suspension * flag permits audit to be temporarily disabled without reconfiguring the * audit target. */ extern int audit_enabled; extern int audit_suspended; void audit_syscall_enter(unsigned short code, struct thread *td); void audit_syscall_exit(int error, struct thread *td); /* * The remaining kernel functions are conditionally compiled in as they are * wrapped by a macro, and the macro should be the only place in the source * tree where these functions are referenced. */ #ifdef AUDIT struct ipc_perm; struct sockaddr; union auditon_udata; void audit_arg_addr(void * addr); void audit_arg_exit(int status, int retval); void audit_arg_len(int len); void audit_arg_atfd1(int atfd); void audit_arg_atfd2(int atfd); void audit_arg_fd(int fd); void audit_arg_fflags(int fflags); void audit_arg_gid(gid_t gid); void audit_arg_uid(uid_t uid); void audit_arg_egid(gid_t egid); void audit_arg_euid(uid_t euid); void audit_arg_rgid(gid_t rgid); void audit_arg_ruid(uid_t ruid); void audit_arg_sgid(gid_t sgid); void audit_arg_suid(uid_t suid); void audit_arg_groupset(gid_t *gidset, u_int gidset_size); void audit_arg_login(char *login); void audit_arg_ctlname(int *name, int namelen); void audit_arg_mask(int mask); void audit_arg_mode(mode_t mode); void audit_arg_dev(int dev); void audit_arg_value(long value); void audit_arg_owner(uid_t uid, gid_t gid); void audit_arg_pid(pid_t pid); void audit_arg_process(struct proc *p); void audit_arg_signum(u_int signum); void audit_arg_socket(int sodomain, int sotype, int soprotocol); void audit_arg_sockaddr(struct thread *td, int dirfd, struct sockaddr *sa); void audit_arg_auid(uid_t auid); void audit_arg_auditinfo(struct auditinfo *au_info); void audit_arg_auditinfo_addr(struct auditinfo_addr *au_info); void audit_arg_upath1(struct thread *td, int dirfd, char *upath); void audit_arg_upath2(struct thread *td, int dirfd, char *upath); void audit_arg_vnode1(struct vnode *vp); void audit_arg_vnode2(struct vnode *vp); void audit_arg_text(char *text); void audit_arg_cmd(int cmd); void audit_arg_svipc_cmd(int cmd); void audit_arg_svipc_perm(struct ipc_perm *perm); void audit_arg_svipc_id(int id); void audit_arg_svipc_addr(void *addr); +void audit_arg_svipc_which(int which); void audit_arg_posix_ipc_perm(uid_t uid, gid_t gid, mode_t mode); void audit_arg_auditon(union auditon_udata *udata); void audit_arg_file(struct proc *p, struct file *fp); void audit_arg_argv(char *argv, int argc, int length); void audit_arg_envv(char *envv, int envc, int length); void audit_arg_rights(cap_rights_t *rightsp); void audit_arg_fcntl_rights(uint32_t fcntlrights); void audit_sysclose(struct thread *td, int fd); void audit_cred_copy(struct ucred *src, struct ucred *dest); void audit_cred_destroy(struct ucred *cred); void audit_cred_init(struct ucred *cred); void audit_cred_kproc0(struct ucred *cred); void audit_cred_proc1(struct ucred *cred); void audit_proc_coredump(struct thread *td, char *path, int errcode); void audit_thread_alloc(struct thread *td); void audit_thread_free(struct thread *td); /* * Define macros to wrap the audit_arg_* calls by checking the global * audit_enabled flag before performing the actual call. */ #define AUDITING_TD(td) ((td)->td_pflags & TDP_AUDITREC) #define AUDIT_ARG_ADDR(addr) do { \ if (AUDITING_TD(curthread)) \ audit_arg_addr((addr)); \ } while (0) #define AUDIT_ARG_ARGV(argv, argc, length) do { \ if (AUDITING_TD(curthread)) \ audit_arg_argv((argv), (argc), (length)); \ } while (0) #define AUDIT_ARG_ATFD1(atfd) do { \ if (AUDITING_TD(curthread)) \ audit_arg_atfd1((atfd)); \ } while (0) #define AUDIT_ARG_ATFD2(atfd) do { \ if (AUDITING_TD(curthread)) \ audit_arg_atfd2((atfd)); \ } while (0) #define AUDIT_ARG_AUDITON(udata) do { \ if (AUDITING_TD(curthread)) \ audit_arg_auditon((udata)); \ } while (0) #define AUDIT_ARG_CMD(cmd) do { \ if (AUDITING_TD(curthread)) \ audit_arg_cmd((cmd)); \ } while (0) #define AUDIT_ARG_DEV(dev) do { \ if (AUDITING_TD(curthread)) \ audit_arg_dev((dev)); \ } while (0) #define AUDIT_ARG_EGID(egid) do { \ if (AUDITING_TD(curthread)) \ audit_arg_egid((egid)); \ } while (0) #define AUDIT_ARG_ENVV(envv, envc, length) do { \ if (AUDITING_TD(curthread)) \ audit_arg_envv((envv), (envc), (length)); \ } while (0) #define AUDIT_ARG_EXIT(status, retval) do { \ if (AUDITING_TD(curthread)) \ audit_arg_exit((status), (retval)); \ } while (0) #define AUDIT_ARG_EUID(euid) do { \ if (AUDITING_TD(curthread)) \ audit_arg_euid((euid)); \ } while (0) #define AUDIT_ARG_FD(fd) do { \ if (AUDITING_TD(curthread)) \ audit_arg_fd((fd)); \ } while (0) #define AUDIT_ARG_FILE(p, fp) do { \ if (AUDITING_TD(curthread)) \ audit_arg_file((p), (fp)); \ } while (0) #define AUDIT_ARG_FFLAGS(fflags) do { \ if (AUDITING_TD(curthread)) \ audit_arg_fflags((fflags)); \ } while (0) #define AUDIT_ARG_GID(gid) do { \ if (AUDITING_TD(curthread)) \ audit_arg_gid((gid)); \ } while (0) #define AUDIT_ARG_GROUPSET(gidset, gidset_size) do { \ if (AUDITING_TD(curthread)) \ audit_arg_groupset((gidset), (gidset_size)); \ } while (0) #define AUDIT_ARG_LOGIN(login) do { \ if (AUDITING_TD(curthread)) \ audit_arg_login((login)); \ } while (0) #define AUDIT_ARG_MODE(mode) do { \ if (AUDITING_TD(curthread)) \ audit_arg_mode((mode)); \ } while (0) #define AUDIT_ARG_OWNER(uid, gid) do { \ if (AUDITING_TD(curthread)) \ audit_arg_owner((uid), (gid)); \ } while (0) #define AUDIT_ARG_PID(pid) do { \ if (AUDITING_TD(curthread)) \ audit_arg_pid((pid)); \ } while (0) #define AUDIT_ARG_PROCESS(p) do { \ if (AUDITING_TD(curthread)) \ audit_arg_process((p)); \ } while (0) #define AUDIT_ARG_RGID(rgid) do { \ if (AUDITING_TD(curthread)) \ audit_arg_rgid((rgid)); \ } while (0) #define AUDIT_ARG_RIGHTS(rights) do { \ if (AUDITING_TD(curthread)) \ audit_arg_rights((rights)); \ } while (0) #define AUDIT_ARG_FCNTL_RIGHTS(fcntlrights) do { \ if (AUDITING_TD(curthread)) \ audit_arg_fcntl_rights((fcntlrights)); \ } while (0) #define AUDIT_ARG_RUID(ruid) do { \ if (AUDITING_TD(curthread)) \ audit_arg_ruid((ruid)); \ } while (0) #define AUDIT_ARG_SIGNUM(signum) do { \ if (AUDITING_TD(curthread)) \ audit_arg_signum((signum)); \ } while (0) #define AUDIT_ARG_SGID(sgid) do { \ if (AUDITING_TD(curthread)) \ audit_arg_sgid((sgid)); \ } while (0) #define AUDIT_ARG_SOCKET(sodomain, sotype, soprotocol) do { \ if (AUDITING_TD(curthread)) \ audit_arg_socket((sodomain), (sotype), (soprotocol)); \ } while (0) #define AUDIT_ARG_SOCKADDR(td, dirfd, sa) do { \ if (AUDITING_TD(curthread)) \ audit_arg_sockaddr((td), (dirfd), (sa)); \ } while (0) #define AUDIT_ARG_SUID(suid) do { \ if (AUDITING_TD(curthread)) \ audit_arg_suid((suid)); \ } while (0) +#define AUDIT_ARG_SVIPC_WHICH(which) do { \ + if (AUDITING_TD(curthread)) \ + audit_arg_svipc_which((which)); \ +} while (0) + #define AUDIT_ARG_TEXT(text) do { \ if (AUDITING_TD(curthread)) \ audit_arg_text((text)); \ } while (0) #define AUDIT_ARG_UID(uid) do { \ if (AUDITING_TD(curthread)) \ audit_arg_uid((uid)); \ } while (0) #define AUDIT_ARG_UPATH1(td, dirfd, upath) do { \ if (AUDITING_TD(curthread)) \ audit_arg_upath1((td), (dirfd), (upath)); \ } while (0) #define AUDIT_ARG_UPATH2(td, dirfd, upath) do { \ if (AUDITING_TD(curthread)) \ audit_arg_upath2((td), (dirfd), (upath)); \ } while (0) #define AUDIT_ARG_VALUE(value) do { \ if (AUDITING_TD(curthread)) \ audit_arg_value((value)); \ } while (0) #define AUDIT_ARG_VNODE1(vp) do { \ if (AUDITING_TD(curthread)) \ audit_arg_vnode1((vp)); \ } while (0) #define AUDIT_ARG_VNODE2(vp) do { \ if (AUDITING_TD(curthread)) \ audit_arg_vnode2((vp)); \ } while (0) #define AUDIT_SYSCALL_ENTER(code, td) do { \ if (audit_enabled) { \ audit_syscall_enter(code, td); \ } \ } while (0) /* * Wrap the audit_syscall_exit() function so that it is called only when * we have a audit record on the thread. Audit records can persist after * auditing is disabled, so we don't just check audit_enabled here. */ #define AUDIT_SYSCALL_EXIT(error, td) do { \ if (td->td_pflags & TDP_AUDITREC) \ audit_syscall_exit(error, td); \ } while (0) /* * A Macro to wrap the audit_sysclose() function. */ #define AUDIT_SYSCLOSE(td, fd) do { \ if (td->td_pflags & TDP_AUDITREC) \ audit_sysclose(td, fd); \ } while (0) #else /* !AUDIT */ #define AUDIT_ARG_ADDR(addr) #define AUDIT_ARG_ARGV(argv, argc, length) #define AUDIT_ARG_ATFD1(atfd) #define AUDIT_ARG_ATFD2(atfd) #define AUDIT_ARG_AUDITON(udata) #define AUDIT_ARG_CMD(cmd) #define AUDIT_ARG_DEV(dev) #define AUDIT_ARG_EGID(egid) #define AUDIT_ARG_ENVV(envv, envc, length) #define AUDIT_ARG_EXIT(status, retval) #define AUDIT_ARG_EUID(euid) #define AUDIT_ARG_FD(fd) #define AUDIT_ARG_FILE(p, fp) #define AUDIT_ARG_FFLAGS(fflags) #define AUDIT_ARG_GID(gid) #define AUDIT_ARG_GROUPSET(gidset, gidset_size) #define AUDIT_ARG_LOGIN(login) #define AUDIT_ARG_MODE(mode) #define AUDIT_ARG_OWNER(uid, gid) #define AUDIT_ARG_PID(pid) #define AUDIT_ARG_PROCESS(p) #define AUDIT_ARG_RGID(rgid) #define AUDIT_ARG_RIGHTS(rights) #define AUDIT_ARG_FCNTL_RIGHTS(fcntlrights) #define AUDIT_ARG_RUID(ruid) #define AUDIT_ARG_SIGNUM(signum) #define AUDIT_ARG_SGID(sgid) #define AUDIT_ARG_SOCKET(sodomain, sotype, soprotocol) #define AUDIT_ARG_SOCKADDR(td, dirfd, sa) #define AUDIT_ARG_SUID(suid) +#define AUDIT_ARG_SVIPC_WHICH(which) #define AUDIT_ARG_TEXT(text) #define AUDIT_ARG_UID(uid) #define AUDIT_ARG_UPATH1(td, dirfd, upath) #define AUDIT_ARG_UPATH2(td, dirfd, upath) #define AUDIT_ARG_VALUE(value) #define AUDIT_ARG_VNODE1(vp) #define AUDIT_ARG_VNODE2(vp) #define AUDIT_SYSCALL_ENTER(code, td) #define AUDIT_SYSCALL_EXIT(error, td) #define AUDIT_SYSCLOSE(p, fd) #endif /* AUDIT */ #endif /* !_SECURITY_AUDIT_KERNEL_H_ */ Index: head/sys/security/audit/audit_arg.c =================================================================== --- head/sys/security/audit/audit_arg.c (revision 316184) +++ head/sys/security/audit/audit_arg.c (revision 316185) @@ -1,920 +1,939 @@ /*- * Copyright (c) 1999-2005 Apple Inc. + * Copyright (c) 2016-2017 Robert N. M. Watson * All rights reserved. * + * Portions of this software were developed by BAE Systems, the University of + * Cambridge Computer Laboratory, and Memorial University under DARPA/AFRL + * contract FA8650-15-C-7558 ("CADETS"), as part of the DARPA Transparent + * Computing (TC) research program. + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of Apple Inc. ("Apple") nor the names of * its contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * Calls to manipulate elements of the audit record structure from system * call code. Macro wrappers will prevent this functions from being entered * if auditing is disabled, avoiding the function call cost. We check the * thread audit record pointer anyway, as the audit condition could change, * and pre-selection may not have allocated an audit record for this event. * * XXXAUDIT: Should we assert, in each case, that this field of the record * hasn't already been filled in? */ void audit_arg_addr(void *addr) { struct kaudit_record *ar; ar = currecord(); if (ar == NULL) return; ar->k_ar.ar_arg_addr = addr; ARG_SET_VALID(ar, ARG_ADDR); } void audit_arg_exit(int status, int retval) { struct kaudit_record *ar; ar = currecord(); if (ar == NULL) return; ar->k_ar.ar_arg_exitstatus = status; ar->k_ar.ar_arg_exitretval = retval; ARG_SET_VALID(ar, ARG_EXIT); } void audit_arg_len(int len) { struct kaudit_record *ar; ar = currecord(); if (ar == NULL) return; ar->k_ar.ar_arg_len = len; ARG_SET_VALID(ar, ARG_LEN); } void audit_arg_atfd1(int atfd) { struct kaudit_record *ar; ar = currecord(); if (ar == NULL) return; ar->k_ar.ar_arg_atfd1 = atfd; ARG_SET_VALID(ar, ARG_ATFD1); } void audit_arg_atfd2(int atfd) { struct kaudit_record *ar; ar = currecord(); if (ar == NULL) return; ar->k_ar.ar_arg_atfd2 = atfd; ARG_SET_VALID(ar, ARG_ATFD2); } void audit_arg_fd(int fd) { struct kaudit_record *ar; ar = currecord(); if (ar == NULL) return; ar->k_ar.ar_arg_fd = fd; ARG_SET_VALID(ar, ARG_FD); } void audit_arg_fflags(int fflags) { struct kaudit_record *ar; ar = currecord(); if (ar == NULL) return; ar->k_ar.ar_arg_fflags = fflags; ARG_SET_VALID(ar, ARG_FFLAGS); } void audit_arg_gid(gid_t gid) { struct kaudit_record *ar; ar = currecord(); if (ar == NULL) return; ar->k_ar.ar_arg_gid = gid; ARG_SET_VALID(ar, ARG_GID); } void audit_arg_uid(uid_t uid) { struct kaudit_record *ar; ar = currecord(); if (ar == NULL) return; ar->k_ar.ar_arg_uid = uid; ARG_SET_VALID(ar, ARG_UID); } void audit_arg_egid(gid_t egid) { struct kaudit_record *ar; ar = currecord(); if (ar == NULL) return; ar->k_ar.ar_arg_egid = egid; ARG_SET_VALID(ar, ARG_EGID); } void audit_arg_euid(uid_t euid) { struct kaudit_record *ar; ar = currecord(); if (ar == NULL) return; ar->k_ar.ar_arg_euid = euid; ARG_SET_VALID(ar, ARG_EUID); } void audit_arg_rgid(gid_t rgid) { struct kaudit_record *ar; ar = currecord(); if (ar == NULL) return; ar->k_ar.ar_arg_rgid = rgid; ARG_SET_VALID(ar, ARG_RGID); } void audit_arg_ruid(uid_t ruid) { struct kaudit_record *ar; ar = currecord(); if (ar == NULL) return; ar->k_ar.ar_arg_ruid = ruid; ARG_SET_VALID(ar, ARG_RUID); } void audit_arg_sgid(gid_t sgid) { struct kaudit_record *ar; ar = currecord(); if (ar == NULL) return; ar->k_ar.ar_arg_sgid = sgid; ARG_SET_VALID(ar, ARG_SGID); } void audit_arg_suid(uid_t suid) { struct kaudit_record *ar; ar = currecord(); if (ar == NULL) return; ar->k_ar.ar_arg_suid = suid; ARG_SET_VALID(ar, ARG_SUID); } void audit_arg_groupset(gid_t *gidset, u_int gidset_size) { u_int i; struct kaudit_record *ar; KASSERT(gidset_size <= ngroups_max + 1, ("audit_arg_groupset: gidset_size > (kern.ngroups + 1)")); ar = currecord(); if (ar == NULL) return; if (ar->k_ar.ar_arg_groups.gidset == NULL) ar->k_ar.ar_arg_groups.gidset = malloc( sizeof(gid_t) * gidset_size, M_AUDITGIDSET, M_WAITOK); for (i = 0; i < gidset_size; i++) ar->k_ar.ar_arg_groups.gidset[i] = gidset[i]; ar->k_ar.ar_arg_groups.gidset_size = gidset_size; ARG_SET_VALID(ar, ARG_GROUPSET); } void audit_arg_login(char *login) { struct kaudit_record *ar; ar = currecord(); if (ar == NULL) return; strlcpy(ar->k_ar.ar_arg_login, login, MAXLOGNAME); ARG_SET_VALID(ar, ARG_LOGIN); } void audit_arg_ctlname(int *name, int namelen) { struct kaudit_record *ar; ar = currecord(); if (ar == NULL) return; bcopy(name, &ar->k_ar.ar_arg_ctlname, namelen * sizeof(int)); ar->k_ar.ar_arg_len = namelen; ARG_SET_VALID(ar, ARG_CTLNAME | ARG_LEN); } void audit_arg_mask(int mask) { struct kaudit_record *ar; ar = currecord(); if (ar == NULL) return; ar->k_ar.ar_arg_mask = mask; ARG_SET_VALID(ar, ARG_MASK); } void audit_arg_mode(mode_t mode) { struct kaudit_record *ar; ar = currecord(); if (ar == NULL) return; ar->k_ar.ar_arg_mode = mode; ARG_SET_VALID(ar, ARG_MODE); } void audit_arg_dev(int dev) { struct kaudit_record *ar; ar = currecord(); if (ar == NULL) return; ar->k_ar.ar_arg_dev = dev; ARG_SET_VALID(ar, ARG_DEV); } void audit_arg_value(long value) { struct kaudit_record *ar; ar = currecord(); if (ar == NULL) return; ar->k_ar.ar_arg_value = value; ARG_SET_VALID(ar, ARG_VALUE); } void audit_arg_owner(uid_t uid, gid_t gid) { struct kaudit_record *ar; ar = currecord(); if (ar == NULL) return; ar->k_ar.ar_arg_uid = uid; ar->k_ar.ar_arg_gid = gid; ARG_SET_VALID(ar, ARG_UID | ARG_GID); } void audit_arg_pid(pid_t pid) { struct kaudit_record *ar; ar = currecord(); if (ar == NULL) return; ar->k_ar.ar_arg_pid = pid; ARG_SET_VALID(ar, ARG_PID); } void audit_arg_process(struct proc *p) { struct kaudit_record *ar; struct ucred *cred; KASSERT(p != NULL, ("audit_arg_process: p == NULL")); PROC_LOCK_ASSERT(p, MA_OWNED); ar = currecord(); if (ar == NULL) return; cred = p->p_ucred; ar->k_ar.ar_arg_auid = cred->cr_audit.ai_auid; ar->k_ar.ar_arg_euid = cred->cr_uid; ar->k_ar.ar_arg_egid = cred->cr_groups[0]; ar->k_ar.ar_arg_ruid = cred->cr_ruid; ar->k_ar.ar_arg_rgid = cred->cr_rgid; ar->k_ar.ar_arg_asid = cred->cr_audit.ai_asid; ar->k_ar.ar_arg_termid_addr = cred->cr_audit.ai_termid; ar->k_ar.ar_arg_pid = p->p_pid; ARG_SET_VALID(ar, ARG_AUID | ARG_EUID | ARG_EGID | ARG_RUID | ARG_RGID | ARG_ASID | ARG_TERMID_ADDR | ARG_PID | ARG_PROCESS); } void audit_arg_signum(u_int signum) { struct kaudit_record *ar; ar = currecord(); if (ar == NULL) return; ar->k_ar.ar_arg_signum = signum; ARG_SET_VALID(ar, ARG_SIGNUM); } void audit_arg_socket(int sodomain, int sotype, int soprotocol) { struct kaudit_record *ar; ar = currecord(); if (ar == NULL) return; ar->k_ar.ar_arg_sockinfo.so_domain = sodomain; ar->k_ar.ar_arg_sockinfo.so_type = sotype; ar->k_ar.ar_arg_sockinfo.so_protocol = soprotocol; ARG_SET_VALID(ar, ARG_SOCKINFO); } void audit_arg_sockaddr(struct thread *td, int dirfd, struct sockaddr *sa) { struct kaudit_record *ar; KASSERT(td != NULL, ("audit_arg_sockaddr: td == NULL")); KASSERT(sa != NULL, ("audit_arg_sockaddr: sa == NULL")); ar = currecord(); if (ar == NULL) return; bcopy(sa, &ar->k_ar.ar_arg_sockaddr, sa->sa_len); switch (sa->sa_family) { case AF_INET: ARG_SET_VALID(ar, ARG_SADDRINET); break; case AF_INET6: ARG_SET_VALID(ar, ARG_SADDRINET6); break; case AF_UNIX: if (dirfd != AT_FDCWD) audit_arg_atfd1(dirfd); audit_arg_upath1(td, dirfd, ((struct sockaddr_un *)sa)->sun_path); ARG_SET_VALID(ar, ARG_SADDRUNIX); break; /* XXXAUDIT: default:? */ } } void audit_arg_auid(uid_t auid) { struct kaudit_record *ar; ar = currecord(); if (ar == NULL) return; ar->k_ar.ar_arg_auid = auid; ARG_SET_VALID(ar, ARG_AUID); } void audit_arg_auditinfo(struct auditinfo *au_info) { struct kaudit_record *ar; ar = currecord(); if (ar == NULL) return; ar->k_ar.ar_arg_auid = au_info->ai_auid; ar->k_ar.ar_arg_asid = au_info->ai_asid; ar->k_ar.ar_arg_amask.am_success = au_info->ai_mask.am_success; ar->k_ar.ar_arg_amask.am_failure = au_info->ai_mask.am_failure; ar->k_ar.ar_arg_termid.port = au_info->ai_termid.port; ar->k_ar.ar_arg_termid.machine = au_info->ai_termid.machine; ARG_SET_VALID(ar, ARG_AUID | ARG_ASID | ARG_AMASK | ARG_TERMID); } void audit_arg_auditinfo_addr(struct auditinfo_addr *au_info) { struct kaudit_record *ar; ar = currecord(); if (ar == NULL) return; ar->k_ar.ar_arg_auid = au_info->ai_auid; ar->k_ar.ar_arg_asid = au_info->ai_asid; ar->k_ar.ar_arg_amask.am_success = au_info->ai_mask.am_success; ar->k_ar.ar_arg_amask.am_failure = au_info->ai_mask.am_failure; ar->k_ar.ar_arg_termid_addr.at_type = au_info->ai_termid.at_type; ar->k_ar.ar_arg_termid_addr.at_port = au_info->ai_termid.at_port; ar->k_ar.ar_arg_termid_addr.at_addr[0] = au_info->ai_termid.at_addr[0]; ar->k_ar.ar_arg_termid_addr.at_addr[1] = au_info->ai_termid.at_addr[1]; ar->k_ar.ar_arg_termid_addr.at_addr[2] = au_info->ai_termid.at_addr[2]; ar->k_ar.ar_arg_termid_addr.at_addr[3] = au_info->ai_termid.at_addr[3]; ARG_SET_VALID(ar, ARG_AUID | ARG_ASID | ARG_AMASK | ARG_TERMID_ADDR); } void audit_arg_text(char *text) { struct kaudit_record *ar; KASSERT(text != NULL, ("audit_arg_text: text == NULL")); ar = currecord(); if (ar == NULL) return; /* Invalidate the text string */ ar->k_ar.ar_valid_arg &= (ARG_ALL ^ ARG_TEXT); if (ar->k_ar.ar_arg_text == NULL) ar->k_ar.ar_arg_text = malloc(MAXPATHLEN, M_AUDITTEXT, M_WAITOK); strncpy(ar->k_ar.ar_arg_text, text, MAXPATHLEN); ARG_SET_VALID(ar, ARG_TEXT); } void audit_arg_cmd(int cmd) { struct kaudit_record *ar; ar = currecord(); if (ar == NULL) return; ar->k_ar.ar_arg_cmd = cmd; ARG_SET_VALID(ar, ARG_CMD); } void audit_arg_svipc_cmd(int cmd) { struct kaudit_record *ar; ar = currecord(); if (ar == NULL) return; ar->k_ar.ar_arg_svipc_cmd = cmd; ARG_SET_VALID(ar, ARG_SVIPC_CMD); } void audit_arg_svipc_perm(struct ipc_perm *perm) { struct kaudit_record *ar; ar = currecord(); if (ar == NULL) return; bcopy(perm, &ar->k_ar.ar_arg_svipc_perm, sizeof(ar->k_ar.ar_arg_svipc_perm)); ARG_SET_VALID(ar, ARG_SVIPC_PERM); } void audit_arg_svipc_id(int id) { struct kaudit_record *ar; ar = currecord(); if (ar == NULL) return; ar->k_ar.ar_arg_svipc_id = id; ARG_SET_VALID(ar, ARG_SVIPC_ID); } void audit_arg_svipc_addr(void * addr) { struct kaudit_record *ar; ar = currecord(); if (ar == NULL) return; ar->k_ar.ar_arg_svipc_addr = addr; ARG_SET_VALID(ar, ARG_SVIPC_ADDR); +} + +void +audit_arg_svipc_which(int which) +{ + struct kaudit_record *ar; + + ar = currecord(); + if (ar == NULL) + return; + + ar->k_ar.ar_arg_svipc_which = which; + ARG_SET_VALID(ar, ARG_SVIPC_WHICH); } void audit_arg_posix_ipc_perm(uid_t uid, gid_t gid, mode_t mode) { struct kaudit_record *ar; ar = currecord(); if (ar == NULL) return; ar->k_ar.ar_arg_pipc_perm.pipc_uid = uid; ar->k_ar.ar_arg_pipc_perm.pipc_gid = gid; ar->k_ar.ar_arg_pipc_perm.pipc_mode = mode; ARG_SET_VALID(ar, ARG_POSIX_IPC_PERM); } void audit_arg_auditon(union auditon_udata *udata) { struct kaudit_record *ar; ar = currecord(); if (ar == NULL) return; bcopy((void *)udata, &ar->k_ar.ar_arg_auditon, sizeof(ar->k_ar.ar_arg_auditon)); ARG_SET_VALID(ar, ARG_AUDITON); } /* * Audit information about a file, either the file's vnode info, or its * socket address info. */ void audit_arg_file(struct proc *p, struct file *fp) { struct kaudit_record *ar; struct socket *so; struct inpcb *pcb; struct vnode *vp; ar = currecord(); if (ar == NULL) return; switch (fp->f_type) { case DTYPE_VNODE: case DTYPE_FIFO: /* * XXXAUDIT: Only possibly to record as first vnode? */ vp = fp->f_vnode; vn_lock(vp, LK_SHARED | LK_RETRY); audit_arg_vnode1(vp); VOP_UNLOCK(vp, 0); break; case DTYPE_SOCKET: so = (struct socket *)fp->f_data; if (INP_CHECK_SOCKAF(so, PF_INET)) { SOCK_LOCK(so); ar->k_ar.ar_arg_sockinfo.so_type = so->so_type; ar->k_ar.ar_arg_sockinfo.so_domain = INP_SOCKAF(so); ar->k_ar.ar_arg_sockinfo.so_protocol = so->so_proto->pr_protocol; SOCK_UNLOCK(so); pcb = (struct inpcb *)so->so_pcb; INP_RLOCK(pcb); ar->k_ar.ar_arg_sockinfo.so_raddr = pcb->inp_faddr.s_addr; ar->k_ar.ar_arg_sockinfo.so_laddr = pcb->inp_laddr.s_addr; ar->k_ar.ar_arg_sockinfo.so_rport = pcb->inp_fport; ar->k_ar.ar_arg_sockinfo.so_lport = pcb->inp_lport; INP_RUNLOCK(pcb); ARG_SET_VALID(ar, ARG_SOCKINFO); } break; default: /* XXXAUDIT: else? */ break; } } /* * Store a path as given by the user process for auditing into the audit * record stored on the user thread. This function will allocate the memory * to store the path info if not already available. This memory will be * freed when the audit record is freed. The path is canonlicalised with * respect to the thread and directory descriptor passed. */ static void audit_arg_upath(struct thread *td, int dirfd, char *upath, char **pathp) { if (*pathp == NULL) *pathp = malloc(MAXPATHLEN, M_AUDITPATH, M_WAITOK); audit_canon_path(td, dirfd, upath, *pathp); } void audit_arg_upath1(struct thread *td, int dirfd, char *upath) { struct kaudit_record *ar; ar = currecord(); if (ar == NULL) return; audit_arg_upath(td, dirfd, upath, &ar->k_ar.ar_arg_upath1); ARG_SET_VALID(ar, ARG_UPATH1); } void audit_arg_upath2(struct thread *td, int dirfd, char *upath) { struct kaudit_record *ar; ar = currecord(); if (ar == NULL) return; audit_arg_upath(td, dirfd, upath, &ar->k_ar.ar_arg_upath2); ARG_SET_VALID(ar, ARG_UPATH2); } /* * Function to save the path and vnode attr information into the audit * record. * * It is assumed that the caller will hold any vnode locks necessary to * perform a VOP_GETATTR() on the passed vnode. * * XXX: The attr code is very similar to vfs_vnops.c:vn_stat(), but always * provides access to the generation number as we need that to construct the * BSM file ID. * * XXX: We should accept the process argument from the caller, since it's * very likely they already have a reference. * * XXX: Error handling in this function is poor. * * XXXAUDIT: Possibly KASSERT the path pointer is NULL? */ static int audit_arg_vnode(struct vnode *vp, struct vnode_au_info *vnp) { struct vattr vattr; int error; ASSERT_VOP_LOCKED(vp, "audit_arg_vnode"); error = VOP_GETATTR(vp, &vattr, curthread->td_ucred); if (error) { /* XXX: How to handle this case? */ return (error); } vnp->vn_mode = vattr.va_mode; vnp->vn_uid = vattr.va_uid; vnp->vn_gid = vattr.va_gid; vnp->vn_dev = vattr.va_rdev; vnp->vn_fsid = vattr.va_fsid; vnp->vn_fileid = vattr.va_fileid; vnp->vn_gen = vattr.va_gen; return (0); } void audit_arg_vnode1(struct vnode *vp) { struct kaudit_record *ar; int error; ar = currecord(); if (ar == NULL) return; ARG_CLEAR_VALID(ar, ARG_VNODE1); error = audit_arg_vnode(vp, &ar->k_ar.ar_arg_vnode1); if (error == 0) ARG_SET_VALID(ar, ARG_VNODE1); } void audit_arg_vnode2(struct vnode *vp) { struct kaudit_record *ar; int error; ar = currecord(); if (ar == NULL) return; ARG_CLEAR_VALID(ar, ARG_VNODE2); error = audit_arg_vnode(vp, &ar->k_ar.ar_arg_vnode2); if (error == 0) ARG_SET_VALID(ar, ARG_VNODE2); } /* * Audit the argument strings passed to exec. */ void audit_arg_argv(char *argv, int argc, int length) { struct kaudit_record *ar; if (audit_argv == 0) return; ar = currecord(); if (ar == NULL) return; ar->k_ar.ar_arg_argv = malloc(length, M_AUDITTEXT, M_WAITOK); bcopy(argv, ar->k_ar.ar_arg_argv, length); ar->k_ar.ar_arg_argc = argc; ARG_SET_VALID(ar, ARG_ARGV); } /* * Audit the environment strings passed to exec. */ void audit_arg_envv(char *envv, int envc, int length) { struct kaudit_record *ar; if (audit_arge == 0) return; ar = currecord(); if (ar == NULL) return; ar->k_ar.ar_arg_envv = malloc(length, M_AUDITTEXT, M_WAITOK); bcopy(envv, ar->k_ar.ar_arg_envv, length); ar->k_ar.ar_arg_envc = envc; ARG_SET_VALID(ar, ARG_ENVV); } void audit_arg_rights(cap_rights_t *rightsp) { struct kaudit_record *ar; ar = currecord(); if (ar == NULL) return; ar->k_ar.ar_arg_rights = *rightsp; ARG_SET_VALID(ar, ARG_RIGHTS); } void audit_arg_fcntl_rights(uint32_t fcntlrights) { struct kaudit_record *ar; ar = currecord(); if (ar == NULL) return; ar->k_ar.ar_arg_fcntl_rights = fcntlrights; ARG_SET_VALID(ar, ARG_FCNTL_RIGHTS); } /* * The close() system call uses it's own audit call to capture the path/vnode * information because those pieces are not easily obtained within the system * call itself. */ void audit_sysclose(struct thread *td, int fd) { cap_rights_t rights; struct kaudit_record *ar; struct vnode *vp; struct file *fp; KASSERT(td != NULL, ("audit_sysclose: td == NULL")); ar = currecord(); if (ar == NULL) return; audit_arg_fd(fd); if (getvnode(td, fd, cap_rights_init(&rights), &fp) != 0) return; vp = fp->f_vnode; vn_lock(vp, LK_SHARED | LK_RETRY); audit_arg_vnode1(vp); VOP_UNLOCK(vp, 0); fdrop(fp, td); } Index: head/sys/security/audit/audit_bsm_klib.c =================================================================== --- head/sys/security/audit/audit_bsm_klib.c (revision 316184) +++ head/sys/security/audit/audit_bsm_klib.c (revision 316185) @@ -1,741 +1,801 @@ /* * Copyright (c) 1999-2009 Apple Inc. - * Copyright (c) 2005, 2016 Robert N. M. Watson + * Copyright (c) 2005, 2016-2017 Robert N. M. Watson * All rights reserved. * * Portions of this software were developed by BAE Systems, the University of * Cambridge Computer Laboratory, and Memorial University under DARPA/AFRL * contract FA8650-15-C-7558 ("CADETS"), as part of the DARPA Transparent * Computing (TC) research program. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of Apple Inc. ("Apple") nor the names of * its contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * Hash table functions for the audit event number to event class mask * mapping. */ #define EVCLASSMAP_HASH_TABLE_SIZE 251 struct evclass_elem { au_event_t event; au_class_t class; LIST_ENTRY(evclass_elem) entry; }; struct evclass_list { LIST_HEAD(, evclass_elem) head; }; static MALLOC_DEFINE(M_AUDITEVCLASS, "audit_evclass", "Audit event class"); static struct rwlock evclass_lock; static struct evclass_list evclass_hash[EVCLASSMAP_HASH_TABLE_SIZE]; #define EVCLASS_LOCK_INIT() rw_init(&evclass_lock, "evclass_lock") #define EVCLASS_RLOCK() rw_rlock(&evclass_lock) #define EVCLASS_RUNLOCK() rw_runlock(&evclass_lock) #define EVCLASS_WLOCK() rw_wlock(&evclass_lock) #define EVCLASS_WUNLOCK() rw_wunlock(&evclass_lock) /* * Hash table maintaining a mapping from audit event numbers to audit event * names. For now, used only by DTrace, but present always so that userspace * tools can register and inspect fields consistently even if DTrace is not * present. * * struct evname_elem is defined in audit_private.h so that audit_dtrace.c can * use the definition. */ #define EVNAMEMAP_HASH_TABLE_SIZE 251 struct evname_list { LIST_HEAD(, evname_elem) enl_head; }; static MALLOC_DEFINE(M_AUDITEVNAME, "audit_evname", "Audit event name"); static struct sx evnamemap_lock; static struct evname_list evnamemap_hash[EVNAMEMAP_HASH_TABLE_SIZE]; #define EVNAMEMAP_LOCK_INIT() sx_init(&evnamemap_lock, "evnamemap_lock"); #define EVNAMEMAP_RLOCK() sx_slock(&evnamemap_lock) #define EVNAMEMAP_RUNLOCK() sx_sunlock(&evnamemap_lock) #define EVNAMEMAP_WLOCK() sx_xlock(&evnamemap_lock) #define EVNAMEMAP_WUNLOCK() sx_xunlock(&evnamemap_lock) struct aue_open_event { int aoe_flags; au_event_t aoe_event; }; static const struct aue_open_event aue_open[] = { { O_RDONLY, AUE_OPEN_R }, { (O_RDONLY | O_CREAT), AUE_OPEN_RC }, { (O_RDONLY | O_CREAT | O_TRUNC), AUE_OPEN_RTC }, { (O_RDONLY | O_TRUNC), AUE_OPEN_RT }, { O_RDWR, AUE_OPEN_RW }, { (O_RDWR | O_CREAT), AUE_OPEN_RWC }, { (O_RDWR | O_CREAT | O_TRUNC), AUE_OPEN_RWTC }, { (O_RDWR | O_TRUNC), AUE_OPEN_RWT }, { O_WRONLY, AUE_OPEN_W }, { (O_WRONLY | O_CREAT), AUE_OPEN_WC }, { (O_WRONLY | O_CREAT | O_TRUNC), AUE_OPEN_WTC }, { (O_WRONLY | O_TRUNC), AUE_OPEN_WT }, }; static const struct aue_open_event aue_openat[] = { { O_RDONLY, AUE_OPENAT_R }, { (O_RDONLY | O_CREAT), AUE_OPENAT_RC }, { (O_RDONLY | O_CREAT | O_TRUNC), AUE_OPENAT_RTC }, { (O_RDONLY | O_TRUNC), AUE_OPENAT_RT }, { O_RDWR, AUE_OPENAT_RW }, { (O_RDWR | O_CREAT), AUE_OPENAT_RWC }, { (O_RDWR | O_CREAT | O_TRUNC), AUE_OPENAT_RWTC }, { (O_RDWR | O_TRUNC), AUE_OPENAT_RWT }, { O_WRONLY, AUE_OPENAT_W }, { (O_WRONLY | O_CREAT), AUE_OPENAT_WC }, { (O_WRONLY | O_CREAT | O_TRUNC), AUE_OPENAT_WTC }, { (O_WRONLY | O_TRUNC), AUE_OPENAT_WT }, }; +static const int aue_msgsys[] = { + /* 0 */ AUE_MSGCTL, + /* 1 */ AUE_MSGGET, + /* 2 */ AUE_MSGSND, + /* 3 */ AUE_MSGRCV, +}; +static const int aue_msgsys_count = sizeof(aue_msgsys) / sizeof(int); + +static const int aue_semsys[] = { + /* 0 */ AUE_SEMCTL, + /* 1 */ AUE_SEMGET, + /* 2 */ AUE_SEMOP, +}; +static const int aue_semsys_count = sizeof(aue_semsys) / sizeof(int); + +static const int aue_shmsys[] = { + /* 0 */ AUE_SHMAT, + /* 1 */ AUE_SHMDT, + /* 2 */ AUE_SHMGET, + /* 3 */ AUE_SHMCTL, +}; +static const int aue_shmsys_count = sizeof(aue_shmsys) / sizeof(int); + /* * Look up the class for an audit event in the class mapping table. */ au_class_t au_event_class(au_event_t event) { struct evclass_list *evcl; struct evclass_elem *evc; au_class_t class; EVCLASS_RLOCK(); evcl = &evclass_hash[event % EVCLASSMAP_HASH_TABLE_SIZE]; class = 0; LIST_FOREACH(evc, &evcl->head, entry) { if (evc->event == event) { class = evc->class; goto out; } } out: EVCLASS_RUNLOCK(); return (class); } /* * Insert a event to class mapping. If the event already exists in the * mapping, then replace the mapping with the new one. * * XXX There is currently no constraints placed on the number of mappings. * May want to either limit to a number, or in terms of memory usage. */ void au_evclassmap_insert(au_event_t event, au_class_t class) { struct evclass_list *evcl; struct evclass_elem *evc, *evc_new; /* * Pessimistically, always allocate storage before acquiring mutex. * Free if there is already a mapping for this event. */ evc_new = malloc(sizeof(*evc), M_AUDITEVCLASS, M_WAITOK); EVCLASS_WLOCK(); evcl = &evclass_hash[event % EVCLASSMAP_HASH_TABLE_SIZE]; LIST_FOREACH(evc, &evcl->head, entry) { if (evc->event == event) { evc->class = class; EVCLASS_WUNLOCK(); free(evc_new, M_AUDITEVCLASS); return; } } evc = evc_new; evc->event = event; evc->class = class; LIST_INSERT_HEAD(&evcl->head, evc, entry); EVCLASS_WUNLOCK(); } void au_evclassmap_init(void) { int i; EVCLASS_LOCK_INIT(); for (i = 0; i < EVCLASSMAP_HASH_TABLE_SIZE; i++) LIST_INIT(&evclass_hash[i].head); /* * Set up the initial event to class mapping for system calls. * * XXXRW: Really, this should walk all possible audit events, not all * native ABI system calls, as there may be audit events reachable * only through non-native system calls. It also seems a shame to * frob the mutex this early. */ for (i = 0; i < SYS_MAXSYSCALL; i++) { if (sysent[i].sy_auevent != AUE_NULL) au_evclassmap_insert(sysent[i].sy_auevent, 0); } } /* * Check whether an event is aditable by comparing the mask of classes this * event is part of against the given mask. */ int au_preselect(au_event_t event, au_class_t class, au_mask_t *mask_p, int sorf) { au_class_t effmask = 0; if (mask_p == NULL) return (-1); /* * Perform the actual check of the masks against the event. */ if (sorf & AU_PRS_SUCCESS) effmask |= (mask_p->am_success & class); if (sorf & AU_PRS_FAILURE) effmask |= (mask_p->am_failure & class); if (effmask) return (1); else return (0); } /* * Look up the name for an audit event in the event-to-name mapping table. */ int au_event_name(au_event_t event, char *name) { struct evname_list *enl; struct evname_elem *ene; int error; error = ENOENT; EVNAMEMAP_RLOCK(); enl = &evnamemap_hash[event % EVNAMEMAP_HASH_TABLE_SIZE]; LIST_FOREACH(ene, &enl->enl_head, ene_entry) { if (ene->ene_event == event) { strlcpy(name, ene->ene_name, EVNAMEMAP_NAME_SIZE); error = 0; goto out; } } out: EVNAMEMAP_RUNLOCK(); return (error); } /* * Insert a event-to-name mapping. If the event already exists in the * mapping, then replace the mapping with the new one. * * XXX There is currently no constraints placed on the number of mappings. * May want to either limit to a number, or in terms of memory usage. * * XXXRW: Accepts truncated name -- but perhaps should return failure instead? * * XXXRW: It could be we need a way to remove existing names...? * * XXXRW: We handle collisions between numbers, but I wonder if we also need a * way to handle name collisions, for DTrace, where probe names must be * unique? */ void au_evnamemap_insert(au_event_t event, const char *name) { struct evname_list *enl; struct evname_elem *ene, *ene_new; /* * Pessimistically, always allocate storage before acquiring lock. * Free if there is already a mapping for this event. */ ene_new = malloc(sizeof(*ene_new), M_AUDITEVNAME, M_WAITOK | M_ZERO); EVNAMEMAP_WLOCK(); enl = &evnamemap_hash[event % EVNAMEMAP_HASH_TABLE_SIZE]; LIST_FOREACH(ene, &enl->enl_head, ene_entry) { if (ene->ene_event == event) { EVNAME_LOCK(ene); (void)strlcpy(ene->ene_name, name, sizeof(ene->ene_name)); EVNAME_UNLOCK(ene); EVNAMEMAP_WUNLOCK(); free(ene_new, M_AUDITEVNAME); return; } } ene = ene_new; mtx_init(&ene->ene_lock, "au_evnamemap", NULL, MTX_DEF); ene->ene_event = event; (void)strlcpy(ene->ene_name, name, sizeof(ene->ene_name)); LIST_INSERT_HEAD(&enl->enl_head, ene, ene_entry); EVNAMEMAP_WUNLOCK(); } void au_evnamemap_init(void) { int i; EVNAMEMAP_LOCK_INIT(); for (i = 0; i < EVNAMEMAP_HASH_TABLE_SIZE; i++) LIST_INIT(&evnamemap_hash[i].enl_head); /* * XXXRW: Unlike the event-to-class mapping, we don't attempt to * pre-populate the list. Perhaps we should...? But not sure we * really want to duplicate /etc/security/audit_event in the kernel * -- and we'd need a way to remove names? */ } /* * The DTrace audit provider occasionally needs to walk the entries in the * event-to-name mapping table, and uses this public interface to do so. A * write lock is acquired so that the provider can safely update its fields in * table entries. */ void au_evnamemap_foreach(au_evnamemap_callback_t callback) { struct evname_list *enl; struct evname_elem *ene; int i; EVNAMEMAP_WLOCK(); for (i = 0; i < EVNAMEMAP_HASH_TABLE_SIZE; i++) { enl = &evnamemap_hash[i]; LIST_FOREACH(ene, &enl->enl_head, ene_entry) callback(ene); } EVNAMEMAP_WUNLOCK(); } #ifdef KDTRACE_HOOKS /* * Look up an event-to-name mapping table entry by event number. As evname * elements are stable in memory, we can return the pointer without the table * lock held -- but the caller will need to lock the element mutex before * accessing element fields. * * NB: the event identifier in elements is stable and can be read without * holding the evname_elem lock. */ struct evname_elem * au_evnamemap_lookup(au_event_t event) { struct evname_list *enl; struct evname_elem *ene; EVNAMEMAP_RLOCK(); enl = &evnamemap_hash[event % EVNAMEMAP_HASH_TABLE_SIZE]; LIST_FOREACH(ene, &enl->enl_head, ene_entry) { if (ene->ene_event == event) goto out; } ene = NULL; out: EVNAMEMAP_RUNLOCK(); return (ene); } #endif /* !KDTRACE_HOOKS */ /* * Convert sysctl names and present arguments to events. */ au_event_t audit_ctlname_to_sysctlevent(int name[], uint64_t valid_arg) { /* can't parse it - so return the worst case */ if ((valid_arg & (ARG_CTLNAME | ARG_LEN)) != (ARG_CTLNAME | ARG_LEN)) return (AUE_SYSCTL); switch (name[0]) { /* non-admin "lookups" treat them special */ case KERN_OSTYPE: case KERN_OSRELEASE: case KERN_OSREV: case KERN_VERSION: case KERN_ARGMAX: case KERN_CLOCKRATE: case KERN_BOOTTIME: case KERN_POSIX1: case KERN_NGROUPS: case KERN_JOB_CONTROL: case KERN_SAVED_IDS: case KERN_OSRELDATE: case KERN_DUMMY: return (AUE_SYSCTL_NONADMIN); /* only treat the changeable controls as admin */ case KERN_MAXVNODES: case KERN_MAXPROC: case KERN_MAXFILES: case KERN_MAXPROCPERUID: case KERN_MAXFILESPERPROC: case KERN_HOSTID: case KERN_SECURELVL: case KERN_HOSTNAME: case KERN_VNODE: case KERN_PROC: case KERN_FILE: case KERN_PROF: case KERN_NISDOMAINNAME: case KERN_UPDATEINTERVAL: case KERN_NTP_PLL: case KERN_BOOTFILE: case KERN_DUMPDEV: case KERN_IPC: case KERN_PS_STRINGS: case KERN_USRSTACK: case KERN_LOGSIGEXIT: case KERN_IOV_MAX: return ((valid_arg & ARG_VALUE) ? AUE_SYSCTL : AUE_SYSCTL_NONADMIN); default: return (AUE_SYSCTL); } /* NOTREACHED */ } /* * Convert an open flags specifier into a specific type of open event for * auditing purposes. */ au_event_t audit_flags_and_error_to_openevent(int oflags, int error) { int i; /* * Need to check only those flags we care about. */ oflags = oflags & (O_RDONLY | O_CREAT | O_TRUNC | O_RDWR | O_WRONLY); for (i = 0; i < nitems(aue_open); i++) { if (aue_open[i].aoe_flags == oflags) return (aue_open[i].aoe_event); } return (AUE_OPEN); } au_event_t audit_flags_and_error_to_openatevent(int oflags, int error) { int i; /* * Need to check only those flags we care about. */ oflags = oflags & (O_RDONLY | O_CREAT | O_TRUNC | O_RDWR | O_WRONLY); for (i = 0; i < nitems(aue_openat); i++) { if (aue_openat[i].aoe_flags == oflags) return (aue_openat[i].aoe_event); } return (AUE_OPENAT); } /* * Convert a MSGCTL command to a specific event. */ au_event_t audit_msgctl_to_event(int cmd) { switch (cmd) { case IPC_RMID: return (AUE_MSGCTL_RMID); case IPC_SET: return (AUE_MSGCTL_SET); case IPC_STAT: return (AUE_MSGCTL_STAT); default: /* We will audit a bad command. */ return (AUE_MSGCTL); } } /* * Convert a SEMCTL command to a specific event. */ au_event_t audit_semctl_to_event(int cmd) { switch (cmd) { case GETALL: return (AUE_SEMCTL_GETALL); case GETNCNT: return (AUE_SEMCTL_GETNCNT); case GETPID: return (AUE_SEMCTL_GETPID); case GETVAL: return (AUE_SEMCTL_GETVAL); case GETZCNT: return (AUE_SEMCTL_GETZCNT); case IPC_RMID: return (AUE_SEMCTL_RMID); case IPC_SET: return (AUE_SEMCTL_SET); case SETALL: return (AUE_SEMCTL_SETALL); case SETVAL: return (AUE_SEMCTL_SETVAL); case IPC_STAT: return (AUE_SEMCTL_STAT); default: /* We will audit a bad command. */ return (AUE_SEMCTL); } +} + +/* + * Convert msgsys(2), semsys(2), and shmsys(2) system-call variations into + * audit events, if possible. + */ +au_event_t +audit_msgsys_to_event(int which) +{ + + if ((which >= 0) && (which < aue_msgsys_count)) + return (aue_msgsys[which]); + + /* Audit a bad command. */ + return (AUE_MSGSYS); +} + +au_event_t +audit_semsys_to_event(int which) +{ + + if ((which >= 0) && (which < aue_semsys_count)) + return (aue_semsys[which]); + + /* Audit a bad command. */ + return (AUE_SEMSYS); +} + +au_event_t +audit_shmsys_to_event(int which) +{ + + if ((which >= 0) && (which < aue_shmsys_count)) + return (aue_shmsys[which]); + + /* Audit a bad command. */ + return (AUE_SHMSYS); } /* * Convert a command for the auditon() system call to a audit event. */ au_event_t auditon_command_event(int cmd) { switch(cmd) { case A_GETPOLICY: return (AUE_AUDITON_GPOLICY); case A_SETPOLICY: return (AUE_AUDITON_SPOLICY); case A_GETKMASK: return (AUE_AUDITON_GETKMASK); case A_SETKMASK: return (AUE_AUDITON_SETKMASK); case A_GETQCTRL: return (AUE_AUDITON_GQCTRL); case A_SETQCTRL: return (AUE_AUDITON_SQCTRL); case A_GETCWD: return (AUE_AUDITON_GETCWD); case A_GETCAR: return (AUE_AUDITON_GETCAR); case A_GETSTAT: return (AUE_AUDITON_GETSTAT); case A_SETSTAT: return (AUE_AUDITON_SETSTAT); case A_SETUMASK: return (AUE_AUDITON_SETUMASK); case A_SETSMASK: return (AUE_AUDITON_SETSMASK); case A_GETCOND: return (AUE_AUDITON_GETCOND); case A_SETCOND: return (AUE_AUDITON_SETCOND); case A_GETCLASS: return (AUE_AUDITON_GETCLASS); case A_SETCLASS: return (AUE_AUDITON_SETCLASS); case A_GETPINFO: case A_SETPMASK: case A_SETFSIZE: case A_GETFSIZE: case A_GETPINFO_ADDR: case A_GETKAUDIT: case A_SETKAUDIT: default: return (AUE_AUDITON); /* No special record */ } } /* * Create a canonical path from given path by prefixing either the root * directory, or the current working directory. If the process working * directory is NULL, we could use 'rootvnode' to obtain the root directory, * but this results in a volfs name written to the audit log. So we will * leave the filename starting with '/' in the audit log in this case. */ void audit_canon_path(struct thread *td, int dirfd, char *path, char *cpath) { struct vnode *cvnp, *rvnp; char *rbuf, *fbuf, *copy; struct filedesc *fdp; struct sbuf sbf; cap_rights_t rights; int error, needslash; WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "%s: at %s:%d", __func__, __FILE__, __LINE__); copy = path; rvnp = cvnp = NULL; fdp = td->td_proc->p_fd; FILEDESC_SLOCK(fdp); /* * Make sure that we handle the chroot(2) case. If there is an * alternate root directory, prepend it to the audited pathname. */ if (fdp->fd_rdir != NULL && fdp->fd_rdir != rootvnode) { rvnp = fdp->fd_rdir; vhold(rvnp); } /* * If the supplied path is relative, make sure we capture the current * working directory so we can prepend it to the supplied relative * path. */ if (*path != '/') { if (dirfd == AT_FDCWD) { cvnp = fdp->fd_cdir; vhold(cvnp); } else { /* XXX: fgetvp() that vhold()s vnode instead of vref()ing it would be better */ error = fgetvp(td, dirfd, cap_rights_init(&rights), &cvnp); if (error) { FILEDESC_SUNLOCK(fdp); cpath[0] = '\0'; if (rvnp != NULL) vdrop(rvnp); return; } vhold(cvnp); vrele(cvnp); } needslash = (fdp->fd_rdir != cvnp); } else { needslash = 1; } FILEDESC_SUNLOCK(fdp); /* * NB: We require that the supplied array be at least MAXPATHLEN bytes * long. If this is not the case, then we can run into serious trouble. */ (void) sbuf_new(&sbf, cpath, MAXPATHLEN, SBUF_FIXEDLEN); /* * Strip leading forward slashes. */ while (*copy == '/') copy++; /* * Make sure we handle chroot(2) and prepend the global path to these * environments. * * NB: vn_fullpath(9) on FreeBSD is less reliable than vn_getpath(9) * on Darwin. As a result, this may need some additional attention * in the future. */ if (rvnp != NULL) { error = vn_fullpath_global(td, rvnp, &rbuf, &fbuf); vdrop(rvnp); if (error) { cpath[0] = '\0'; if (cvnp != NULL) vdrop(cvnp); return; } (void) sbuf_cat(&sbf, rbuf); free(fbuf, M_TEMP); } if (cvnp != NULL) { error = vn_fullpath(td, cvnp, &rbuf, &fbuf); vdrop(cvnp); if (error) { cpath[0] = '\0'; return; } (void) sbuf_cat(&sbf, rbuf); free(fbuf, M_TEMP); } if (needslash) (void) sbuf_putc(&sbf, '/'); /* * Now that we have processed any alternate root and relative path * names, add the supplied pathname. */ (void) sbuf_cat(&sbf, copy); /* * One or more of the previous sbuf operations could have resulted in * the supplied buffer being overflowed. Check to see if this is the * case. */ if (sbuf_error(&sbf) != 0) { cpath[0] = '\0'; return; } sbuf_finish(&sbf); } Index: head/sys/security/audit/audit_private.h =================================================================== --- head/sys/security/audit/audit_private.h (revision 316184) +++ head/sys/security/audit/audit_private.h (revision 316185) @@ -1,503 +1,508 @@ /*- * Copyright (c) 1999-2009 Apple Inc. * Copyright (c) 2016-2017 Robert N. M. Watson * All rights reserved. * * Portions of this software were developed by BAE Systems, the University of * Cambridge Computer Laboratory, and Memorial University under DARPA/AFRL * contract FA8650-15-C-7558 ("CADETS"), as part of the DARPA Transparent * Computing (TC) research program. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of Apple Inc. ("Apple") nor the names of * its contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ /* * This include file contains function prototypes and type definitions used * within the audit implementation. */ #ifndef _SECURITY_AUDIT_PRIVATE_H_ #define _SECURITY_AUDIT_PRIVATE_H_ #ifndef _KERNEL #error "no user-serviceable parts inside" #endif #include #include #include #include #ifdef MALLOC_DECLARE MALLOC_DECLARE(M_AUDITBSM); MALLOC_DECLARE(M_AUDITDATA); MALLOC_DECLARE(M_AUDITPATH); MALLOC_DECLARE(M_AUDITTEXT); MALLOC_DECLARE(M_AUDITGIDSET); #endif /* * Audit control variables that are usually set/read via system calls and * used to control various aspects of auditing. */ extern struct au_qctrl audit_qctrl; extern struct audit_fstat audit_fstat; extern struct au_mask audit_nae_mask; extern int audit_panic_on_write_fail; extern int audit_fail_stop; extern int audit_argv; extern int audit_arge; /* * Success/failure conditions for the conversion of a kernel audit record to * BSM format. */ #define BSM_SUCCESS 0 #define BSM_FAILURE 1 #define BSM_NOAUDIT 2 /* * Defines for the kernel audit record k_ar_commit field. Flags are set to * indicate what sort of record it is, and which preselection mechanism * selected it. */ #define AR_COMMIT_KERNEL 0x00000001U #define AR_COMMIT_USER 0x00000010U #define AR_PRESELECT_TRAIL 0x00001000U #define AR_PRESELECT_PIPE 0x00002000U #define AR_PRESELECT_USER_TRAIL 0x00004000U #define AR_PRESELECT_USER_PIPE 0x00008000U #define AR_PRESELECT_DTRACE 0x00010000U /* * Audit data is generated as a stream of struct audit_record structures, * linked by struct kaudit_record, and contain storage for possible audit so * that it will not need to be allocated during the processing of a system * call, both improving efficiency and avoiding sleeping at untimely moments. * This structure is converted to BSM format before being written to disk. */ struct vnode_au_info { mode_t vn_mode; uid_t vn_uid; gid_t vn_gid; dev_t vn_dev; long vn_fsid; long vn_fileid; long vn_gen; }; struct groupset { gid_t *gidset; u_int gidset_size; }; struct socket_au_info { int so_domain; int so_type; int so_protocol; in_addr_t so_raddr; /* Remote address if INET socket. */ in_addr_t so_laddr; /* Local address if INET socket. */ u_short so_rport; /* Remote port. */ u_short so_lport; /* Local port. */ }; /* * The following is used for A_OLDSETQCTRL and AU_OLDGETQCTRL and a 64-bit * userland. */ struct au_qctrl64 { u_int64_t aq64_hiwater; u_int64_t aq64_lowater; u_int64_t aq64_bufsz; u_int64_t aq64_delay; u_int64_t aq64_minfree; }; typedef struct au_qctrl64 au_qctrl64_t; union auditon_udata { char *au_path; int au_cond; int au_flags; int au_policy; int au_trigger; int64_t au_cond64; int64_t au_policy64; au_evclass_map_t au_evclass; au_mask_t au_mask; auditinfo_t au_auinfo; auditpinfo_t au_aupinfo; auditpinfo_addr_t au_aupinfo_addr; au_qctrl_t au_qctrl; au_qctrl64_t au_qctrl64; au_stat_t au_stat; au_fstat_t au_fstat; auditinfo_addr_t au_kau_info; au_evname_map_t au_evname; }; struct posix_ipc_perm { uid_t pipc_uid; gid_t pipc_gid; mode_t pipc_mode; }; struct audit_record { /* Audit record header. */ u_int32_t ar_magic; int ar_event; int ar_retval; /* value returned to the process */ int ar_errno; /* return status of system call */ struct timespec ar_starttime; struct timespec ar_endtime; u_int64_t ar_valid_arg; /* Bitmask of valid arguments */ /* Audit subject information. */ struct xucred ar_subj_cred; uid_t ar_subj_ruid; gid_t ar_subj_rgid; gid_t ar_subj_egid; uid_t ar_subj_auid; /* Audit user ID */ pid_t ar_subj_asid; /* Audit session ID */ pid_t ar_subj_pid; struct au_tid ar_subj_term; struct au_tid_addr ar_subj_term_addr; struct au_mask ar_subj_amask; /* Operation arguments. */ uid_t ar_arg_euid; uid_t ar_arg_ruid; uid_t ar_arg_suid; gid_t ar_arg_egid; gid_t ar_arg_rgid; gid_t ar_arg_sgid; pid_t ar_arg_pid; pid_t ar_arg_asid; struct au_tid ar_arg_termid; struct au_tid_addr ar_arg_termid_addr; uid_t ar_arg_uid; uid_t ar_arg_auid; gid_t ar_arg_gid; struct groupset ar_arg_groups; int ar_arg_fd; int ar_arg_atfd1; int ar_arg_atfd2; int ar_arg_fflags; mode_t ar_arg_mode; int ar_arg_dev; long ar_arg_value; void *ar_arg_addr; int ar_arg_len; int ar_arg_mask; u_int ar_arg_signum; char ar_arg_login[MAXLOGNAME]; int ar_arg_ctlname[CTL_MAXNAME]; struct socket_au_info ar_arg_sockinfo; char *ar_arg_upath1; char *ar_arg_upath2; char *ar_arg_text; struct au_mask ar_arg_amask; struct vnode_au_info ar_arg_vnode1; struct vnode_au_info ar_arg_vnode2; int ar_arg_cmd; + int ar_arg_svipc_which; int ar_arg_svipc_cmd; struct ipc_perm ar_arg_svipc_perm; int ar_arg_svipc_id; void *ar_arg_svipc_addr; struct posix_ipc_perm ar_arg_pipc_perm; union auditon_udata ar_arg_auditon; char *ar_arg_argv; int ar_arg_argc; char *ar_arg_envv; int ar_arg_envc; int ar_arg_exitstatus; int ar_arg_exitretval; struct sockaddr_storage ar_arg_sockaddr; cap_rights_t ar_arg_rights; uint32_t ar_arg_fcntl_rights; char ar_jailname[MAXHOSTNAMELEN]; }; /* * Arguments in the audit record are initially not defined; flags are set to * indicate if they are present so they can be included in the audit log * stream only if defined. */ #define ARG_EUID 0x0000000000000001ULL #define ARG_RUID 0x0000000000000002ULL #define ARG_SUID 0x0000000000000004ULL #define ARG_EGID 0x0000000000000008ULL #define ARG_RGID 0x0000000000000010ULL #define ARG_SGID 0x0000000000000020ULL #define ARG_PID 0x0000000000000040ULL #define ARG_UID 0x0000000000000080ULL #define ARG_AUID 0x0000000000000100ULL #define ARG_GID 0x0000000000000200ULL #define ARG_FD 0x0000000000000400ULL #define ARG_POSIX_IPC_PERM 0x0000000000000800ULL #define ARG_FFLAGS 0x0000000000001000ULL #define ARG_MODE 0x0000000000002000ULL #define ARG_DEV 0x0000000000004000ULL #define ARG_ADDR 0x0000000000008000ULL #define ARG_LEN 0x0000000000010000ULL #define ARG_MASK 0x0000000000020000ULL #define ARG_SIGNUM 0x0000000000040000ULL #define ARG_LOGIN 0x0000000000080000ULL #define ARG_SADDRINET 0x0000000000100000ULL #define ARG_SADDRINET6 0x0000000000200000ULL #define ARG_SADDRUNIX 0x0000000000400000ULL #define ARG_TERMID_ADDR 0x0000000000400000ULL #define ARG_UNUSED2 0x0000000001000000ULL #define ARG_UPATH1 0x0000000002000000ULL #define ARG_UPATH2 0x0000000004000000ULL #define ARG_TEXT 0x0000000008000000ULL #define ARG_VNODE1 0x0000000010000000ULL #define ARG_VNODE2 0x0000000020000000ULL #define ARG_SVIPC_CMD 0x0000000040000000ULL #define ARG_SVIPC_PERM 0x0000000080000000ULL #define ARG_SVIPC_ID 0x0000000100000000ULL #define ARG_SVIPC_ADDR 0x0000000200000000ULL #define ARG_GROUPSET 0x0000000400000000ULL #define ARG_CMD 0x0000000800000000ULL #define ARG_SOCKINFO 0x0000001000000000ULL #define ARG_ASID 0x0000002000000000ULL #define ARG_TERMID 0x0000004000000000ULL #define ARG_AUDITON 0x0000008000000000ULL #define ARG_VALUE 0x0000010000000000ULL #define ARG_AMASK 0x0000020000000000ULL #define ARG_CTLNAME 0x0000040000000000ULL #define ARG_PROCESS 0x0000080000000000ULL #define ARG_MACHPORT1 0x0000100000000000ULL #define ARG_MACHPORT2 0x0000200000000000ULL #define ARG_EXIT 0x0000400000000000ULL #define ARG_IOVECSTR 0x0000800000000000ULL #define ARG_ARGV 0x0001000000000000ULL #define ARG_ENVV 0x0002000000000000ULL #define ARG_ATFD1 0x0004000000000000ULL #define ARG_ATFD2 0x0008000000000000ULL #define ARG_RIGHTS 0x0010000000000000ULL #define ARG_FCNTL_RIGHTS 0x0020000000000000ULL +#define ARG_SVIPC_WHICH 0x0200000000000000ULL #define ARG_NONE 0x0000000000000000ULL #define ARG_ALL 0xFFFFFFFFFFFFFFFFULL #define ARG_IS_VALID(kar, arg) ((kar)->k_ar.ar_valid_arg & (arg)) #define ARG_SET_VALID(kar, arg) do { \ (kar)->k_ar.ar_valid_arg |= (arg); \ } while (0) #define ARG_CLEAR_VALID(kar, arg) do { \ (kar)->k_ar.ar_valid_arg &= ~(arg); \ } while (0) /* * In-kernel version of audit record; the basic record plus queue meta-data. * This record can also have a pointer set to some opaque data that will be * passed through to the audit writing mechanism. */ struct kaudit_record { struct audit_record k_ar; u_int32_t k_ar_commit; void *k_udata; /* User data. */ u_int k_ulen; /* User data length. */ struct uthread *k_uthread; /* Audited thread. */ #ifdef KDTRACE_HOOKS void *k_dtaudit_state; #endif TAILQ_ENTRY(kaudit_record) k_q; }; TAILQ_HEAD(kaudit_queue, kaudit_record); /* * Functions to manage the allocation, release, and commit of kernel audit * records. */ void audit_abort(struct kaudit_record *ar); void audit_commit(struct kaudit_record *ar, int error, int retval); struct kaudit_record *audit_new(int event, struct thread *td); /* * Functions relating to the conversion of internal kernel audit records to * the BSM file format. */ struct au_record; int kaudit_to_bsm(struct kaudit_record *kar, struct au_record **pau); int bsm_rec_verify(void *rec); /* * Kernel versions of the libbsm audit record functions. */ void kau_free(struct au_record *rec); void kau_init(void); /* * Return values for pre-selection and post-selection decisions. */ #define AU_PRS_SUCCESS 1 #define AU_PRS_FAILURE 2 #define AU_PRS_BOTH (AU_PRS_SUCCESS|AU_PRS_FAILURE) /* * Data structures relating to the kernel audit queue. Ideally, these might * be abstracted so that only accessor methods are exposed. */ extern struct mtx audit_mtx; extern struct cv audit_watermark_cv; extern struct cv audit_worker_cv; extern struct kaudit_queue audit_q; extern int audit_q_len; extern int audit_pre_q_len; extern int audit_in_failure; /* * Flags to use on audit files when opening and closing. */ #define AUDIT_OPEN_FLAGS (FWRITE | O_APPEND) #define AUDIT_CLOSE_FLAGS (FWRITE | O_APPEND) /* * Audit event-to-name mapping structure, maintained in audit_bsm_klib.c. It * appears in this header so that the DTrace audit provider can dereference * instances passed back in the au_evname_foreach() callbacks. Safe access to * its fields requires holding ene_lock (after it is visible in the global * table). * * Locking: * (c) - Constant after inserted in the global table * (l) - Protected by ene_lock * (m) - Protected by evnamemap_lock (audit_bsm_klib.c) * (M) - Writes protected by evnamemap_lock; reads unprotected. */ struct evname_elem { au_event_t ene_event; /* (c) */ char ene_name[EVNAMEMAP_NAME_SIZE]; /* (l) */ LIST_ENTRY(evname_elem) ene_entry; /* (m) */ struct mtx ene_lock; #ifdef KDTRACE_HOOKS /* DTrace probe IDs; 0 if not yet registered. */ uint32_t ene_commit_probe_id; /* (M) */ uint32_t ene_bsm_probe_id; /* (M) */ /* Flags indicating if the probes enabled or not. */ int ene_commit_probe_enabled; /* (M) */ int ene_bsm_probe_enabled; /* (M) */ #endif }; #define EVNAME_LOCK(ene) mtx_lock(&(ene)->ene_lock) #define EVNAME_UNLOCK(ene) mtx_unlock(&(ene)->ene_lock) /* * Callback function typedef for the same. */ typedef void (*au_evnamemap_callback_t)(struct evname_elem *ene); /* * DTrace audit provider (dtaudit) hooks -- to be set non-NULL when the audit * provider is loaded and ready to be called into. */ #ifdef KDTRACE_HOOKS extern void *(*dtaudit_hook_preselect)(au_id_t auid, au_event_t event, au_class_t class); extern int (*dtaudit_hook_commit)(struct kaudit_record *kar, au_id_t auid, au_event_t event, au_class_t class, int sorf); extern void (*dtaudit_hook_bsm)(struct kaudit_record *kar, au_id_t auid, au_event_t event, au_class_t class, int sorf, void *bsm_data, size_t bsm_len); #endif /* !KDTRACE_HOOKS */ #include #include #include /* * Some of the BSM tokenizer functions take different parameters in the * kernel implementations in order to save the copying of large kernel data * structures. The prototypes of these functions are declared here. */ token_t *kau_to_socket(struct socket_au_info *soi); /* * audit_klib prototypes */ int au_preselect(au_event_t event, au_class_t class, au_mask_t *mask_p, int sorf); void au_evclassmap_init(void); void au_evclassmap_insert(au_event_t event, au_class_t class); au_class_t au_event_class(au_event_t event); void au_evnamemap_init(void); void au_evnamemap_insert(au_event_t event, const char *name); void au_evnamemap_foreach(au_evnamemap_callback_t callback); #ifdef KDTRACE_HOOKS struct evname_elem *au_evnamemap_lookup(au_event_t event); #endif int au_event_name(au_event_t event, char *name); au_event_t audit_ctlname_to_sysctlevent(int name[], uint64_t valid_arg); au_event_t audit_flags_and_error_to_openevent(int oflags, int error); au_event_t audit_flags_and_error_to_openatevent(int oflags, int error); au_event_t audit_msgctl_to_event(int cmd); -au_event_t audit_semctl_to_event(int cmr); +au_event_t audit_msgsys_to_event(int which); +au_event_t audit_semctl_to_event(int cmd); +au_event_t audit_semsys_to_event(int which); +au_event_t audit_shmsys_to_event(int which); void audit_canon_path(struct thread *td, int dirfd, char *path, char *cpath); au_event_t auditon_command_event(int cmd); /* * Audit trigger events notify user space of kernel audit conditions * asynchronously. */ void audit_trigger_init(void); int audit_send_trigger(unsigned int trigger); /* * Accessor functions to manage global audit state. */ void audit_set_kinfo(struct auditinfo_addr *); void audit_get_kinfo(struct auditinfo_addr *); /* * General audit related functions. */ struct kaudit_record *currecord(void); void audit_free(struct kaudit_record *ar); void audit_shutdown(void *arg, int howto); void audit_rotate_vnode(struct ucred *cred, struct vnode *vp); void audit_worker_init(void); /* * Audit pipe functions. */ int audit_pipe_preselect(au_id_t auid, au_event_t event, au_class_t class, int sorf, int trail_select); void audit_pipe_submit(au_id_t auid, au_event_t event, au_class_t class, int sorf, int trail_select, void *record, u_int record_len); void audit_pipe_submit_user(void *record, u_int record_len); #endif /* ! _SECURITY_AUDIT_PRIVATE_H_ */