Index: head/sys/cam/ctl/ctl_backend_block.c
===================================================================
--- head/sys/cam/ctl/ctl_backend_block.c	(revision 292383)
+++ head/sys/cam/ctl/ctl_backend_block.c	(revision 292384)
@@ -1,2880 +1,2880 @@
 /*-
  * Copyright (c) 2003 Silicon Graphics International Corp.
  * Copyright (c) 2009-2011 Spectra Logic Corporation
  * Copyright (c) 2012 The FreeBSD Foundation
  * Copyright (c) 2014-2015 Alexander Motin <mav@FreeBSD.org>
  * All rights reserved.
  *
  * Portions of this software were developed by Edward Tomasz Napierala
  * under sponsorship from the FreeBSD Foundation.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions, and the following disclaimer,
  *    without modification.
  * 2. Redistributions in binary form must reproduce at minimum a disclaimer
  *    substantially similar to the "NO WARRANTY" disclaimer below
  *    ("Disclaimer") and any redistribution must be conditioned upon
  *    including a substantially similar Disclaimer requirement for further
  *    binary redistribution.
  *
  * NO WARRANTY
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGES.
  *
  * $Id: //depot/users/kenm/FreeBSD-test2/sys/cam/ctl/ctl_backend_block.c#5 $
  */
 /*
  * CAM Target Layer driver backend for block devices.
  *
  * Author: Ken Merry <ken@FreeBSD.org>
  */
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/types.h>
 #include <sys/kthread.h>
 #include <sys/bio.h>
 #include <sys/fcntl.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/condvar.h>
 #include <sys/malloc.h>
 #include <sys/conf.h>
 #include <sys/ioccom.h>
 #include <sys/queue.h>
 #include <sys/sbuf.h>
 #include <sys/endian.h>
 #include <sys/uio.h>
 #include <sys/buf.h>
 #include <sys/taskqueue.h>
 #include <sys/vnode.h>
 #include <sys/namei.h>
 #include <sys/mount.h>
 #include <sys/disk.h>
 #include <sys/fcntl.h>
 #include <sys/filedesc.h>
 #include <sys/filio.h>
 #include <sys/proc.h>
 #include <sys/pcpu.h>
 #include <sys/module.h>
 #include <sys/sdt.h>
 #include <sys/devicestat.h>
 #include <sys/sysctl.h>
 
 #include <geom/geom.h>
 
 #include <cam/cam.h>
 #include <cam/scsi/scsi_all.h>
 #include <cam/scsi/scsi_da.h>
 #include <cam/ctl/ctl_io.h>
 #include <cam/ctl/ctl.h>
 #include <cam/ctl/ctl_backend.h>
 #include <cam/ctl/ctl_ioctl.h>
 #include <cam/ctl/ctl_ha.h>
 #include <cam/ctl/ctl_scsi_all.h>
 #include <cam/ctl/ctl_private.h>
 #include <cam/ctl/ctl_error.h>
 
 /*
  * The idea here is that we'll allocate enough S/G space to hold a 1MB
  * I/O.  If we get an I/O larger than that, we'll split it.
  */
 #define	CTLBLK_HALF_IO_SIZE	(512 * 1024)
 #define	CTLBLK_MAX_IO_SIZE	(CTLBLK_HALF_IO_SIZE * 2)
 #define	CTLBLK_MAX_SEG		MAXPHYS
 #define	CTLBLK_HALF_SEGS	MAX(CTLBLK_HALF_IO_SIZE / CTLBLK_MAX_SEG, 1)
 #define	CTLBLK_MAX_SEGS		(CTLBLK_HALF_SEGS * 2)
 
 #ifdef CTLBLK_DEBUG
 #define DPRINTF(fmt, args...) \
     printf("cbb(%s:%d): " fmt, __FUNCTION__, __LINE__, ##args)
 #else
 #define DPRINTF(fmt, args...) do {} while(0)
 #endif
 
 #define PRIV(io)	\
     ((struct ctl_ptr_len_flags *)&(io)->io_hdr.ctl_private[CTL_PRIV_BACKEND])
 #define ARGS(io)	\
     ((struct ctl_lba_len_flags *)&(io)->io_hdr.ctl_private[CTL_PRIV_LBA_LEN])
 
 SDT_PROVIDER_DEFINE(cbb);
 
 typedef enum {
 	CTL_BE_BLOCK_LUN_UNCONFIGURED	= 0x01,
 	CTL_BE_BLOCK_LUN_CONFIG_ERR	= 0x02,
 	CTL_BE_BLOCK_LUN_WAITING	= 0x04,
 } ctl_be_block_lun_flags;
 
 typedef enum {
 	CTL_BE_BLOCK_NONE,
 	CTL_BE_BLOCK_DEV,
 	CTL_BE_BLOCK_FILE
 } ctl_be_block_type;
 
 struct ctl_be_block_filedata {
 	struct ucred *cred;
 };
 
 union ctl_be_block_bedata {
 	struct ctl_be_block_filedata file;
 };
 
 struct ctl_be_block_io;
 struct ctl_be_block_lun;
 
 typedef void (*cbb_dispatch_t)(struct ctl_be_block_lun *be_lun,
 			       struct ctl_be_block_io *beio);
 typedef uint64_t (*cbb_getattr_t)(struct ctl_be_block_lun *be_lun,
 				  const char *attrname);
 
 /*
  * Backend LUN structure.  There is a 1:1 mapping between a block device
  * and a backend block LUN, and between a backend block LUN and a CTL LUN.
  */
 struct ctl_be_block_lun {
 	struct ctl_lun_create_params params;
 	char lunname[32];
 	char *dev_path;
 	ctl_be_block_type dev_type;
 	struct vnode *vn;
 	union ctl_be_block_bedata backend;
 	cbb_dispatch_t dispatch;
 	cbb_dispatch_t lun_flush;
 	cbb_dispatch_t unmap;
 	cbb_dispatch_t get_lba_status;
 	cbb_getattr_t getattr;
 	uma_zone_t lun_zone;
 	uint64_t size_blocks;
 	uint64_t size_bytes;
 	struct ctl_be_block_softc *softc;
 	struct devstat *disk_stats;
 	ctl_be_block_lun_flags flags;
 	STAILQ_ENTRY(ctl_be_block_lun) links;
 	struct ctl_be_lun cbe_lun;
 	struct taskqueue *io_taskqueue;
 	struct task io_task;
 	int num_threads;
 	STAILQ_HEAD(, ctl_io_hdr) input_queue;
 	STAILQ_HEAD(, ctl_io_hdr) config_read_queue;
 	STAILQ_HEAD(, ctl_io_hdr) config_write_queue;
 	STAILQ_HEAD(, ctl_io_hdr) datamove_queue;
 	struct mtx_padalign io_lock;
 	struct mtx_padalign queue_lock;
 };
 
 /*
  * Overall softc structure for the block backend module.
  */
 struct ctl_be_block_softc {
 	struct mtx			 lock;
 	int				 num_luns;
 	STAILQ_HEAD(, ctl_be_block_lun)	 lun_list;
 };
 
 static struct ctl_be_block_softc backend_block_softc;
 
 /*
  * Per-I/O information.
  */
 struct ctl_be_block_io {
 	union ctl_io			*io;
 	struct ctl_sg_entry		sg_segs[CTLBLK_MAX_SEGS];
 	struct iovec			xiovecs[CTLBLK_MAX_SEGS];
 	int				bio_cmd;
 	int				num_segs;
 	int				num_bios_sent;
 	int				num_bios_done;
 	int				send_complete;
 	int				num_errors;
 	struct bintime			ds_t0;
 	devstat_tag_type		ds_tag_type;
 	devstat_trans_flags		ds_trans_type;
 	uint64_t			io_len;
 	uint64_t			io_offset;
 	int				io_arg;
 	struct ctl_be_block_softc	*softc;
 	struct ctl_be_block_lun		*lun;
 	void (*beio_cont)(struct ctl_be_block_io *beio); /* to continue processing */
 };
 
 extern struct ctl_softc *control_softc;
 
 static int cbb_num_threads = 14;
 SYSCTL_NODE(_kern_cam_ctl, OID_AUTO, block, CTLFLAG_RD, 0,
 	    "CAM Target Layer Block Backend");
 SYSCTL_INT(_kern_cam_ctl_block, OID_AUTO, num_threads, CTLFLAG_RWTUN,
            &cbb_num_threads, 0, "Number of threads per backing file");
 
 static struct ctl_be_block_io *ctl_alloc_beio(struct ctl_be_block_softc *softc);
 static void ctl_free_beio(struct ctl_be_block_io *beio);
 static void ctl_complete_beio(struct ctl_be_block_io *beio);
 static int ctl_be_block_move_done(union ctl_io *io);
 static void ctl_be_block_biodone(struct bio *bio);
 static void ctl_be_block_flush_file(struct ctl_be_block_lun *be_lun,
 				    struct ctl_be_block_io *beio);
 static void ctl_be_block_dispatch_file(struct ctl_be_block_lun *be_lun,
 				       struct ctl_be_block_io *beio);
 static void ctl_be_block_gls_file(struct ctl_be_block_lun *be_lun,
 				  struct ctl_be_block_io *beio);
 static uint64_t ctl_be_block_getattr_file(struct ctl_be_block_lun *be_lun,
 					 const char *attrname);
 static void ctl_be_block_flush_dev(struct ctl_be_block_lun *be_lun,
 				   struct ctl_be_block_io *beio);
 static void ctl_be_block_unmap_dev(struct ctl_be_block_lun *be_lun,
 				   struct ctl_be_block_io *beio);
 static void ctl_be_block_dispatch_dev(struct ctl_be_block_lun *be_lun,
 				      struct ctl_be_block_io *beio);
 static uint64_t ctl_be_block_getattr_dev(struct ctl_be_block_lun *be_lun,
 					 const char *attrname);
 static void ctl_be_block_cr_dispatch(struct ctl_be_block_lun *be_lun,
 				    union ctl_io *io);
 static void ctl_be_block_cw_dispatch(struct ctl_be_block_lun *be_lun,
 				    union ctl_io *io);
 static void ctl_be_block_dispatch(struct ctl_be_block_lun *be_lun,
 				  union ctl_io *io);
 static void ctl_be_block_worker(void *context, int pending);
 static int ctl_be_block_submit(union ctl_io *io);
 static int ctl_be_block_ioctl(struct cdev *dev, u_long cmd, caddr_t addr,
 				   int flag, struct thread *td);
 static int ctl_be_block_open_file(struct ctl_be_block_lun *be_lun,
 				  struct ctl_lun_req *req);
 static int ctl_be_block_open_dev(struct ctl_be_block_lun *be_lun,
 				 struct ctl_lun_req *req);
 static int ctl_be_block_close(struct ctl_be_block_lun *be_lun);
 static int ctl_be_block_open(struct ctl_be_block_lun *be_lun,
 			     struct ctl_lun_req *req);
 static int ctl_be_block_create(struct ctl_be_block_softc *softc,
 			       struct ctl_lun_req *req);
 static int ctl_be_block_rm(struct ctl_be_block_softc *softc,
 			   struct ctl_lun_req *req);
 static int ctl_be_block_modify(struct ctl_be_block_softc *softc,
 			   struct ctl_lun_req *req);
 static void ctl_be_block_lun_shutdown(void *be_lun);
 static void ctl_be_block_lun_config_status(void *be_lun,
 					   ctl_lun_config_status status);
 static int ctl_be_block_config_write(union ctl_io *io);
 static int ctl_be_block_config_read(union ctl_io *io);
 static int ctl_be_block_lun_info(void *be_lun, struct sbuf *sb);
 static uint64_t ctl_be_block_lun_attr(void *be_lun, const char *attrname);
 int ctl_be_block_init(void);
 
 static struct ctl_backend_driver ctl_be_block_driver = 
 {
 	.name = "block",
 	.flags = CTL_BE_FLAG_HAS_CONFIG,
 	.init = ctl_be_block_init,
 	.data_submit = ctl_be_block_submit,
 	.data_move_done = ctl_be_block_move_done,
 	.config_read = ctl_be_block_config_read,
 	.config_write = ctl_be_block_config_write,
 	.ioctl = ctl_be_block_ioctl,
 	.lun_info = ctl_be_block_lun_info,
 	.lun_attr = ctl_be_block_lun_attr
 };
 
 MALLOC_DEFINE(M_CTLBLK, "ctlblk", "Memory used for CTL block backend");
 CTL_BACKEND_DECLARE(cbb, ctl_be_block_driver);
 
 static uma_zone_t beio_zone;
 
 static struct ctl_be_block_io *
 ctl_alloc_beio(struct ctl_be_block_softc *softc)
 {
 	struct ctl_be_block_io *beio;
 
 	beio = uma_zalloc(beio_zone, M_WAITOK | M_ZERO);
 	beio->softc = softc;
 	return (beio);
 }
 
 static void
 ctl_free_beio(struct ctl_be_block_io *beio)
 {
 	int duplicate_free;
 	int i;
 
 	duplicate_free = 0;
 
 	for (i = 0; i < beio->num_segs; i++) {
 		if (beio->sg_segs[i].addr == NULL)
 			duplicate_free++;
 
 		uma_zfree(beio->lun->lun_zone, beio->sg_segs[i].addr);
 		beio->sg_segs[i].addr = NULL;
 
 		/* For compare we had two equal S/G lists. */
 		if (ARGS(beio->io)->flags & CTL_LLF_COMPARE) {
 			uma_zfree(beio->lun->lun_zone,
 			    beio->sg_segs[i + CTLBLK_HALF_SEGS].addr);
 			beio->sg_segs[i + CTLBLK_HALF_SEGS].addr = NULL;
 		}
 	}
 
 	if (duplicate_free > 0) {
 		printf("%s: %d duplicate frees out of %d segments\n", __func__,
 		       duplicate_free, beio->num_segs);
 	}
 
 	uma_zfree(beio_zone, beio);
 }
 
 static void
 ctl_complete_beio(struct ctl_be_block_io *beio)
 {
 	union ctl_io *io = beio->io;
 
 	if (beio->beio_cont != NULL) {
 		beio->beio_cont(beio);
 	} else {
 		ctl_free_beio(beio);
 		ctl_data_submit_done(io);
 	}
 }
 
 static size_t
 cmp(uint8_t *a, uint8_t *b, size_t size)
 {
 	size_t i;
 
 	for (i = 0; i < size; i++) {
 		if (a[i] != b[i])
 			break;
 	}
 	return (i);
 }
 
 static void
 ctl_be_block_compare(union ctl_io *io)
 {
 	struct ctl_be_block_io *beio;
 	uint64_t off, res;
 	int i;
 	uint8_t info[8];
 
 	beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
 	off = 0;
 	for (i = 0; i < beio->num_segs; i++) {
 		res = cmp(beio->sg_segs[i].addr,
 		    beio->sg_segs[i + CTLBLK_HALF_SEGS].addr,
 		    beio->sg_segs[i].len);
 		off += res;
 		if (res < beio->sg_segs[i].len)
 			break;
 	}
 	if (i < beio->num_segs) {
 		scsi_u64to8b(off, info);
 		ctl_set_sense(&io->scsiio, /*current_error*/ 1,
 		    /*sense_key*/ SSD_KEY_MISCOMPARE,
 		    /*asc*/ 0x1D, /*ascq*/ 0x00,
 		    /*type*/ SSD_ELEM_INFO,
 		    /*size*/ sizeof(info), /*data*/ &info,
 		    /*type*/ SSD_ELEM_NONE);
 	} else
 		ctl_set_success(&io->scsiio);
 }
 
 static int
 ctl_be_block_move_done(union ctl_io *io)
 {
 	struct ctl_be_block_io *beio;
 	struct ctl_be_block_lun *be_lun;
 	struct ctl_lba_len_flags *lbalen;
 #ifdef CTL_TIME_IO
 	struct bintime cur_bt;
 #endif
 
 	beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
 	be_lun = beio->lun;
 
 	DPRINTF("entered\n");
 
 #ifdef CTL_TIME_IO
 	getbinuptime(&cur_bt);
 	bintime_sub(&cur_bt, &io->io_hdr.dma_start_bt);
 	bintime_add(&io->io_hdr.dma_bt, &cur_bt);
 #endif
 	io->io_hdr.num_dmas++;
 	io->scsiio.kern_rel_offset += io->scsiio.kern_data_len;
 
 	/*
 	 * We set status at this point for read commands, and write
 	 * commands with errors.
 	 */
 	if (io->io_hdr.flags & CTL_FLAG_ABORT) {
 		;
 	} else if ((io->io_hdr.port_status == 0) &&
 	    ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE)) {
 		lbalen = ARGS(beio->io);
 		if (lbalen->flags & CTL_LLF_READ) {
 			ctl_set_success(&io->scsiio);
 		} else if (lbalen->flags & CTL_LLF_COMPARE) {
 			/* We have two data blocks ready for comparison. */
 			ctl_be_block_compare(io);
 		}
 	} else if ((io->io_hdr.port_status != 0) &&
 	    ((io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE ||
 	     (io->io_hdr.status & CTL_STATUS_MASK) == CTL_SUCCESS)) {
 		/*
 		 * For hardware error sense keys, the sense key
 		 * specific value is defined to be a retry count,
 		 * but we use it to pass back an internal FETD
 		 * error code.  XXX KDM  Hopefully the FETD is only
 		 * using 16 bits for an error code, since that's
 		 * all the space we have in the sks field.
 		 */
 		ctl_set_internal_failure(&io->scsiio,
 					 /*sks_valid*/ 1,
 					 /*retry_count*/
 					 io->io_hdr.port_status);
 	}
 
 	/*
 	 * If this is a read, or a write with errors, it is done.
 	 */
 	if ((beio->bio_cmd == BIO_READ)
 	 || ((io->io_hdr.flags & CTL_FLAG_ABORT) != 0)
 	 || ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE)) {
 		ctl_complete_beio(beio);
 		return (0);
 	}
 
 	/*
 	 * At this point, we have a write and the DMA completed
 	 * successfully.  We now have to queue it to the task queue to
 	 * execute the backend I/O.  That is because we do blocking
 	 * memory allocations, and in the file backing case, blocking I/O.
 	 * This move done routine is generally called in the SIM's
 	 * interrupt context, and therefore we cannot block.
 	 */
 	mtx_lock(&be_lun->queue_lock);
 	STAILQ_INSERT_TAIL(&be_lun->datamove_queue, &io->io_hdr, links);
 	mtx_unlock(&be_lun->queue_lock);
 	taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task);
 
 	return (0);
 }
 
 static void
 ctl_be_block_biodone(struct bio *bio)
 {
 	struct ctl_be_block_io *beio;
 	struct ctl_be_block_lun *be_lun;
 	union ctl_io *io;
 	int error;
 
 	beio = bio->bio_caller1;
 	be_lun = beio->lun;
 	io = beio->io;
 
 	DPRINTF("entered\n");
 
 	error = bio->bio_error;
 	mtx_lock(&be_lun->io_lock);
 	if (error != 0)
 		beio->num_errors++;
 
 	beio->num_bios_done++;
 
 	/*
 	 * XXX KDM will this cause WITNESS to complain?  Holding a lock
 	 * during the free might cause it to complain.
 	 */
 	g_destroy_bio(bio);
 
 	/*
 	 * If the send complete bit isn't set, or we aren't the last I/O to
 	 * complete, then we're done.
 	 */
 	if ((beio->send_complete == 0)
 	 || (beio->num_bios_done < beio->num_bios_sent)) {
 		mtx_unlock(&be_lun->io_lock);
 		return;
 	}
 
 	/*
 	 * At this point, we've verified that we are the last I/O to
 	 * complete, so it's safe to drop the lock.
 	 */
 	devstat_end_transaction(beio->lun->disk_stats, beio->io_len,
 	    beio->ds_tag_type, beio->ds_trans_type,
 	    /*now*/ NULL, /*then*/&beio->ds_t0);
 	mtx_unlock(&be_lun->io_lock);
 
 	/*
 	 * If there are any errors from the backing device, we fail the
 	 * entire I/O with a medium error.
 	 */
 	if (beio->num_errors > 0) {
 		if (error == EOPNOTSUPP) {
 			ctl_set_invalid_opcode(&io->scsiio);
 		} else if (error == ENOSPC || error == EDQUOT) {
 			ctl_set_space_alloc_fail(&io->scsiio);
 		} else if (error == EROFS || error == EACCES) {
 			ctl_set_hw_write_protected(&io->scsiio);
 		} else if (beio->bio_cmd == BIO_FLUSH) {
 			/* XXX KDM is there is a better error here? */
 			ctl_set_internal_failure(&io->scsiio,
 						 /*sks_valid*/ 1,
 						 /*retry_count*/ 0xbad2);
 		} else {
 			ctl_set_medium_error(&io->scsiio,
 			    beio->bio_cmd == BIO_READ);
 		}
 		ctl_complete_beio(beio);
 		return;
 	}
 
 	/*
 	 * If this is a write, a flush, a delete or verify, we're all done.
 	 * If this is a read, we can now send the data to the user.
 	 */
 	if ((beio->bio_cmd == BIO_WRITE)
 	 || (beio->bio_cmd == BIO_FLUSH)
 	 || (beio->bio_cmd == BIO_DELETE)
 	 || (ARGS(io)->flags & CTL_LLF_VERIFY)) {
 		ctl_set_success(&io->scsiio);
 		ctl_complete_beio(beio);
 	} else {
 		if ((ARGS(io)->flags & CTL_LLF_READ) &&
 		    beio->beio_cont == NULL) {
 			ctl_set_success(&io->scsiio);
 			ctl_serseq_done(io);
 		}
 #ifdef CTL_TIME_IO
 		getbinuptime(&io->io_hdr.dma_start_bt);
 #endif
 		ctl_datamove(io);
 	}
 }
 
 static void
 ctl_be_block_flush_file(struct ctl_be_block_lun *be_lun,
 			struct ctl_be_block_io *beio)
 {
 	union ctl_io *io = beio->io;
 	struct mount *mountpoint;
 	int error, lock_flags;
 
 	DPRINTF("entered\n");
 
 	binuptime(&beio->ds_t0);
 	mtx_lock(&be_lun->io_lock);
 	devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0);
 	mtx_unlock(&be_lun->io_lock);
 
 	(void) vn_start_write(be_lun->vn, &mountpoint, V_WAIT);
 
 	if (MNT_SHARED_WRITES(mountpoint) ||
 	    ((mountpoint == NULL) && MNT_SHARED_WRITES(be_lun->vn->v_mount)))
 		lock_flags = LK_SHARED;
 	else
 		lock_flags = LK_EXCLUSIVE;
 	vn_lock(be_lun->vn, lock_flags | LK_RETRY);
 	error = VOP_FSYNC(be_lun->vn, beio->io_arg ? MNT_NOWAIT : MNT_WAIT,
 	    curthread);
 	VOP_UNLOCK(be_lun->vn, 0);
 
 	vn_finished_write(mountpoint);
 
 	mtx_lock(&be_lun->io_lock);
 	devstat_end_transaction(beio->lun->disk_stats, beio->io_len,
 	    beio->ds_tag_type, beio->ds_trans_type,
 	    /*now*/ NULL, /*then*/&beio->ds_t0);
 	mtx_unlock(&be_lun->io_lock);
 
 	if (error == 0)
 		ctl_set_success(&io->scsiio);
 	else {
 		/* XXX KDM is there is a better error here? */
 		ctl_set_internal_failure(&io->scsiio,
 					 /*sks_valid*/ 1,
 					 /*retry_count*/ 0xbad1);
 	}
 
 	ctl_complete_beio(beio);
 }
 
-SDT_PROBE_DEFINE1(cbb, kernel, read, file_start, "uint64_t");
-SDT_PROBE_DEFINE1(cbb, kernel, write, file_start, "uint64_t");
-SDT_PROBE_DEFINE1(cbb, kernel, read, file_done,"uint64_t");
-SDT_PROBE_DEFINE1(cbb, kernel, write, file_done, "uint64_t");
+SDT_PROBE_DEFINE1(cbb, , read, file_start, "uint64_t");
+SDT_PROBE_DEFINE1(cbb, , write, file_start, "uint64_t");
+SDT_PROBE_DEFINE1(cbb, , read, file_done,"uint64_t");
+SDT_PROBE_DEFINE1(cbb, , write, file_done, "uint64_t");
 
 static void
 ctl_be_block_dispatch_file(struct ctl_be_block_lun *be_lun,
 			   struct ctl_be_block_io *beio)
 {
 	struct ctl_be_block_filedata *file_data;
 	union ctl_io *io;
 	struct uio xuio;
 	struct iovec *xiovec;
 	size_t s;
 	int error, flags, i;
 
 	DPRINTF("entered\n");
 
 	file_data = &be_lun->backend.file;
 	io = beio->io;
 	flags = 0;
 	if (ARGS(io)->flags & CTL_LLF_DPO)
 		flags |= IO_DIRECT;
 	if (beio->bio_cmd == BIO_WRITE && ARGS(io)->flags & CTL_LLF_FUA)
 		flags |= IO_SYNC;
 
 	bzero(&xuio, sizeof(xuio));
 	if (beio->bio_cmd == BIO_READ) {
-		SDT_PROBE(cbb, kernel, read, file_start, 0, 0, 0, 0, 0);
+		SDT_PROBE0(cbb, , read, file_start);
 		xuio.uio_rw = UIO_READ;
 	} else {
-		SDT_PROBE(cbb, kernel, write, file_start, 0, 0, 0, 0, 0);
+		SDT_PROBE0(cbb, , write, file_start);
 		xuio.uio_rw = UIO_WRITE;
 	}
 	xuio.uio_offset = beio->io_offset;
 	xuio.uio_resid = beio->io_len;
 	xuio.uio_segflg = UIO_SYSSPACE;
 	xuio.uio_iov = beio->xiovecs;
 	xuio.uio_iovcnt = beio->num_segs;
 	xuio.uio_td = curthread;
 
 	for (i = 0, xiovec = xuio.uio_iov; i < xuio.uio_iovcnt; i++, xiovec++) {
 		xiovec->iov_base = beio->sg_segs[i].addr;
 		xiovec->iov_len = beio->sg_segs[i].len;
 	}
 
 	binuptime(&beio->ds_t0);
 	mtx_lock(&be_lun->io_lock);
 	devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0);
 	mtx_unlock(&be_lun->io_lock);
 
 	if (beio->bio_cmd == BIO_READ) {
 		vn_lock(be_lun->vn, LK_SHARED | LK_RETRY);
 
 		/*
 		 * UFS pays attention to IO_DIRECT for reads.  If the
 		 * DIRECTIO option is configured into the kernel, it calls
 		 * ffs_rawread().  But that only works for single-segment
 		 * uios with user space addresses.  In our case, with a
 		 * kernel uio, it still reads into the buffer cache, but it
 		 * will just try to release the buffer from the cache later
 		 * on in ffs_read().
 		 *
 		 * ZFS does not pay attention to IO_DIRECT for reads.
 		 *
 		 * UFS does not pay attention to IO_SYNC for reads.
 		 *
 		 * ZFS pays attention to IO_SYNC (which translates into the
 		 * Solaris define FRSYNC for zfs_read()) for reads.  It
 		 * attempts to sync the file before reading.
 		 */
 		error = VOP_READ(be_lun->vn, &xuio, flags, file_data->cred);
 
 		VOP_UNLOCK(be_lun->vn, 0);
-		SDT_PROBE(cbb, kernel, read, file_done, 0, 0, 0, 0, 0);
+		SDT_PROBE0(cbb, , read, file_done);
 		if (error == 0 && xuio.uio_resid > 0) {
 			/*
 			 * If we red less then requested (EOF), then
 			 * we should clean the rest of the buffer.
 			 */
 			s = beio->io_len - xuio.uio_resid;
 			for (i = 0; i < beio->num_segs; i++) {
 				if (s >= beio->sg_segs[i].len) {
 					s -= beio->sg_segs[i].len;
 					continue;
 				}
 				bzero((uint8_t *)beio->sg_segs[i].addr + s,
 				    beio->sg_segs[i].len - s);
 				s = 0;
 			}
 		}
 	} else {
 		struct mount *mountpoint;
 		int lock_flags;
 
 		(void)vn_start_write(be_lun->vn, &mountpoint, V_WAIT);
 
 		if (MNT_SHARED_WRITES(mountpoint) || ((mountpoint == NULL)
 		  && MNT_SHARED_WRITES(be_lun->vn->v_mount)))
 			lock_flags = LK_SHARED;
 		else
 			lock_flags = LK_EXCLUSIVE;
 		vn_lock(be_lun->vn, lock_flags | LK_RETRY);
 
 		/*
 		 * UFS pays attention to IO_DIRECT for writes.  The write
 		 * is done asynchronously.  (Normally the write would just
 		 * get put into cache.
 		 *
 		 * UFS pays attention to IO_SYNC for writes.  It will
 		 * attempt to write the buffer out synchronously if that
 		 * flag is set.
 		 *
 		 * ZFS does not pay attention to IO_DIRECT for writes.
 		 *
 		 * ZFS pays attention to IO_SYNC (a.k.a. FSYNC or FRSYNC)
 		 * for writes.  It will flush the transaction from the
 		 * cache before returning.
 		 */
 		error = VOP_WRITE(be_lun->vn, &xuio, flags, file_data->cred);
 		VOP_UNLOCK(be_lun->vn, 0);
 
 		vn_finished_write(mountpoint);
-		SDT_PROBE(cbb, kernel, write, file_done, 0, 0, 0, 0, 0);
+		SDT_PROBE0(cbb, , write, file_done);
         }
 
 	mtx_lock(&be_lun->io_lock);
 	devstat_end_transaction(beio->lun->disk_stats, beio->io_len,
 	    beio->ds_tag_type, beio->ds_trans_type,
 	    /*now*/ NULL, /*then*/&beio->ds_t0);
 	mtx_unlock(&be_lun->io_lock);
 
 	/*
 	 * If we got an error, set the sense data to "MEDIUM ERROR" and
 	 * return the I/O to the user.
 	 */
 	if (error != 0) {
 		if (error == ENOSPC || error == EDQUOT) {
 			ctl_set_space_alloc_fail(&io->scsiio);
 		} else if (error == EROFS || error == EACCES) {
 			ctl_set_hw_write_protected(&io->scsiio);
 		} else {
 			ctl_set_medium_error(&io->scsiio,
 			    beio->bio_cmd == BIO_READ);
 		}
 		ctl_complete_beio(beio);
 		return;
 	}
 
 	/*
 	 * If this is a write or a verify, we're all done.
 	 * If this is a read, we can now send the data to the user.
 	 */
 	if ((beio->bio_cmd == BIO_WRITE) ||
 	    (ARGS(io)->flags & CTL_LLF_VERIFY)) {
 		ctl_set_success(&io->scsiio);
 		ctl_complete_beio(beio);
 	} else {
 		if ((ARGS(io)->flags & CTL_LLF_READ) &&
 		    beio->beio_cont == NULL) {
 			ctl_set_success(&io->scsiio);
 			ctl_serseq_done(io);
 		}
 #ifdef CTL_TIME_IO
 		getbinuptime(&io->io_hdr.dma_start_bt);
 #endif
 		ctl_datamove(io);
 	}
 }
 
 static void
 ctl_be_block_gls_file(struct ctl_be_block_lun *be_lun,
 			struct ctl_be_block_io *beio)
 {
 	union ctl_io *io = beio->io;
 	struct ctl_lba_len_flags *lbalen = ARGS(io);
 	struct scsi_get_lba_status_data *data;
 	off_t roff, off;
 	int error, status;
 
 	DPRINTF("entered\n");
 
 	off = roff = ((off_t)lbalen->lba) * be_lun->cbe_lun.blocksize;
 	vn_lock(be_lun->vn, LK_SHARED | LK_RETRY);
 	error = VOP_IOCTL(be_lun->vn, FIOSEEKHOLE, &off,
 	    0, curthread->td_ucred, curthread);
 	if (error == 0 && off > roff)
 		status = 0;	/* mapped up to off */
 	else {
 		error = VOP_IOCTL(be_lun->vn, FIOSEEKDATA, &off,
 		    0, curthread->td_ucred, curthread);
 		if (error == 0 && off > roff)
 			status = 1;	/* deallocated up to off */
 		else {
 			status = 0;	/* unknown up to the end */
 			off = be_lun->size_bytes;
 		}
 	}
 	VOP_UNLOCK(be_lun->vn, 0);
 
 	data = (struct scsi_get_lba_status_data *)io->scsiio.kern_data_ptr;
 	scsi_u64to8b(lbalen->lba, data->descr[0].addr);
 	scsi_ulto4b(MIN(UINT32_MAX, off / be_lun->cbe_lun.blocksize -
 	    lbalen->lba), data->descr[0].length);
 	data->descr[0].status = status;
 
 	ctl_complete_beio(beio);
 }
 
 static uint64_t
 ctl_be_block_getattr_file(struct ctl_be_block_lun *be_lun, const char *attrname)
 {
 	struct vattr		vattr;
 	struct statfs		statfs;
 	uint64_t		val;
 	int			error;
 
 	val = UINT64_MAX;
 	if (be_lun->vn == NULL)
 		return (val);
 	vn_lock(be_lun->vn, LK_SHARED | LK_RETRY);
 	if (strcmp(attrname, "blocksused") == 0) {
 		error = VOP_GETATTR(be_lun->vn, &vattr, curthread->td_ucred);
 		if (error == 0)
 			val = vattr.va_bytes / be_lun->cbe_lun.blocksize;
 	}
 	if (strcmp(attrname, "blocksavail") == 0 &&
 	    (be_lun->vn->v_iflag & VI_DOOMED) == 0) {
 		error = VFS_STATFS(be_lun->vn->v_mount, &statfs);
 		if (error == 0)
 			val = statfs.f_bavail * statfs.f_bsize /
 			    be_lun->cbe_lun.blocksize;
 	}
 	VOP_UNLOCK(be_lun->vn, 0);
 	return (val);
 }
 
 static void
 ctl_be_block_dispatch_zvol(struct ctl_be_block_lun *be_lun,
 			   struct ctl_be_block_io *beio)
 {
 	union ctl_io *io;
 	struct cdevsw *csw;
 	struct cdev *dev;
 	struct uio xuio;
 	struct iovec *xiovec;
 	int error, flags, i, ref;
 
 	DPRINTF("entered\n");
 
 	io = beio->io;
 	flags = 0;
 	if (ARGS(io)->flags & CTL_LLF_DPO)
 		flags |= IO_DIRECT;
 	if (beio->bio_cmd == BIO_WRITE && ARGS(io)->flags & CTL_LLF_FUA)
 		flags |= IO_SYNC;
 
 	bzero(&xuio, sizeof(xuio));
 	if (beio->bio_cmd == BIO_READ) {
-		SDT_PROBE(cbb, kernel, read, file_start, 0, 0, 0, 0, 0);
+		SDT_PROBE0(cbb, , read, file_start);
 		xuio.uio_rw = UIO_READ;
 	} else {
-		SDT_PROBE(cbb, kernel, write, file_start, 0, 0, 0, 0, 0);
+		SDT_PROBE0(cbb, , write, file_start);
 		xuio.uio_rw = UIO_WRITE;
 	}
 	xuio.uio_offset = beio->io_offset;
 	xuio.uio_resid = beio->io_len;
 	xuio.uio_segflg = UIO_SYSSPACE;
 	xuio.uio_iov = beio->xiovecs;
 	xuio.uio_iovcnt = beio->num_segs;
 	xuio.uio_td = curthread;
 
 	for (i = 0, xiovec = xuio.uio_iov; i < xuio.uio_iovcnt; i++, xiovec++) {
 		xiovec->iov_base = beio->sg_segs[i].addr;
 		xiovec->iov_len = beio->sg_segs[i].len;
 	}
 
 	binuptime(&beio->ds_t0);
 	mtx_lock(&be_lun->io_lock);
 	devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0);
 	mtx_unlock(&be_lun->io_lock);
 
 	csw = devvn_refthread(be_lun->vn, &dev, &ref);
 	if (csw) {
 		if (beio->bio_cmd == BIO_READ)
 			error = csw->d_read(dev, &xuio, flags);
 		else
 			error = csw->d_write(dev, &xuio, flags);
 		dev_relthread(dev, ref);
 	} else
 		error = ENXIO;
 
 	if (beio->bio_cmd == BIO_READ)
-		SDT_PROBE(cbb, kernel, read, file_done, 0, 0, 0, 0, 0);
+		SDT_PROBE0(cbb, , read, file_done);
 	else
-		SDT_PROBE(cbb, kernel, write, file_done, 0, 0, 0, 0, 0);
+		SDT_PROBE0(cbb, , write, file_done);
 
 	mtx_lock(&be_lun->io_lock);
 	devstat_end_transaction(beio->lun->disk_stats, beio->io_len,
 	    beio->ds_tag_type, beio->ds_trans_type,
 	    /*now*/ NULL, /*then*/&beio->ds_t0);
 	mtx_unlock(&be_lun->io_lock);
 
 	/*
 	 * If we got an error, set the sense data to "MEDIUM ERROR" and
 	 * return the I/O to the user.
 	 */
 	if (error != 0) {
 		if (error == ENOSPC || error == EDQUOT) {
 			ctl_set_space_alloc_fail(&io->scsiio);
 		} else if (error == EROFS || error == EACCES) {
 			ctl_set_hw_write_protected(&io->scsiio);
 		} else {
 			ctl_set_medium_error(&io->scsiio,
 			    beio->bio_cmd == BIO_READ);
 		}
 		ctl_complete_beio(beio);
 		return;
 	}
 
 	/*
 	 * If this is a write or a verify, we're all done.
 	 * If this is a read, we can now send the data to the user.
 	 */
 	if ((beio->bio_cmd == BIO_WRITE) ||
 	    (ARGS(io)->flags & CTL_LLF_VERIFY)) {
 		ctl_set_success(&io->scsiio);
 		ctl_complete_beio(beio);
 	} else {
 		if ((ARGS(io)->flags & CTL_LLF_READ) &&
 		    beio->beio_cont == NULL) {
 			ctl_set_success(&io->scsiio);
 			ctl_serseq_done(io);
 		}
 #ifdef CTL_TIME_IO
 		getbinuptime(&io->io_hdr.dma_start_bt);
 #endif
 		ctl_datamove(io);
 	}
 }
 
 static void
 ctl_be_block_gls_zvol(struct ctl_be_block_lun *be_lun,
 			struct ctl_be_block_io *beio)
 {
 	union ctl_io *io = beio->io;
 	struct cdevsw *csw;
 	struct cdev *dev;
 	struct ctl_lba_len_flags *lbalen = ARGS(io);
 	struct scsi_get_lba_status_data *data;
 	off_t roff, off;
 	int error, ref, status;
 
 	DPRINTF("entered\n");
 
 	csw = devvn_refthread(be_lun->vn, &dev, &ref);
 	if (csw == NULL) {
 		status = 0;	/* unknown up to the end */
 		off = be_lun->size_bytes;
 		goto done;
 	}
 	off = roff = ((off_t)lbalen->lba) * be_lun->cbe_lun.blocksize;
 	error = csw->d_ioctl(dev, FIOSEEKHOLE, (caddr_t)&off, FREAD,
 	    curthread);
 	if (error == 0 && off > roff)
 		status = 0;	/* mapped up to off */
 	else {
 		error = csw->d_ioctl(dev, FIOSEEKDATA, (caddr_t)&off, FREAD,
 		    curthread);
 		if (error == 0 && off > roff)
 			status = 1;	/* deallocated up to off */
 		else {
 			status = 0;	/* unknown up to the end */
 			off = be_lun->size_bytes;
 		}
 	}
 	dev_relthread(dev, ref);
 
 done:
 	data = (struct scsi_get_lba_status_data *)io->scsiio.kern_data_ptr;
 	scsi_u64to8b(lbalen->lba, data->descr[0].addr);
 	scsi_ulto4b(MIN(UINT32_MAX, off / be_lun->cbe_lun.blocksize -
 	    lbalen->lba), data->descr[0].length);
 	data->descr[0].status = status;
 
 	ctl_complete_beio(beio);
 }
 
 static void
 ctl_be_block_flush_dev(struct ctl_be_block_lun *be_lun,
 		       struct ctl_be_block_io *beio)
 {
 	struct bio *bio;
 	struct cdevsw *csw;
 	struct cdev *dev;
 	int ref;
 
 	DPRINTF("entered\n");
 
 	/* This can't fail, it's a blocking allocation. */
 	bio = g_alloc_bio();
 
 	bio->bio_cmd	    = BIO_FLUSH;
 	bio->bio_offset	    = 0;
 	bio->bio_data	    = 0;
 	bio->bio_done	    = ctl_be_block_biodone;
 	bio->bio_caller1    = beio;
 	bio->bio_pblkno	    = 0;
 
 	/*
 	 * We don't need to acquire the LUN lock here, because we are only
 	 * sending one bio, and so there is no other context to synchronize
 	 * with.
 	 */
 	beio->num_bios_sent = 1;
 	beio->send_complete = 1;
 
 	binuptime(&beio->ds_t0);
 	mtx_lock(&be_lun->io_lock);
 	devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0);
 	mtx_unlock(&be_lun->io_lock);
 
 	csw = devvn_refthread(be_lun->vn, &dev, &ref);
 	if (csw) {
 		bio->bio_dev = dev;
 		csw->d_strategy(bio);
 		dev_relthread(dev, ref);
 	} else {
 		bio->bio_error = ENXIO;
 		ctl_be_block_biodone(bio);
 	}
 }
 
 static void
 ctl_be_block_unmap_dev_range(struct ctl_be_block_lun *be_lun,
 		       struct ctl_be_block_io *beio,
 		       uint64_t off, uint64_t len, int last)
 {
 	struct bio *bio;
 	uint64_t maxlen;
 	struct cdevsw *csw;
 	struct cdev *dev;
 	int ref;
 
 	csw = devvn_refthread(be_lun->vn, &dev, &ref);
 	maxlen = LONG_MAX - (LONG_MAX % be_lun->cbe_lun.blocksize);
 	while (len > 0) {
 		bio = g_alloc_bio();
 		bio->bio_cmd	    = BIO_DELETE;
 		bio->bio_dev	    = dev;
 		bio->bio_offset	    = off;
 		bio->bio_length	    = MIN(len, maxlen);
 		bio->bio_data	    = 0;
 		bio->bio_done	    = ctl_be_block_biodone;
 		bio->bio_caller1    = beio;
 		bio->bio_pblkno     = off / be_lun->cbe_lun.blocksize;
 
 		off += bio->bio_length;
 		len -= bio->bio_length;
 
 		mtx_lock(&be_lun->io_lock);
 		beio->num_bios_sent++;
 		if (last && len == 0)
 			beio->send_complete = 1;
 		mtx_unlock(&be_lun->io_lock);
 
 		if (csw) {
 			csw->d_strategy(bio);
 		} else {
 			bio->bio_error = ENXIO;
 			ctl_be_block_biodone(bio);
 		}
 	}
 	if (csw)
 		dev_relthread(dev, ref);
 }
 
 static void
 ctl_be_block_unmap_dev(struct ctl_be_block_lun *be_lun,
 		       struct ctl_be_block_io *beio)
 {
 	union ctl_io *io;
 	struct ctl_ptr_len_flags *ptrlen;
 	struct scsi_unmap_desc *buf, *end;
 	uint64_t len;
 
 	io = beio->io;
 
 	DPRINTF("entered\n");
 
 	binuptime(&beio->ds_t0);
 	mtx_lock(&be_lun->io_lock);
 	devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0);
 	mtx_unlock(&be_lun->io_lock);
 
 	if (beio->io_offset == -1) {
 		beio->io_len = 0;
 		ptrlen = (struct ctl_ptr_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
 		buf = (struct scsi_unmap_desc *)ptrlen->ptr;
 		end = buf + ptrlen->len / sizeof(*buf);
 		for (; buf < end; buf++) {
 			len = (uint64_t)scsi_4btoul(buf->length) *
 			    be_lun->cbe_lun.blocksize;
 			beio->io_len += len;
 			ctl_be_block_unmap_dev_range(be_lun, beio,
 			    scsi_8btou64(buf->lba) * be_lun->cbe_lun.blocksize,
 			    len, (end - buf < 2) ? TRUE : FALSE);
 		}
 	} else
 		ctl_be_block_unmap_dev_range(be_lun, beio,
 		    beio->io_offset, beio->io_len, TRUE);
 }
 
 static void
 ctl_be_block_dispatch_dev(struct ctl_be_block_lun *be_lun,
 			  struct ctl_be_block_io *beio)
 {
 	TAILQ_HEAD(, bio) queue = TAILQ_HEAD_INITIALIZER(queue);
 	struct bio *bio;
 	struct cdevsw *csw;
 	struct cdev *dev;
 	off_t cur_offset;
 	int i, max_iosize, ref;
 
 	DPRINTF("entered\n");
 	csw = devvn_refthread(be_lun->vn, &dev, &ref);
 
 	/*
 	 * We have to limit our I/O size to the maximum supported by the
 	 * backend device.  Hopefully it is MAXPHYS.  If the driver doesn't
 	 * set it properly, use DFLTPHYS.
 	 */
 	if (csw) {
 		max_iosize = dev->si_iosize_max;
 		if (max_iosize < PAGE_SIZE)
 			max_iosize = DFLTPHYS;
 	} else
 		max_iosize = DFLTPHYS;
 
 	cur_offset = beio->io_offset;
 	for (i = 0; i < beio->num_segs; i++) {
 		size_t cur_size;
 		uint8_t *cur_ptr;
 
 		cur_size = beio->sg_segs[i].len;
 		cur_ptr = beio->sg_segs[i].addr;
 
 		while (cur_size > 0) {
 			/* This can't fail, it's a blocking allocation. */
 			bio = g_alloc_bio();
 
 			KASSERT(bio != NULL, ("g_alloc_bio() failed!\n"));
 
 			bio->bio_cmd = beio->bio_cmd;
 			bio->bio_dev = dev;
 			bio->bio_caller1 = beio;
 			bio->bio_length = min(cur_size, max_iosize);
 			bio->bio_offset = cur_offset;
 			bio->bio_data = cur_ptr;
 			bio->bio_done = ctl_be_block_biodone;
 			bio->bio_pblkno = cur_offset / be_lun->cbe_lun.blocksize;
 
 			cur_offset += bio->bio_length;
 			cur_ptr += bio->bio_length;
 			cur_size -= bio->bio_length;
 
 			TAILQ_INSERT_TAIL(&queue, bio, bio_queue);
 			beio->num_bios_sent++;
 		}
 	}
 	binuptime(&beio->ds_t0);
 	mtx_lock(&be_lun->io_lock);
 	devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0);
 	beio->send_complete = 1;
 	mtx_unlock(&be_lun->io_lock);
 
 	/*
 	 * Fire off all allocated requests!
 	 */
 	while ((bio = TAILQ_FIRST(&queue)) != NULL) {
 		TAILQ_REMOVE(&queue, bio, bio_queue);
 		if (csw)
 			csw->d_strategy(bio);
 		else {
 			bio->bio_error = ENXIO;
 			ctl_be_block_biodone(bio);
 		}
 	}
 	if (csw)
 		dev_relthread(dev, ref);
 }
 
 static uint64_t
 ctl_be_block_getattr_dev(struct ctl_be_block_lun *be_lun, const char *attrname)
 {
 	struct diocgattr_arg	arg;
 	struct cdevsw *csw;
 	struct cdev *dev;
 	int error, ref;
 
 	csw = devvn_refthread(be_lun->vn, &dev, &ref);
 	if (csw == NULL)
 		return (UINT64_MAX);
 	strlcpy(arg.name, attrname, sizeof(arg.name));
 	arg.len = sizeof(arg.value.off);
 	if (csw->d_ioctl) {
 		error = csw->d_ioctl(dev, DIOCGATTR, (caddr_t)&arg, FREAD,
 		    curthread);
 	} else
 		error = ENODEV;
 	dev_relthread(dev, ref);
 	if (error != 0)
 		return (UINT64_MAX);
 	return (arg.value.off);
 }
 
 static void
 ctl_be_block_cw_dispatch_sync(struct ctl_be_block_lun *be_lun,
 			    union ctl_io *io)
 {
 	struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
 	struct ctl_be_block_io *beio;
 	struct ctl_lba_len_flags *lbalen;
 
 	DPRINTF("entered\n");
 	beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
 	lbalen = (struct ctl_lba_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
 
 	beio->io_len = lbalen->len * cbe_lun->blocksize;
 	beio->io_offset = lbalen->lba * cbe_lun->blocksize;
 	beio->io_arg = (lbalen->flags & SSC_IMMED) != 0;
 	beio->bio_cmd = BIO_FLUSH;
 	beio->ds_trans_type = DEVSTAT_NO_DATA;
 	DPRINTF("SYNC\n");
 	be_lun->lun_flush(be_lun, beio);
 }
 
 static void
 ctl_be_block_cw_done_ws(struct ctl_be_block_io *beio)
 {
 	union ctl_io *io;
 
 	io = beio->io;
 	ctl_free_beio(beio);
 	if ((io->io_hdr.flags & CTL_FLAG_ABORT) ||
 	    ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE &&
 	     (io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS)) {
 		ctl_config_write_done(io);
 		return;
 	}
 
 	ctl_be_block_config_write(io);
 }
 
 static void
 ctl_be_block_cw_dispatch_ws(struct ctl_be_block_lun *be_lun,
 			    union ctl_io *io)
 {
 	struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
 	struct ctl_be_block_io *beio;
 	struct ctl_lba_len_flags *lbalen;
 	uint64_t len_left, lba;
 	uint32_t pb, pbo, adj;
 	int i, seglen;
 	uint8_t *buf, *end;
 
 	DPRINTF("entered\n");
 
 	beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
 	lbalen = ARGS(beio->io);
 
 	if (lbalen->flags & ~(SWS_LBDATA | SWS_UNMAP | SWS_ANCHOR | SWS_NDOB) ||
 	    (lbalen->flags & (SWS_UNMAP | SWS_ANCHOR) && be_lun->unmap == NULL)) {
 		ctl_free_beio(beio);
 		ctl_set_invalid_field(&io->scsiio,
 				      /*sks_valid*/ 1,
 				      /*command*/ 1,
 				      /*field*/ 1,
 				      /*bit_valid*/ 0,
 				      /*bit*/ 0);
 		ctl_config_write_done(io);
 		return;
 	}
 
 	if (lbalen->flags & (SWS_UNMAP | SWS_ANCHOR)) {
 		beio->io_offset = lbalen->lba * cbe_lun->blocksize;
 		beio->io_len = (uint64_t)lbalen->len * cbe_lun->blocksize;
 		beio->bio_cmd = BIO_DELETE;
 		beio->ds_trans_type = DEVSTAT_FREE;
 
 		be_lun->unmap(be_lun, beio);
 		return;
 	}
 
 	beio->bio_cmd = BIO_WRITE;
 	beio->ds_trans_type = DEVSTAT_WRITE;
 
 	DPRINTF("WRITE SAME at LBA %jx len %u\n",
 	       (uintmax_t)lbalen->lba, lbalen->len);
 
 	pb = cbe_lun->blocksize << be_lun->cbe_lun.pblockexp;
 	if (be_lun->cbe_lun.pblockoff > 0)
 		pbo = pb - cbe_lun->blocksize * be_lun->cbe_lun.pblockoff;
 	else
 		pbo = 0;
 	len_left = (uint64_t)lbalen->len * cbe_lun->blocksize;
 	for (i = 0, lba = 0; i < CTLBLK_MAX_SEGS && len_left > 0; i++) {
 
 		/*
 		 * Setup the S/G entry for this chunk.
 		 */
 		seglen = MIN(CTLBLK_MAX_SEG, len_left);
 		if (pb > cbe_lun->blocksize) {
 			adj = ((lbalen->lba + lba) * cbe_lun->blocksize +
 			    seglen - pbo) % pb;
 			if (seglen > adj)
 				seglen -= adj;
 			else
 				seglen -= seglen % cbe_lun->blocksize;
 		} else
 			seglen -= seglen % cbe_lun->blocksize;
 		beio->sg_segs[i].len = seglen;
 		beio->sg_segs[i].addr = uma_zalloc(be_lun->lun_zone, M_WAITOK);
 
 		DPRINTF("segment %d addr %p len %zd\n", i,
 			beio->sg_segs[i].addr, beio->sg_segs[i].len);
 
 		beio->num_segs++;
 		len_left -= seglen;
 
 		buf = beio->sg_segs[i].addr;
 		end = buf + seglen;
 		for (; buf < end; buf += cbe_lun->blocksize) {
 			if (lbalen->flags & SWS_NDOB) {
 				memset(buf, 0, cbe_lun->blocksize);
 			} else {
 				memcpy(buf, io->scsiio.kern_data_ptr,
 				    cbe_lun->blocksize);
 			}
 			if (lbalen->flags & SWS_LBDATA)
 				scsi_ulto4b(lbalen->lba + lba, buf);
 			lba++;
 		}
 	}
 
 	beio->io_offset = lbalen->lba * cbe_lun->blocksize;
 	beio->io_len = lba * cbe_lun->blocksize;
 
 	/* We can not do all in one run. Correct and schedule rerun. */
 	if (len_left > 0) {
 		lbalen->lba += lba;
 		lbalen->len -= lba;
 		beio->beio_cont = ctl_be_block_cw_done_ws;
 	}
 
 	be_lun->dispatch(be_lun, beio);
 }
 
 static void
 ctl_be_block_cw_dispatch_unmap(struct ctl_be_block_lun *be_lun,
 			    union ctl_io *io)
 {
 	struct ctl_be_block_io *beio;
 	struct ctl_ptr_len_flags *ptrlen;
 
 	DPRINTF("entered\n");
 
 	beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
 	ptrlen = (struct ctl_ptr_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
 
 	if ((ptrlen->flags & ~SU_ANCHOR) != 0 || be_lun->unmap == NULL) {
 		ctl_free_beio(beio);
 		ctl_set_invalid_field(&io->scsiio,
 				      /*sks_valid*/ 0,
 				      /*command*/ 1,
 				      /*field*/ 0,
 				      /*bit_valid*/ 0,
 				      /*bit*/ 0);
 		ctl_config_write_done(io);
 		return;
 	}
 
 	beio->io_len = 0;
 	beio->io_offset = -1;
 	beio->bio_cmd = BIO_DELETE;
 	beio->ds_trans_type = DEVSTAT_FREE;
 	DPRINTF("UNMAP\n");
 	be_lun->unmap(be_lun, beio);
 }
 
 static void
 ctl_be_block_cr_done(struct ctl_be_block_io *beio)
 {
 	union ctl_io *io;
 
 	io = beio->io;
 	ctl_free_beio(beio);
 	ctl_config_read_done(io);
 }
 
 static void
 ctl_be_block_cr_dispatch(struct ctl_be_block_lun *be_lun,
 			 union ctl_io *io)
 {
 	struct ctl_be_block_io *beio;
 	struct ctl_be_block_softc *softc;
 
 	DPRINTF("entered\n");
 
 	softc = be_lun->softc;
 	beio = ctl_alloc_beio(softc);
 	beio->io = io;
 	beio->lun = be_lun;
 	beio->beio_cont = ctl_be_block_cr_done;
 	PRIV(io)->ptr = (void *)beio;
 
 	switch (io->scsiio.cdb[0]) {
 	case SERVICE_ACTION_IN:		/* GET LBA STATUS */
 		beio->bio_cmd = -1;
 		beio->ds_trans_type = DEVSTAT_NO_DATA;
 		beio->ds_tag_type = DEVSTAT_TAG_ORDERED;
 		beio->io_len = 0;
 		if (be_lun->get_lba_status)
 			be_lun->get_lba_status(be_lun, beio);
 		else
 			ctl_be_block_cr_done(beio);
 		break;
 	default:
 		panic("Unhandled CDB type %#x", io->scsiio.cdb[0]);
 		break;
 	}
 }
 
 static void
 ctl_be_block_cw_done(struct ctl_be_block_io *beio)
 {
 	union ctl_io *io;
 
 	io = beio->io;
 	ctl_free_beio(beio);
 	ctl_config_write_done(io);
 }
 
 static void
 ctl_be_block_cw_dispatch(struct ctl_be_block_lun *be_lun,
 			 union ctl_io *io)
 {
 	struct ctl_be_block_io *beio;
 	struct ctl_be_block_softc *softc;
 
 	DPRINTF("entered\n");
 
 	softc = be_lun->softc;
 	beio = ctl_alloc_beio(softc);
 	beio->io = io;
 	beio->lun = be_lun;
 	beio->beio_cont = ctl_be_block_cw_done;
 	switch (io->scsiio.tag_type) {
 	case CTL_TAG_ORDERED:
 		beio->ds_tag_type = DEVSTAT_TAG_ORDERED;
 		break;
 	case CTL_TAG_HEAD_OF_QUEUE:
 		beio->ds_tag_type = DEVSTAT_TAG_HEAD;
 		break;
 	case CTL_TAG_UNTAGGED:
 	case CTL_TAG_SIMPLE:
 	case CTL_TAG_ACA:
 	default:
 		beio->ds_tag_type = DEVSTAT_TAG_SIMPLE;
 		break;
 	}
 	PRIV(io)->ptr = (void *)beio;
 
 	switch (io->scsiio.cdb[0]) {
 	case SYNCHRONIZE_CACHE:
 	case SYNCHRONIZE_CACHE_16:
 		ctl_be_block_cw_dispatch_sync(be_lun, io);
 		break;
 	case WRITE_SAME_10:
 	case WRITE_SAME_16:
 		ctl_be_block_cw_dispatch_ws(be_lun, io);
 		break;
 	case UNMAP:
 		ctl_be_block_cw_dispatch_unmap(be_lun, io);
 		break;
 	default:
 		panic("Unhandled CDB type %#x", io->scsiio.cdb[0]);
 		break;
 	}
 }
 
-SDT_PROBE_DEFINE1(cbb, kernel, read, start, "uint64_t");
-SDT_PROBE_DEFINE1(cbb, kernel, write, start, "uint64_t");
-SDT_PROBE_DEFINE1(cbb, kernel, read, alloc_done, "uint64_t");
-SDT_PROBE_DEFINE1(cbb, kernel, write, alloc_done, "uint64_t");
+SDT_PROBE_DEFINE1(cbb, , read, start, "uint64_t");
+SDT_PROBE_DEFINE1(cbb, , write, start, "uint64_t");
+SDT_PROBE_DEFINE1(cbb, , read, alloc_done, "uint64_t");
+SDT_PROBE_DEFINE1(cbb, , write, alloc_done, "uint64_t");
 
 static void
 ctl_be_block_next(struct ctl_be_block_io *beio)
 {
 	struct ctl_be_block_lun *be_lun;
 	union ctl_io *io;
 
 	io = beio->io;
 	be_lun = beio->lun;
 	ctl_free_beio(beio);
 	if ((io->io_hdr.flags & CTL_FLAG_ABORT) ||
 	    ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE &&
 	     (io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS)) {
 		ctl_data_submit_done(io);
 		return;
 	}
 
 	io->io_hdr.status &= ~CTL_STATUS_MASK;
 	io->io_hdr.status |= CTL_STATUS_NONE;
 
 	mtx_lock(&be_lun->queue_lock);
 	STAILQ_INSERT_TAIL(&be_lun->input_queue, &io->io_hdr, links);
 	mtx_unlock(&be_lun->queue_lock);
 	taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task);
 }
 
 static void
 ctl_be_block_dispatch(struct ctl_be_block_lun *be_lun,
 			   union ctl_io *io)
 {
 	struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
 	struct ctl_be_block_io *beio;
 	struct ctl_be_block_softc *softc;
 	struct ctl_lba_len_flags *lbalen;
 	struct ctl_ptr_len_flags *bptrlen;
 	uint64_t len_left, lbas;
 	int i;
 
 	softc = be_lun->softc;
 
 	DPRINTF("entered\n");
 
 	lbalen = ARGS(io);
 	if (lbalen->flags & CTL_LLF_WRITE) {
-		SDT_PROBE(cbb, kernel, write, start, 0, 0, 0, 0, 0);
+		SDT_PROBE0(cbb, , write, start);
 	} else {
-		SDT_PROBE(cbb, kernel, read, start, 0, 0, 0, 0, 0);
+		SDT_PROBE0(cbb, , read, start);
 	}
 
 	beio = ctl_alloc_beio(softc);
 	beio->io = io;
 	beio->lun = be_lun;
 	bptrlen = PRIV(io);
 	bptrlen->ptr = (void *)beio;
 
 	switch (io->scsiio.tag_type) {
 	case CTL_TAG_ORDERED:
 		beio->ds_tag_type = DEVSTAT_TAG_ORDERED;
 		break;
 	case CTL_TAG_HEAD_OF_QUEUE:
 		beio->ds_tag_type = DEVSTAT_TAG_HEAD;
 		break;
 	case CTL_TAG_UNTAGGED:
 	case CTL_TAG_SIMPLE:
 	case CTL_TAG_ACA:
 	default:
 		beio->ds_tag_type = DEVSTAT_TAG_SIMPLE;
 		break;
 	}
 
 	if (lbalen->flags & CTL_LLF_WRITE) {
 		beio->bio_cmd = BIO_WRITE;
 		beio->ds_trans_type = DEVSTAT_WRITE;
 	} else {
 		beio->bio_cmd = BIO_READ;
 		beio->ds_trans_type = DEVSTAT_READ;
 	}
 
 	DPRINTF("%s at LBA %jx len %u @%ju\n",
 	       (beio->bio_cmd == BIO_READ) ? "READ" : "WRITE",
 	       (uintmax_t)lbalen->lba, lbalen->len, bptrlen->len);
 	if (lbalen->flags & CTL_LLF_COMPARE)
 		lbas = CTLBLK_HALF_IO_SIZE;
 	else
 		lbas = CTLBLK_MAX_IO_SIZE;
 	lbas = MIN(lbalen->len - bptrlen->len, lbas / cbe_lun->blocksize);
 	beio->io_offset = (lbalen->lba + bptrlen->len) * cbe_lun->blocksize;
 	beio->io_len = lbas * cbe_lun->blocksize;
 	bptrlen->len += lbas;
 
 	for (i = 0, len_left = beio->io_len; len_left > 0; i++) {
 		KASSERT(i < CTLBLK_MAX_SEGS, ("Too many segs (%d >= %d)",
 		    i, CTLBLK_MAX_SEGS));
 
 		/*
 		 * Setup the S/G entry for this chunk.
 		 */
 		beio->sg_segs[i].len = min(CTLBLK_MAX_SEG, len_left);
 		beio->sg_segs[i].addr = uma_zalloc(be_lun->lun_zone, M_WAITOK);
 
 		DPRINTF("segment %d addr %p len %zd\n", i,
 			beio->sg_segs[i].addr, beio->sg_segs[i].len);
 
 		/* Set up second segment for compare operation. */
 		if (lbalen->flags & CTL_LLF_COMPARE) {
 			beio->sg_segs[i + CTLBLK_HALF_SEGS].len =
 			    beio->sg_segs[i].len;
 			beio->sg_segs[i + CTLBLK_HALF_SEGS].addr =
 			    uma_zalloc(be_lun->lun_zone, M_WAITOK);
 		}
 
 		beio->num_segs++;
 		len_left -= beio->sg_segs[i].len;
 	}
 	if (bptrlen->len < lbalen->len)
 		beio->beio_cont = ctl_be_block_next;
 	io->scsiio.be_move_done = ctl_be_block_move_done;
 	/* For compare we have separate S/G lists for read and datamove. */
 	if (lbalen->flags & CTL_LLF_COMPARE)
 		io->scsiio.kern_data_ptr = (uint8_t *)&beio->sg_segs[CTLBLK_HALF_SEGS];
 	else
 		io->scsiio.kern_data_ptr = (uint8_t *)beio->sg_segs;
 	io->scsiio.kern_data_len = beio->io_len;
 	io->scsiio.kern_data_resid = 0;
 	io->scsiio.kern_sg_entries = beio->num_segs;
 	io->io_hdr.flags |= CTL_FLAG_ALLOCATED;
 
 	/*
 	 * For the read case, we need to read the data into our buffers and
 	 * then we can send it back to the user.  For the write case, we
 	 * need to get the data from the user first.
 	 */
 	if (beio->bio_cmd == BIO_READ) {
-		SDT_PROBE(cbb, kernel, read, alloc_done, 0, 0, 0, 0, 0);
+		SDT_PROBE0(cbb, , read, alloc_done);
 		be_lun->dispatch(be_lun, beio);
 	} else {
-		SDT_PROBE(cbb, kernel, write, alloc_done, 0, 0, 0, 0, 0);
+		SDT_PROBE0(cbb, , write, alloc_done);
 #ifdef CTL_TIME_IO
 		getbinuptime(&io->io_hdr.dma_start_bt);
 #endif
 		ctl_datamove(io);
 	}
 }
 
 static void
 ctl_be_block_worker(void *context, int pending)
 {
 	struct ctl_be_block_lun *be_lun = (struct ctl_be_block_lun *)context;
 	struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
 	union ctl_io *io;
 	struct ctl_be_block_io *beio;
 
 	DPRINTF("entered\n");
 	/*
 	 * Fetch and process I/Os from all queues.  If we detect LUN
 	 * CTL_LUN_FLAG_NO_MEDIA status here -- it is result of a race,
 	 * so make response maximally opaque to not confuse initiator.
 	 */
 	for (;;) {
 		mtx_lock(&be_lun->queue_lock);
 		io = (union ctl_io *)STAILQ_FIRST(&be_lun->datamove_queue);
 		if (io != NULL) {
 			DPRINTF("datamove queue\n");
 			STAILQ_REMOVE(&be_lun->datamove_queue, &io->io_hdr,
 				      ctl_io_hdr, links);
 			mtx_unlock(&be_lun->queue_lock);
 			beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
 			if (cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) {
 				ctl_set_busy(&io->scsiio);
 				ctl_complete_beio(beio);
 				return;
 			}
 			be_lun->dispatch(be_lun, beio);
 			continue;
 		}
 		io = (union ctl_io *)STAILQ_FIRST(&be_lun->config_write_queue);
 		if (io != NULL) {
 			DPRINTF("config write queue\n");
 			STAILQ_REMOVE(&be_lun->config_write_queue, &io->io_hdr,
 				      ctl_io_hdr, links);
 			mtx_unlock(&be_lun->queue_lock);
 			if (cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) {
 				ctl_set_busy(&io->scsiio);
 				ctl_config_write_done(io);
 				return;
 			}
 			ctl_be_block_cw_dispatch(be_lun, io);
 			continue;
 		}
 		io = (union ctl_io *)STAILQ_FIRST(&be_lun->config_read_queue);
 		if (io != NULL) {
 			DPRINTF("config read queue\n");
 			STAILQ_REMOVE(&be_lun->config_read_queue, &io->io_hdr,
 				      ctl_io_hdr, links);
 			mtx_unlock(&be_lun->queue_lock);
 			if (cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) {
 				ctl_set_busy(&io->scsiio);
 				ctl_config_read_done(io);
 				return;
 			}
 			ctl_be_block_cr_dispatch(be_lun, io);
 			continue;
 		}
 		io = (union ctl_io *)STAILQ_FIRST(&be_lun->input_queue);
 		if (io != NULL) {
 			DPRINTF("input queue\n");
 			STAILQ_REMOVE(&be_lun->input_queue, &io->io_hdr,
 				      ctl_io_hdr, links);
 			mtx_unlock(&be_lun->queue_lock);
 			if (cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) {
 				ctl_set_busy(&io->scsiio);
 				ctl_data_submit_done(io);
 				return;
 			}
 			ctl_be_block_dispatch(be_lun, io);
 			continue;
 		}
 
 		/*
 		 * If we get here, there is no work left in the queues, so
 		 * just break out and let the task queue go to sleep.
 		 */
 		mtx_unlock(&be_lun->queue_lock);
 		break;
 	}
 }
 
 /*
  * Entry point from CTL to the backend for I/O.  We queue everything to a
  * work thread, so this just puts the I/O on a queue and wakes up the
  * thread.
  */
 static int
 ctl_be_block_submit(union ctl_io *io)
 {
 	struct ctl_be_block_lun *be_lun;
 	struct ctl_be_lun *cbe_lun;
 
 	DPRINTF("entered\n");
 
 	cbe_lun = (struct ctl_be_lun *)io->io_hdr.ctl_private[
 		CTL_PRIV_BACKEND_LUN].ptr;
 	be_lun = (struct ctl_be_block_lun *)cbe_lun->be_lun;
 
 	/*
 	 * Make sure we only get SCSI I/O.
 	 */
 	KASSERT(io->io_hdr.io_type == CTL_IO_SCSI, ("Non-SCSI I/O (type "
 		"%#x) encountered", io->io_hdr.io_type));
 
 	PRIV(io)->len = 0;
 
 	mtx_lock(&be_lun->queue_lock);
 	STAILQ_INSERT_TAIL(&be_lun->input_queue, &io->io_hdr, links);
 	mtx_unlock(&be_lun->queue_lock);
 	taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task);
 
 	return (CTL_RETVAL_COMPLETE);
 }
 
 static int
 ctl_be_block_ioctl(struct cdev *dev, u_long cmd, caddr_t addr,
 			int flag, struct thread *td)
 {
 	struct ctl_be_block_softc *softc;
 	int error;
 
 	softc = &backend_block_softc;
 
 	error = 0;
 
 	switch (cmd) {
 	case CTL_LUN_REQ: {
 		struct ctl_lun_req *lun_req;
 
 		lun_req = (struct ctl_lun_req *)addr;
 
 		switch (lun_req->reqtype) {
 		case CTL_LUNREQ_CREATE:
 			error = ctl_be_block_create(softc, lun_req);
 			break;
 		case CTL_LUNREQ_RM:
 			error = ctl_be_block_rm(softc, lun_req);
 			break;
 		case CTL_LUNREQ_MODIFY:
 			error = ctl_be_block_modify(softc, lun_req);
 			break;
 		default:
 			lun_req->status = CTL_LUN_ERROR;
 			snprintf(lun_req->error_str, sizeof(lun_req->error_str),
 				 "invalid LUN request type %d",
 				 lun_req->reqtype);
 			break;
 		}
 		break;
 	}
 	default:
 		error = ENOTTY;
 		break;
 	}
 
 	return (error);
 }
 
 static int
 ctl_be_block_open_file(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req)
 {
 	struct ctl_be_lun *cbe_lun;
 	struct ctl_be_block_filedata *file_data;
 	struct ctl_lun_create_params *params;
 	char			     *value;
 	struct vattr		      vattr;
 	off_t			      ps, pss, po, pos, us, uss, uo, uos;
 	int			      error;
 
 	cbe_lun = &be_lun->cbe_lun;
 	file_data = &be_lun->backend.file;
 	params = &be_lun->params;
 
 	be_lun->dev_type = CTL_BE_BLOCK_FILE;
 	be_lun->dispatch = ctl_be_block_dispatch_file;
 	be_lun->lun_flush = ctl_be_block_flush_file;
 	be_lun->get_lba_status = ctl_be_block_gls_file;
 	be_lun->getattr = ctl_be_block_getattr_file;
 	be_lun->unmap = NULL;
 	cbe_lun->flags &= ~CTL_LUN_FLAG_UNMAP;
 
 	error = VOP_GETATTR(be_lun->vn, &vattr, curthread->td_ucred);
 	if (error != 0) {
 		snprintf(req->error_str, sizeof(req->error_str),
 			 "error calling VOP_GETATTR() for file %s",
 			 be_lun->dev_path);
 		return (error);
 	}
 
 	file_data->cred = crhold(curthread->td_ucred);
 	if (params->lun_size_bytes != 0)
 		be_lun->size_bytes = params->lun_size_bytes;
 	else
 		be_lun->size_bytes = vattr.va_size;
 
 	/*
 	 * For files we can use any logical block size.  Prefer 512 bytes
 	 * for compatibility reasons.  If file's vattr.va_blocksize
 	 * (preferred I/O block size) is bigger and multiple to chosen
 	 * logical block size -- report it as physical block size.
 	 */
 	if (params->blocksize_bytes != 0)
 		cbe_lun->blocksize = params->blocksize_bytes;
 	else if (cbe_lun->lun_type == T_CDROM)
 		cbe_lun->blocksize = 2048;
 	else
 		cbe_lun->blocksize = 512;
 	be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize;
 	cbe_lun->maxlba = (be_lun->size_blocks == 0) ?
 	    0 : (be_lun->size_blocks - 1);
 
 	us = ps = vattr.va_blocksize;
 	uo = po = 0;
 
 	value = ctl_get_opt(&cbe_lun->options, "pblocksize");
 	if (value != NULL)
 		ctl_expand_number(value, &ps);
 	value = ctl_get_opt(&cbe_lun->options, "pblockoffset");
 	if (value != NULL)
 		ctl_expand_number(value, &po);
 	pss = ps / cbe_lun->blocksize;
 	pos = po / cbe_lun->blocksize;
 	if ((pss > 0) && (pss * cbe_lun->blocksize == ps) && (pss >= pos) &&
 	    ((pss & (pss - 1)) == 0) && (pos * cbe_lun->blocksize == po)) {
 		cbe_lun->pblockexp = fls(pss) - 1;
 		cbe_lun->pblockoff = (pss - pos) % pss;
 	}
 
 	value = ctl_get_opt(&cbe_lun->options, "ublocksize");
 	if (value != NULL)
 		ctl_expand_number(value, &us);
 	value = ctl_get_opt(&cbe_lun->options, "ublockoffset");
 	if (value != NULL)
 		ctl_expand_number(value, &uo);
 	uss = us / cbe_lun->blocksize;
 	uos = uo / cbe_lun->blocksize;
 	if ((uss > 0) && (uss * cbe_lun->blocksize == us) && (uss >= uos) &&
 	    ((uss & (uss - 1)) == 0) && (uos * cbe_lun->blocksize == uo)) {
 		cbe_lun->ublockexp = fls(uss) - 1;
 		cbe_lun->ublockoff = (uss - uos) % uss;
 	}
 
 	/*
 	 * Sanity check.  The media size has to be at least one
 	 * sector long.
 	 */
 	if (be_lun->size_bytes < cbe_lun->blocksize) {
 		error = EINVAL;
 		snprintf(req->error_str, sizeof(req->error_str),
 			 "file %s size %ju < block size %u", be_lun->dev_path,
 			 (uintmax_t)be_lun->size_bytes, cbe_lun->blocksize);
 	}
 
 	cbe_lun->opttxferlen = CTLBLK_MAX_IO_SIZE / cbe_lun->blocksize;
 	return (error);
 }
 
 static int
 ctl_be_block_open_dev(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req)
 {
 	struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
 	struct ctl_lun_create_params *params;
 	struct cdevsw		     *csw;
 	struct cdev		     *dev;
 	char			     *value;
 	int			      error, atomic, maxio, ref, unmap, tmp;
 	off_t			      ps, pss, po, pos, us, uss, uo, uos, otmp;
 
 	params = &be_lun->params;
 
 	be_lun->dev_type = CTL_BE_BLOCK_DEV;
 	csw = devvn_refthread(be_lun->vn, &dev, &ref);
 	if (csw == NULL)
 		return (ENXIO);
 	if (strcmp(csw->d_name, "zvol") == 0) {
 		be_lun->dispatch = ctl_be_block_dispatch_zvol;
 		be_lun->get_lba_status = ctl_be_block_gls_zvol;
 		atomic = maxio = CTLBLK_MAX_IO_SIZE;
 	} else {
 		be_lun->dispatch = ctl_be_block_dispatch_dev;
 		be_lun->get_lba_status = NULL;
 		atomic = 0;
 		maxio = dev->si_iosize_max;
 		if (maxio <= 0)
 			maxio = DFLTPHYS;
 		if (maxio > CTLBLK_MAX_IO_SIZE)
 			maxio = CTLBLK_MAX_IO_SIZE;
 	}
 	be_lun->lun_flush = ctl_be_block_flush_dev;
 	be_lun->getattr = ctl_be_block_getattr_dev;
 	be_lun->unmap = ctl_be_block_unmap_dev;
 
 	if (!csw->d_ioctl) {
 		dev_relthread(dev, ref);
 		snprintf(req->error_str, sizeof(req->error_str),
 			 "no d_ioctl for device %s!", be_lun->dev_path);
 		return (ENODEV);
 	}
 
 	error = csw->d_ioctl(dev, DIOCGSECTORSIZE, (caddr_t)&tmp, FREAD,
 			       curthread);
 	if (error) {
 		dev_relthread(dev, ref);
 		snprintf(req->error_str, sizeof(req->error_str),
 			 "error %d returned for DIOCGSECTORSIZE ioctl "
 			 "on %s!", error, be_lun->dev_path);
 		return (error);
 	}
 
 	/*
 	 * If the user has asked for a blocksize that is greater than the
 	 * backing device's blocksize, we can do it only if the blocksize
 	 * the user is asking for is an even multiple of the underlying 
 	 * device's blocksize.
 	 */
 	if ((params->blocksize_bytes != 0) &&
 	    (params->blocksize_bytes >= tmp)) {
 		if (params->blocksize_bytes % tmp == 0) {
 			cbe_lun->blocksize = params->blocksize_bytes;
 		} else {
 			dev_relthread(dev, ref);
 			snprintf(req->error_str, sizeof(req->error_str),
 				 "requested blocksize %u is not an even "
 				 "multiple of backing device blocksize %u",
 				 params->blocksize_bytes, tmp);
 			return (EINVAL);
 		}
 	} else if (params->blocksize_bytes != 0) {
 		dev_relthread(dev, ref);
 		snprintf(req->error_str, sizeof(req->error_str),
 			 "requested blocksize %u < backing device "
 			 "blocksize %u", params->blocksize_bytes, tmp);
 		return (EINVAL);
 	} else if (cbe_lun->lun_type == T_CDROM)
 		cbe_lun->blocksize = MAX(tmp, 2048);
 	else
 		cbe_lun->blocksize = tmp;
 
 	error = csw->d_ioctl(dev, DIOCGMEDIASIZE, (caddr_t)&otmp, FREAD,
 			     curthread);
 	if (error) {
 		dev_relthread(dev, ref);
 		snprintf(req->error_str, sizeof(req->error_str),
 			 "error %d returned for DIOCGMEDIASIZE "
 			 " ioctl on %s!", error,
 			 be_lun->dev_path);
 		return (error);
 	}
 
 	if (params->lun_size_bytes != 0) {
 		if (params->lun_size_bytes > otmp) {
 			dev_relthread(dev, ref);
 			snprintf(req->error_str, sizeof(req->error_str),
 				 "requested LUN size %ju > backing device "
 				 "size %ju",
 				 (uintmax_t)params->lun_size_bytes,
 				 (uintmax_t)otmp);
 			return (EINVAL);
 		}
 
 		be_lun->size_bytes = params->lun_size_bytes;
 	} else
 		be_lun->size_bytes = otmp;
 	be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize;
 	cbe_lun->maxlba = (be_lun->size_blocks == 0) ?
 	    0 : (be_lun->size_blocks - 1);
 
 	error = csw->d_ioctl(dev, DIOCGSTRIPESIZE, (caddr_t)&ps, FREAD,
 	    curthread);
 	if (error)
 		ps = po = 0;
 	else {
 		error = csw->d_ioctl(dev, DIOCGSTRIPEOFFSET, (caddr_t)&po,
 		    FREAD, curthread);
 		if (error)
 			po = 0;
 	}
 	us = ps;
 	uo = po;
 
 	value = ctl_get_opt(&cbe_lun->options, "pblocksize");
 	if (value != NULL)
 		ctl_expand_number(value, &ps);
 	value = ctl_get_opt(&cbe_lun->options, "pblockoffset");
 	if (value != NULL)
 		ctl_expand_number(value, &po);
 	pss = ps / cbe_lun->blocksize;
 	pos = po / cbe_lun->blocksize;
 	if ((pss > 0) && (pss * cbe_lun->blocksize == ps) && (pss >= pos) &&
 	    ((pss & (pss - 1)) == 0) && (pos * cbe_lun->blocksize == po)) {
 		cbe_lun->pblockexp = fls(pss) - 1;
 		cbe_lun->pblockoff = (pss - pos) % pss;
 	}
 
 	value = ctl_get_opt(&cbe_lun->options, "ublocksize");
 	if (value != NULL)
 		ctl_expand_number(value, &us);
 	value = ctl_get_opt(&cbe_lun->options, "ublockoffset");
 	if (value != NULL)
 		ctl_expand_number(value, &uo);
 	uss = us / cbe_lun->blocksize;
 	uos = uo / cbe_lun->blocksize;
 	if ((uss > 0) && (uss * cbe_lun->blocksize == us) && (uss >= uos) &&
 	    ((uss & (uss - 1)) == 0) && (uos * cbe_lun->blocksize == uo)) {
 		cbe_lun->ublockexp = fls(uss) - 1;
 		cbe_lun->ublockoff = (uss - uos) % uss;
 	}
 
 	cbe_lun->atomicblock = atomic / cbe_lun->blocksize;
 	cbe_lun->opttxferlen = maxio / cbe_lun->blocksize;
 
 	if (be_lun->dispatch == ctl_be_block_dispatch_zvol) {
 		unmap = 1;
 	} else {
 		struct diocgattr_arg	arg;
 
 		strlcpy(arg.name, "GEOM::candelete", sizeof(arg.name));
 		arg.len = sizeof(arg.value.i);
 		error = csw->d_ioctl(dev, DIOCGATTR, (caddr_t)&arg, FREAD,
 		    curthread);
 		unmap = (error == 0) ? arg.value.i : 0;
 	}
 	value = ctl_get_opt(&cbe_lun->options, "unmap");
 	if (value != NULL)
 		unmap = (strcmp(value, "on") == 0);
 	if (unmap)
 		cbe_lun->flags |= CTL_LUN_FLAG_UNMAP;
 	else
 		cbe_lun->flags &= ~CTL_LUN_FLAG_UNMAP;
 
 	dev_relthread(dev, ref);
 	return (0);
 }
 
 static int
 ctl_be_block_close(struct ctl_be_block_lun *be_lun)
 {
 	struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
 	int flags;
 
 	if (be_lun->vn) {
 		flags = FREAD;
 		if ((cbe_lun->flags & CTL_LUN_FLAG_READONLY) == 0)
 			flags |= FWRITE;
 		(void)vn_close(be_lun->vn, flags, NOCRED, curthread);
 		be_lun->vn = NULL;
 
 		switch (be_lun->dev_type) {
 		case CTL_BE_BLOCK_DEV:
 			break;
 		case CTL_BE_BLOCK_FILE:
 			if (be_lun->backend.file.cred != NULL) {
 				crfree(be_lun->backend.file.cred);
 				be_lun->backend.file.cred = NULL;
 			}
 			break;
 		case CTL_BE_BLOCK_NONE:
 			break;
 		default:
 			panic("Unexpected backend type %d", be_lun->dev_type);
 			break;
 		}
 		be_lun->dev_type = CTL_BE_BLOCK_NONE;
 	}
 	return (0);
 }
 
 static int
 ctl_be_block_open(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req)
 {
 	struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
 	struct nameidata nd;
 	char		*value;
 	int		 error, flags;
 
 	error = 0;
 	if (rootvnode == NULL) {
 		snprintf(req->error_str, sizeof(req->error_str),
 			 "Root filesystem is not mounted");
 		return (1);
 	}
 	pwd_ensure_dirs();
 
 	value = ctl_get_opt(&cbe_lun->options, "file");
 	if (value == NULL) {
 		snprintf(req->error_str, sizeof(req->error_str),
 			 "no file argument specified");
 		return (1);
 	}
 	free(be_lun->dev_path, M_CTLBLK);
 	be_lun->dev_path = strdup(value, M_CTLBLK);
 
 	flags = FREAD;
 	value = ctl_get_opt(&cbe_lun->options, "readonly");
 	if (value != NULL) {
 		if (strcmp(value, "on") != 0)
 			flags |= FWRITE;
 	} else if (cbe_lun->lun_type == T_DIRECT)
 		flags |= FWRITE;
 
 again:
 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, be_lun->dev_path, curthread);
 	error = vn_open(&nd, &flags, 0, NULL);
 	if ((error == EROFS || error == EACCES) && (flags & FWRITE)) {
 		flags &= ~FWRITE;
 		goto again;
 	}
 	if (error) {
 		/*
 		 * This is the only reasonable guess we can make as far as
 		 * path if the user doesn't give us a fully qualified path.
 		 * If they want to specify a file, they need to specify the
 		 * full path.
 		 */
 		if (be_lun->dev_path[0] != '/') {
 			char *dev_name;
 
 			asprintf(&dev_name, M_CTLBLK, "/dev/%s",
 				be_lun->dev_path);
 			free(be_lun->dev_path, M_CTLBLK);
 			be_lun->dev_path = dev_name;
 			goto again;
 		}
 		snprintf(req->error_str, sizeof(req->error_str),
 		    "error opening %s: %d", be_lun->dev_path, error);
 		return (error);
 	}
 	if (flags & FWRITE)
 		cbe_lun->flags &= ~CTL_LUN_FLAG_READONLY;
 	else
 		cbe_lun->flags |= CTL_LUN_FLAG_READONLY;
 
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	be_lun->vn = nd.ni_vp;
 
 	/* We only support disks and files. */
 	if (vn_isdisk(be_lun->vn, &error)) {
 		error = ctl_be_block_open_dev(be_lun, req);
 	} else if (be_lun->vn->v_type == VREG) {
 		error = ctl_be_block_open_file(be_lun, req);
 	} else {
 		error = EINVAL;
 		snprintf(req->error_str, sizeof(req->error_str),
 			 "%s is not a disk or plain file", be_lun->dev_path);
 	}
 	VOP_UNLOCK(be_lun->vn, 0);
 
 	if (error != 0)
 		ctl_be_block_close(be_lun);
 	cbe_lun->serseq = CTL_LUN_SERSEQ_OFF;
 	if (be_lun->dispatch != ctl_be_block_dispatch_dev)
 		cbe_lun->serseq = CTL_LUN_SERSEQ_READ;
 	value = ctl_get_opt(&cbe_lun->options, "serseq");
 	if (value != NULL && strcmp(value, "on") == 0)
 		cbe_lun->serseq = CTL_LUN_SERSEQ_ON;
 	else if (value != NULL && strcmp(value, "read") == 0)
 		cbe_lun->serseq = CTL_LUN_SERSEQ_READ;
 	else if (value != NULL && strcmp(value, "off") == 0)
 		cbe_lun->serseq = CTL_LUN_SERSEQ_OFF;
 	return (0);
 }
 
 static int
 ctl_be_block_create(struct ctl_be_block_softc *softc, struct ctl_lun_req *req)
 {
 	struct ctl_be_lun *cbe_lun;
 	struct ctl_be_block_lun *be_lun;
 	struct ctl_lun_create_params *params;
 	char num_thread_str[16];
 	char tmpstr[32];
 	char *value;
 	int retval, num_threads;
 	int tmp_num_threads;
 
 	params = &req->reqdata.create;
 	retval = 0;
 	req->status = CTL_LUN_OK;
 
 	be_lun = malloc(sizeof(*be_lun), M_CTLBLK, M_ZERO | M_WAITOK);
 	cbe_lun = &be_lun->cbe_lun;
 	cbe_lun->be_lun = be_lun;
 	be_lun->params = req->reqdata.create;
 	be_lun->softc = softc;
 	STAILQ_INIT(&be_lun->input_queue);
 	STAILQ_INIT(&be_lun->config_read_queue);
 	STAILQ_INIT(&be_lun->config_write_queue);
 	STAILQ_INIT(&be_lun->datamove_queue);
 	sprintf(be_lun->lunname, "cblk%d", softc->num_luns);
 	mtx_init(&be_lun->io_lock, "cblk io lock", NULL, MTX_DEF);
 	mtx_init(&be_lun->queue_lock, "cblk queue lock", NULL, MTX_DEF);
 	ctl_init_opts(&cbe_lun->options,
 	    req->num_be_args, req->kern_be_args);
 	be_lun->lun_zone = uma_zcreate(be_lun->lunname, CTLBLK_MAX_SEG,
 	    NULL, NULL, NULL, NULL, /*align*/ 0, /*flags*/0);
 	if (be_lun->lun_zone == NULL) {
 		snprintf(req->error_str, sizeof(req->error_str),
 			 "error allocating UMA zone");
 		goto bailout_error;
 	}
 
 	if (params->flags & CTL_LUN_FLAG_DEV_TYPE)
 		cbe_lun->lun_type = params->device_type;
 	else
 		cbe_lun->lun_type = T_DIRECT;
 	be_lun->flags = CTL_BE_BLOCK_LUN_UNCONFIGURED;
 	cbe_lun->flags = 0;
 	value = ctl_get_opt(&cbe_lun->options, "ha_role");
 	if (value != NULL) {
 		if (strcmp(value, "primary") == 0)
 			cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY;
 	} else if (control_softc->flags & CTL_FLAG_ACTIVE_SHELF)
 		cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY;
 
 	if (cbe_lun->lun_type == T_DIRECT ||
 	    cbe_lun->lun_type == T_CDROM) {
 		be_lun->size_bytes = params->lun_size_bytes;
 		if (params->blocksize_bytes != 0)
 			cbe_lun->blocksize = params->blocksize_bytes;
 		else if (cbe_lun->lun_type == T_CDROM)
 			cbe_lun->blocksize = 2048;
 		else
 			cbe_lun->blocksize = 512;
 		be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize;
 		cbe_lun->maxlba = (be_lun->size_blocks == 0) ?
 		    0 : (be_lun->size_blocks - 1);
 
 		if ((cbe_lun->flags & CTL_LUN_FLAG_PRIMARY) ||
 		    control_softc->ha_mode == CTL_HA_MODE_SER_ONLY) {
 			retval = ctl_be_block_open(be_lun, req);
 			if (retval != 0) {
 				retval = 0;
 				req->status = CTL_LUN_WARNING;
 			}
 		}
 		num_threads = cbb_num_threads;
 	} else {
 		num_threads = 1;
 	}
 
 	value = ctl_get_opt(&cbe_lun->options, "num_threads");
 	if (value != NULL) {
 		tmp_num_threads = strtol(value, NULL, 0);
 
 		/*
 		 * We don't let the user specify less than one
 		 * thread, but hope he's clueful enough not to
 		 * specify 1000 threads.
 		 */
 		if (tmp_num_threads < 1) {
 			snprintf(req->error_str, sizeof(req->error_str),
 				 "invalid number of threads %s",
 				 num_thread_str);
 			goto bailout_error;
 		}
 		num_threads = tmp_num_threads;
 	}
 
 	if (be_lun->vn == NULL)
 		cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA;
 	/* Tell the user the blocksize we ended up using */
 	params->lun_size_bytes = be_lun->size_bytes;
 	params->blocksize_bytes = cbe_lun->blocksize;
 	if (params->flags & CTL_LUN_FLAG_ID_REQ) {
 		cbe_lun->req_lun_id = params->req_lun_id;
 		cbe_lun->flags |= CTL_LUN_FLAG_ID_REQ;
 	} else
 		cbe_lun->req_lun_id = 0;
 
 	cbe_lun->lun_shutdown = ctl_be_block_lun_shutdown;
 	cbe_lun->lun_config_status = ctl_be_block_lun_config_status;
 	cbe_lun->be = &ctl_be_block_driver;
 
 	if ((params->flags & CTL_LUN_FLAG_SERIAL_NUM) == 0) {
 		snprintf(tmpstr, sizeof(tmpstr), "MYSERIAL%4d",
 			 softc->num_luns);
 		strncpy((char *)cbe_lun->serial_num, tmpstr,
 			MIN(sizeof(cbe_lun->serial_num), sizeof(tmpstr)));
 
 		/* Tell the user what we used for a serial number */
 		strncpy((char *)params->serial_num, tmpstr,
 			MIN(sizeof(params->serial_num), sizeof(tmpstr)));
 	} else { 
 		strncpy((char *)cbe_lun->serial_num, params->serial_num,
 			MIN(sizeof(cbe_lun->serial_num),
 			sizeof(params->serial_num)));
 	}
 	if ((params->flags & CTL_LUN_FLAG_DEVID) == 0) {
 		snprintf(tmpstr, sizeof(tmpstr), "MYDEVID%4d", softc->num_luns);
 		strncpy((char *)cbe_lun->device_id, tmpstr,
 			MIN(sizeof(cbe_lun->device_id), sizeof(tmpstr)));
 
 		/* Tell the user what we used for a device ID */
 		strncpy((char *)params->device_id, tmpstr,
 			MIN(sizeof(params->device_id), sizeof(tmpstr)));
 	} else {
 		strncpy((char *)cbe_lun->device_id, params->device_id,
 			MIN(sizeof(cbe_lun->device_id),
 			    sizeof(params->device_id)));
 	}
 
 	TASK_INIT(&be_lun->io_task, /*priority*/0, ctl_be_block_worker, be_lun);
 
 	be_lun->io_taskqueue = taskqueue_create(be_lun->lunname, M_WAITOK,
 	    taskqueue_thread_enqueue, /*context*/&be_lun->io_taskqueue);
 
 	if (be_lun->io_taskqueue == NULL) {
 		snprintf(req->error_str, sizeof(req->error_str),
 			 "unable to create taskqueue");
 		goto bailout_error;
 	}
 
 	/*
 	 * Note that we start the same number of threads by default for
 	 * both the file case and the block device case.  For the file
 	 * case, we need multiple threads to allow concurrency, because the
 	 * vnode interface is designed to be a blocking interface.  For the
 	 * block device case, ZFS zvols at least will block the caller's
 	 * context in many instances, and so we need multiple threads to
 	 * overcome that problem.  Other block devices don't need as many
 	 * threads, but they shouldn't cause too many problems.
 	 *
 	 * If the user wants to just have a single thread for a block
 	 * device, he can specify that when the LUN is created, or change
 	 * the tunable/sysctl to alter the default number of threads.
 	 */
 	retval = taskqueue_start_threads(&be_lun->io_taskqueue,
 					 /*num threads*/num_threads,
 					 /*priority*/PWAIT,
 					 /*thread name*/
 					 "%s taskq", be_lun->lunname);
 
 	if (retval != 0)
 		goto bailout_error;
 
 	be_lun->num_threads = num_threads;
 
 	mtx_lock(&softc->lock);
 	softc->num_luns++;
 	STAILQ_INSERT_TAIL(&softc->lun_list, be_lun, links);
 
 	mtx_unlock(&softc->lock);
 
 	retval = ctl_add_lun(&be_lun->cbe_lun);
 	if (retval != 0) {
 		mtx_lock(&softc->lock);
 		STAILQ_REMOVE(&softc->lun_list, be_lun, ctl_be_block_lun,
 			      links);
 		softc->num_luns--;
 		mtx_unlock(&softc->lock);
 		snprintf(req->error_str, sizeof(req->error_str),
 			 "ctl_add_lun() returned error %d, see dmesg for "
 			 "details", retval);
 		retval = 0;
 		goto bailout_error;
 	}
 
 	mtx_lock(&softc->lock);
 
 	/*
 	 * Tell the config_status routine that we're waiting so it won't
 	 * clean up the LUN in the event of an error.
 	 */
 	be_lun->flags |= CTL_BE_BLOCK_LUN_WAITING;
 
 	while (be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) {
 		retval = msleep(be_lun, &softc->lock, PCATCH, "ctlblk", 0);
 		if (retval == EINTR)
 			break;
 	}
 	be_lun->flags &= ~CTL_BE_BLOCK_LUN_WAITING;
 
 	if (be_lun->flags & CTL_BE_BLOCK_LUN_CONFIG_ERR) {
 		snprintf(req->error_str, sizeof(req->error_str),
 			 "LUN configuration error, see dmesg for details");
 		STAILQ_REMOVE(&softc->lun_list, be_lun, ctl_be_block_lun,
 			      links);
 		softc->num_luns--;
 		mtx_unlock(&softc->lock);
 		goto bailout_error;
 	} else {
 		params->req_lun_id = cbe_lun->lun_id;
 	}
 
 	mtx_unlock(&softc->lock);
 
 	be_lun->disk_stats = devstat_new_entry("cbb", params->req_lun_id,
 					       cbe_lun->blocksize,
 					       DEVSTAT_ALL_SUPPORTED,
 					       cbe_lun->lun_type
 					       | DEVSTAT_TYPE_IF_OTHER,
 					       DEVSTAT_PRIORITY_OTHER);
 
 	return (retval);
 
 bailout_error:
 	req->status = CTL_LUN_ERROR;
 
 	if (be_lun->io_taskqueue != NULL)
 		taskqueue_free(be_lun->io_taskqueue);
 	ctl_be_block_close(be_lun);
 	if (be_lun->dev_path != NULL)
 		free(be_lun->dev_path, M_CTLBLK);
 	if (be_lun->lun_zone != NULL)
 		uma_zdestroy(be_lun->lun_zone);
 	ctl_free_opts(&cbe_lun->options);
 	mtx_destroy(&be_lun->queue_lock);
 	mtx_destroy(&be_lun->io_lock);
 	free(be_lun, M_CTLBLK);
 
 	return (retval);
 }
 
 static int
 ctl_be_block_rm(struct ctl_be_block_softc *softc, struct ctl_lun_req *req)
 {
 	struct ctl_lun_rm_params *params;
 	struct ctl_be_block_lun *be_lun;
 	struct ctl_be_lun *cbe_lun;
 	int retval;
 
 	params = &req->reqdata.rm;
 
 	mtx_lock(&softc->lock);
 	STAILQ_FOREACH(be_lun, &softc->lun_list, links) {
 		if (be_lun->cbe_lun.lun_id == params->lun_id)
 			break;
 	}
 	mtx_unlock(&softc->lock);
 	if (be_lun == NULL) {
 		snprintf(req->error_str, sizeof(req->error_str),
 			 "LUN %u is not managed by the block backend",
 			 params->lun_id);
 		goto bailout_error;
 	}
 	cbe_lun = &be_lun->cbe_lun;
 
 	retval = ctl_disable_lun(cbe_lun);
 	if (retval != 0) {
 		snprintf(req->error_str, sizeof(req->error_str),
 			 "error %d returned from ctl_disable_lun() for "
 			 "LUN %d", retval, params->lun_id);
 		goto bailout_error;
 	}
 
 	if (be_lun->vn != NULL) {
 		cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA;
 		ctl_lun_no_media(cbe_lun);
 		taskqueue_drain_all(be_lun->io_taskqueue);
 		ctl_be_block_close(be_lun);
 	}
 
 	retval = ctl_invalidate_lun(cbe_lun);
 	if (retval != 0) {
 		snprintf(req->error_str, sizeof(req->error_str),
 			 "error %d returned from ctl_invalidate_lun() for "
 			 "LUN %d", retval, params->lun_id);
 		goto bailout_error;
 	}
 
 	mtx_lock(&softc->lock);
 	be_lun->flags |= CTL_BE_BLOCK_LUN_WAITING;
 	while ((be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) == 0) {
                 retval = msleep(be_lun, &softc->lock, PCATCH, "ctlblk", 0);
                 if (retval == EINTR)
                         break;
         }
 	be_lun->flags &= ~CTL_BE_BLOCK_LUN_WAITING;
 
 	if ((be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) == 0) {
 		snprintf(req->error_str, sizeof(req->error_str),
 			 "interrupted waiting for LUN to be freed");
 		mtx_unlock(&softc->lock);
 		goto bailout_error;
 	}
 
 	STAILQ_REMOVE(&softc->lun_list, be_lun, ctl_be_block_lun, links);
 
 	softc->num_luns--;
 	mtx_unlock(&softc->lock);
 
 	taskqueue_drain_all(be_lun->io_taskqueue);
 	taskqueue_free(be_lun->io_taskqueue);
 
 	if (be_lun->disk_stats != NULL)
 		devstat_remove_entry(be_lun->disk_stats);
 
 	uma_zdestroy(be_lun->lun_zone);
 
 	ctl_free_opts(&cbe_lun->options);
 	free(be_lun->dev_path, M_CTLBLK);
 	mtx_destroy(&be_lun->queue_lock);
 	mtx_destroy(&be_lun->io_lock);
 	free(be_lun, M_CTLBLK);
 
 	req->status = CTL_LUN_OK;
 	return (0);
 
 bailout_error:
 	req->status = CTL_LUN_ERROR;
 	return (0);
 }
 
 static int
 ctl_be_block_modify(struct ctl_be_block_softc *softc, struct ctl_lun_req *req)
 {
 	struct ctl_lun_modify_params *params;
 	struct ctl_be_block_lun *be_lun;
 	struct ctl_be_lun *cbe_lun;
 	char *value;
 	uint64_t oldsize;
 	int error, wasprim;
 
 	params = &req->reqdata.modify;
 
 	mtx_lock(&softc->lock);
 	STAILQ_FOREACH(be_lun, &softc->lun_list, links) {
 		if (be_lun->cbe_lun.lun_id == params->lun_id)
 			break;
 	}
 	mtx_unlock(&softc->lock);
 	if (be_lun == NULL) {
 		snprintf(req->error_str, sizeof(req->error_str),
 			 "LUN %u is not managed by the block backend",
 			 params->lun_id);
 		goto bailout_error;
 	}
 	cbe_lun = &be_lun->cbe_lun;
 
 	if (params->lun_size_bytes != 0)
 		be_lun->params.lun_size_bytes = params->lun_size_bytes;
 	ctl_update_opts(&cbe_lun->options, req->num_be_args, req->kern_be_args);
 
 	wasprim = (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY);
 	value = ctl_get_opt(&cbe_lun->options, "ha_role");
 	if (value != NULL) {
 		if (strcmp(value, "primary") == 0)
 			cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY;
 		else
 			cbe_lun->flags &= ~CTL_LUN_FLAG_PRIMARY;
 	} else if (control_softc->flags & CTL_FLAG_ACTIVE_SHELF)
 		cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY;
 	else
 		cbe_lun->flags &= ~CTL_LUN_FLAG_PRIMARY;
 	if (wasprim != (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY)) {
 		if (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY)
 			ctl_lun_primary(cbe_lun);
 		else
 			ctl_lun_secondary(cbe_lun);
 	}
 
 	oldsize = be_lun->size_blocks;
 	if ((cbe_lun->flags & CTL_LUN_FLAG_PRIMARY) ||
 	    control_softc->ha_mode == CTL_HA_MODE_SER_ONLY) {
 		if (be_lun->vn == NULL)
 			error = ctl_be_block_open(be_lun, req);
 		else if (vn_isdisk(be_lun->vn, &error))
 			error = ctl_be_block_open_dev(be_lun, req);
 		else if (be_lun->vn->v_type == VREG) {
 			vn_lock(be_lun->vn, LK_SHARED | LK_RETRY);
 			error = ctl_be_block_open_file(be_lun, req);
 			VOP_UNLOCK(be_lun->vn, 0);
 		} else
 			error = EINVAL;
 		if ((cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) &&
 		    be_lun->vn != NULL) {
 			cbe_lun->flags &= ~CTL_LUN_FLAG_NO_MEDIA;
 			ctl_lun_has_media(cbe_lun);
 		} else if ((cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) == 0 &&
 		    be_lun->vn == NULL) {
 			cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA;
 			ctl_lun_no_media(cbe_lun);
 		}
 		cbe_lun->flags &= ~CTL_LUN_FLAG_EJECTED;
 	} else {
 		if (be_lun->vn != NULL) {
 			cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA;
 			ctl_lun_no_media(cbe_lun);
 			taskqueue_drain_all(be_lun->io_taskqueue);
 			error = ctl_be_block_close(be_lun);
 		} else
 			error = 0;
 	}
 	if (be_lun->size_blocks != oldsize)
 		ctl_lun_capacity_changed(cbe_lun);
 
 	/* Tell the user the exact size we ended up using */
 	params->lun_size_bytes = be_lun->size_bytes;
 
 	req->status = error ? CTL_LUN_WARNING : CTL_LUN_OK;
 	return (0);
 
 bailout_error:
 	req->status = CTL_LUN_ERROR;
 	return (0);
 }
 
 static void
 ctl_be_block_lun_shutdown(void *be_lun)
 {
 	struct ctl_be_block_lun *lun;
 	struct ctl_be_block_softc *softc;
 
 	lun = (struct ctl_be_block_lun *)be_lun;
 	softc = lun->softc;
 
 	mtx_lock(&softc->lock);
 	lun->flags |= CTL_BE_BLOCK_LUN_UNCONFIGURED;
 	if (lun->flags & CTL_BE_BLOCK_LUN_WAITING)
 		wakeup(lun);
 	mtx_unlock(&softc->lock);
 }
 
 static void
 ctl_be_block_lun_config_status(void *be_lun, ctl_lun_config_status status)
 {
 	struct ctl_be_block_lun *lun;
 	struct ctl_be_block_softc *softc;
 
 	lun = (struct ctl_be_block_lun *)be_lun;
 	softc = lun->softc;
 
 	if (status == CTL_LUN_CONFIG_OK) {
 		mtx_lock(&softc->lock);
 		lun->flags &= ~CTL_BE_BLOCK_LUN_UNCONFIGURED;
 		if (lun->flags & CTL_BE_BLOCK_LUN_WAITING)
 			wakeup(lun);
 		mtx_unlock(&softc->lock);
 
 		/*
 		 * We successfully added the LUN, attempt to enable it.
 		 */
 		if (ctl_enable_lun(&lun->cbe_lun) != 0) {
 			printf("%s: ctl_enable_lun() failed!\n", __func__);
 			if (ctl_invalidate_lun(&lun->cbe_lun) != 0) {
 				printf("%s: ctl_invalidate_lun() failed!\n",
 				       __func__);
 			}
 		}
 
 		return;
 	}
 
 
 	mtx_lock(&softc->lock);
 	lun->flags &= ~CTL_BE_BLOCK_LUN_UNCONFIGURED;
 	lun->flags |= CTL_BE_BLOCK_LUN_CONFIG_ERR;
 	wakeup(lun);
 	mtx_unlock(&softc->lock);
 }
 
 
 static int
 ctl_be_block_config_write(union ctl_io *io)
 {
 	struct ctl_be_block_lun *be_lun;
 	struct ctl_be_lun *cbe_lun;
 	int retval;
 
 	DPRINTF("entered\n");
 
 	cbe_lun = (struct ctl_be_lun *)io->io_hdr.ctl_private[
 		CTL_PRIV_BACKEND_LUN].ptr;
 	be_lun = (struct ctl_be_block_lun *)cbe_lun->be_lun;
 
 	retval = 0;
 	switch (io->scsiio.cdb[0]) {
 	case SYNCHRONIZE_CACHE:
 	case SYNCHRONIZE_CACHE_16:
 	case WRITE_SAME_10:
 	case WRITE_SAME_16:
 	case UNMAP:
 		/*
 		 * The upper level CTL code will filter out any CDBs with
 		 * the immediate bit set and return the proper error.
 		 *
 		 * We don't really need to worry about what LBA range the
 		 * user asked to be synced out.  When they issue a sync
 		 * cache command, we'll sync out the whole thing.
 		 */
 		mtx_lock(&be_lun->queue_lock);
 		STAILQ_INSERT_TAIL(&be_lun->config_write_queue, &io->io_hdr,
 				   links);
 		mtx_unlock(&be_lun->queue_lock);
 		taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task);
 		break;
 	case START_STOP_UNIT: {
 		struct scsi_start_stop_unit *cdb;
 		struct ctl_lun_req req;
 
 		cdb = (struct scsi_start_stop_unit *)io->scsiio.cdb;
 		if ((cdb->how & SSS_PC_MASK) != 0) {
 			ctl_set_success(&io->scsiio);
 			ctl_config_write_done(io);
 			break;
 		}
 		if (cdb->how & SSS_START) {
 			if ((cdb->how & SSS_LOEJ) && be_lun->vn == NULL) {
 				retval = ctl_be_block_open(be_lun, &req);
 				cbe_lun->flags &= ~CTL_LUN_FLAG_EJECTED;
 				if (retval == 0) {
 					cbe_lun->flags &= ~CTL_LUN_FLAG_NO_MEDIA;
 					ctl_lun_has_media(cbe_lun);
 				} else {
 					cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA;
 					ctl_lun_no_media(cbe_lun);
 				}
 			}
 			ctl_start_lun(cbe_lun);
 		} else {
 			ctl_stop_lun(cbe_lun);
 			if (cdb->how & SSS_LOEJ) {
 				cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA;
 				cbe_lun->flags |= CTL_LUN_FLAG_EJECTED;
 				ctl_lun_ejected(cbe_lun);
 				if (be_lun->vn != NULL)
 					ctl_be_block_close(be_lun);
 			}
 		}
 
 		ctl_set_success(&io->scsiio);
 		ctl_config_write_done(io);
 		break;
 	}
 	case PREVENT_ALLOW:
 		ctl_set_success(&io->scsiio);
 		ctl_config_write_done(io);
 		break;
 	default:
 		ctl_set_invalid_opcode(&io->scsiio);
 		ctl_config_write_done(io);
 		retval = CTL_RETVAL_COMPLETE;
 		break;
 	}
 
 	return (retval);
 }
 
 static int
 ctl_be_block_config_read(union ctl_io *io)
 {
 	struct ctl_be_block_lun *be_lun;
 	struct ctl_be_lun *cbe_lun;
 	int retval = 0;
 
 	DPRINTF("entered\n");
 
 	cbe_lun = (struct ctl_be_lun *)io->io_hdr.ctl_private[
 		CTL_PRIV_BACKEND_LUN].ptr;
 	be_lun = (struct ctl_be_block_lun *)cbe_lun->be_lun;
 
 	switch (io->scsiio.cdb[0]) {
 	case SERVICE_ACTION_IN:
 		if (io->scsiio.cdb[1] == SGLS_SERVICE_ACTION) {
 			mtx_lock(&be_lun->queue_lock);
 			STAILQ_INSERT_TAIL(&be_lun->config_read_queue,
 			    &io->io_hdr, links);
 			mtx_unlock(&be_lun->queue_lock);
 			taskqueue_enqueue(be_lun->io_taskqueue,
 			    &be_lun->io_task);
 			retval = CTL_RETVAL_QUEUED;
 			break;
 		}
 		ctl_set_invalid_field(&io->scsiio,
 				      /*sks_valid*/ 1,
 				      /*command*/ 1,
 				      /*field*/ 1,
 				      /*bit_valid*/ 1,
 				      /*bit*/ 4);
 		ctl_config_read_done(io);
 		retval = CTL_RETVAL_COMPLETE;
 		break;
 	default:
 		ctl_set_invalid_opcode(&io->scsiio);
 		ctl_config_read_done(io);
 		retval = CTL_RETVAL_COMPLETE;
 		break;
 	}
 
 	return (retval);
 }
 
 static int
 ctl_be_block_lun_info(void *be_lun, struct sbuf *sb)
 {
 	struct ctl_be_block_lun *lun;
 	int retval;
 
 	lun = (struct ctl_be_block_lun *)be_lun;
 
 	retval = sbuf_printf(sb, "\t<num_threads>");
 	if (retval != 0)
 		goto bailout;
 	retval = sbuf_printf(sb, "%d", lun->num_threads);
 	if (retval != 0)
 		goto bailout;
 	retval = sbuf_printf(sb, "</num_threads>\n");
 
 bailout:
 	return (retval);
 }
 
 static uint64_t
 ctl_be_block_lun_attr(void *be_lun, const char *attrname)
 {
 	struct ctl_be_block_lun *lun = (struct ctl_be_block_lun *)be_lun;
 
 	if (lun->getattr == NULL)
 		return (UINT64_MAX);
 	return (lun->getattr(lun, attrname));
 }
 
 int
 ctl_be_block_init(void)
 {
 	struct ctl_be_block_softc *softc;
 	int retval;
 
 	softc = &backend_block_softc;
 	retval = 0;
 
 	mtx_init(&softc->lock, "ctlblock", NULL, MTX_DEF);
 	beio_zone = uma_zcreate("beio", sizeof(struct ctl_be_block_io),
 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
 	STAILQ_INIT(&softc->lun_list);
 
 	return (retval);
 }
Index: head/sys/compat/linux/linux_dtrace.h
===================================================================
--- head/sys/compat/linux/linux_dtrace.h	(revision 292383)
+++ head/sys/compat/linux/linux_dtrace.h	(revision 292384)
@@ -1,90 +1,90 @@
 /*-
  * Copyright (c) 2008-2012 Alexander Leidinger <netchild@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer
  *    in this position and unchanged.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _LINUX_DTRACE_H_
 #define _LINUX_DTRACE_H_
 
 /**
  * DTrace support macros for the linuxulator.
  *
  * Some wrapper macros to make it more easy to handle the linuxulator
  * providers and to allow to make the name depend upon the bitsize.
  *
  * Basically this is the same as the normal SDT macros in sys/sdt.h. The
  * difference is that the provider name is automatically inserted, and
  * we do not use a different name for the probe-description.
  */
 
 #define	LIN_SDT_PROVIDER_DEFINE(x)	SDT_PROVIDER_DEFINE(x)
 #define LIN_SDT_PROVIDER_DECLARE(x)	SDT_PROVIDER_DECLARE(x)
 
 #define	_LIN_SDT_PROBE_DECLARE(a, b, c, d)	SDT_PROBE_DECLARE(a, b, c, d)
 #define	LIN_SDT_PROBE_DECLARE(a, b, c)		_LIN_SDT_PROBE_DECLARE( \
     LINUX_DTRACE, a, b, c)
 
 #define	_LIN_SDT_PROBE_DEFINE0(a, b, c, d)		SDT_PROBE_DEFINE(a, \
     b, c, d)
 #define	LIN_SDT_PROBE_DEFINE0(a, b, c)			_LIN_SDT_PROBE_DEFINE0(\
     LINUX_DTRACE, a, b, c)
 #define	_LIN_SDT_PROBE_DEFINE1(a, b, c, d, e)		SDT_PROBE_DEFINE1(a, \
     b, c, d, e)
 #define	LIN_SDT_PROBE_DEFINE1(a, b, c, d)		_LIN_SDT_PROBE_DEFINE1(\
     LINUX_DTRACE, a, b, c, d)
 #define	_LIN_SDT_PROBE_DEFINE2(a, b, c, d, e, f)	SDT_PROBE_DEFINE2(a, \
     b, c, d, e, f)
 #define	LIN_SDT_PROBE_DEFINE2(a, b, c, d, e)		_LIN_SDT_PROBE_DEFINE2(\
     LINUX_DTRACE, a, b, c, d, e)
 #define	_LIN_SDT_PROBE_DEFINE3(a, b, c, d, e, f, g)	SDT_PROBE_DEFINE3(a, \
     b, c, d, e, f, g)
 #define	LIN_SDT_PROBE_DEFINE3(a, b, c, d, e, f)		_LIN_SDT_PROBE_DEFINE3(\
     LINUX_DTRACE, a, b, c, d, e, f)
 #define	_LIN_SDT_PROBE_DEFINE4(a, b, c, d, e, f, g, h)	SDT_PROBE_DEFINE4(a, \
     b, c, d, e, f, g, h)
 #define	LIN_SDT_PROBE_DEFINE4(a, b, c, d, e, f, g)	_LIN_SDT_PROBE_DEFINE4(\
     LINUX_DTRACE, a, b, c, d, e, f, g)
 #define	_LIN_SDT_PROBE_DEFINE5(a, b, c, d, e, f, g, h, i) \
     SDT_PROBE_DEFINE5(a, b, c, d, e, f, g, h, i)
 #define	LIN_SDT_PROBE_DEFINE5(a, b, c, d, e, f, g, h)	_LIN_SDT_PROBE_DEFINE5(\
     LINUX_DTRACE, a, b, c, d, e, f, g, h)
 
-#define	LIN_SDT_PROBE0(a, b, c)			SDT_PROBE1(LINUX_DTRACE, a, b, \
-    c, 0)
+#define	LIN_SDT_PROBE0(a, b, c)			SDT_PROBE0(LINUX_DTRACE, a, b, \
+    c)
 #define	LIN_SDT_PROBE1(a, b, c, d)		SDT_PROBE1(LINUX_DTRACE, a, b, \
     c, d)
 #define	LIN_SDT_PROBE2(a, b, c, d, e)		SDT_PROBE2(LINUX_DTRACE, a, b, \
     c, d, e)
 #define	LIN_SDT_PROBE3(a, b, c, d, e, f)	SDT_PROBE3(LINUX_DTRACE, a, b, \
     c, d, e, f)
 #define	LIN_SDT_PROBE4(a, b, c, d, e, f, g)	SDT_PROBE4(LINUX_DTRACE, a, b, \
     c, d, e, f, g)
 #define	_LIN_SDT_PROBE5(a, b, c, d, e, f, g, h, i)	SDT_PROBE5(a, b, c, d, \
     e, f, g, h, i)
 #define	LIN_SDT_PROBE5(a, b, c, d, e, f, g, h)	_LIN_SDT_PROBE5(LINUX_DTRACE, \
     a, b, c, d, e, f, g, h)
 
 #endif /* _LINUX_DTRACE_H_ */
Index: head/sys/kern/kern_exec.c
===================================================================
--- head/sys/kern/kern_exec.c	(revision 292383)
+++ head/sys/kern/kern_exec.c	(revision 292384)
@@ -1,1618 +1,1618 @@
 /*-
  * Copyright (c) 1993, David Greenman
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_capsicum.h"
 #include "opt_hwpmc_hooks.h"
 #include "opt_ktrace.h"
 #include "opt_vm.h"
 
 #include <sys/param.h>
 #include <sys/capsicum.h>
 #include <sys/systm.h>
 #include <sys/eventhandler.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/sysproto.h>
 #include <sys/signalvar.h>
 #include <sys/kernel.h>
 #include <sys/mount.h>
 #include <sys/filedesc.h>
 #include <sys/fcntl.h>
 #include <sys/acct.h>
 #include <sys/exec.h>
 #include <sys/imgact.h>
 #include <sys/imgact_elf.h>
 #include <sys/wait.h>
 #include <sys/malloc.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/pioctl.h>
 #include <sys/namei.h>
 #include <sys/resourcevar.h>
 #include <sys/rwlock.h>
 #include <sys/sched.h>
 #include <sys/sdt.h>
 #include <sys/sf_buf.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysent.h>
 #include <sys/shm.h>
 #include <sys/sysctl.h>
 #include <sys/vnode.h>
 #include <sys/stat.h>
 #ifdef KTRACE
 #include <sys/ktrace.h>
 #endif
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_object.h>
 #include <vm/vm_pager.h>
 
 #ifdef	HWPMC_HOOKS
 #include <sys/pmckern.h>
 #endif
 
 #include <machine/reg.h>
 
 #include <security/audit/audit.h>
 #include <security/mac/mac_framework.h>
 
 #ifdef KDTRACE_HOOKS
 #include <sys/dtrace_bsd.h>
 dtrace_execexit_func_t	dtrace_fasttrap_exec;
 #endif
 
 SDT_PROVIDER_DECLARE(proc);
-SDT_PROBE_DEFINE1(proc, kernel, , exec, "char *");
-SDT_PROBE_DEFINE1(proc, kernel, , exec__failure, "int");
-SDT_PROBE_DEFINE1(proc, kernel, , exec__success, "char *");
+SDT_PROBE_DEFINE1(proc, , , exec, "char *");
+SDT_PROBE_DEFINE1(proc, , , exec__failure, "int");
+SDT_PROBE_DEFINE1(proc, , , exec__success, "char *");
 
 MALLOC_DEFINE(M_PARGS, "proc-args", "Process arguments");
 
 int coredump_pack_fileinfo = 1;
 SYSCTL_INT(_kern, OID_AUTO, coredump_pack_fileinfo, CTLFLAG_RWTUN,
     &coredump_pack_fileinfo, 0,
     "Enable file path packing in 'procstat -f' coredump notes");
 
 int coredump_pack_vmmapinfo = 1;
 SYSCTL_INT(_kern, OID_AUTO, coredump_pack_vmmapinfo, CTLFLAG_RWTUN,
     &coredump_pack_vmmapinfo, 0,
     "Enable file path packing in 'procstat -v' coredump notes");
 
 static int sysctl_kern_ps_strings(SYSCTL_HANDLER_ARGS);
 static int sysctl_kern_usrstack(SYSCTL_HANDLER_ARGS);
 static int sysctl_kern_stackprot(SYSCTL_HANDLER_ARGS);
 static int do_execve(struct thread *td, struct image_args *args,
     struct mac *mac_p);
 
 /* XXX This should be vm_size_t. */
 SYSCTL_PROC(_kern, KERN_PS_STRINGS, ps_strings, CTLTYPE_ULONG|CTLFLAG_RD,
     NULL, 0, sysctl_kern_ps_strings, "LU", "");
 
 /* XXX This should be vm_size_t. */
 SYSCTL_PROC(_kern, KERN_USRSTACK, usrstack, CTLTYPE_ULONG|CTLFLAG_RD|
     CTLFLAG_CAPRD, NULL, 0, sysctl_kern_usrstack, "LU", "");
 
 SYSCTL_PROC(_kern, OID_AUTO, stackprot, CTLTYPE_INT|CTLFLAG_RD,
     NULL, 0, sysctl_kern_stackprot, "I", "");
 
 u_long ps_arg_cache_limit = PAGE_SIZE / 16;
 SYSCTL_ULONG(_kern, OID_AUTO, ps_arg_cache_limit, CTLFLAG_RW, 
     &ps_arg_cache_limit, 0, "");
 
 static int disallow_high_osrel;
 SYSCTL_INT(_kern, OID_AUTO, disallow_high_osrel, CTLFLAG_RW,
     &disallow_high_osrel, 0,
     "Disallow execution of binaries built for higher version of the world");
 
 static int map_at_zero = 0;
 SYSCTL_INT(_security_bsd, OID_AUTO, map_at_zero, CTLFLAG_RWTUN, &map_at_zero, 0,
     "Permit processes to map an object at virtual address 0.");
 
 static int
 sysctl_kern_ps_strings(SYSCTL_HANDLER_ARGS)
 {
 	struct proc *p;
 	int error;
 
 	p = curproc;
 #ifdef SCTL_MASK32
 	if (req->flags & SCTL_MASK32) {
 		unsigned int val;
 		val = (unsigned int)p->p_sysent->sv_psstrings;
 		error = SYSCTL_OUT(req, &val, sizeof(val));
 	} else
 #endif
 		error = SYSCTL_OUT(req, &p->p_sysent->sv_psstrings,
 		   sizeof(p->p_sysent->sv_psstrings));
 	return error;
 }
 
 static int
 sysctl_kern_usrstack(SYSCTL_HANDLER_ARGS)
 {
 	struct proc *p;
 	int error;
 
 	p = curproc;
 #ifdef SCTL_MASK32
 	if (req->flags & SCTL_MASK32) {
 		unsigned int val;
 		val = (unsigned int)p->p_sysent->sv_usrstack;
 		error = SYSCTL_OUT(req, &val, sizeof(val));
 	} else
 #endif
 		error = SYSCTL_OUT(req, &p->p_sysent->sv_usrstack,
 		    sizeof(p->p_sysent->sv_usrstack));
 	return error;
 }
 
 static int
 sysctl_kern_stackprot(SYSCTL_HANDLER_ARGS)
 {
 	struct proc *p;
 
 	p = curproc;
 	return (SYSCTL_OUT(req, &p->p_sysent->sv_stackprot,
 	    sizeof(p->p_sysent->sv_stackprot)));
 }
 
 /*
  * Each of the items is a pointer to a `const struct execsw', hence the
  * double pointer here.
  */
 static const struct execsw **execsw;
 
 #ifndef _SYS_SYSPROTO_H_
 struct execve_args {
 	char    *fname; 
 	char    **argv;
 	char    **envv; 
 };
 #endif
 
 int
 sys_execve(struct thread *td, struct execve_args *uap)
 {
 	struct image_args args;
 	struct vmspace *oldvmspace;
 	int error;
 
 	error = pre_execve(td, &oldvmspace);
 	if (error != 0)
 		return (error);
 	error = exec_copyin_args(&args, uap->fname, UIO_USERSPACE,
 	    uap->argv, uap->envv);
 	if (error == 0)
 		error = kern_execve(td, &args, NULL);
 	post_execve(td, error, oldvmspace);
 	return (error);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct fexecve_args {
 	int	fd;
 	char	**argv;
 	char	**envv;
 }
 #endif
 int
 sys_fexecve(struct thread *td, struct fexecve_args *uap)
 {
 	struct image_args args;
 	struct vmspace *oldvmspace;
 	int error;
 
 	error = pre_execve(td, &oldvmspace);
 	if (error != 0)
 		return (error);
 	error = exec_copyin_args(&args, NULL, UIO_SYSSPACE,
 	    uap->argv, uap->envv);
 	if (error == 0) {
 		args.fd = uap->fd;
 		error = kern_execve(td, &args, NULL);
 	}
 	post_execve(td, error, oldvmspace);
 	return (error);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct __mac_execve_args {
 	char	*fname;
 	char	**argv;
 	char	**envv;
 	struct mac	*mac_p;
 };
 #endif
 
 int
 sys___mac_execve(struct thread *td, struct __mac_execve_args *uap)
 {
 #ifdef MAC
 	struct image_args args;
 	struct vmspace *oldvmspace;
 	int error;
 
 	error = pre_execve(td, &oldvmspace);
 	if (error != 0)
 		return (error);
 	error = exec_copyin_args(&args, uap->fname, UIO_USERSPACE,
 	    uap->argv, uap->envv);
 	if (error == 0)
 		error = kern_execve(td, &args, uap->mac_p);
 	post_execve(td, error, oldvmspace);
 	return (error);
 #else
 	return (ENOSYS);
 #endif
 }
 
 int
 pre_execve(struct thread *td, struct vmspace **oldvmspace)
 {
 	struct proc *p;
 	int error;
 
 	KASSERT(td == curthread, ("non-current thread %p", td));
 	error = 0;
 	p = td->td_proc;
 	if ((p->p_flag & P_HADTHREADS) != 0) {
 		PROC_LOCK(p);
 		if (thread_single(p, SINGLE_BOUNDARY) != 0)
 			error = ERESTART;
 		PROC_UNLOCK(p);
 	}
 	KASSERT(error != 0 || (td->td_pflags & TDP_EXECVMSPC) == 0,
 	    ("nested execve"));
 	*oldvmspace = p->p_vmspace;
 	return (error);
 }
 
 void
 post_execve(struct thread *td, int error, struct vmspace *oldvmspace)
 {
 	struct proc *p;
 
 	KASSERT(td == curthread, ("non-current thread %p", td));
 	p = td->td_proc;
 	if ((p->p_flag & P_HADTHREADS) != 0) {
 		PROC_LOCK(p);
 		/*
 		 * If success, we upgrade to SINGLE_EXIT state to
 		 * force other threads to suicide.
 		 */
 		if (error == 0)
 			thread_single(p, SINGLE_EXIT);
 		else
 			thread_single_end(p, SINGLE_BOUNDARY);
 		PROC_UNLOCK(p);
 	}
 	if ((td->td_pflags & TDP_EXECVMSPC) != 0) {
 		KASSERT(p->p_vmspace != oldvmspace,
 		    ("oldvmspace still used"));
 		vmspace_free(oldvmspace);
 		td->td_pflags &= ~TDP_EXECVMSPC;
 	}
 }
 
 /*
  * XXX: kern_execve has the astonishing property of not always returning to
  * the caller.  If sufficiently bad things happen during the call to
  * do_execve(), it can end up calling exit1(); as a result, callers must
  * avoid doing anything which they might need to undo (e.g., allocating
  * memory).
  */
 int
 kern_execve(struct thread *td, struct image_args *args, struct mac *mac_p)
 {
 
 	AUDIT_ARG_ARGV(args->begin_argv, args->argc,
 	    args->begin_envv - args->begin_argv);
 	AUDIT_ARG_ENVV(args->begin_envv, args->envc,
 	    args->endp - args->begin_envv);
 	return (do_execve(td, args, mac_p));
 }
 
 /*
  * In-kernel implementation of execve().  All arguments are assumed to be
  * userspace pointers from the passed thread.
  */
 static int
 do_execve(td, args, mac_p)
 	struct thread *td;
 	struct image_args *args;
 	struct mac *mac_p;
 {
 	struct proc *p = td->td_proc;
 	struct nameidata nd;
 	struct ucred *newcred = NULL, *oldcred;
 	struct uidinfo *euip = NULL;
 	register_t *stack_base;
 	int error, i;
 	struct image_params image_params, *imgp;
 	struct vattr attr;
 	int (*img_first)(struct image_params *);
 	struct pargs *oldargs = NULL, *newargs = NULL;
 	struct sigacts *oldsigacts, *newsigacts;
 #ifdef KTRACE
 	struct vnode *tracevp = NULL;
 	struct ucred *tracecred = NULL;
 #endif
 	struct vnode *oldtextvp = NULL, *newtextvp;
 	cap_rights_t rights;
 	int credential_changing;
 	int textset;
 #ifdef MAC
 	struct label *interpvplabel = NULL;
 	int will_transition;
 #endif
 #ifdef HWPMC_HOOKS
 	struct pmckern_procexec pe;
 #endif
 	static const char fexecv_proc_title[] = "(fexecv)";
 
 	imgp = &image_params;
 
 	/*
 	 * Lock the process and set the P_INEXEC flag to indicate that
 	 * it should be left alone until we're done here.  This is
 	 * necessary to avoid race conditions - e.g. in ptrace() -
 	 * that might allow a local user to illicitly obtain elevated
 	 * privileges.
 	 */
 	PROC_LOCK(p);
 	KASSERT((p->p_flag & P_INEXEC) == 0,
 	    ("%s(): process already has P_INEXEC flag", __func__));
 	p->p_flag |= P_INEXEC;
 	PROC_UNLOCK(p);
 
 	/*
 	 * Initialize part of the common data
 	 */
 	bzero(imgp, sizeof(*imgp));
 	imgp->proc = p;
 	imgp->attr = &attr;
 	imgp->args = args;
 
 #ifdef MAC
 	error = mac_execve_enter(imgp, mac_p);
 	if (error)
 		goto exec_fail;
 #endif
 
 	/*
 	 * Translate the file name. namei() returns a vnode pointer
 	 *	in ni_vp amoung other things.
 	 *
 	 * XXXAUDIT: It would be desirable to also audit the name of the
 	 * interpreter if this is an interpreted binary.
 	 */
 	if (args->fname != NULL) {
 		NDINIT(&nd, LOOKUP, ISOPEN | LOCKLEAF | FOLLOW | SAVENAME
 		    | AUDITVNODE1, UIO_SYSSPACE, args->fname, td);
 	}
 
-	SDT_PROBE1(proc, kernel, , exec, args->fname);
+	SDT_PROBE1(proc, , , exec, args->fname);
 
 interpret:
 	if (args->fname != NULL) {
 #ifdef CAPABILITY_MODE
 		/*
 		 * While capability mode can't reach this point via direct
 		 * path arguments to execve(), we also don't allow
 		 * interpreters to be used in capability mode (for now).
 		 * Catch indirect lookups and return a permissions error.
 		 */
 		if (IN_CAPABILITY_MODE(td)) {
 			error = ECAPMODE;
 			goto exec_fail;
 		}
 #endif
 		error = namei(&nd);
 		if (error)
 			goto exec_fail;
 
 		newtextvp = nd.ni_vp;
 		imgp->vp = newtextvp;
 	} else {
 		AUDIT_ARG_FD(args->fd);
 		/*
 		 * Descriptors opened only with O_EXEC or O_RDONLY are allowed.
 		 */
 		error = fgetvp_exec(td, args->fd,
 		    cap_rights_init(&rights, CAP_FEXECVE), &newtextvp);
 		if (error)
 			goto exec_fail;
 		vn_lock(newtextvp, LK_EXCLUSIVE | LK_RETRY);
 		AUDIT_ARG_VNODE1(newtextvp);
 		imgp->vp = newtextvp;
 	}
 
 	/*
 	 * Check file permissions (also 'opens' file)
 	 */
 	error = exec_check_permissions(imgp);
 	if (error)
 		goto exec_fail_dealloc;
 
 	imgp->object = imgp->vp->v_object;
 	if (imgp->object != NULL)
 		vm_object_reference(imgp->object);
 
 	/*
 	 * Set VV_TEXT now so no one can write to the executable while we're
 	 * activating it.
 	 *
 	 * Remember if this was set before and unset it in case this is not
 	 * actually an executable image.
 	 */
 	textset = VOP_IS_TEXT(imgp->vp);
 	VOP_SET_TEXT(imgp->vp);
 
 	error = exec_map_first_page(imgp);
 	if (error)
 		goto exec_fail_dealloc;
 
 	imgp->proc->p_osrel = 0;
 	/*
 	 *	If the current process has a special image activator it
 	 *	wants to try first, call it.   For example, emulating shell
 	 *	scripts differently.
 	 */
 	error = -1;
 	if ((img_first = imgp->proc->p_sysent->sv_imgact_try) != NULL)
 		error = img_first(imgp);
 
 	/*
 	 *	Loop through the list of image activators, calling each one.
 	 *	An activator returns -1 if there is no match, 0 on success,
 	 *	and an error otherwise.
 	 */
 	for (i = 0; error == -1 && execsw[i]; ++i) {
 		if (execsw[i]->ex_imgact == NULL ||
 		    execsw[i]->ex_imgact == img_first) {
 			continue;
 		}
 		error = (*execsw[i]->ex_imgact)(imgp);
 	}
 
 	if (error) {
 		if (error == -1) {
 			if (textset == 0)
 				VOP_UNSET_TEXT(imgp->vp);
 			error = ENOEXEC;
 		}
 		goto exec_fail_dealloc;
 	}
 
 	/*
 	 * Special interpreter operation, cleanup and loop up to try to
 	 * activate the interpreter.
 	 */
 	if (imgp->interpreted) {
 		exec_unmap_first_page(imgp);
 		/*
 		 * VV_TEXT needs to be unset for scripts.  There is a short
 		 * period before we determine that something is a script where
 		 * VV_TEXT will be set. The vnode lock is held over this
 		 * entire period so nothing should illegitimately be blocked.
 		 */
 		VOP_UNSET_TEXT(imgp->vp);
 		/* free name buffer and old vnode */
 		if (args->fname != NULL)
 			NDFREE(&nd, NDF_ONLY_PNBUF);
 #ifdef MAC
 		mac_execve_interpreter_enter(newtextvp, &interpvplabel);
 #endif
 		if (imgp->opened) {
 			VOP_CLOSE(newtextvp, FREAD, td->td_ucred, td);
 			imgp->opened = 0;
 		}
 		vput(newtextvp);
 		vm_object_deallocate(imgp->object);
 		imgp->object = NULL;
 		/* set new name to that of the interpreter */
 		NDINIT(&nd, LOOKUP, LOCKLEAF | FOLLOW | SAVENAME,
 		    UIO_SYSSPACE, imgp->interpreter_name, td);
 		args->fname = imgp->interpreter_name;
 		goto interpret;
 	}
 
 	/*
 	 * NB: We unlock the vnode here because it is believed that none
 	 * of the sv_copyout_strings/sv_fixup operations require the vnode.
 	 */
 	VOP_UNLOCK(imgp->vp, 0);
 
 	/*
 	 * Do the best to calculate the full path to the image file.
 	 */
 	if (imgp->auxargs != NULL &&
 	    ((args->fname != NULL && args->fname[0] == '/') ||
 	     vn_fullpath(td, imgp->vp, &imgp->execpath, &imgp->freepath) != 0))
 		imgp->execpath = args->fname;
 
 	if (disallow_high_osrel &&
 	    P_OSREL_MAJOR(p->p_osrel) > P_OSREL_MAJOR(__FreeBSD_version)) {
 		error = ENOEXEC;
 		uprintf("Osrel %d for image %s too high\n", p->p_osrel,
 		    imgp->execpath != NULL ? imgp->execpath : "<unresolved>");
 		vn_lock(imgp->vp, LK_SHARED | LK_RETRY);
 		goto exec_fail_dealloc;
 	}
 
 	/* ABI enforces the use of Capsicum. Switch into capabilities mode. */
 	if (SV_PROC_FLAG(p, SV_CAPSICUM))
 		sys_cap_enter(td, NULL);
 
 	/*
 	 * Copy out strings (args and env) and initialize stack base
 	 */
 	if (p->p_sysent->sv_copyout_strings)
 		stack_base = (*p->p_sysent->sv_copyout_strings)(imgp);
 	else
 		stack_base = exec_copyout_strings(imgp);
 
 	/*
 	 * If custom stack fixup routine present for this process
 	 * let it do the stack setup.
 	 * Else stuff argument count as first item on stack
 	 */
 	if (p->p_sysent->sv_fixup != NULL)
 		(*p->p_sysent->sv_fixup)(&stack_base, imgp);
 	else
 		suword(--stack_base, imgp->args->argc);
 
 	if (args->fdp != NULL) {
 		/* Install a brand new file descriptor table. */
 		fdinstall_remapped(td, args->fdp);
 		args->fdp = NULL;
 	} else {
 		/*
 		 * Keep on using the existing file descriptor table. For
 		 * security and other reasons, the file descriptor table
 		 * cannot be shared after an exec.
 		 */
 		fdunshare(td);
 		/* close files on exec */
 		fdcloseexec(td);
 	}
 
 	/*
 	 * Malloc things before we need locks.
 	 */
 	i = imgp->args->begin_envv - imgp->args->begin_argv;
 	/* Cache arguments if they fit inside our allowance */
 	if (ps_arg_cache_limit >= i + sizeof(struct pargs)) {
 		newargs = pargs_alloc(i);
 		bcopy(imgp->args->begin_argv, newargs->ar_args, i);
 	}
 
 	vn_lock(imgp->vp, LK_SHARED | LK_RETRY);
 
 	/*
 	 * For security and other reasons, signal handlers cannot
 	 * be shared after an exec. The new process gets a copy of the old
 	 * handlers. In execsigs(), the new process will have its signals
 	 * reset.
 	 */
 	if (sigacts_shared(p->p_sigacts)) {
 		oldsigacts = p->p_sigacts;
 		newsigacts = sigacts_alloc();
 		sigacts_copy(newsigacts, oldsigacts);
 	} else {
 		oldsigacts = NULL;
 		newsigacts = NULL; /* satisfy gcc */
 	}
 
 	PROC_LOCK(p);
 	if (oldsigacts)
 		p->p_sigacts = newsigacts;
 	oldcred = p->p_ucred;
 	/* Stop profiling */
 	stopprofclock(p);
 
 	/* reset caught signals */
 	execsigs(p);
 
 	/* name this process - nameiexec(p, ndp) */
 	bzero(p->p_comm, sizeof(p->p_comm));
 	if (args->fname)
 		bcopy(nd.ni_cnd.cn_nameptr, p->p_comm,
 		    min(nd.ni_cnd.cn_namelen, MAXCOMLEN));
 	else if (vn_commname(newtextvp, p->p_comm, sizeof(p->p_comm)) != 0)
 		bcopy(fexecv_proc_title, p->p_comm, sizeof(fexecv_proc_title));
 	bcopy(p->p_comm, td->td_name, sizeof(td->td_name));
 #ifdef KTR
 	sched_clear_tdname(td);
 #endif
 
 	/*
 	 * mark as execed, wakeup the process that vforked (if any) and tell
 	 * it that it now has its own resources back
 	 */
 	p->p_flag |= P_EXEC;
 	if ((p->p_flag2 & P2_NOTRACE_EXEC) == 0)
 		p->p_flag2 &= ~P2_NOTRACE;
 	if (p->p_flag & P_PPWAIT) {
 		p->p_flag &= ~(P_PPWAIT | P_PPTRACE);
 		cv_broadcast(&p->p_pwait);
 	}
 
 	/*
 	 * Implement image setuid/setgid.
 	 *
 	 * Don't honor setuid/setgid if the filesystem prohibits it or if
 	 * the process is being traced.
 	 *
 	 * We disable setuid/setgid/etc in compatibility mode on the basis
 	 * that most setugid applications are not written with that
 	 * environment in mind, and will therefore almost certainly operate
 	 * incorrectly. In principle there's no reason that setugid
 	 * applications might not be useful in capability mode, so we may want
 	 * to reconsider this conservative design choice in the future.
 	 *
 	 * XXXMAC: For the time being, use NOSUID to also prohibit
 	 * transitions on the file system.
 	 */
 	credential_changing = 0;
 	credential_changing |= (attr.va_mode & S_ISUID) && oldcred->cr_uid !=
 	    attr.va_uid;
 	credential_changing |= (attr.va_mode & S_ISGID) && oldcred->cr_gid !=
 	    attr.va_gid;
 #ifdef MAC
 	will_transition = mac_vnode_execve_will_transition(oldcred, imgp->vp,
 	    interpvplabel, imgp);
 	credential_changing |= will_transition;
 #endif
 
 	if (credential_changing &&
 #ifdef CAPABILITY_MODE
 	    ((oldcred->cr_flags & CRED_FLAG_CAPMODE) == 0) &&
 #endif
 	    (imgp->vp->v_mount->mnt_flag & MNT_NOSUID) == 0 &&
 	    (p->p_flag & P_TRACED) == 0) {
 		/*
 		 * Turn off syscall tracing for set-id programs, except for
 		 * root.  Record any set-id flags first to make sure that
 		 * we do not regain any tracing during a possible block.
 		 */
 		setsugid(p);
 
 #ifdef KTRACE
 		if (p->p_tracecred != NULL &&
 		    priv_check_cred(p->p_tracecred, PRIV_DEBUG_DIFFCRED, 0))
 			ktrprocexec(p, &tracecred, &tracevp);
 #endif
 		/*
 		 * Close any file descriptors 0..2 that reference procfs,
 		 * then make sure file descriptors 0..2 are in use.
 		 *
 		 * Both fdsetugidsafety() and fdcheckstd() may call functions
 		 * taking sleepable locks, so temporarily drop our locks.
 		 */
 		PROC_UNLOCK(p);
 		VOP_UNLOCK(imgp->vp, 0);
 		fdsetugidsafety(td);
 		error = fdcheckstd(td);
 		if (error != 0)
 			goto done1;
 		newcred = crdup(oldcred);
 		euip = uifind(attr.va_uid);
 		vn_lock(imgp->vp, LK_SHARED | LK_RETRY);
 		PROC_LOCK(p);
 		/*
 		 * Set the new credentials.
 		 */
 		if (attr.va_mode & S_ISUID)
 			change_euid(newcred, euip);
 		if (attr.va_mode & S_ISGID)
 			change_egid(newcred, attr.va_gid);
 #ifdef MAC
 		if (will_transition) {
 			mac_vnode_execve_transition(oldcred, newcred, imgp->vp,
 			    interpvplabel, imgp);
 		}
 #endif
 		/*
 		 * Implement correct POSIX saved-id behavior.
 		 *
 		 * XXXMAC: Note that the current logic will save the
 		 * uid and gid if a MAC domain transition occurs, even
 		 * though maybe it shouldn't.
 		 */
 		change_svuid(newcred, newcred->cr_uid);
 		change_svgid(newcred, newcred->cr_gid);
 		proc_set_cred(p, newcred);
 	} else {
 		if (oldcred->cr_uid == oldcred->cr_ruid &&
 		    oldcred->cr_gid == oldcred->cr_rgid)
 			p->p_flag &= ~P_SUGID;
 		/*
 		 * Implement correct POSIX saved-id behavior.
 		 *
 		 * XXX: It's not clear that the existing behavior is
 		 * POSIX-compliant.  A number of sources indicate that the
 		 * saved uid/gid should only be updated if the new ruid is
 		 * not equal to the old ruid, or the new euid is not equal
 		 * to the old euid and the new euid is not equal to the old
 		 * ruid.  The FreeBSD code always updates the saved uid/gid.
 		 * Also, this code uses the new (replaced) euid and egid as
 		 * the source, which may or may not be the right ones to use.
 		 */
 		if (oldcred->cr_svuid != oldcred->cr_uid ||
 		    oldcred->cr_svgid != oldcred->cr_gid) {
 			PROC_UNLOCK(p);
 			VOP_UNLOCK(imgp->vp, 0);
 			newcred = crdup(oldcred);
 			vn_lock(imgp->vp, LK_SHARED | LK_RETRY);
 			PROC_LOCK(p);
 			change_svuid(newcred, newcred->cr_uid);
 			change_svgid(newcred, newcred->cr_gid);
 			proc_set_cred(p, newcred);
 		}
 	}
 
 	/*
 	 * Store the vp for use in procfs.  This vnode was referenced by namei
 	 * or fgetvp_exec.
 	 */
 	oldtextvp = p->p_textvp;
 	p->p_textvp = newtextvp;
 
 #ifdef KDTRACE_HOOKS
 	/*
 	 * Tell the DTrace fasttrap provider about the exec if it
 	 * has declared an interest.
 	 */
 	if (dtrace_fasttrap_exec)
 		dtrace_fasttrap_exec(p);
 #endif
 
 	/*
 	 * Notify others that we exec'd, and clear the P_INEXEC flag
 	 * as we're now a bona fide freshly-execed process.
 	 */
 	KNOTE_LOCKED(&p->p_klist, NOTE_EXEC);
 	p->p_flag &= ~P_INEXEC;
 
 	/* clear "fork but no exec" flag, as we _are_ execing */
 	p->p_acflag &= ~AFORK;
 
 	/*
 	 * Free any previous argument cache and replace it with
 	 * the new argument cache, if any.
 	 */
 	oldargs = p->p_args;
 	p->p_args = newargs;
 	newargs = NULL;
 
 #ifdef	HWPMC_HOOKS
 	/*
 	 * Check if system-wide sampling is in effect or if the
 	 * current process is using PMCs.  If so, do exec() time
 	 * processing.  This processing needs to happen AFTER the
 	 * P_INEXEC flag is cleared.
 	 *
 	 * The proc lock needs to be released before taking the PMC
 	 * SX.
 	 */
 	if (PMC_SYSTEM_SAMPLING_ACTIVE() || PMC_PROC_IS_USING_PMCS(p)) {
 		PROC_UNLOCK(p);
 		VOP_UNLOCK(imgp->vp, 0);
 		pe.pm_credentialschanged = credential_changing;
 		pe.pm_entryaddr = imgp->entry_addr;
 
 		PMC_CALL_HOOK_X(td, PMC_FN_PROCESS_EXEC, (void *) &pe);
 		vn_lock(imgp->vp, LK_SHARED | LK_RETRY);
 	} else
 		PROC_UNLOCK(p);
 #else  /* !HWPMC_HOOKS */
 	PROC_UNLOCK(p);
 #endif
 
 	/* Set values passed into the program in registers. */
 	if (p->p_sysent->sv_setregs)
 		(*p->p_sysent->sv_setregs)(td, imgp, 
 		    (u_long)(uintptr_t)stack_base);
 	else
 		exec_setregs(td, imgp, (u_long)(uintptr_t)stack_base);
 
 	vfs_mark_atime(imgp->vp, td->td_ucred);
 
-	SDT_PROBE1(proc, kernel, , exec__success, args->fname);
+	SDT_PROBE1(proc, , , exec__success, args->fname);
 
 	VOP_UNLOCK(imgp->vp, 0);
 done1:
 	/*
 	 * Free any resources malloc'd earlier that we didn't use.
 	 */
 	if (euip != NULL)
 		uifree(euip);
 	if (newcred != NULL)
 		crfree(oldcred);
 
 	/*
 	 * Handle deferred decrement of ref counts.
 	 */
 	if (oldtextvp != NULL)
 		vrele(oldtextvp);
 #ifdef KTRACE
 	if (tracevp != NULL)
 		vrele(tracevp);
 	if (tracecred != NULL)
 		crfree(tracecred);
 #endif
 	vn_lock(imgp->vp, LK_SHARED | LK_RETRY);
 	pargs_drop(oldargs);
 	pargs_drop(newargs);
 	if (oldsigacts != NULL)
 		sigacts_free(oldsigacts);
 
 exec_fail_dealloc:
 
 	/*
 	 * free various allocated resources
 	 */
 	if (imgp->firstpage != NULL)
 		exec_unmap_first_page(imgp);
 
 	if (imgp->vp != NULL) {
 		if (args->fname)
 			NDFREE(&nd, NDF_ONLY_PNBUF);
 		if (imgp->opened)
 			VOP_CLOSE(imgp->vp, FREAD, td->td_ucred, td);
 		if (error != 0)
 			vput(imgp->vp);
 		else
 			VOP_UNLOCK(imgp->vp, 0);
 	}
 
 	if (imgp->object != NULL)
 		vm_object_deallocate(imgp->object);
 
 	free(imgp->freepath, M_TEMP);
 
 	if (error == 0) {
 		PROC_LOCK(p);
 		td->td_dbgflags |= TDB_EXEC;
 		PROC_UNLOCK(p);
 
 		/*
 		 * Stop the process here if its stop event mask has
 		 * the S_EXEC bit set.
 		 */
 		STOPEVENT(p, S_EXEC, 0);
 		goto done2;
 	}
 
 exec_fail:
 	/* we're done here, clear P_INEXEC */
 	PROC_LOCK(p);
 	p->p_flag &= ~P_INEXEC;
 	PROC_UNLOCK(p);
 
-	SDT_PROBE1(proc, kernel, , exec__failure, error);
+	SDT_PROBE1(proc, , , exec__failure, error);
 
 done2:
 #ifdef MAC
 	mac_execve_exit(imgp);
 	mac_execve_interpreter_exit(interpvplabel);
 #endif
 	exec_free_args(args);
 
 	if (error && imgp->vmspace_destroyed) {
 		/* sorry, no more process anymore. exit gracefully */
 		exit1(td, 0, SIGABRT);
 		/* NOT REACHED */
 	}
 
 #ifdef KTRACE
 	if (error == 0)
 		ktrprocctor(p);
 #endif
 
 	return (error);
 }
 
 int
 exec_map_first_page(imgp)
 	struct image_params *imgp;
 {
 	int rv, i, after, initial_pagein;
 	vm_page_t ma[VM_INITIAL_PAGEIN];
 	vm_object_t object;
 
 	if (imgp->firstpage != NULL)
 		exec_unmap_first_page(imgp);
 
 	object = imgp->vp->v_object;
 	if (object == NULL)
 		return (EACCES);
 	VM_OBJECT_WLOCK(object);
 #if VM_NRESERVLEVEL > 0
 	vm_object_color(object, 0);
 #endif
 	ma[0] = vm_page_grab(object, 0, VM_ALLOC_NORMAL);
 	if (ma[0]->valid != VM_PAGE_BITS_ALL) {
 		if (!vm_pager_has_page(object, 0, NULL, &after)) {
 			vm_page_lock(ma[0]);
 			vm_page_free(ma[0]);
 			vm_page_unlock(ma[0]);
 			vm_page_xunbusy(ma[0]);
 			VM_OBJECT_WUNLOCK(object);
 			return (EIO);
 		}
 		initial_pagein = min(after, VM_INITIAL_PAGEIN);
 		KASSERT(initial_pagein <= object->size,
 		    ("%s: initial_pagein %d object->size %ju",
 		    __func__, initial_pagein, (uintmax_t )object->size));
 		for (i = 1; i < initial_pagein; i++) {
 			if ((ma[i] = vm_page_next(ma[i - 1])) != NULL) {
 				if (ma[i]->valid)
 					break;
 				if (vm_page_tryxbusy(ma[i]))
 					break;
 			} else {
 				ma[i] = vm_page_alloc(object, i,
 				    VM_ALLOC_NORMAL | VM_ALLOC_IFNOTCACHED);
 				if (ma[i] == NULL)
 					break;
 			}
 		}
 		initial_pagein = i;
 		rv = vm_pager_get_pages(object, ma, initial_pagein, NULL, NULL);
 		if (rv != VM_PAGER_OK) {
 			for (i = 0; i < initial_pagein; i++) {
 				vm_page_lock(ma[i]);
 				vm_page_free(ma[i]);
 				vm_page_unlock(ma[i]);
 				vm_page_xunbusy(ma[i]);
 			}
 			VM_OBJECT_WUNLOCK(object);
 			return (EIO);
 		}
 		for (i = 1; i < initial_pagein; i++)
 			vm_page_readahead_finish(ma[i]);
 	}
 	vm_page_xunbusy(ma[0]);
 	vm_page_lock(ma[0]);
 	vm_page_hold(ma[0]);
 	vm_page_activate(ma[0]);
 	vm_page_unlock(ma[0]);
 	VM_OBJECT_WUNLOCK(object);
 
 	imgp->firstpage = sf_buf_alloc(ma[0], 0);
 	imgp->image_header = (char *)sf_buf_kva(imgp->firstpage);
 
 	return (0);
 }
 
 void
 exec_unmap_first_page(imgp)
 	struct image_params *imgp;
 {
 	vm_page_t m;
 
 	if (imgp->firstpage != NULL) {
 		m = sf_buf_page(imgp->firstpage);
 		sf_buf_free(imgp->firstpage);
 		imgp->firstpage = NULL;
 		vm_page_lock(m);
 		vm_page_unhold(m);
 		vm_page_unlock(m);
 	}
 }
 
 /*
  * Destroy old address space, and allocate a new stack
  *	The new stack is only SGROWSIZ large because it is grown
  *	automatically in trap.c.
  */
 int
 exec_new_vmspace(imgp, sv)
 	struct image_params *imgp;
 	struct sysentvec *sv;
 {
 	int error;
 	struct proc *p = imgp->proc;
 	struct vmspace *vmspace = p->p_vmspace;
 	vm_object_t obj;
 	struct rlimit rlim_stack;
 	vm_offset_t sv_minuser, stack_addr;
 	vm_map_t map;
 	u_long ssiz;
 
 	imgp->vmspace_destroyed = 1;
 	imgp->sysent = sv;
 
 	/* May be called with Giant held */
 	EVENTHANDLER_INVOKE(process_exec, p, imgp);
 
 	/*
 	 * Blow away entire process VM, if address space not shared,
 	 * otherwise, create a new VM space so that other threads are
 	 * not disrupted
 	 */
 	map = &vmspace->vm_map;
 	if (map_at_zero)
 		sv_minuser = sv->sv_minuser;
 	else
 		sv_minuser = MAX(sv->sv_minuser, PAGE_SIZE);
 	if (vmspace->vm_refcnt == 1 && vm_map_min(map) == sv_minuser &&
 	    vm_map_max(map) == sv->sv_maxuser) {
 		shmexit(vmspace);
 		pmap_remove_pages(vmspace_pmap(vmspace));
 		vm_map_remove(map, vm_map_min(map), vm_map_max(map));
 	} else {
 		error = vmspace_exec(p, sv_minuser, sv->sv_maxuser);
 		if (error)
 			return (error);
 		vmspace = p->p_vmspace;
 		map = &vmspace->vm_map;
 	}
 
 	/* Map a shared page */
 	obj = sv->sv_shared_page_obj;
 	if (obj != NULL) {
 		vm_object_reference(obj);
 		error = vm_map_fixed(map, obj, 0,
 		    sv->sv_shared_page_base, sv->sv_shared_page_len,
 		    VM_PROT_READ | VM_PROT_EXECUTE,
 		    VM_PROT_READ | VM_PROT_EXECUTE,
 		    MAP_INHERIT_SHARE | MAP_ACC_NO_CHARGE);
 		if (error) {
 			vm_object_deallocate(obj);
 			return (error);
 		}
 	}
 
 	/* Allocate a new stack */
 	if (imgp->stack_sz != 0) {
 		ssiz = trunc_page(imgp->stack_sz);
 		PROC_LOCK(p);
 		lim_rlimit_proc(p, RLIMIT_STACK, &rlim_stack);
 		PROC_UNLOCK(p);
 		if (ssiz > rlim_stack.rlim_max)
 			ssiz = rlim_stack.rlim_max;
 		if (ssiz > rlim_stack.rlim_cur) {
 			rlim_stack.rlim_cur = ssiz;
 			kern_setrlimit(curthread, RLIMIT_STACK, &rlim_stack);
 		}
 	} else if (sv->sv_maxssiz != NULL) {
 		ssiz = *sv->sv_maxssiz;
 	} else {
 		ssiz = maxssiz;
 	}
 	stack_addr = sv->sv_usrstack - ssiz;
 	error = vm_map_stack(map, stack_addr, (vm_size_t)ssiz,
 	    obj != NULL && imgp->stack_prot != 0 ? imgp->stack_prot :
 		sv->sv_stackprot,
 	    VM_PROT_ALL, MAP_STACK_GROWS_DOWN);
 	if (error)
 		return (error);
 
 	/*
 	 * vm_ssize and vm_maxsaddr are somewhat antiquated concepts, but they
 	 * are still used to enforce the stack rlimit on the process stack.
 	 */
 	vmspace->vm_ssize = sgrowsiz >> PAGE_SHIFT;
 	vmspace->vm_maxsaddr = (char *)stack_addr;
 
 	return (0);
 }
 
 /*
  * Copy out argument and environment strings from the old process address
  * space into the temporary string buffer.
  */
 int
 exec_copyin_args(struct image_args *args, char *fname,
     enum uio_seg segflg, char **argv, char **envv)
 {
 	u_long argp, envp;
 	int error;
 	size_t length;
 
 	bzero(args, sizeof(*args));
 	if (argv == NULL)
 		return (EFAULT);
 
 	/*
 	 * Allocate demand-paged memory for the file name, argument, and
 	 * environment strings.
 	 */
 	error = exec_alloc_args(args);
 	if (error != 0)
 		return (error);
 
 	/*
 	 * Copy the file name.
 	 */
 	if (fname != NULL) {
 		args->fname = args->buf;
 		error = (segflg == UIO_SYSSPACE) ?
 		    copystr(fname, args->fname, PATH_MAX, &length) :
 		    copyinstr(fname, args->fname, PATH_MAX, &length);
 		if (error != 0)
 			goto err_exit;
 	} else
 		length = 0;
 
 	args->begin_argv = args->buf + length;
 	args->endp = args->begin_argv;
 	args->stringspace = ARG_MAX;
 
 	/*
 	 * extract arguments first
 	 */
 	for (;;) {
 		error = fueword(argv++, &argp);
 		if (error == -1) {
 			error = EFAULT;
 			goto err_exit;
 		}
 		if (argp == 0)
 			break;
 		error = copyinstr((void *)(uintptr_t)argp, args->endp,
 		    args->stringspace, &length);
 		if (error != 0) {
 			if (error == ENAMETOOLONG) 
 				error = E2BIG;
 			goto err_exit;
 		}
 		args->stringspace -= length;
 		args->endp += length;
 		args->argc++;
 	}
 
 	args->begin_envv = args->endp;
 
 	/*
 	 * extract environment strings
 	 */
 	if (envv) {
 		for (;;) {
 			error = fueword(envv++, &envp);
 			if (error == -1) {
 				error = EFAULT;
 				goto err_exit;
 			}
 			if (envp == 0)
 				break;
 			error = copyinstr((void *)(uintptr_t)envp,
 			    args->endp, args->stringspace, &length);
 			if (error != 0) {
 				if (error == ENAMETOOLONG)
 					error = E2BIG;
 				goto err_exit;
 			}
 			args->stringspace -= length;
 			args->endp += length;
 			args->envc++;
 		}
 	}
 
 	return (0);
 
 err_exit:
 	exec_free_args(args);
 	return (error);
 }
 
 int
 exec_copyin_data_fds(struct thread *td, struct image_args *args,
     const void *data, size_t datalen, const int *fds, size_t fdslen)
 {
 	struct filedesc *ofdp;
 	const char *p;
 	int *kfds;
 	int error;
 
 	memset(args, '\0', sizeof(*args));
 	ofdp = td->td_proc->p_fd;
 	if (datalen >= ARG_MAX || fdslen > ofdp->fd_lastfile + 1)
 		return (E2BIG);
 	error = exec_alloc_args(args);
 	if (error != 0)
 		return (error);
 
 	args->begin_argv = args->buf;
 	args->stringspace = ARG_MAX;
 
 	if (datalen > 0) {
 		/*
 		 * Argument buffer has been provided. Copy it into the
 		 * kernel as a single string and add a terminating null
 		 * byte.
 		 */
 		error = copyin(data, args->begin_argv, datalen);
 		if (error != 0)
 			goto err_exit;
 		args->begin_argv[datalen] = '\0';
 		args->endp = args->begin_argv + datalen + 1;
 		args->stringspace -= datalen + 1;
 
 		/*
 		 * Traditional argument counting. Count the number of
 		 * null bytes.
 		 */
 		for (p = args->begin_argv; p < args->endp; ++p)
 			if (*p == '\0')
 				++args->argc;
 	} else {
 		/* No argument buffer provided. */
 		args->endp = args->begin_argv;
 	}
 	/* There are no environment variables. */
 	args->begin_envv = args->endp;
 
 	/* Create new file descriptor table. */
 	kfds = malloc(fdslen * sizeof(int), M_TEMP, M_WAITOK);
 	error = copyin(fds, kfds, fdslen * sizeof(int));
 	if (error != 0) {
 		free(kfds, M_TEMP);
 		goto err_exit;
 	}
 	error = fdcopy_remapped(ofdp, kfds, fdslen, &args->fdp);
 	free(kfds, M_TEMP);
 	if (error != 0)
 		goto err_exit;
 
 	return (0);
 err_exit:
 	exec_free_args(args);
 	return (error);
 }
 
 /*
  * Allocate temporary demand-paged, zero-filled memory for the file name,
  * argument, and environment strings.  Returns zero if the allocation succeeds
  * and ENOMEM otherwise.
  */
 int
 exec_alloc_args(struct image_args *args)
 {
 
 	args->buf = (char *)kmap_alloc_wait(exec_map, PATH_MAX + ARG_MAX);
 	return (args->buf != NULL ? 0 : ENOMEM);
 }
 
 void
 exec_free_args(struct image_args *args)
 {
 
 	if (args->buf != NULL) {
 		kmap_free_wakeup(exec_map, (vm_offset_t)args->buf,
 		    PATH_MAX + ARG_MAX);
 		args->buf = NULL;
 	}
 	if (args->fname_buf != NULL) {
 		free(args->fname_buf, M_TEMP);
 		args->fname_buf = NULL;
 	}
 	if (args->fdp != NULL)
 		fdescfree_remapped(args->fdp);
 }
 
 /*
  * Copy strings out to the new process address space, constructing new arg
  * and env vector tables. Return a pointer to the base so that it can be used
  * as the initial stack pointer.
  */
 register_t *
 exec_copyout_strings(imgp)
 	struct image_params *imgp;
 {
 	int argc, envc;
 	char **vectp;
 	char *stringp;
 	uintptr_t destp;
 	register_t *stack_base;
 	struct ps_strings *arginfo;
 	struct proc *p;
 	size_t execpath_len;
 	int szsigcode, szps;
 	char canary[sizeof(long) * 8];
 
 	szps = sizeof(pagesizes[0]) * MAXPAGESIZES;
 	/*
 	 * Calculate string base and vector table pointers.
 	 * Also deal with signal trampoline code for this exec type.
 	 */
 	if (imgp->execpath != NULL && imgp->auxargs != NULL)
 		execpath_len = strlen(imgp->execpath) + 1;
 	else
 		execpath_len = 0;
 	p = imgp->proc;
 	szsigcode = 0;
 	arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings;
 	if (p->p_sysent->sv_sigcode_base == 0) {
 		if (p->p_sysent->sv_szsigcode != NULL)
 			szsigcode = *(p->p_sysent->sv_szsigcode);
 	}
 	destp =	(uintptr_t)arginfo;
 
 	/*
 	 * install sigcode
 	 */
 	if (szsigcode != 0) {
 		destp -= szsigcode;
 		destp = rounddown2(destp, sizeof(void *));
 		copyout(p->p_sysent->sv_sigcode, (void *)destp, szsigcode);
 	}
 
 	/*
 	 * Copy the image path for the rtld.
 	 */
 	if (execpath_len != 0) {
 		destp -= execpath_len;
 		imgp->execpathp = destp;
 		copyout(imgp->execpath, (void *)destp, execpath_len);
 	}
 
 	/*
 	 * Prepare the canary for SSP.
 	 */
 	arc4rand(canary, sizeof(canary), 0);
 	destp -= sizeof(canary);
 	imgp->canary = destp;
 	copyout(canary, (void *)destp, sizeof(canary));
 	imgp->canarylen = sizeof(canary);
 
 	/*
 	 * Prepare the pagesizes array.
 	 */
 	destp -= szps;
 	destp = rounddown2(destp, sizeof(void *));
 	imgp->pagesizes = destp;
 	copyout(pagesizes, (void *)destp, szps);
 	imgp->pagesizeslen = szps;
 
 	destp -= ARG_MAX - imgp->args->stringspace;
 	destp = rounddown2(destp, sizeof(void *));
 
 	/*
 	 * If we have a valid auxargs ptr, prepare some room
 	 * on the stack.
 	 */
 	if (imgp->auxargs) {
 		/*
 		 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for
 		 * lower compatibility.
 		 */
 		imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size :
 		    (AT_COUNT * 2);
 		/*
 		 * The '+ 2' is for the null pointers at the end of each of
 		 * the arg and env vector sets,and imgp->auxarg_size is room
 		 * for argument of Runtime loader.
 		 */
 		vectp = (char **)(destp - (imgp->args->argc +
 		    imgp->args->envc + 2 + imgp->auxarg_size)
 		    * sizeof(char *));
 	} else {
 		/*
 		 * The '+ 2' is for the null pointers at the end of each of
 		 * the arg and env vector sets
 		 */
 		vectp = (char **)(destp - (imgp->args->argc + imgp->args->envc
 		    + 2) * sizeof(char *));
 	}
 
 	/*
 	 * vectp also becomes our initial stack base
 	 */
 	stack_base = (register_t *)vectp;
 
 	stringp = imgp->args->begin_argv;
 	argc = imgp->args->argc;
 	envc = imgp->args->envc;
 
 	/*
 	 * Copy out strings - arguments and environment.
 	 */
 	copyout(stringp, (void *)destp, ARG_MAX - imgp->args->stringspace);
 
 	/*
 	 * Fill in "ps_strings" struct for ps, w, etc.
 	 */
 	suword(&arginfo->ps_argvstr, (long)(intptr_t)vectp);
 	suword32(&arginfo->ps_nargvstr, argc);
 
 	/*
 	 * Fill in argument portion of vector table.
 	 */
 	for (; argc > 0; --argc) {
 		suword(vectp++, (long)(intptr_t)destp);
 		while (*stringp++ != 0)
 			destp++;
 		destp++;
 	}
 
 	/* a null vector table pointer separates the argp's from the envp's */
 	suword(vectp++, 0);
 
 	suword(&arginfo->ps_envstr, (long)(intptr_t)vectp);
 	suword32(&arginfo->ps_nenvstr, envc);
 
 	/*
 	 * Fill in environment portion of vector table.
 	 */
 	for (; envc > 0; --envc) {
 		suword(vectp++, (long)(intptr_t)destp);
 		while (*stringp++ != 0)
 			destp++;
 		destp++;
 	}
 
 	/* end of vector table is a null pointer */
 	suword(vectp, 0);
 
 	return (stack_base);
 }
 
 /*
  * Check permissions of file to execute.
  *	Called with imgp->vp locked.
  *	Return 0 for success or error code on failure.
  */
 int
 exec_check_permissions(imgp)
 	struct image_params *imgp;
 {
 	struct vnode *vp = imgp->vp;
 	struct vattr *attr = imgp->attr;
 	struct thread *td;
 	int error, writecount;
 
 	td = curthread;
 
 	/* Get file attributes */
 	error = VOP_GETATTR(vp, attr, td->td_ucred);
 	if (error)
 		return (error);
 
 #ifdef MAC
 	error = mac_vnode_check_exec(td->td_ucred, imgp->vp, imgp);
 	if (error)
 		return (error);
 #endif
 
 	/*
 	 * 1) Check if file execution is disabled for the filesystem that
 	 *    this file resides on.
 	 * 2) Ensure that at least one execute bit is on. Otherwise, a
 	 *    privileged user will always succeed, and we don't want this
 	 *    to happen unless the file really is executable.
 	 * 3) Ensure that the file is a regular file.
 	 */
 	if ((vp->v_mount->mnt_flag & MNT_NOEXEC) ||
 	    (attr->va_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0 ||
 	    (attr->va_type != VREG))
 		return (EACCES);
 
 	/*
 	 * Zero length files can't be exec'd
 	 */
 	if (attr->va_size == 0)
 		return (ENOEXEC);
 
 	/*
 	 *  Check for execute permission to file based on current credentials.
 	 */
 	error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
 	if (error)
 		return (error);
 
 	/*
 	 * Check number of open-for-writes on the file and deny execution
 	 * if there are any.
 	 */
 	error = VOP_GET_WRITECOUNT(vp, &writecount);
 	if (error != 0)
 		return (error);
 	if (writecount != 0)
 		return (ETXTBSY);
 
 	/*
 	 * Call filesystem specific open routine (which does nothing in the
 	 * general case).
 	 */
 	error = VOP_OPEN(vp, FREAD, td->td_ucred, td, NULL);
 	if (error == 0)
 		imgp->opened = 1;
 	return (error);
 }
 
 /*
  * Exec handler registration
  */
 int
 exec_register(execsw_arg)
 	const struct execsw *execsw_arg;
 {
 	const struct execsw **es, **xs, **newexecsw;
 	int count = 2;	/* New slot and trailing NULL */
 
 	if (execsw)
 		for (es = execsw; *es; es++)
 			count++;
 	newexecsw = malloc(count * sizeof(*es), M_TEMP, M_WAITOK);
 	if (newexecsw == NULL)
 		return (ENOMEM);
 	xs = newexecsw;
 	if (execsw)
 		for (es = execsw; *es; es++)
 			*xs++ = *es;
 	*xs++ = execsw_arg;
 	*xs = NULL;
 	if (execsw)
 		free(execsw, M_TEMP);
 	execsw = newexecsw;
 	return (0);
 }
 
 int
 exec_unregister(execsw_arg)
 	const struct execsw *execsw_arg;
 {
 	const struct execsw **es, **xs, **newexecsw;
 	int count = 1;
 
 	if (execsw == NULL)
 		panic("unregister with no handlers left?\n");
 
 	for (es = execsw; *es; es++) {
 		if (*es == execsw_arg)
 			break;
 	}
 	if (*es == NULL)
 		return (ENOENT);
 	for (es = execsw; *es; es++)
 		if (*es != execsw_arg)
 			count++;
 	newexecsw = malloc(count * sizeof(*es), M_TEMP, M_WAITOK);
 	if (newexecsw == NULL)
 		return (ENOMEM);
 	xs = newexecsw;
 	for (es = execsw; *es; es++)
 		if (*es != execsw_arg)
 			*xs++ = *es;
 	*xs = NULL;
 	if (execsw)
 		free(execsw, M_TEMP);
 	execsw = newexecsw;
 	return (0);
 }
Index: head/sys/kern/kern_exit.c
===================================================================
--- head/sys/kern/kern_exit.c	(revision 292383)
+++ head/sys/kern/kern_exit.c	(revision 292384)
@@ -1,1379 +1,1379 @@
 /*-
  * Copyright (c) 1982, 1986, 1989, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)kern_exit.c	8.7 (Berkeley) 2/12/94
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_compat.h"
 #include "opt_ktrace.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/sysproto.h>
 #include <sys/capsicum.h>
 #include <sys/eventhandler.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/procdesc.h>
 #include <sys/pioctl.h>
 #include <sys/jail.h>
 #include <sys/tty.h>
 #include <sys/wait.h>
 #include <sys/vmmeter.h>
 #include <sys/vnode.h>
 #include <sys/racct.h>
 #include <sys/resourcevar.h>
 #include <sys/sbuf.h>
 #include <sys/signalvar.h>
 #include <sys/sched.h>
 #include <sys/sx.h>
 #include <sys/syscallsubr.h>
 #include <sys/syslog.h>
 #include <sys/ptrace.h>
 #include <sys/acct.h>		/* for acct_process() function prototype */
 #include <sys/filedesc.h>
 #include <sys/sdt.h>
 #include <sys/shm.h>
 #include <sys/sem.h>
 #include <sys/umtx.h>
 #ifdef KTRACE
 #include <sys/ktrace.h>
 #endif
 
 #include <security/audit/audit.h>
 #include <security/mac/mac_framework.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_page.h>
 #include <vm/uma.h>
 #include <vm/vm_domain.h>
 
 #ifdef KDTRACE_HOOKS
 #include <sys/dtrace_bsd.h>
 dtrace_execexit_func_t	dtrace_fasttrap_exit;
 #endif
 
 SDT_PROVIDER_DECLARE(proc);
-SDT_PROBE_DEFINE1(proc, kernel, , exit, "int");
+SDT_PROBE_DEFINE1(proc, , , exit, "int");
 
 /* Hook for NFS teardown procedure. */
 void (*nlminfo_release_p)(struct proc *p);
 
 struct proc *
 proc_realparent(struct proc *child)
 {
 	struct proc *p, *parent;
 
 	sx_assert(&proctree_lock, SX_LOCKED);
 	if ((child->p_treeflag & P_TREE_ORPHANED) == 0) {
 		if (child->p_oppid == 0 ||
 		    child->p_pptr->p_pid == child->p_oppid)
 			parent = child->p_pptr;
 		else
 			parent = initproc;
 		return (parent);
 	}
 	for (p = child; (p->p_treeflag & P_TREE_FIRST_ORPHAN) == 0;) {
 		/* Cannot use LIST_PREV(), since the list head is not known. */
 		p = __containerof(p->p_orphan.le_prev, struct proc,
 		    p_orphan.le_next);
 		KASSERT((p->p_treeflag & P_TREE_ORPHANED) != 0,
 		    ("missing P_ORPHAN %p", p));
 	}
 	parent = __containerof(p->p_orphan.le_prev, struct proc,
 	    p_orphans.lh_first);
 	return (parent);
 }
 
 void
 reaper_abandon_children(struct proc *p, bool exiting)
 {
 	struct proc *p1, *p2, *ptmp;
 
 	sx_assert(&proctree_lock, SX_LOCKED);
 	KASSERT(p != initproc, ("reaper_abandon_children for initproc"));
 	if ((p->p_treeflag & P_TREE_REAPER) == 0)
 		return;
 	p1 = p->p_reaper;
 	LIST_FOREACH_SAFE(p2, &p->p_reaplist, p_reapsibling, ptmp) {
 		LIST_REMOVE(p2, p_reapsibling);
 		p2->p_reaper = p1;
 		p2->p_reapsubtree = p->p_reapsubtree;
 		LIST_INSERT_HEAD(&p1->p_reaplist, p2, p_reapsibling);
 		if (exiting && p2->p_pptr == p) {
 			PROC_LOCK(p2);
 			proc_reparent(p2, p1);
 			PROC_UNLOCK(p2);
 		}
 	}
 	KASSERT(LIST_EMPTY(&p->p_reaplist), ("p_reaplist not empty"));
 	p->p_treeflag &= ~P_TREE_REAPER;
 }
 
 static void
 clear_orphan(struct proc *p)
 {
 	struct proc *p1;
 
 	sx_assert(&proctree_lock, SA_XLOCKED);
 	if ((p->p_treeflag & P_TREE_ORPHANED) == 0)
 		return;
 	if ((p->p_treeflag & P_TREE_FIRST_ORPHAN) != 0) {
 		p1 = LIST_NEXT(p, p_orphan);
 		if (p1 != NULL)
 			p1->p_treeflag |= P_TREE_FIRST_ORPHAN;
 		p->p_treeflag &= ~P_TREE_FIRST_ORPHAN;
 	}
 	LIST_REMOVE(p, p_orphan);
 	p->p_treeflag &= ~P_TREE_ORPHANED;
 }
 
 /*
  * exit -- death of process.
  */
 void
 sys_sys_exit(struct thread *td, struct sys_exit_args *uap)
 {
 
 	exit1(td, uap->rval, 0);
 	/* NOTREACHED */
 }
 
 /*
  * Exit: deallocate address space and other resources, change proc state to
  * zombie, and unlink proc from allproc and parent's lists.  Save exit status
  * and rusage for wait().  Check for child processes and orphan them.
  */
 void
 exit1(struct thread *td, int rval, int signo)
 {
 	struct proc *p, *nq, *q, *t;
 	struct thread *tdt;
 	struct vnode *ttyvp = NULL;
 
 	mtx_assert(&Giant, MA_NOTOWNED);
 	KASSERT(rval == 0 || signo == 0, ("exit1 rv %d sig %d", rval, signo));
 
 	p = td->td_proc;
 	/*
 	 * XXX in case we're rebooting we just let init die in order to
 	 * work around an unsolved stack overflow seen very late during
 	 * shutdown on sparc64 when the gmirror worker process exists.
 	 */
 	if (p == initproc && rebooting == 0) {
 		printf("init died (signal %d, exit %d)\n", signo, rval);
 		panic("Going nowhere without my init!");
 	}
 
 	/*
 	 * Deref SU mp, since the thread does not return to userspace.
 	 */
 	if (softdep_ast_cleanup != NULL)
 		softdep_ast_cleanup();
 
 	/*
 	 * MUST abort all other threads before proceeding past here.
 	 */
 	PROC_LOCK(p);
 	/*
 	 * First check if some other thread or external request got
 	 * here before us.  If so, act appropriately: exit or suspend.
 	 * We must ensure that stop requests are handled before we set
 	 * P_WEXIT.
 	 */
 	thread_suspend_check(0);
 	while (p->p_flag & P_HADTHREADS) {
 		/*
 		 * Kill off the other threads. This requires
 		 * some co-operation from other parts of the kernel
 		 * so it may not be instantaneous.  With this state set
 		 * any thread entering the kernel from userspace will
 		 * thread_exit() in trap().  Any thread attempting to
 		 * sleep will return immediately with EINTR or EWOULDBLOCK
 		 * which will hopefully force them to back out to userland
 		 * freeing resources as they go.  Any thread attempting
 		 * to return to userland will thread_exit() from userret().
 		 * thread_exit() will unsuspend us when the last of the
 		 * other threads exits.
 		 * If there is already a thread singler after resumption,
 		 * calling thread_single will fail; in that case, we just
 		 * re-check all suspension request, the thread should
 		 * either be suspended there or exit.
 		 */
 		if (!thread_single(p, SINGLE_EXIT))
 			/*
 			 * All other activity in this process is now
 			 * stopped.  Threading support has been turned
 			 * off.
 			 */
 			break;
 		/*
 		 * Recheck for new stop or suspend requests which
 		 * might appear while process lock was dropped in
 		 * thread_single().
 		 */
 		thread_suspend_check(0);
 	}
 	KASSERT(p->p_numthreads == 1,
 	    ("exit1: proc %p exiting with %d threads", p, p->p_numthreads));
 	racct_sub(p, RACCT_NTHR, 1);
 
 	/* Let event handler change exit status */
 	p->p_xexit = rval;
 	p->p_xsig = signo;
 
 	/*
 	 * Wakeup anyone in procfs' PIOCWAIT.  They should have a hold
 	 * on our vmspace, so we should block below until they have
 	 * released their reference to us.  Note that if they have
 	 * requested S_EXIT stops we will block here until they ack
 	 * via PIOCCONT.
 	 */
 	_STOPEVENT(p, S_EXIT, 0);
 
 	/*
 	 * Ignore any pending request to stop due to a stop signal.
 	 * Once P_WEXIT is set, future requests will be ignored as
 	 * well.
 	 */
 	p->p_flag &= ~P_STOPPED_SIG;
 	KASSERT(!P_SHOULDSTOP(p), ("exiting process is stopped"));
 
 	/*
 	 * Note that we are exiting and do another wakeup of anyone in
 	 * PIOCWAIT in case they aren't listening for S_EXIT stops or
 	 * decided to wait again after we told them we are exiting.
 	 */
 	p->p_flag |= P_WEXIT;
 	wakeup(&p->p_stype);
 
 	/*
 	 * Wait for any processes that have a hold on our vmspace to
 	 * release their reference.
 	 */
 	while (p->p_lock > 0)
 		msleep(&p->p_lock, &p->p_mtx, PWAIT, "exithold", 0);
 
 	PROC_UNLOCK(p);
 	/* Drain the limit callout while we don't have the proc locked */
 	callout_drain(&p->p_limco);
 
 #ifdef AUDIT
 	/*
 	 * The Sun BSM exit token contains two components: an exit status as
 	 * passed to exit(), and a return value to indicate what sort of exit
 	 * it was.  The exit status is WEXITSTATUS(rv), but it's not clear
 	 * what the return value is.
 	 */
 	AUDIT_ARG_EXIT(rval, 0);
 	AUDIT_SYSCALL_EXIT(0, td);
 #endif
 
 	/* Are we a task leader with peers? */
 	if (p->p_peers != NULL && p == p->p_leader) {
 		mtx_lock(&ppeers_lock);
 		q = p->p_peers;
 		while (q != NULL) {
 			PROC_LOCK(q);
 			kern_psignal(q, SIGKILL);
 			PROC_UNLOCK(q);
 			q = q->p_peers;
 		}
 		while (p->p_peers != NULL)
 			msleep(p, &ppeers_lock, PWAIT, "exit1", 0);
 		mtx_unlock(&ppeers_lock);
 	}
 
 	/*
 	 * Check if any loadable modules need anything done at process exit.
 	 * E.g. SYSV IPC stuff.
 	 * Event handler could change exit status.
 	 * XXX what if one of these generates an error?
 	 */
 	EVENTHANDLER_INVOKE(process_exit, p);
 
 	/*
 	 * If parent is waiting for us to exit or exec,
 	 * P_PPWAIT is set; we will wakeup the parent below.
 	 */
 	PROC_LOCK(p);
 	stopprofclock(p);
 	p->p_flag &= ~(P_TRACED | P_PPWAIT | P_PPTRACE);
 
 	/*
 	 * Stop the real interval timer.  If the handler is currently
 	 * executing, prevent it from rearming itself and let it finish.
 	 */
 	if (timevalisset(&p->p_realtimer.it_value) &&
 	    callout_stop(&p->p_itcallout) == 0) {
 		timevalclear(&p->p_realtimer.it_interval);
 		msleep(&p->p_itcallout, &p->p_mtx, PWAIT, "ritwait", 0);
 		KASSERT(!timevalisset(&p->p_realtimer.it_value),
 		    ("realtime timer is still armed"));
 	}
 	PROC_UNLOCK(p);
 
 	/*
 	 * Reset any sigio structures pointing to us as a result of
 	 * F_SETOWN with our pid.
 	 */
 	funsetownlst(&p->p_sigiolst);
 
 	/*
 	 * If this process has an nlminfo data area (for lockd), release it
 	 */
 	if (nlminfo_release_p != NULL && p->p_nlminfo != NULL)
 		(*nlminfo_release_p)(p);
 
 	/*
 	 * Close open files and release open-file table.
 	 * This may block!
 	 */
 	fdescfree(td);
 
 	/*
 	 * If this thread tickled GEOM, we need to wait for the giggling to
 	 * stop before we return to userland
 	 */
 	if (td->td_pflags & TDP_GEOM)
 		g_waitidle();
 
 	/*
 	 * Remove ourself from our leader's peer list and wake our leader.
 	 */
 	if (p->p_leader->p_peers != NULL) {
 		mtx_lock(&ppeers_lock);
 		if (p->p_leader->p_peers != NULL) {
 			q = p->p_leader;
 			while (q->p_peers != p)
 				q = q->p_peers;
 			q->p_peers = p->p_peers;
 			wakeup(p->p_leader);
 		}
 		mtx_unlock(&ppeers_lock);
 	}
 
 	vmspace_exit(td);
 
 	sx_xlock(&proctree_lock);
 	if (SESS_LEADER(p)) {
 		struct session *sp = p->p_session;
 		struct tty *tp;
 
 		/*
 		 * s_ttyp is not zero'd; we use this to indicate that
 		 * the session once had a controlling terminal. (for
 		 * logging and informational purposes)
 		 */
 		SESS_LOCK(sp);
 		ttyvp = sp->s_ttyvp;
 		tp = sp->s_ttyp;
 		sp->s_ttyvp = NULL;
 		sp->s_ttydp = NULL;
 		sp->s_leader = NULL;
 		SESS_UNLOCK(sp);
 
 		/*
 		 * Signal foreground pgrp and revoke access to
 		 * controlling terminal if it has not been revoked
 		 * already.
 		 *
 		 * Because the TTY may have been revoked in the mean
 		 * time and could already have a new session associated
 		 * with it, make sure we don't send a SIGHUP to a
 		 * foreground process group that does not belong to this
 		 * session.
 		 */
 
 		if (tp != NULL) {
 			tty_lock(tp);
 			if (tp->t_session == sp)
 				tty_signal_pgrp(tp, SIGHUP);
 			tty_unlock(tp);
 		}
 
 		if (ttyvp != NULL) {
 			sx_xunlock(&proctree_lock);
 			if (vn_lock(ttyvp, LK_EXCLUSIVE) == 0) {
 				VOP_REVOKE(ttyvp, REVOKEALL);
 				VOP_UNLOCK(ttyvp, 0);
 			}
 			sx_xlock(&proctree_lock);
 		}
 	}
 	fixjobc(p, p->p_pgrp, 0);
 	sx_xunlock(&proctree_lock);
 	(void)acct_process(td);
 
 	/* Release the TTY now we've unlocked everything. */
 	if (ttyvp != NULL)
 		vrele(ttyvp);
 #ifdef KTRACE
 	ktrprocexit(td);
 #endif
 	/*
 	 * Release reference to text vnode
 	 */
 	if (p->p_textvp != NULL) {
 		vrele(p->p_textvp);
 		p->p_textvp = NULL;
 	}
 
 	/*
 	 * Release our limits structure.
 	 */
 	lim_free(p->p_limit);
 	p->p_limit = NULL;
 
 	tidhash_remove(td);
 
 	/*
 	 * Remove proc from allproc queue and pidhash chain.
 	 * Place onto zombproc.  Unlink from parent's child list.
 	 */
 	sx_xlock(&allproc_lock);
 	LIST_REMOVE(p, p_list);
 	LIST_INSERT_HEAD(&zombproc, p, p_list);
 	LIST_REMOVE(p, p_hash);
 	sx_xunlock(&allproc_lock);
 
 	/*
 	 * Call machine-dependent code to release any
 	 * machine-dependent resources other than the address space.
 	 * The address space is released by "vmspace_exitfree(p)" in
 	 * vm_waitproc().
 	 */
 	cpu_exit(td);
 
 	WITNESS_WARN(WARN_PANIC, NULL, "process (pid %d) exiting", p->p_pid);
 
 	/*
 	 * Reparent all children processes:
 	 * - traced ones to the original parent (or init if we are that parent)
 	 * - the rest to init
 	 */
 	sx_xlock(&proctree_lock);
 	q = LIST_FIRST(&p->p_children);
 	if (q != NULL)		/* only need this if any child is S_ZOMB */
 		wakeup(q->p_reaper);
 	for (; q != NULL; q = nq) {
 		nq = LIST_NEXT(q, p_sibling);
 		PROC_LOCK(q);
 		q->p_sigparent = SIGCHLD;
 
 		if (!(q->p_flag & P_TRACED)) {
 			proc_reparent(q, q->p_reaper);
 		} else {
 			/*
 			 * Traced processes are killed since their existence
 			 * means someone is screwing up.
 			 */
 			t = proc_realparent(q);
 			if (t == p) {
 				proc_reparent(q, q->p_reaper);
 			} else {
 				PROC_LOCK(t);
 				proc_reparent(q, t);
 				PROC_UNLOCK(t);
 			}
 			/*
 			 * Since q was found on our children list, the
 			 * proc_reparent() call moved q to the orphan
 			 * list due to present P_TRACED flag. Clear
 			 * orphan link for q now while q is locked.
 			 */
 			clear_orphan(q);
 			q->p_flag &= ~(P_TRACED | P_STOPPED_TRACE);
 			FOREACH_THREAD_IN_PROC(q, tdt)
 				tdt->td_dbgflags &= ~TDB_SUSPEND;
 			kern_psignal(q, SIGKILL);
 		}
 		PROC_UNLOCK(q);
 	}
 
 	/*
 	 * Also get rid of our orphans.
 	 */
 	while ((q = LIST_FIRST(&p->p_orphans)) != NULL) {
 		PROC_LOCK(q);
 		CTR2(KTR_PTRACE, "exit: pid %d, clearing orphan %d", p->p_pid,
 		    q->p_pid);
 		clear_orphan(q);
 		PROC_UNLOCK(q);
 	}
 
 	/* Save exit status. */
 	PROC_LOCK(p);
 	p->p_xthread = td;
 
 	/* Tell the prison that we are gone. */
 	prison_proc_free(p->p_ucred->cr_prison);
 
 #ifdef KDTRACE_HOOKS
 	/*
 	 * Tell the DTrace fasttrap provider about the exit if it
 	 * has declared an interest.
 	 */
 	if (dtrace_fasttrap_exit)
 		dtrace_fasttrap_exit(p);
 #endif
 
 	/*
 	 * Notify interested parties of our demise.
 	 */
 	KNOTE_LOCKED(&p->p_klist, NOTE_EXIT);
 
 #ifdef KDTRACE_HOOKS
 	int reason = CLD_EXITED;
 	if (WCOREDUMP(signo))
 		reason = CLD_DUMPED;
 	else if (WIFSIGNALED(signo))
 		reason = CLD_KILLED;
-	SDT_PROBE1(proc, kernel, , exit, reason);
+	SDT_PROBE1(proc, , , exit, reason);
 #endif
 
 	/*
 	 * Just delete all entries in the p_klist. At this point we won't
 	 * report any more events, and there are nasty race conditions that
 	 * can beat us if we don't.
 	 */
 	knlist_clear(&p->p_klist, 1);
 
 	/*
 	 * If this is a process with a descriptor, we may not need to deliver
 	 * a signal to the parent.  proctree_lock is held over
 	 * procdesc_exit() to serialize concurrent calls to close() and
 	 * exit().
 	 */
 	if (p->p_procdesc == NULL || procdesc_exit(p)) {
 		/*
 		 * Notify parent that we're gone.  If parent has the
 		 * PS_NOCLDWAIT flag set, or if the handler is set to SIG_IGN,
 		 * notify process 1 instead (and hope it will handle this
 		 * situation).
 		 */
 		PROC_LOCK(p->p_pptr);
 		mtx_lock(&p->p_pptr->p_sigacts->ps_mtx);
 		if (p->p_pptr->p_sigacts->ps_flag &
 		    (PS_NOCLDWAIT | PS_CLDSIGIGN)) {
 			struct proc *pp;
 
 			mtx_unlock(&p->p_pptr->p_sigacts->ps_mtx);
 			pp = p->p_pptr;
 			PROC_UNLOCK(pp);
 			proc_reparent(p, p->p_reaper);
 			p->p_sigparent = SIGCHLD;
 			PROC_LOCK(p->p_pptr);
 
 			/*
 			 * Notify parent, so in case he was wait(2)ing or
 			 * executing waitpid(2) with our pid, he will
 			 * continue.
 			 */
 			wakeup(pp);
 		} else
 			mtx_unlock(&p->p_pptr->p_sigacts->ps_mtx);
 
 		if (p->p_pptr == p->p_reaper || p->p_pptr == initproc)
 			childproc_exited(p);
 		else if (p->p_sigparent != 0) {
 			if (p->p_sigparent == SIGCHLD)
 				childproc_exited(p);
 			else	/* LINUX thread */
 				kern_psignal(p->p_pptr, p->p_sigparent);
 		}
 	} else
 		PROC_LOCK(p->p_pptr);
 	sx_xunlock(&proctree_lock);
 
 	/*
 	 * The state PRS_ZOMBIE prevents other proesses from sending
 	 * signal to the process, to avoid memory leak, we free memory
 	 * for signal queue at the time when the state is set.
 	 */
 	sigqueue_flush(&p->p_sigqueue);
 	sigqueue_flush(&td->td_sigqueue);
 
 	/*
 	 * We have to wait until after acquiring all locks before
 	 * changing p_state.  We need to avoid all possible context
 	 * switches (including ones from blocking on a mutex) while
 	 * marked as a zombie.  We also have to set the zombie state
 	 * before we release the parent process' proc lock to avoid
 	 * a lost wakeup.  So, we first call wakeup, then we grab the
 	 * sched lock, update the state, and release the parent process'
 	 * proc lock.
 	 */
 	wakeup(p->p_pptr);
 	cv_broadcast(&p->p_pwait);
 	sched_exit(p->p_pptr, td);
 	umtx_thread_exit(td);
 	PROC_SLOCK(p);
 	p->p_state = PRS_ZOMBIE;
 	PROC_UNLOCK(p->p_pptr);
 
 	/*
 	 * Hopefully no one will try to deliver a signal to the process this
 	 * late in the game.
 	 */
 	knlist_destroy(&p->p_klist);
 
 	/*
 	 * Save our children's rusage information in our exit rusage.
 	 */
 	PROC_STATLOCK(p);
 	ruadd(&p->p_ru, &p->p_rux, &p->p_stats->p_cru, &p->p_crux);
 	PROC_STATUNLOCK(p);
 
 	/*
 	 * Make sure the scheduler takes this thread out of its tables etc.
 	 * This will also release this thread's reference to the ucred.
 	 * Other thread parts to release include pcb bits and such.
 	 */
 	thread_exit();
 }
 
 
 #ifndef _SYS_SYSPROTO_H_
 struct abort2_args {
 	char *why;
 	int nargs;
 	void **args;
 };
 #endif
 
 int
 sys_abort2(struct thread *td, struct abort2_args *uap)
 {
 	struct proc *p = td->td_proc;
 	struct sbuf *sb;
 	void *uargs[16];
 	int error, i, sig;
 
 	/*
 	 * Do it right now so we can log either proper call of abort2(), or
 	 * note, that invalid argument was passed. 512 is big enough to
 	 * handle 16 arguments' descriptions with additional comments.
 	 */
 	sb = sbuf_new(NULL, NULL, 512, SBUF_FIXEDLEN);
 	sbuf_clear(sb);
 	sbuf_printf(sb, "%s(pid %d uid %d) aborted: ",
 	    p->p_comm, p->p_pid, td->td_ucred->cr_uid);
 	/*
 	 * Since we can't return from abort2(), send SIGKILL in cases, where
 	 * abort2() was called improperly
 	 */
 	sig = SIGKILL;
 	/* Prevent from DoSes from user-space. */
 	if (uap->nargs < 0 || uap->nargs > 16)
 		goto out;
 	if (uap->nargs > 0) {
 		if (uap->args == NULL)
 			goto out;
 		error = copyin(uap->args, uargs, uap->nargs * sizeof(void *));
 		if (error != 0)
 			goto out;
 	}
 	/*
 	 * Limit size of 'reason' string to 128. Will fit even when
 	 * maximal number of arguments was chosen to be logged.
 	 */
 	if (uap->why != NULL) {
 		error = sbuf_copyin(sb, uap->why, 128);
 		if (error < 0)
 			goto out;
 	} else {
 		sbuf_printf(sb, "(null)");
 	}
 	if (uap->nargs > 0) {
 		sbuf_printf(sb, "(");
 		for (i = 0;i < uap->nargs; i++)
 			sbuf_printf(sb, "%s%p", i == 0 ? "" : ", ", uargs[i]);
 		sbuf_printf(sb, ")");
 	}
 	/*
 	 * Final stage: arguments were proper, string has been
 	 * successfully copied from userspace, and copying pointers
 	 * from user-space succeed.
 	 */
 	sig = SIGABRT;
 out:
 	if (sig == SIGKILL) {
 		sbuf_trim(sb);
 		sbuf_printf(sb, " (Reason text inaccessible)");
 	}
 	sbuf_cat(sb, "\n");
 	sbuf_finish(sb);
 	log(LOG_INFO, "%s", sbuf_data(sb));
 	sbuf_delete(sb);
 	exit1(td, 0, sig);
 	return (0);
 }
 
 
 #ifdef COMPAT_43
 /*
  * The dirty work is handled by kern_wait().
  */
 int
 owait(struct thread *td, struct owait_args *uap __unused)
 {
 	int error, status;
 
 	error = kern_wait(td, WAIT_ANY, &status, 0, NULL);
 	if (error == 0)
 		td->td_retval[1] = status;
 	return (error);
 }
 #endif /* COMPAT_43 */
 
 /*
  * The dirty work is handled by kern_wait().
  */
 int
 sys_wait4(struct thread *td, struct wait4_args *uap)
 {
 	struct rusage ru, *rup;
 	int error, status;
 
 	if (uap->rusage != NULL)
 		rup = &ru;
 	else
 		rup = NULL;
 	error = kern_wait(td, uap->pid, &status, uap->options, rup);
 	if (uap->status != NULL && error == 0)
 		error = copyout(&status, uap->status, sizeof(status));
 	if (uap->rusage != NULL && error == 0)
 		error = copyout(&ru, uap->rusage, sizeof(struct rusage));
 	return (error);
 }
 
 int
 sys_wait6(struct thread *td, struct wait6_args *uap)
 {
 	struct __wrusage wru, *wrup;
 	siginfo_t si, *sip;
 	idtype_t idtype;
 	id_t id;
 	int error, status;
 
 	idtype = uap->idtype;
 	id = uap->id;
 
 	if (uap->wrusage != NULL)
 		wrup = &wru;
 	else
 		wrup = NULL;
 
 	if (uap->info != NULL) {
 		sip = &si;
 		bzero(sip, sizeof(*sip));
 	} else
 		sip = NULL;
 
 	/*
 	 *  We expect all callers of wait6() to know about WEXITED and
 	 *  WTRAPPED.
 	 */
 	error = kern_wait6(td, idtype, id, &status, uap->options, wrup, sip);
 
 	if (uap->status != NULL && error == 0)
 		error = copyout(&status, uap->status, sizeof(status));
 	if (uap->wrusage != NULL && error == 0)
 		error = copyout(&wru, uap->wrusage, sizeof(wru));
 	if (uap->info != NULL && error == 0)
 		error = copyout(&si, uap->info, sizeof(si));
 	return (error);
 }
 
 /*
  * Reap the remains of a zombie process and optionally return status and
  * rusage.  Asserts and will release both the proctree_lock and the process
  * lock as part of its work.
  */
 void
 proc_reap(struct thread *td, struct proc *p, int *status, int options)
 {
 	struct proc *q, *t;
 
 	sx_assert(&proctree_lock, SA_XLOCKED);
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	PROC_SLOCK_ASSERT(p, MA_OWNED);
 	KASSERT(p->p_state == PRS_ZOMBIE, ("proc_reap: !PRS_ZOMBIE"));
 
 	q = td->td_proc;
 
 	PROC_SUNLOCK(p);
 	if (status)
 		*status = KW_EXITCODE(p->p_xexit, p->p_xsig);
 	if (options & WNOWAIT) {
 		/*
 		 *  Only poll, returning the status.  Caller does not wish to
 		 * release the proc struct just yet.
 		 */
 		PROC_UNLOCK(p);
 		sx_xunlock(&proctree_lock);
 		return;
 	}
 
 	PROC_LOCK(q);
 	sigqueue_take(p->p_ksi);
 	PROC_UNLOCK(q);
 
 	/*
 	 * If we got the child via a ptrace 'attach', we need to give it back
 	 * to the old parent.
 	 */
 	if (p->p_oppid != 0 && p->p_oppid != p->p_pptr->p_pid) {
 		PROC_UNLOCK(p);
 		t = proc_realparent(p);
 		PROC_LOCK(t);
 		PROC_LOCK(p);
 		CTR2(KTR_PTRACE,
 		    "wait: traced child %d moved back to parent %d", p->p_pid,
 		    t->p_pid);
 		proc_reparent(p, t);
 		p->p_oppid = 0;
 		PROC_UNLOCK(p);
 		pksignal(t, SIGCHLD, p->p_ksi);
 		wakeup(t);
 		cv_broadcast(&p->p_pwait);
 		PROC_UNLOCK(t);
 		sx_xunlock(&proctree_lock);
 		return;
 	}
 	p->p_oppid = 0;
 	PROC_UNLOCK(p);
 
 	/*
 	 * Remove other references to this process to ensure we have an
 	 * exclusive reference.
 	 */
 	sx_xlock(&allproc_lock);
 	LIST_REMOVE(p, p_list);	/* off zombproc */
 	sx_xunlock(&allproc_lock);
 	LIST_REMOVE(p, p_sibling);
 	reaper_abandon_children(p, true);
 	LIST_REMOVE(p, p_reapsibling);
 	PROC_LOCK(p);
 	clear_orphan(p);
 	PROC_UNLOCK(p);
 	leavepgrp(p);
 	if (p->p_procdesc != NULL)
 		procdesc_reap(p);
 	sx_xunlock(&proctree_lock);
 
 	/*
 	 * Removal from allproc list and process group list paired with
 	 * PROC_LOCK which was executed during that time should guarantee
 	 * nothing can reach this process anymore. As such further locking
 	 * is unnecessary.
 	 */
 	p->p_xexit = p->p_xsig = 0;		/* XXX: why? */
 
 	PROC_LOCK(q);
 	ruadd(&q->p_stats->p_cru, &q->p_crux, &p->p_ru, &p->p_rux);
 	PROC_UNLOCK(q);
 
 	/*
 	 * Decrement the count of procs running with this uid.
 	 */
 	(void)chgproccnt(p->p_ucred->cr_ruidinfo, -1, 0);
 
 	/*
 	 * Destroy resource accounting information associated with the process.
 	 */
 #ifdef RACCT
 	if (racct_enable) {
 		PROC_LOCK(p);
 		racct_sub(p, RACCT_NPROC, 1);
 		PROC_UNLOCK(p);
 	}
 #endif
 	racct_proc_exit(p);
 
 	/*
 	 * Free credentials, arguments, and sigacts.
 	 */
 	crfree(p->p_ucred);
 	proc_set_cred(p, NULL);
 	pargs_drop(p->p_args);
 	p->p_args = NULL;
 	sigacts_free(p->p_sigacts);
 	p->p_sigacts = NULL;
 
 	/*
 	 * Do any thread-system specific cleanups.
 	 */
 	thread_wait(p);
 
 	/*
 	 * Give vm and machine-dependent layer a chance to free anything that
 	 * cpu_exit couldn't release while still running in process context.
 	 */
 	vm_waitproc(p);
 #ifdef MAC
 	mac_proc_destroy(p);
 #endif
 	/*
 	 * Free any domain policy that's still hiding around.
 	 */
 	vm_domain_policy_cleanup(&p->p_vm_dom_policy);
 
 	KASSERT(FIRST_THREAD_IN_PROC(p),
 	    ("proc_reap: no residual thread!"));
 	uma_zfree(proc_zone, p);
 	atomic_add_int(&nprocs, -1);
 }
 
 static int
 proc_to_reap(struct thread *td, struct proc *p, idtype_t idtype, id_t id,
     int *status, int options, struct __wrusage *wrusage, siginfo_t *siginfo,
     int check_only)
 {
 	struct rusage *rup;
 
 	sx_assert(&proctree_lock, SA_XLOCKED);
 
 	PROC_LOCK(p);
 
 	switch (idtype) {
 	case P_ALL:
 		if (p->p_procdesc != NULL) {
 			PROC_UNLOCK(p);
 			return (0);
 		}
 		break;
 	case P_PID:
 		if (p->p_pid != (pid_t)id) {
 			PROC_UNLOCK(p);
 			return (0);
 		}
 		break;
 	case P_PGID:
 		if (p->p_pgid != (pid_t)id) {
 			PROC_UNLOCK(p);
 			return (0);
 		}
 		break;
 	case P_SID:
 		if (p->p_session->s_sid != (pid_t)id) {
 			PROC_UNLOCK(p);
 			return (0);
 		}
 		break;
 	case P_UID:
 		if (p->p_ucred->cr_uid != (uid_t)id) {
 			PROC_UNLOCK(p);
 			return (0);
 		}
 		break;
 	case P_GID:
 		if (p->p_ucred->cr_gid != (gid_t)id) {
 			PROC_UNLOCK(p);
 			return (0);
 		}
 		break;
 	case P_JAILID:
 		if (p->p_ucred->cr_prison->pr_id != (int)id) {
 			PROC_UNLOCK(p);
 			return (0);
 		}
 		break;
 	/*
 	 * It seems that the thread structures get zeroed out
 	 * at process exit.  This makes it impossible to
 	 * support P_SETID, P_CID or P_CPUID.
 	 */
 	default:
 		PROC_UNLOCK(p);
 		return (0);
 	}
 
 	if (p_canwait(td, p)) {
 		PROC_UNLOCK(p);
 		return (0);
 	}
 
 	if (((options & WEXITED) == 0) && (p->p_state == PRS_ZOMBIE)) {
 		PROC_UNLOCK(p);
 		return (0);
 	}
 
 	/*
 	 * This special case handles a kthread spawned by linux_clone
 	 * (see linux_misc.c).  The linux_wait4 and linux_waitpid
 	 * functions need to be able to distinguish between waiting
 	 * on a process and waiting on a thread.  It is a thread if
 	 * p_sigparent is not SIGCHLD, and the WLINUXCLONE option
 	 * signifies we want to wait for threads and not processes.
 	 */
 	if ((p->p_sigparent != SIGCHLD) ^
 	    ((options & WLINUXCLONE) != 0)) {
 		PROC_UNLOCK(p);
 		return (0);
 	}
 
 	if (siginfo != NULL) {
 		bzero(siginfo, sizeof(*siginfo));
 		siginfo->si_errno = 0;
 
 		/*
 		 * SUSv4 requires that the si_signo value is always
 		 * SIGCHLD. Obey it despite the rfork(2) interface
 		 * allows to request other signal for child exit
 		 * notification.
 		 */
 		siginfo->si_signo = SIGCHLD;
 
 		/*
 		 *  This is still a rough estimate.  We will fix the
 		 *  cases TRAPPED, STOPPED, and CONTINUED later.
 		 */
 		if (WCOREDUMP(p->p_xsig)) {
 			siginfo->si_code = CLD_DUMPED;
 			siginfo->si_status = WTERMSIG(p->p_xsig);
 		} else if (WIFSIGNALED(p->p_xsig)) {
 			siginfo->si_code = CLD_KILLED;
 			siginfo->si_status = WTERMSIG(p->p_xsig);
 		} else {
 			siginfo->si_code = CLD_EXITED;
 			siginfo->si_status = p->p_xexit;
 		}
 
 		siginfo->si_pid = p->p_pid;
 		siginfo->si_uid = p->p_ucred->cr_uid;
 
 		/*
 		 * The si_addr field would be useful additional
 		 * detail, but apparently the PC value may be lost
 		 * when we reach this point.  bzero() above sets
 		 * siginfo->si_addr to NULL.
 		 */
 	}
 
 	/*
 	 * There should be no reason to limit resources usage info to
 	 * exited processes only.  A snapshot about any resources used
 	 * by a stopped process may be exactly what is needed.
 	 */
 	if (wrusage != NULL) {
 		rup = &wrusage->wru_self;
 		*rup = p->p_ru;
 		PROC_STATLOCK(p);
 		calcru(p, &rup->ru_utime, &rup->ru_stime);
 		PROC_STATUNLOCK(p);
 
 		rup = &wrusage->wru_children;
 		*rup = p->p_stats->p_cru;
 		calccru(p, &rup->ru_utime, &rup->ru_stime);
 	}
 
 	if (p->p_state == PRS_ZOMBIE && !check_only) {
 		PROC_SLOCK(p);
 		proc_reap(td, p, status, options);
 		return (-1);
 	}
 	PROC_UNLOCK(p);
 	return (1);
 }
 
 int
 kern_wait(struct thread *td, pid_t pid, int *status, int options,
     struct rusage *rusage)
 {
 	struct __wrusage wru, *wrup;
 	idtype_t idtype;
 	id_t id;
 	int ret;
 
 	/*
 	 * Translate the special pid values into the (idtype, pid)
 	 * pair for kern_wait6.  The WAIT_MYPGRP case is handled by
 	 * kern_wait6() on its own.
 	 */
 	if (pid == WAIT_ANY) {
 		idtype = P_ALL;
 		id = 0;
 	} else if (pid < 0) {
 		idtype = P_PGID;
 		id = (id_t)-pid;
 	} else {
 		idtype = P_PID;
 		id = (id_t)pid;
 	}
 
 	if (rusage != NULL)
 		wrup = &wru;
 	else
 		wrup = NULL;
 
 	/*
 	 * For backward compatibility we implicitly add flags WEXITED
 	 * and WTRAPPED here.
 	 */
 	options |= WEXITED | WTRAPPED;
 	ret = kern_wait6(td, idtype, id, status, options, wrup, NULL);
 	if (rusage != NULL)
 		*rusage = wru.wru_self;
 	return (ret);
 }
 
 int
 kern_wait6(struct thread *td, idtype_t idtype, id_t id, int *status,
     int options, struct __wrusage *wrusage, siginfo_t *siginfo)
 {
 	struct proc *p, *q;
 	pid_t pid;
 	int error, nfound, ret;
 
 	AUDIT_ARG_VALUE((int)idtype);	/* XXX - This is likely wrong! */
 	AUDIT_ARG_PID((pid_t)id);	/* XXX - This may be wrong! */
 	AUDIT_ARG_VALUE(options);
 
 	q = td->td_proc;
 
 	if ((pid_t)id == WAIT_MYPGRP && (idtype == P_PID || idtype == P_PGID)) {
 		PROC_LOCK(q);
 		id = (id_t)q->p_pgid;
 		PROC_UNLOCK(q);
 		idtype = P_PGID;
 	}
 
 	/* If we don't know the option, just return. */
 	if ((options & ~(WUNTRACED | WNOHANG | WCONTINUED | WNOWAIT |
 	    WEXITED | WTRAPPED | WLINUXCLONE)) != 0)
 		return (EINVAL);
 	if ((options & (WEXITED | WUNTRACED | WCONTINUED | WTRAPPED)) == 0) {
 		/*
 		 * We will be unable to find any matching processes,
 		 * because there are no known events to look for.
 		 * Prefer to return error instead of blocking
 		 * indefinitely.
 		 */
 		return (EINVAL);
 	}
 
 loop:
 	if (q->p_flag & P_STATCHILD) {
 		PROC_LOCK(q);
 		q->p_flag &= ~P_STATCHILD;
 		PROC_UNLOCK(q);
 	}
 	nfound = 0;
 	sx_xlock(&proctree_lock);
 	LIST_FOREACH(p, &q->p_children, p_sibling) {
 		pid = p->p_pid;
 		ret = proc_to_reap(td, p, idtype, id, status, options,
 		    wrusage, siginfo, 0);
 		if (ret == 0)
 			continue;
 		else if (ret == 1)
 			nfound++;
 		else {
 			td->td_retval[0] = pid;
 			return (0);
 		}
 
 		PROC_LOCK(p);
 		PROC_SLOCK(p);
 
 		if ((options & WTRAPPED) != 0 &&
 		    (p->p_flag & P_TRACED) != 0 &&
 		    (p->p_flag & (P_STOPPED_TRACE | P_STOPPED_SIG)) != 0 &&
 		    (p->p_suspcount == p->p_numthreads) &&
 		    ((p->p_flag & P_WAITED) == 0)) {
 			PROC_SUNLOCK(p);
 			if ((options & WNOWAIT) == 0)
 				p->p_flag |= P_WAITED;
 			sx_xunlock(&proctree_lock);
 
 			if (status != NULL)
 				*status = W_STOPCODE(p->p_xsig);
 			if (siginfo != NULL) {
 				siginfo->si_status = p->p_xsig;
 				siginfo->si_code = CLD_TRAPPED;
 			}
 			if ((options & WNOWAIT) == 0) {
 				PROC_LOCK(q);
 				sigqueue_take(p->p_ksi);
 				PROC_UNLOCK(q);
 			}
 
 			CTR4(KTR_PTRACE,
 	    "wait: returning trapped pid %d status %#x (xstat %d) xthread %d",
 			    p->p_pid, W_STOPCODE(p->p_xsig), p->p_xsig,
 			    p->p_xthread != NULL ? p->p_xthread->td_tid : -1);
 			PROC_UNLOCK(p);
 			td->td_retval[0] = pid;
 			return (0);
 		}
 		if ((options & WUNTRACED) != 0 &&
 		    (p->p_flag & P_STOPPED_SIG) != 0 &&
 		    (p->p_suspcount == p->p_numthreads) &&
 		    ((p->p_flag & P_WAITED) == 0)) {
 			PROC_SUNLOCK(p);
 			if ((options & WNOWAIT) == 0)
 				p->p_flag |= P_WAITED;
 			sx_xunlock(&proctree_lock);
 
 			if (status != NULL)
 				*status = W_STOPCODE(p->p_xsig);
 			if (siginfo != NULL) {
 				siginfo->si_status = p->p_xsig;
 				siginfo->si_code = CLD_STOPPED;
 			}
 			if ((options & WNOWAIT) == 0) {
 				PROC_LOCK(q);
 				sigqueue_take(p->p_ksi);
 				PROC_UNLOCK(q);
 			}
 
 			PROC_UNLOCK(p);
 			td->td_retval[0] = pid;
 			return (0);
 		}
 		PROC_SUNLOCK(p);
 		if ((options & WCONTINUED) != 0 &&
 		    (p->p_flag & P_CONTINUED) != 0) {
 			sx_xunlock(&proctree_lock);
 			if ((options & WNOWAIT) == 0) {
 				p->p_flag &= ~P_CONTINUED;
 				PROC_LOCK(q);
 				sigqueue_take(p->p_ksi);
 				PROC_UNLOCK(q);
 			}
 			PROC_UNLOCK(p);
 
 			if (status != NULL)
 				*status = SIGCONT;
 			if (siginfo != NULL) {
 				siginfo->si_status = SIGCONT;
 				siginfo->si_code = CLD_CONTINUED;
 			}
 			td->td_retval[0] = pid;
 			return (0);
 		}
 		PROC_UNLOCK(p);
 	}
 
 	/*
 	 * Look in the orphans list too, to allow the parent to
 	 * collect it's child exit status even if child is being
 	 * debugged.
 	 *
 	 * Debugger detaches from the parent upon successful
 	 * switch-over from parent to child.  At this point due to
 	 * re-parenting the parent loses the child to debugger and a
 	 * wait4(2) call would report that it has no children to wait
 	 * for.  By maintaining a list of orphans we allow the parent
 	 * to successfully wait until the child becomes a zombie.
 	 */
 	if (nfound == 0) {
 		LIST_FOREACH(p, &q->p_orphans, p_orphan) {
 			ret = proc_to_reap(td, p, idtype, id, NULL, options,
 			    NULL, NULL, 1);
 			if (ret != 0) {
 				KASSERT(ret != -1, ("reaped an orphan (pid %d)",
 				    (int)td->td_retval[0]));
 				nfound++;
 				break;
 			}
 		}
 	}
 	if (nfound == 0) {
 		sx_xunlock(&proctree_lock);
 		return (ECHILD);
 	}
 	if (options & WNOHANG) {
 		sx_xunlock(&proctree_lock);
 		td->td_retval[0] = 0;
 		return (0);
 	}
 	PROC_LOCK(q);
 	sx_xunlock(&proctree_lock);
 	if (q->p_flag & P_STATCHILD) {
 		q->p_flag &= ~P_STATCHILD;
 		error = 0;
 	} else
 		error = msleep(q, &q->p_mtx, PWAIT | PCATCH, "wait", 0);
 	PROC_UNLOCK(q);
 	if (error)
 		return (error);
 	goto loop;
 }
 
 /*
  * Make process 'parent' the new parent of process 'child'.
  * Must be called with an exclusive hold of proctree lock.
  */
 void
 proc_reparent(struct proc *child, struct proc *parent)
 {
 
 	sx_assert(&proctree_lock, SX_XLOCKED);
 	PROC_LOCK_ASSERT(child, MA_OWNED);
 	if (child->p_pptr == parent)
 		return;
 
 	PROC_LOCK(child->p_pptr);
 	sigqueue_take(child->p_ksi);
 	PROC_UNLOCK(child->p_pptr);
 	LIST_REMOVE(child, p_sibling);
 	LIST_INSERT_HEAD(&parent->p_children, child, p_sibling);
 
 	clear_orphan(child);
 	if (child->p_flag & P_TRACED) {
 		if (LIST_EMPTY(&child->p_pptr->p_orphans)) {
 			child->p_treeflag |= P_TREE_FIRST_ORPHAN;
 			LIST_INSERT_HEAD(&child->p_pptr->p_orphans, child,
 			    p_orphan);
 		} else {
 			LIST_INSERT_AFTER(LIST_FIRST(&child->p_pptr->p_orphans),
 			    child, p_orphan);
 		}
 		child->p_treeflag |= P_TREE_ORPHANED;
 	}
 
 	child->p_pptr = parent;
 }
Index: head/sys/kern/kern_fork.c
===================================================================
--- head/sys/kern/kern_fork.c	(revision 292383)
+++ head/sys/kern/kern_fork.c	(revision 292384)
@@ -1,1090 +1,1089 @@
 /*-
  * Copyright (c) 1982, 1986, 1989, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)kern_fork.c	8.6 (Berkeley) 4/8/94
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ktrace.h"
 #include "opt_kstack_pages.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/sysproto.h>
 #include <sys/eventhandler.h>
 #include <sys/fcntl.h>
 #include <sys/filedesc.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/kthread.h>
 #include <sys/sysctl.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/procdesc.h>
 #include <sys/pioctl.h>
 #include <sys/ptrace.h>
 #include <sys/racct.h>
 #include <sys/resourcevar.h>
 #include <sys/sched.h>
 #include <sys/syscall.h>
 #include <sys/vmmeter.h>
 #include <sys/vnode.h>
 #include <sys/acct.h>
 #include <sys/ktr.h>
 #include <sys/ktrace.h>
 #include <sys/unistd.h>	
 #include <sys/sdt.h>
 #include <sys/sx.h>
 #include <sys/sysent.h>
 #include <sys/signalvar.h>
 
 #include <security/audit/audit.h>
 #include <security/mac/mac_framework.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_extern.h>
 #include <vm/uma.h>
 #include <vm/vm_domain.h>
 
 #ifdef KDTRACE_HOOKS
 #include <sys/dtrace_bsd.h>
 dtrace_fork_func_t	dtrace_fasttrap_fork;
 #endif
 
 SDT_PROVIDER_DECLARE(proc);
-SDT_PROBE_DEFINE3(proc, kernel, , create, "struct proc *",
-    "struct proc *", "int");
+SDT_PROBE_DEFINE3(proc, , , create, "struct proc *", "struct proc *", "int");
 
 #ifndef _SYS_SYSPROTO_H_
 struct fork_args {
 	int     dummy;
 };
 #endif
 
 /* ARGSUSED */
 int
 sys_fork(struct thread *td, struct fork_args *uap)
 {
 	int error;
 	struct proc *p2;
 
 	error = fork1(td, RFFDG | RFPROC, 0, &p2, NULL, 0, NULL);
 	if (error == 0) {
 		td->td_retval[0] = p2->p_pid;
 		td->td_retval[1] = 0;
 	}
 	return (error);
 }
 
 /* ARGUSED */
 int
 sys_pdfork(td, uap)
 	struct thread *td;
 	struct pdfork_args *uap;
 {
 	int error, fd;
 	struct proc *p2;
 
 	/*
 	 * It is necessary to return fd by reference because 0 is a valid file
 	 * descriptor number, and the child needs to be able to distinguish
 	 * itself from the parent using the return value.
 	 */
 	error = fork1(td, RFFDG | RFPROC | RFPROCDESC, 0, &p2,
 	    &fd, uap->flags, NULL);
 	if (error == 0) {
 		td->td_retval[0] = p2->p_pid;
 		td->td_retval[1] = 0;
 		error = copyout(&fd, uap->fdp, sizeof(fd));
 	}
 	return (error);
 }
 
 /* ARGSUSED */
 int
 sys_vfork(struct thread *td, struct vfork_args *uap)
 {
 	int error, flags;
 	struct proc *p2;
 
 	flags = RFFDG | RFPROC | RFPPWAIT | RFMEM;
 	error = fork1(td, flags, 0, &p2, NULL, 0, NULL);
 	if (error == 0) {
 		td->td_retval[0] = p2->p_pid;
 		td->td_retval[1] = 0;
 	}
 	return (error);
 }
 
 int
 sys_rfork(struct thread *td, struct rfork_args *uap)
 {
 	struct proc *p2;
 	int error;
 
 	/* Don't allow kernel-only flags. */
 	if ((uap->flags & RFKERNELONLY) != 0)
 		return (EINVAL);
 
 	AUDIT_ARG_FFLAGS(uap->flags);
 	error = fork1(td, uap->flags, 0, &p2, NULL, 0, NULL);
 	if (error == 0) {
 		td->td_retval[0] = p2 ? p2->p_pid : 0;
 		td->td_retval[1] = 0;
 	}
 	return (error);
 }
 
 int	nprocs = 1;		/* process 0 */
 int	lastpid = 0;
 SYSCTL_INT(_kern, OID_AUTO, lastpid, CTLFLAG_RD, &lastpid, 0, 
     "Last used PID");
 
 /*
  * Random component to lastpid generation.  We mix in a random factor to make
  * it a little harder to predict.  We sanity check the modulus value to avoid
  * doing it in critical paths.  Don't let it be too small or we pointlessly
  * waste randomness entropy, and don't let it be impossibly large.  Using a
  * modulus that is too big causes a LOT more process table scans and slows
  * down fork processing as the pidchecked caching is defeated.
  */
 static int randompid = 0;
 
 static int
 sysctl_kern_randompid(SYSCTL_HANDLER_ARGS)
 {
 	int error, pid;
 
 	error = sysctl_wire_old_buffer(req, sizeof(int));
 	if (error != 0)
 		return(error);
 	sx_xlock(&allproc_lock);
 	pid = randompid;
 	error = sysctl_handle_int(oidp, &pid, 0, req);
 	if (error == 0 && req->newptr != NULL) {
 		if (pid < 0 || pid > pid_max - 100)	/* out of range */
 			pid = pid_max - 100;
 		else if (pid < 2)			/* NOP */
 			pid = 0;
 		else if (pid < 100)			/* Make it reasonable */
 			pid = 100;
 		randompid = pid;
 	}
 	sx_xunlock(&allproc_lock);
 	return (error);
 }
 
 SYSCTL_PROC(_kern, OID_AUTO, randompid, CTLTYPE_INT|CTLFLAG_RW,
     0, 0, sysctl_kern_randompid, "I", "Random PID modulus");
 
 static int
 fork_findpid(int flags)
 {
 	struct proc *p;
 	int trypid;
 	static int pidchecked = 0;
 
 	/*
 	 * Requires allproc_lock in order to iterate over the list
 	 * of processes, and proctree_lock to access p_pgrp.
 	 */
 	sx_assert(&allproc_lock, SX_LOCKED);
 	sx_assert(&proctree_lock, SX_LOCKED);
 
 	/*
 	 * Find an unused process ID.  We remember a range of unused IDs
 	 * ready to use (from lastpid+1 through pidchecked-1).
 	 *
 	 * If RFHIGHPID is set (used during system boot), do not allocate
 	 * low-numbered pids.
 	 */
 	trypid = lastpid + 1;
 	if (flags & RFHIGHPID) {
 		if (trypid < 10)
 			trypid = 10;
 	} else {
 		if (randompid)
 			trypid += arc4random() % randompid;
 	}
 retry:
 	/*
 	 * If the process ID prototype has wrapped around,
 	 * restart somewhat above 0, as the low-numbered procs
 	 * tend to include daemons that don't exit.
 	 */
 	if (trypid >= pid_max) {
 		trypid = trypid % pid_max;
 		if (trypid < 100)
 			trypid += 100;
 		pidchecked = 0;
 	}
 	if (trypid >= pidchecked) {
 		int doingzomb = 0;
 
 		pidchecked = PID_MAX;
 		/*
 		 * Scan the active and zombie procs to check whether this pid
 		 * is in use.  Remember the lowest pid that's greater
 		 * than trypid, so we can avoid checking for a while.
 		 *
 		 * Avoid reuse of the process group id, session id or
 		 * the reaper subtree id.  Note that for process group
 		 * and sessions, the amount of reserved pids is
 		 * limited by process limit.  For the subtree ids, the
 		 * id is kept reserved only while there is a
 		 * non-reaped process in the subtree, so amount of
 		 * reserved pids is limited by process limit times
 		 * two.
 		 */
 		p = LIST_FIRST(&allproc);
 again:
 		for (; p != NULL; p = LIST_NEXT(p, p_list)) {
 			while (p->p_pid == trypid ||
 			    p->p_reapsubtree == trypid ||
 			    (p->p_pgrp != NULL &&
 			    (p->p_pgrp->pg_id == trypid ||
 			    (p->p_session != NULL &&
 			    p->p_session->s_sid == trypid)))) {
 				trypid++;
 				if (trypid >= pidchecked)
 					goto retry;
 			}
 			if (p->p_pid > trypid && pidchecked > p->p_pid)
 				pidchecked = p->p_pid;
 			if (p->p_pgrp != NULL) {
 				if (p->p_pgrp->pg_id > trypid &&
 				    pidchecked > p->p_pgrp->pg_id)
 					pidchecked = p->p_pgrp->pg_id;
 				if (p->p_session != NULL &&
 				    p->p_session->s_sid > trypid &&
 				    pidchecked > p->p_session->s_sid)
 					pidchecked = p->p_session->s_sid;
 			}
 		}
 		if (!doingzomb) {
 			doingzomb = 1;
 			p = LIST_FIRST(&zombproc);
 			goto again;
 		}
 	}
 
 	/*
 	 * RFHIGHPID does not mess with the lastpid counter during boot.
 	 */
 	if (flags & RFHIGHPID)
 		pidchecked = 0;
 	else
 		lastpid = trypid;
 
 	return (trypid);
 }
 
 static int
 fork_norfproc(struct thread *td, int flags)
 {
 	int error;
 	struct proc *p1;
 
 	KASSERT((flags & RFPROC) == 0,
 	    ("fork_norfproc called with RFPROC set"));
 	p1 = td->td_proc;
 
 	if (((p1->p_flag & (P_HADTHREADS|P_SYSTEM)) == P_HADTHREADS) &&
 	    (flags & (RFCFDG | RFFDG))) {
 		PROC_LOCK(p1);
 		if (thread_single(p1, SINGLE_BOUNDARY)) {
 			PROC_UNLOCK(p1);
 			return (ERESTART);
 		}
 		PROC_UNLOCK(p1);
 	}
 
 	error = vm_forkproc(td, NULL, NULL, NULL, flags);
 	if (error)
 		goto fail;
 
 	/*
 	 * Close all file descriptors.
 	 */
 	if (flags & RFCFDG) {
 		struct filedesc *fdtmp;
 		fdtmp = fdinit(td->td_proc->p_fd, false);
 		fdescfree(td);
 		p1->p_fd = fdtmp;
 	}
 
 	/*
 	 * Unshare file descriptors (from parent).
 	 */
 	if (flags & RFFDG)
 		fdunshare(td);
 
 fail:
 	if (((p1->p_flag & (P_HADTHREADS|P_SYSTEM)) == P_HADTHREADS) &&
 	    (flags & (RFCFDG | RFFDG))) {
 		PROC_LOCK(p1);
 		thread_single_end(p1, SINGLE_BOUNDARY);
 		PROC_UNLOCK(p1);
 	}
 	return (error);
 }
 
 static void
 do_fork(struct thread *td, int flags, struct proc *p2, struct thread *td2,
     struct vmspace *vm2, int pdflags)
 {
 	struct proc *p1, *pptr;
 	int p2_held, trypid;
 	struct filedesc *fd;
 	struct filedesc_to_leader *fdtol;
 	struct sigacts *newsigacts;
 
 	sx_assert(&proctree_lock, SX_SLOCKED);
 	sx_assert(&allproc_lock, SX_XLOCKED);
 
 	p2_held = 0;
 	p1 = td->td_proc;
 
 	trypid = fork_findpid(flags);
 
 	sx_sunlock(&proctree_lock);
 
 	p2->p_state = PRS_NEW;		/* protect against others */
 	p2->p_pid = trypid;
 	AUDIT_ARG_PID(p2->p_pid);
 	LIST_INSERT_HEAD(&allproc, p2, p_list);
 	allproc_gen++;
 	LIST_INSERT_HEAD(PIDHASH(p2->p_pid), p2, p_hash);
 	tidhash_add(td2);
 	PROC_LOCK(p2);
 	PROC_LOCK(p1);
 
 	sx_xunlock(&allproc_lock);
 
 	bcopy(&p1->p_startcopy, &p2->p_startcopy,
 	    __rangeof(struct proc, p_startcopy, p_endcopy));
 	pargs_hold(p2->p_args);
 
 	PROC_UNLOCK(p1);
 
 	bzero(&p2->p_startzero,
 	    __rangeof(struct proc, p_startzero, p_endzero));
 
 	/* Tell the prison that we exist. */
 	prison_proc_hold(p2->p_ucred->cr_prison);
 
 	PROC_UNLOCK(p2);
 
 	/*
 	 * Malloc things while we don't hold any locks.
 	 */
 	if (flags & RFSIGSHARE)
 		newsigacts = NULL;
 	else
 		newsigacts = sigacts_alloc();
 
 	/*
 	 * Copy filedesc.
 	 */
 	if (flags & RFCFDG) {
 		fd = fdinit(p1->p_fd, false);
 		fdtol = NULL;
 	} else if (flags & RFFDG) {
 		fd = fdcopy(p1->p_fd);
 		fdtol = NULL;
 	} else {
 		fd = fdshare(p1->p_fd);
 		if (p1->p_fdtol == NULL)
 			p1->p_fdtol = filedesc_to_leader_alloc(NULL, NULL,
 			    p1->p_leader);
 		if ((flags & RFTHREAD) != 0) {
 			/*
 			 * Shared file descriptor table, and shared
 			 * process leaders.
 			 */
 			fdtol = p1->p_fdtol;
 			FILEDESC_XLOCK(p1->p_fd);
 			fdtol->fdl_refcount++;
 			FILEDESC_XUNLOCK(p1->p_fd);
 		} else {
 			/* 
 			 * Shared file descriptor table, and different
 			 * process leaders.
 			 */
 			fdtol = filedesc_to_leader_alloc(p1->p_fdtol,
 			    p1->p_fd, p2);
 		}
 	}
 	/*
 	 * Make a proc table entry for the new process.
 	 * Start by zeroing the section of proc that is zero-initialized,
 	 * then copy the section that is copied directly from the parent.
 	 */
 
 	PROC_LOCK(p2);
 	PROC_LOCK(p1);
 
 	bzero(&td2->td_startzero,
 	    __rangeof(struct thread, td_startzero, td_endzero));
 
 	bcopy(&td->td_startcopy, &td2->td_startcopy,
 	    __rangeof(struct thread, td_startcopy, td_endcopy));
 
 	bcopy(&p2->p_comm, &td2->td_name, sizeof(td2->td_name));
 	td2->td_sigstk = td->td_sigstk;
 	td2->td_flags = TDF_INMEM;
 	td2->td_lend_user_pri = PRI_MAX;
 
 #ifdef VIMAGE
 	td2->td_vnet = NULL;
 	td2->td_vnet_lpush = NULL;
 #endif
 
 	/*
 	 * Allow the scheduler to initialize the child.
 	 */
 	thread_lock(td);
 	sched_fork(td, td2);
 	thread_unlock(td);
 
 	/*
 	 * Duplicate sub-structures as needed.
 	 * Increase reference counts on shared objects.
 	 */
 	p2->p_flag = P_INMEM;
 	p2->p_flag2 = p1->p_flag2 & (P2_NOTRACE | P2_NOTRACE_EXEC);
 	p2->p_swtick = ticks;
 	if (p1->p_flag & P_PROFIL)
 		startprofclock(p2);
 
 	/*
 	 * Whilst the proc lock is held, copy the VM domain data out
 	 * using the VM domain method.
 	 */
 	vm_domain_policy_init(&p2->p_vm_dom_policy);
 	vm_domain_policy_localcopy(&p2->p_vm_dom_policy,
 	    &p1->p_vm_dom_policy);
 
 	if (flags & RFSIGSHARE) {
 		p2->p_sigacts = sigacts_hold(p1->p_sigacts);
 	} else {
 		sigacts_copy(newsigacts, p1->p_sigacts);
 		p2->p_sigacts = newsigacts;
 	}
 
 	if (flags & RFTSIGZMB)
 	        p2->p_sigparent = RFTSIGNUM(flags);
 	else if (flags & RFLINUXTHPN)
 	        p2->p_sigparent = SIGUSR1;
 	else
 	        p2->p_sigparent = SIGCHLD;
 
 	p2->p_textvp = p1->p_textvp;
 	p2->p_fd = fd;
 	p2->p_fdtol = fdtol;
 
 	if (p1->p_flag2 & P2_INHERIT_PROTECTED) {
 		p2->p_flag |= P_PROTECTED;
 		p2->p_flag2 |= P2_INHERIT_PROTECTED;
 	}
 
 	/*
 	 * p_limit is copy-on-write.  Bump its refcount.
 	 */
 	lim_fork(p1, p2);
 
 	thread_cow_get_proc(td2, p2);
 
 	pstats_fork(p1->p_stats, p2->p_stats);
 
 	PROC_UNLOCK(p1);
 	PROC_UNLOCK(p2);
 
 	/* Bump references to the text vnode (for procfs). */
 	if (p2->p_textvp)
 		vref(p2->p_textvp);
 
 	/*
 	 * Set up linkage for kernel based threading.
 	 */
 	if ((flags & RFTHREAD) != 0) {
 		mtx_lock(&ppeers_lock);
 		p2->p_peers = p1->p_peers;
 		p1->p_peers = p2;
 		p2->p_leader = p1->p_leader;
 		mtx_unlock(&ppeers_lock);
 		PROC_LOCK(p1->p_leader);
 		if ((p1->p_leader->p_flag & P_WEXIT) != 0) {
 			PROC_UNLOCK(p1->p_leader);
 			/*
 			 * The task leader is exiting, so process p1 is
 			 * going to be killed shortly.  Since p1 obviously
 			 * isn't dead yet, we know that the leader is either
 			 * sending SIGKILL's to all the processes in this
 			 * task or is sleeping waiting for all the peers to
 			 * exit.  We let p1 complete the fork, but we need
 			 * to go ahead and kill the new process p2 since
 			 * the task leader may not get a chance to send
 			 * SIGKILL to it.  We leave it on the list so that
 			 * the task leader will wait for this new process
 			 * to commit suicide.
 			 */
 			PROC_LOCK(p2);
 			kern_psignal(p2, SIGKILL);
 			PROC_UNLOCK(p2);
 		} else
 			PROC_UNLOCK(p1->p_leader);
 	} else {
 		p2->p_peers = NULL;
 		p2->p_leader = p2;
 	}
 
 	sx_xlock(&proctree_lock);
 	PGRP_LOCK(p1->p_pgrp);
 	PROC_LOCK(p2);
 	PROC_LOCK(p1);
 
 	/*
 	 * Preserve some more flags in subprocess.  P_PROFIL has already
 	 * been preserved.
 	 */
 	p2->p_flag |= p1->p_flag & P_SUGID;
 	td2->td_pflags |= (td->td_pflags & TDP_ALTSTACK) | TDP_FORKING;
 	SESS_LOCK(p1->p_session);
 	if (p1->p_session->s_ttyvp != NULL && p1->p_flag & P_CONTROLT)
 		p2->p_flag |= P_CONTROLT;
 	SESS_UNLOCK(p1->p_session);
 	if (flags & RFPPWAIT)
 		p2->p_flag |= P_PPWAIT;
 
 	p2->p_pgrp = p1->p_pgrp;
 	LIST_INSERT_AFTER(p1, p2, p_pglist);
 	PGRP_UNLOCK(p1->p_pgrp);
 	LIST_INIT(&p2->p_children);
 	LIST_INIT(&p2->p_orphans);
 
 	callout_init_mtx(&p2->p_itcallout, &p2->p_mtx, 0);
 
 	/*
 	 * If PF_FORK is set, the child process inherits the
 	 * procfs ioctl flags from its parent.
 	 */
 	if (p1->p_pfsflags & PF_FORK) {
 		p2->p_stops = p1->p_stops;
 		p2->p_pfsflags = p1->p_pfsflags;
 	}
 
 	/*
 	 * This begins the section where we must prevent the parent
 	 * from being swapped.
 	 */
 	_PHOLD(p1);
 	PROC_UNLOCK(p1);
 
 	/*
 	 * Attach the new process to its parent.
 	 *
 	 * If RFNOWAIT is set, the newly created process becomes a child
 	 * of init.  This effectively disassociates the child from the
 	 * parent.
 	 */
 	if ((flags & RFNOWAIT) != 0) {
 		pptr = p1->p_reaper;
 		p2->p_reaper = pptr;
 	} else {
 		p2->p_reaper = (p1->p_treeflag & P_TREE_REAPER) != 0 ?
 		    p1 : p1->p_reaper;
 		pptr = p1;
 	}
 	p2->p_pptr = pptr;
 	LIST_INSERT_HEAD(&pptr->p_children, p2, p_sibling);
 	LIST_INIT(&p2->p_reaplist);
 	LIST_INSERT_HEAD(&p2->p_reaper->p_reaplist, p2, p_reapsibling);
 	if (p2->p_reaper == p1)
 		p2->p_reapsubtree = p2->p_pid;
 	sx_xunlock(&proctree_lock);
 
 	/* Inform accounting that we have forked. */
 	p2->p_acflag = AFORK;
 	PROC_UNLOCK(p2);
 
 #ifdef KTRACE
 	ktrprocfork(p1, p2);
 #endif
 
 	/*
 	 * Finish creating the child process.  It will return via a different
 	 * execution path later.  (ie: directly into user mode)
 	 */
 	vm_forkproc(td, p2, td2, vm2, flags);
 
 	if (flags == (RFFDG | RFPROC)) {
 		PCPU_INC(cnt.v_forks);
 		PCPU_ADD(cnt.v_forkpages, p2->p_vmspace->vm_dsize +
 		    p2->p_vmspace->vm_ssize);
 	} else if (flags == (RFFDG | RFPROC | RFPPWAIT | RFMEM)) {
 		PCPU_INC(cnt.v_vforks);
 		PCPU_ADD(cnt.v_vforkpages, p2->p_vmspace->vm_dsize +
 		    p2->p_vmspace->vm_ssize);
 	} else if (p1 == &proc0) {
 		PCPU_INC(cnt.v_kthreads);
 		PCPU_ADD(cnt.v_kthreadpages, p2->p_vmspace->vm_dsize +
 		    p2->p_vmspace->vm_ssize);
 	} else {
 		PCPU_INC(cnt.v_rforks);
 		PCPU_ADD(cnt.v_rforkpages, p2->p_vmspace->vm_dsize +
 		    p2->p_vmspace->vm_ssize);
 	}
 
 	/*
 	 * Associate the process descriptor with the process before anything
 	 * can happen that might cause that process to need the descriptor.
 	 * However, don't do this until after fork(2) can no longer fail.
 	 */
 	if (flags & RFPROCDESC)
 		procdesc_new(p2, pdflags);
 
 	/*
 	 * Both processes are set up, now check if any loadable modules want
 	 * to adjust anything.
 	 */
 	EVENTHANDLER_INVOKE(process_fork, p1, p2, flags);
 
 	/*
 	 * Set the child start time and mark the process as being complete.
 	 */
 	PROC_LOCK(p2);
 	PROC_LOCK(p1);
 	microuptime(&p2->p_stats->p_start);
 	PROC_SLOCK(p2);
 	p2->p_state = PRS_NORMAL;
 	PROC_SUNLOCK(p2);
 
 #ifdef KDTRACE_HOOKS
 	/*
 	 * Tell the DTrace fasttrap provider about the new process so that any
 	 * tracepoints inherited from the parent can be removed. We have to do
 	 * this only after p_state is PRS_NORMAL since the fasttrap module will
 	 * use pfind() later on.
 	 */
 	if ((flags & RFMEM) == 0 && dtrace_fasttrap_fork)
 		dtrace_fasttrap_fork(p1, p2);
 #endif
 	if ((p1->p_flag & (P_TRACED | P_FOLLOWFORK)) == (P_TRACED |
 	    P_FOLLOWFORK)) {
 		/*
 		 * Arrange for debugger to receive the fork event.
 		 *
 		 * We can report PL_FLAG_FORKED regardless of
 		 * P_FOLLOWFORK settings, but it does not make a sense
 		 * for runaway child.
 		 */
 		td->td_dbgflags |= TDB_FORK;
 		td->td_dbg_forked = p2->p_pid;
 		td2->td_dbgflags |= TDB_STOPATFORK;
 		_PHOLD(p2);
 		p2_held = 1;
 	}
 	if (flags & RFPPWAIT) {
 		td->td_pflags |= TDP_RFPPWAIT;
 		td->td_rfppwait_p = p2;
 	}
 	PROC_UNLOCK(p2);
 	if ((flags & RFSTOPPED) == 0) {
 		/*
 		 * If RFSTOPPED not requested, make child runnable and
 		 * add to run queue.
 		 */
 		thread_lock(td2);
 		TD_SET_CAN_RUN(td2);
 		sched_add(td2, SRQ_BORING);
 		thread_unlock(td2);
 	}
 
 	/*
 	 * Now can be swapped.
 	 */
 	_PRELE(p1);
 	PROC_UNLOCK(p1);
 
 	/*
 	 * Tell any interested parties about the new process.
 	 */
 	knote_fork(&p1->p_klist, p2->p_pid);
-	SDT_PROBE3(proc, kernel, , create, p2, p1, flags);
+	SDT_PROBE3(proc, , , create, p2, p1, flags);
 
 	/*
 	 * Wait until debugger is attached to child.
 	 */
 	PROC_LOCK(p2);
 	while ((td2->td_dbgflags & TDB_STOPATFORK) != 0)
 		cv_wait(&p2->p_dbgwait, &p2->p_mtx);
 	if (p2_held)
 		_PRELE(p2);
 	PROC_UNLOCK(p2);
 }
 
 int
 fork1(struct thread *td, int flags, int pages, struct proc **procp,
     int *procdescp, int pdflags, struct filecaps *fcaps)
 {
 	struct proc *p1, *newproc;
 	struct thread *td2;
 	struct vmspace *vm2;
 	struct file *fp_procdesc;
 	vm_ooffset_t mem_charged;
 	int error, nprocs_new, ok;
 	static int curfail;
 	static struct timeval lastfail;
 
 	/* Check for the undefined or unimplemented flags. */
 	if ((flags & ~(RFFLAGS | RFTSIGFLAGS(RFTSIGMASK))) != 0)
 		return (EINVAL);
 
 	/* Signal value requires RFTSIGZMB. */
 	if ((flags & RFTSIGFLAGS(RFTSIGMASK)) != 0 && (flags & RFTSIGZMB) == 0)
 		return (EINVAL);
 
 	/* Can't copy and clear. */
 	if ((flags & (RFFDG|RFCFDG)) == (RFFDG|RFCFDG))
 		return (EINVAL);
 
 	/* Check the validity of the signal number. */
 	if ((flags & RFTSIGZMB) != 0 && (u_int)RFTSIGNUM(flags) > _SIG_MAXSIG)
 		return (EINVAL);
 
 	if ((flags & RFPROCDESC) != 0) {
 		/* Can't not create a process yet get a process descriptor. */
 		if ((flags & RFPROC) == 0)
 			return (EINVAL);
 
 		/* Must provide a place to put a procdesc if creating one. */
 		if (procdescp == NULL)
 			return (EINVAL);
 	}
 
 	p1 = td->td_proc;
 
 	/*
 	 * Here we don't create a new process, but we divorce
 	 * certain parts of a process from itself.
 	 */
 	if ((flags & RFPROC) == 0) {
 		*procp = NULL;
 		return (fork_norfproc(td, flags));
 	}
 
 	fp_procdesc = NULL;
 	newproc = NULL;
 	vm2 = NULL;
 
 	/*
 	 * Increment the nprocs resource before allocations occur.
 	 * Although process entries are dynamically created, we still
 	 * keep a global limit on the maximum number we will
 	 * create. There are hard-limits as to the number of processes
 	 * that can run, established by the KVA and memory usage for
 	 * the process data.
 	 *
 	 * Don't allow a nonprivileged user to use the last ten
 	 * processes; don't let root exceed the limit.
 	 */
 	nprocs_new = atomic_fetchadd_int(&nprocs, 1) + 1;
 	if ((nprocs_new >= maxproc - 10 && priv_check_cred(td->td_ucred,
 	    PRIV_MAXPROC, 0) != 0) || nprocs_new >= maxproc) {
 		error = EAGAIN;
 		sx_xlock(&allproc_lock);
 		if (ppsratecheck(&lastfail, &curfail, 1)) {
 			printf("maxproc limit exceeded by uid %u (pid %d); "
 			    "see tuning(7) and login.conf(5)\n",
 			    td->td_ucred->cr_ruid, p1->p_pid);
 		}
 		sx_xunlock(&allproc_lock);
 		goto fail2;
 	}
 
 	/*
 	 * If required, create a process descriptor in the parent first; we
 	 * will abandon it if something goes wrong. We don't finit() until
 	 * later.
 	 */
 	if (flags & RFPROCDESC) {
 		error = falloc_caps(td, &fp_procdesc, procdescp, 0, fcaps);
 		if (error != 0)
 			goto fail2;
 	}
 
 	mem_charged = 0;
 	if (pages == 0)
 		pages = kstack_pages;
 	/* Allocate new proc. */
 	newproc = uma_zalloc(proc_zone, M_WAITOK);
 	td2 = FIRST_THREAD_IN_PROC(newproc);
 	if (td2 == NULL) {
 		td2 = thread_alloc(pages);
 		if (td2 == NULL) {
 			error = ENOMEM;
 			goto fail2;
 		}
 		proc_linkup(newproc, td2);
 	} else {
 		if (td2->td_kstack == 0 || td2->td_kstack_pages != pages) {
 			if (td2->td_kstack != 0)
 				vm_thread_dispose(td2);
 			if (!thread_alloc_stack(td2, pages)) {
 				error = ENOMEM;
 				goto fail2;
 			}
 		}
 	}
 
 	if ((flags & RFMEM) == 0) {
 		vm2 = vmspace_fork(p1->p_vmspace, &mem_charged);
 		if (vm2 == NULL) {
 			error = ENOMEM;
 			goto fail2;
 		}
 		if (!swap_reserve(mem_charged)) {
 			/*
 			 * The swap reservation failed. The accounting
 			 * from the entries of the copied vm2 will be
 			 * substracted in vmspace_free(), so force the
 			 * reservation there.
 			 */
 			swap_reserve_force(mem_charged);
 			error = ENOMEM;
 			goto fail2;
 		}
 	} else
 		vm2 = NULL;
 
 	/*
 	 * XXX: This is ugly; when we copy resource usage, we need to bump
 	 *      per-cred resource counters.
 	 */
 	proc_set_cred_init(newproc, crhold(td->td_ucred));
 
 	/*
 	 * Initialize resource accounting for the child process.
 	 */
 	error = racct_proc_fork(p1, newproc);
 	if (error != 0) {
 		error = EAGAIN;
 		goto fail1;
 	}
 
 #ifdef MAC
 	mac_proc_init(newproc);
 #endif
 	knlist_init_mtx(&newproc->p_klist, &newproc->p_mtx);
 	STAILQ_INIT(&newproc->p_ktr);
 
 	/* We have to lock the process tree while we look for a pid. */
 	sx_slock(&proctree_lock);
 	sx_xlock(&allproc_lock);
 
 	/*
 	 * Increment the count of procs running with this uid. Don't allow
 	 * a nonprivileged user to exceed their current limit.
 	 *
 	 * XXXRW: Can we avoid privilege here if it's not needed?
 	 */
 	error = priv_check_cred(td->td_ucred, PRIV_PROC_LIMIT, 0);
 	if (error == 0)
 		ok = chgproccnt(td->td_ucred->cr_ruidinfo, 1, 0);
 	else {
 		ok = chgproccnt(td->td_ucred->cr_ruidinfo, 1,
 		    lim_cur(td, RLIMIT_NPROC));
 	}
 	if (ok) {
 		do_fork(td, flags, newproc, td2, vm2, pdflags);
 
 		/*
 		 * Return child proc pointer to parent.
 		 */
 		*procp = newproc;
 		if (flags & RFPROCDESC) {
 			procdesc_finit(newproc->p_procdesc, fp_procdesc);
 			fdrop(fp_procdesc, td);
 		}
 		racct_proc_fork_done(newproc);
 		return (0);
 	}
 
 	error = EAGAIN;
 	sx_sunlock(&proctree_lock);
 	sx_xunlock(&allproc_lock);
 #ifdef MAC
 	mac_proc_destroy(newproc);
 #endif
 	racct_proc_exit(newproc);
 fail1:
 	crfree(newproc->p_ucred);
 	newproc->p_ucred = NULL;
 fail2:
 	if (vm2 != NULL)
 		vmspace_free(vm2);
 	uma_zfree(proc_zone, newproc);
 	if ((flags & RFPROCDESC) != 0 && fp_procdesc != NULL) {
 		fdclose(td, fp_procdesc, *procdescp);
 		fdrop(fp_procdesc, td);
 	}
 	atomic_add_int(&nprocs, -1);
 	pause("fork", hz / 2);
 	return (error);
 }
 
 /*
  * Handle the return of a child process from fork1().  This function
  * is called from the MD fork_trampoline() entry point.
  */
 void
 fork_exit(void (*callout)(void *, struct trapframe *), void *arg,
     struct trapframe *frame)
 {
 	struct proc *p;
 	struct thread *td;
 	struct thread *dtd;
 
 	td = curthread;
 	p = td->td_proc;
 	KASSERT(p->p_state == PRS_NORMAL, ("executing process is still new"));
 
 	CTR4(KTR_PROC, "fork_exit: new thread %p (td_sched %p, pid %d, %s)",
 		td, td->td_sched, p->p_pid, td->td_name);
 
 	sched_fork_exit(td);
 	/*
 	* Processes normally resume in mi_switch() after being
 	* cpu_switch()'ed to, but when children start up they arrive here
 	* instead, so we must do much the same things as mi_switch() would.
 	*/
 	if ((dtd = PCPU_GET(deadthread))) {
 		PCPU_SET(deadthread, NULL);
 		thread_stash(dtd);
 	}
 	thread_unlock(td);
 
 	/*
 	 * cpu_set_fork_handler intercepts this function call to
 	 * have this call a non-return function to stay in kernel mode.
 	 * initproc has its own fork handler, but it does return.
 	 */
 	KASSERT(callout != NULL, ("NULL callout in fork_exit"));
 	callout(arg, frame);
 
 	/*
 	 * Check if a kernel thread misbehaved and returned from its main
 	 * function.
 	 */
 	if (p->p_flag & P_KTHREAD) {
 		printf("Kernel thread \"%s\" (pid %d) exited prematurely.\n",
 		    td->td_name, p->p_pid);
 		kproc_exit(0);
 	}
 	mtx_assert(&Giant, MA_NOTOWNED);
 
 	if (p->p_sysent->sv_schedtail != NULL)
 		(p->p_sysent->sv_schedtail)(td);
 	td->td_pflags &= ~TDP_FORKING;
 }
 
 /*
  * Simplified back end of syscall(), used when returning from fork()
  * directly into user mode.  Giant is not held on entry, and must not
  * be held on return.  This function is passed in to fork_exit() as the
  * first parameter and is called when returning to a new userland process.
  */
 void
 fork_return(struct thread *td, struct trapframe *frame)
 {
 	struct proc *p, *dbg;
 
 	p = td->td_proc;
 	if (td->td_dbgflags & TDB_STOPATFORK) {
 		sx_xlock(&proctree_lock);
 		PROC_LOCK(p);
 		if ((p->p_pptr->p_flag & (P_TRACED | P_FOLLOWFORK)) ==
 		    (P_TRACED | P_FOLLOWFORK)) {
 			/*
 			 * If debugger still wants auto-attach for the
 			 * parent's children, do it now.
 			 */
 			dbg = p->p_pptr->p_pptr;
 			p->p_flag |= P_TRACED;
 			p->p_oppid = p->p_pptr->p_pid;
 			CTR2(KTR_PTRACE,
 		    "fork_return: attaching to new child pid %d: oppid %d",
 			    p->p_pid, p->p_oppid);
 			proc_reparent(p, dbg);
 			sx_xunlock(&proctree_lock);
 			td->td_dbgflags |= TDB_CHILD | TDB_SCX;
 			ptracestop(td, SIGSTOP);
 			td->td_dbgflags &= ~(TDB_CHILD | TDB_SCX);
 		} else {
 			/*
 			 * ... otherwise clear the request.
 			 */
 			sx_xunlock(&proctree_lock);
 			td->td_dbgflags &= ~TDB_STOPATFORK;
 			cv_broadcast(&p->p_dbgwait);
 		}
 		PROC_UNLOCK(p);
 	} else if (p->p_flag & P_TRACED) {
  		/*
 		 * This is the start of a new thread in a traced
 		 * process.  Report a system call exit event.
 		 */
 		PROC_LOCK(p);
 		td->td_dbgflags |= TDB_SCX;
 		_STOPEVENT(p, S_SCX, td->td_dbg_sc_code);
 		if ((p->p_stops & S_PT_SCX) != 0)
 			ptracestop(td, SIGTRAP);
 		td->td_dbgflags &= ~TDB_SCX;
 		PROC_UNLOCK(p);
 	}
 
 	userret(td, frame);
 
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_SYSRET))
 		ktrsysret(SYS_fork, 0, 0);
 #endif
 }
Index: head/sys/kern/kern_proc.c
===================================================================
--- head/sys/kern/kern_proc.c	(revision 292383)
+++ head/sys/kern/kern_proc.c	(revision 292384)
@@ -1,3056 +1,3053 @@
 /*-
  * Copyright (c) 1982, 1986, 1989, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)kern_proc.c	8.7 (Berkeley) 2/14/95
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_compat.h"
 #include "opt_ddb.h"
 #include "opt_ktrace.h"
 #include "opt_kstack_pages.h"
 #include "opt_stack.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/elf.h>
 #include <sys/eventhandler.h>
 #include <sys/exec.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/loginclass.h>
 #include <sys/malloc.h>
 #include <sys/mman.h>
 #include <sys/mount.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/ptrace.h>
 #include <sys/refcount.h>
 #include <sys/resourcevar.h>
 #include <sys/rwlock.h>
 #include <sys/sbuf.h>
 #include <sys/sysent.h>
 #include <sys/sched.h>
 #include <sys/smp.h>
 #include <sys/stack.h>
 #include <sys/stat.h>
 #include <sys/sysctl.h>
 #include <sys/filedesc.h>
 #include <sys/tty.h>
 #include <sys/signalvar.h>
 #include <sys/sdt.h>
 #include <sys/sx.h>
 #include <sys/user.h>
 #include <sys/vnode.h>
 #include <sys/wait.h>
 
 #ifdef DDB
 #include <ddb/ddb.h>
 #endif
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_extern.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/uma.h>
 
 #ifdef COMPAT_FREEBSD32
 #include <compat/freebsd32/freebsd32.h>
 #include <compat/freebsd32/freebsd32_util.h>
 #endif
 
 SDT_PROVIDER_DEFINE(proc);
-SDT_PROBE_DEFINE4(proc, kernel, ctor, entry, "struct proc *", "int",
-    "void *", "int");
-SDT_PROBE_DEFINE4(proc, kernel, ctor, return, "struct proc *", "int",
-    "void *", "int");
-SDT_PROBE_DEFINE4(proc, kernel, dtor, entry, "struct proc *", "int",
-    "void *", "struct thread *");
-SDT_PROBE_DEFINE3(proc, kernel, dtor, return, "struct proc *", "int",
-    "void *");
-SDT_PROBE_DEFINE3(proc, kernel, init, entry, "struct proc *", "int",
+SDT_PROBE_DEFINE4(proc, , ctor, entry, "struct proc *", "int", "void *",
     "int");
-SDT_PROBE_DEFINE3(proc, kernel, init, return, "struct proc *", "int",
+SDT_PROBE_DEFINE4(proc, , ctor, return, "struct proc *", "int", "void *",
     "int");
+SDT_PROBE_DEFINE4(proc, , dtor, entry, "struct proc *", "int", "void *",
+    "struct thread *");
+SDT_PROBE_DEFINE3(proc, , dtor, return, "struct proc *", "int", "void *");
+SDT_PROBE_DEFINE3(proc, , init, entry, "struct proc *", "int", "int");
+SDT_PROBE_DEFINE3(proc, , init, return, "struct proc *", "int", "int");
 
 MALLOC_DEFINE(M_PGRP, "pgrp", "process group header");
 MALLOC_DEFINE(M_SESSION, "session", "session header");
 static MALLOC_DEFINE(M_PROC, "proc", "Proc structures");
 MALLOC_DEFINE(M_SUBPROC, "subproc", "Proc sub-structures");
 
 static void doenterpgrp(struct proc *, struct pgrp *);
 static void orphanpg(struct pgrp *pg);
 static void fill_kinfo_aggregate(struct proc *p, struct kinfo_proc *kp);
 static void fill_kinfo_proc_only(struct proc *p, struct kinfo_proc *kp);
 static void fill_kinfo_thread(struct thread *td, struct kinfo_proc *kp,
     int preferthread);
 static void pgadjustjobc(struct pgrp *pgrp, int entering);
 static void pgdelete(struct pgrp *);
 static int proc_ctor(void *mem, int size, void *arg, int flags);
 static void proc_dtor(void *mem, int size, void *arg);
 static int proc_init(void *mem, int size, int flags);
 static void proc_fini(void *mem, int size);
 static void pargs_free(struct pargs *pa);
 static struct proc *zpfind_locked(pid_t pid);
 
 /*
  * Other process lists
  */
 struct pidhashhead *pidhashtbl;
 u_long pidhash;
 struct pgrphashhead *pgrphashtbl;
 u_long pgrphash;
 struct proclist allproc;
 struct proclist zombproc;
 struct sx allproc_lock;
 struct sx proctree_lock;
 struct mtx ppeers_lock;
 uma_zone_t proc_zone;
 
 /*
  * The offset of various fields in struct proc and struct thread.
  * These are used by kernel debuggers to enumerate kernel threads and
  * processes.
  */
 const int proc_off_p_pid = offsetof(struct proc, p_pid);
 const int proc_off_p_comm = offsetof(struct proc, p_comm);
 const int proc_off_p_list = offsetof(struct proc, p_list);
 const int proc_off_p_threads = offsetof(struct proc, p_threads);
 const int thread_off_td_tid = offsetof(struct thread, td_tid);
 const int thread_off_td_name = offsetof(struct thread, td_name);
 const int thread_off_td_oncpu = offsetof(struct thread, td_oncpu);
 const int thread_off_td_pcb = offsetof(struct thread, td_pcb);
 const int thread_off_td_plist = offsetof(struct thread, td_plist);
 
 int kstack_pages = KSTACK_PAGES;
 SYSCTL_INT(_kern, OID_AUTO, kstack_pages, CTLFLAG_RD, &kstack_pages, 0,
     "Kernel stack size in pages");
 static int vmmap_skip_res_cnt = 0;
 SYSCTL_INT(_kern, OID_AUTO, proc_vmmap_skip_resident_count, CTLFLAG_RW,
     &vmmap_skip_res_cnt, 0,
     "Skip calculation of the pages resident count in kern.proc.vmmap");
 
 CTASSERT(sizeof(struct kinfo_proc) == KINFO_PROC_SIZE);
 #ifdef COMPAT_FREEBSD32
 CTASSERT(sizeof(struct kinfo_proc32) == KINFO_PROC32_SIZE);
 #endif
 
 /*
  * Initialize global process hashing structures.
  */
 void
 procinit()
 {
 
 	sx_init(&allproc_lock, "allproc");
 	sx_init(&proctree_lock, "proctree");
 	mtx_init(&ppeers_lock, "p_peers", NULL, MTX_DEF);
 	LIST_INIT(&allproc);
 	LIST_INIT(&zombproc);
 	pidhashtbl = hashinit(maxproc / 4, M_PROC, &pidhash);
 	pgrphashtbl = hashinit(maxproc / 4, M_PROC, &pgrphash);
 	proc_zone = uma_zcreate("PROC", sched_sizeof_proc(),
 	    proc_ctor, proc_dtor, proc_init, proc_fini,
 	    UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
 	uihashinit();
 }
 
 /*
  * Prepare a proc for use.
  */
 static int
 proc_ctor(void *mem, int size, void *arg, int flags)
 {
 	struct proc *p;
 
 	p = (struct proc *)mem;
-	SDT_PROBE4(proc, kernel, ctor , entry, p, size, arg, flags);
+	SDT_PROBE4(proc, , ctor , entry, p, size, arg, flags);
 	EVENTHANDLER_INVOKE(process_ctor, p);
-	SDT_PROBE4(proc, kernel, ctor , return, p, size, arg, flags);
+	SDT_PROBE4(proc, , ctor , return, p, size, arg, flags);
 	return (0);
 }
 
 /*
  * Reclaim a proc after use.
  */
 static void
 proc_dtor(void *mem, int size, void *arg)
 {
 	struct proc *p;
 	struct thread *td;
 
 	/* INVARIANTS checks go here */
 	p = (struct proc *)mem;
 	td = FIRST_THREAD_IN_PROC(p);
-	SDT_PROBE4(proc, kernel, dtor, entry, p, size, arg, td);
+	SDT_PROBE4(proc, , dtor, entry, p, size, arg, td);
 	if (td != NULL) {
 #ifdef INVARIANTS
 		KASSERT((p->p_numthreads == 1),
 		    ("bad number of threads in exiting process"));
 		KASSERT(STAILQ_EMPTY(&p->p_ktr), ("proc_dtor: non-empty p_ktr"));
 #endif
 		/* Free all OSD associated to this thread. */
 		osd_thread_exit(td);
 	}
 	EVENTHANDLER_INVOKE(process_dtor, p);
 	if (p->p_ksi != NULL)
 		KASSERT(! KSI_ONQ(p->p_ksi), ("SIGCHLD queue"));
-	SDT_PROBE3(proc, kernel, dtor, return, p, size, arg);
+	SDT_PROBE3(proc, , dtor, return, p, size, arg);
 }
 
 /*
  * Initialize type-stable parts of a proc (when newly created).
  */
 static int
 proc_init(void *mem, int size, int flags)
 {
 	struct proc *p;
 
 	p = (struct proc *)mem;
-	SDT_PROBE3(proc, kernel, init, entry, p, size, flags);
+	SDT_PROBE3(proc, , init, entry, p, size, flags);
 	p->p_sched = (struct p_sched *)&p[1];
 	mtx_init(&p->p_mtx, "process lock", NULL, MTX_DEF | MTX_DUPOK | MTX_NEW);
 	mtx_init(&p->p_slock, "process slock", NULL, MTX_SPIN | MTX_NEW);
 	mtx_init(&p->p_statmtx, "pstatl", NULL, MTX_SPIN | MTX_NEW);
 	mtx_init(&p->p_itimmtx, "pitiml", NULL, MTX_SPIN | MTX_NEW);
 	mtx_init(&p->p_profmtx, "pprofl", NULL, MTX_SPIN | MTX_NEW);
 	cv_init(&p->p_pwait, "ppwait");
 	cv_init(&p->p_dbgwait, "dbgwait");
 	TAILQ_INIT(&p->p_threads);	     /* all threads in proc */
 	EVENTHANDLER_INVOKE(process_init, p);
 	p->p_stats = pstats_alloc();
-	SDT_PROBE3(proc, kernel, init, return, p, size, flags);
+	SDT_PROBE3(proc, , init, return, p, size, flags);
 	return (0);
 }
 
 /*
  * UMA should ensure that this function is never called.
  * Freeing a proc structure would violate type stability.
  */
 static void
 proc_fini(void *mem, int size)
 {
 #ifdef notnow
 	struct proc *p;
 
 	p = (struct proc *)mem;
 	EVENTHANDLER_INVOKE(process_fini, p);
 	pstats_free(p->p_stats);
 	thread_free(FIRST_THREAD_IN_PROC(p));
 	mtx_destroy(&p->p_mtx);
 	if (p->p_ksi != NULL)
 		ksiginfo_free(p->p_ksi);
 #else
 	panic("proc reclaimed");
 #endif
 }
 
 /*
  * Is p an inferior of the current process?
  */
 int
 inferior(struct proc *p)
 {
 
 	sx_assert(&proctree_lock, SX_LOCKED);
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	for (; p != curproc; p = proc_realparent(p)) {
 		if (p->p_pid == 0)
 			return (0);
 	}
 	return (1);
 }
 
 struct proc *
 pfind_locked(pid_t pid)
 {
 	struct proc *p;
 
 	sx_assert(&allproc_lock, SX_LOCKED);
 	LIST_FOREACH(p, PIDHASH(pid), p_hash) {
 		if (p->p_pid == pid) {
 			PROC_LOCK(p);
 			if (p->p_state == PRS_NEW) {
 				PROC_UNLOCK(p);
 				p = NULL;
 			}
 			break;
 		}
 	}
 	return (p);
 }
 
 /*
  * Locate a process by number; return only "live" processes -- i.e., neither
  * zombies nor newly born but incompletely initialized processes.  By not
  * returning processes in the PRS_NEW state, we allow callers to avoid
  * testing for that condition to avoid dereferencing p_ucred, et al.
  */
 struct proc *
 pfind(pid_t pid)
 {
 	struct proc *p;
 
 	sx_slock(&allproc_lock);
 	p = pfind_locked(pid);
 	sx_sunlock(&allproc_lock);
 	return (p);
 }
 
 static struct proc *
 pfind_tid_locked(pid_t tid)
 {
 	struct proc *p;
 	struct thread *td;
 
 	sx_assert(&allproc_lock, SX_LOCKED);
 	FOREACH_PROC_IN_SYSTEM(p) {
 		PROC_LOCK(p);
 		if (p->p_state == PRS_NEW) {
 			PROC_UNLOCK(p);
 			continue;
 		}
 		FOREACH_THREAD_IN_PROC(p, td) {
 			if (td->td_tid == tid)
 				goto found;
 		}
 		PROC_UNLOCK(p);
 	}
 found:
 	return (p);
 }
 
 /*
  * Locate a process group by number.
  * The caller must hold proctree_lock.
  */
 struct pgrp *
 pgfind(pgid)
 	register pid_t pgid;
 {
 	register struct pgrp *pgrp;
 
 	sx_assert(&proctree_lock, SX_LOCKED);
 
 	LIST_FOREACH(pgrp, PGRPHASH(pgid), pg_hash) {
 		if (pgrp->pg_id == pgid) {
 			PGRP_LOCK(pgrp);
 			return (pgrp);
 		}
 	}
 	return (NULL);
 }
 
 /*
  * Locate process and do additional manipulations, depending on flags.
  */
 int
 pget(pid_t pid, int flags, struct proc **pp)
 {
 	struct proc *p;
 	int error;
 
 	sx_slock(&allproc_lock);
 	if (pid <= PID_MAX) {
 		p = pfind_locked(pid);
 		if (p == NULL && (flags & PGET_NOTWEXIT) == 0)
 			p = zpfind_locked(pid);
 	} else if ((flags & PGET_NOTID) == 0) {
 		p = pfind_tid_locked(pid);
 	} else {
 		p = NULL;
 	}
 	sx_sunlock(&allproc_lock);
 	if (p == NULL)
 		return (ESRCH);
 	if ((flags & PGET_CANSEE) != 0) {
 		error = p_cansee(curthread, p);
 		if (error != 0)
 			goto errout;
 	}
 	if ((flags & PGET_CANDEBUG) != 0) {
 		error = p_candebug(curthread, p);
 		if (error != 0)
 			goto errout;
 	}
 	if ((flags & PGET_ISCURRENT) != 0 && curproc != p) {
 		error = EPERM;
 		goto errout;
 	}
 	if ((flags & PGET_NOTWEXIT) != 0 && (p->p_flag & P_WEXIT) != 0) {
 		error = ESRCH;
 		goto errout;
 	}
 	if ((flags & PGET_NOTINEXEC) != 0 && (p->p_flag & P_INEXEC) != 0) {
 		/*
 		 * XXXRW: Not clear ESRCH is the right error during proc
 		 * execve().
 		 */
 		error = ESRCH;
 		goto errout;
 	}
 	if ((flags & PGET_HOLD) != 0) {
 		_PHOLD(p);
 		PROC_UNLOCK(p);
 	}
 	*pp = p;
 	return (0);
 errout:
 	PROC_UNLOCK(p);
 	return (error);
 }
 
 /*
  * Create a new process group.
  * pgid must be equal to the pid of p.
  * Begin a new session if required.
  */
 int
 enterpgrp(p, pgid, pgrp, sess)
 	register struct proc *p;
 	pid_t pgid;
 	struct pgrp *pgrp;
 	struct session *sess;
 {
 
 	sx_assert(&proctree_lock, SX_XLOCKED);
 
 	KASSERT(pgrp != NULL, ("enterpgrp: pgrp == NULL"));
 	KASSERT(p->p_pid == pgid,
 	    ("enterpgrp: new pgrp and pid != pgid"));
 	KASSERT(pgfind(pgid) == NULL,
 	    ("enterpgrp: pgrp with pgid exists"));
 	KASSERT(!SESS_LEADER(p),
 	    ("enterpgrp: session leader attempted setpgrp"));
 
 	mtx_init(&pgrp->pg_mtx, "process group", NULL, MTX_DEF | MTX_DUPOK);
 
 	if (sess != NULL) {
 		/*
 		 * new session
 		 */
 		mtx_init(&sess->s_mtx, "session", NULL, MTX_DEF);
 		PROC_LOCK(p);
 		p->p_flag &= ~P_CONTROLT;
 		PROC_UNLOCK(p);
 		PGRP_LOCK(pgrp);
 		sess->s_leader = p;
 		sess->s_sid = p->p_pid;
 		refcount_init(&sess->s_count, 1);
 		sess->s_ttyvp = NULL;
 		sess->s_ttydp = NULL;
 		sess->s_ttyp = NULL;
 		bcopy(p->p_session->s_login, sess->s_login,
 			    sizeof(sess->s_login));
 		pgrp->pg_session = sess;
 		KASSERT(p == curproc,
 		    ("enterpgrp: mksession and p != curproc"));
 	} else {
 		pgrp->pg_session = p->p_session;
 		sess_hold(pgrp->pg_session);
 		PGRP_LOCK(pgrp);
 	}
 	pgrp->pg_id = pgid;
 	LIST_INIT(&pgrp->pg_members);
 
 	/*
 	 * As we have an exclusive lock of proctree_lock,
 	 * this should not deadlock.
 	 */
 	LIST_INSERT_HEAD(PGRPHASH(pgid), pgrp, pg_hash);
 	pgrp->pg_jobc = 0;
 	SLIST_INIT(&pgrp->pg_sigiolst);
 	PGRP_UNLOCK(pgrp);
 
 	doenterpgrp(p, pgrp);
 
 	return (0);
 }
 
 /*
  * Move p to an existing process group
  */
 int
 enterthispgrp(p, pgrp)
 	register struct proc *p;
 	struct pgrp *pgrp;
 {
 
 	sx_assert(&proctree_lock, SX_XLOCKED);
 	PROC_LOCK_ASSERT(p, MA_NOTOWNED);
 	PGRP_LOCK_ASSERT(pgrp, MA_NOTOWNED);
 	PGRP_LOCK_ASSERT(p->p_pgrp, MA_NOTOWNED);
 	SESS_LOCK_ASSERT(p->p_session, MA_NOTOWNED);
 	KASSERT(pgrp->pg_session == p->p_session,
 		("%s: pgrp's session %p, p->p_session %p.\n",
 		__func__,
 		pgrp->pg_session,
 		p->p_session));
 	KASSERT(pgrp != p->p_pgrp,
 		("%s: p belongs to pgrp.", __func__));
 
 	doenterpgrp(p, pgrp);
 
 	return (0);
 }
 
 /*
  * Move p to a process group
  */
 static void
 doenterpgrp(p, pgrp)
 	struct proc *p;
 	struct pgrp *pgrp;
 {
 	struct pgrp *savepgrp;
 
 	sx_assert(&proctree_lock, SX_XLOCKED);
 	PROC_LOCK_ASSERT(p, MA_NOTOWNED);
 	PGRP_LOCK_ASSERT(pgrp, MA_NOTOWNED);
 	PGRP_LOCK_ASSERT(p->p_pgrp, MA_NOTOWNED);
 	SESS_LOCK_ASSERT(p->p_session, MA_NOTOWNED);
 
 	savepgrp = p->p_pgrp;
 
 	/*
 	 * Adjust eligibility of affected pgrps to participate in job control.
 	 * Increment eligibility counts before decrementing, otherwise we
 	 * could reach 0 spuriously during the first call.
 	 */
 	fixjobc(p, pgrp, 1);
 	fixjobc(p, p->p_pgrp, 0);
 
 	PGRP_LOCK(pgrp);
 	PGRP_LOCK(savepgrp);
 	PROC_LOCK(p);
 	LIST_REMOVE(p, p_pglist);
 	p->p_pgrp = pgrp;
 	PROC_UNLOCK(p);
 	LIST_INSERT_HEAD(&pgrp->pg_members, p, p_pglist);
 	PGRP_UNLOCK(savepgrp);
 	PGRP_UNLOCK(pgrp);
 	if (LIST_EMPTY(&savepgrp->pg_members))
 		pgdelete(savepgrp);
 }
 
 /*
  * remove process from process group
  */
 int
 leavepgrp(p)
 	register struct proc *p;
 {
 	struct pgrp *savepgrp;
 
 	sx_assert(&proctree_lock, SX_XLOCKED);
 	savepgrp = p->p_pgrp;
 	PGRP_LOCK(savepgrp);
 	PROC_LOCK(p);
 	LIST_REMOVE(p, p_pglist);
 	p->p_pgrp = NULL;
 	PROC_UNLOCK(p);
 	PGRP_UNLOCK(savepgrp);
 	if (LIST_EMPTY(&savepgrp->pg_members))
 		pgdelete(savepgrp);
 	return (0);
 }
 
 /*
  * delete a process group
  */
 static void
 pgdelete(pgrp)
 	register struct pgrp *pgrp;
 {
 	struct session *savesess;
 	struct tty *tp;
 
 	sx_assert(&proctree_lock, SX_XLOCKED);
 	PGRP_LOCK_ASSERT(pgrp, MA_NOTOWNED);
 	SESS_LOCK_ASSERT(pgrp->pg_session, MA_NOTOWNED);
 
 	/*
 	 * Reset any sigio structures pointing to us as a result of
 	 * F_SETOWN with our pgid.
 	 */
 	funsetownlst(&pgrp->pg_sigiolst);
 
 	PGRP_LOCK(pgrp);
 	tp = pgrp->pg_session->s_ttyp;
 	LIST_REMOVE(pgrp, pg_hash);
 	savesess = pgrp->pg_session;
 	PGRP_UNLOCK(pgrp);
 
 	/* Remove the reference to the pgrp before deallocating it. */
 	if (tp != NULL) {
 		tty_lock(tp);
 		tty_rel_pgrp(tp, pgrp);
 	}
 
 	mtx_destroy(&pgrp->pg_mtx);
 	free(pgrp, M_PGRP);
 	sess_release(savesess);
 }
 
 static void
 pgadjustjobc(pgrp, entering)
 	struct pgrp *pgrp;
 	int entering;
 {
 
 	PGRP_LOCK(pgrp);
 	if (entering)
 		pgrp->pg_jobc++;
 	else {
 		--pgrp->pg_jobc;
 		if (pgrp->pg_jobc == 0)
 			orphanpg(pgrp);
 	}
 	PGRP_UNLOCK(pgrp);
 }
 
 /*
  * Adjust pgrp jobc counters when specified process changes process group.
  * We count the number of processes in each process group that "qualify"
  * the group for terminal job control (those with a parent in a different
  * process group of the same session).  If that count reaches zero, the
  * process group becomes orphaned.  Check both the specified process'
  * process group and that of its children.
  * entering == 0 => p is leaving specified group.
  * entering == 1 => p is entering specified group.
  */
 void
 fixjobc(p, pgrp, entering)
 	register struct proc *p;
 	register struct pgrp *pgrp;
 	int entering;
 {
 	register struct pgrp *hispgrp;
 	register struct session *mysession;
 
 	sx_assert(&proctree_lock, SX_LOCKED);
 	PROC_LOCK_ASSERT(p, MA_NOTOWNED);
 	PGRP_LOCK_ASSERT(pgrp, MA_NOTOWNED);
 	SESS_LOCK_ASSERT(pgrp->pg_session, MA_NOTOWNED);
 
 	/*
 	 * Check p's parent to see whether p qualifies its own process
 	 * group; if so, adjust count for p's process group.
 	 */
 	mysession = pgrp->pg_session;
 	if ((hispgrp = p->p_pptr->p_pgrp) != pgrp &&
 	    hispgrp->pg_session == mysession)
 		pgadjustjobc(pgrp, entering);
 
 	/*
 	 * Check this process' children to see whether they qualify
 	 * their process groups; if so, adjust counts for children's
 	 * process groups.
 	 */
 	LIST_FOREACH(p, &p->p_children, p_sibling) {
 		hispgrp = p->p_pgrp;
 		if (hispgrp == pgrp ||
 		    hispgrp->pg_session != mysession)
 			continue;
 		PROC_LOCK(p);
 		if (p->p_state == PRS_ZOMBIE) {
 			PROC_UNLOCK(p);
 			continue;
 		}
 		PROC_UNLOCK(p);
 		pgadjustjobc(hispgrp, entering);
 	}
 }
 
 /*
  * A process group has become orphaned;
  * if there are any stopped processes in the group,
  * hang-up all process in that group.
  */
 static void
 orphanpg(pg)
 	struct pgrp *pg;
 {
 	register struct proc *p;
 
 	PGRP_LOCK_ASSERT(pg, MA_OWNED);
 
 	LIST_FOREACH(p, &pg->pg_members, p_pglist) {
 		PROC_LOCK(p);
 		if (P_SHOULDSTOP(p) == P_STOPPED_SIG) {
 			PROC_UNLOCK(p);
 			LIST_FOREACH(p, &pg->pg_members, p_pglist) {
 				PROC_LOCK(p);
 				kern_psignal(p, SIGHUP);
 				kern_psignal(p, SIGCONT);
 				PROC_UNLOCK(p);
 			}
 			return;
 		}
 		PROC_UNLOCK(p);
 	}
 }
 
 void
 sess_hold(struct session *s)
 {
 
 	refcount_acquire(&s->s_count);
 }
 
 void
 sess_release(struct session *s)
 {
 
 	if (refcount_release(&s->s_count)) {
 		if (s->s_ttyp != NULL) {
 			tty_lock(s->s_ttyp);
 			tty_rel_sess(s->s_ttyp, s);
 		}
 		mtx_destroy(&s->s_mtx);
 		free(s, M_SESSION);
 	}
 }
 
 #ifdef DDB
 
 DB_SHOW_COMMAND(pgrpdump, pgrpdump)
 {
 	register struct pgrp *pgrp;
 	register struct proc *p;
 	register int i;
 
 	for (i = 0; i <= pgrphash; i++) {
 		if (!LIST_EMPTY(&pgrphashtbl[i])) {
 			printf("\tindx %d\n", i);
 			LIST_FOREACH(pgrp, &pgrphashtbl[i], pg_hash) {
 				printf(
 			"\tpgrp %p, pgid %ld, sess %p, sesscnt %d, mem %p\n",
 				    (void *)pgrp, (long)pgrp->pg_id,
 				    (void *)pgrp->pg_session,
 				    pgrp->pg_session->s_count,
 				    (void *)LIST_FIRST(&pgrp->pg_members));
 				LIST_FOREACH(p, &pgrp->pg_members, p_pglist) {
 					printf("\t\tpid %ld addr %p pgrp %p\n", 
 					    (long)p->p_pid, (void *)p,
 					    (void *)p->p_pgrp);
 				}
 			}
 		}
 	}
 }
 #endif /* DDB */
 
 /*
  * Calculate the kinfo_proc members which contain process-wide
  * informations.
  * Must be called with the target process locked.
  */
 static void
 fill_kinfo_aggregate(struct proc *p, struct kinfo_proc *kp)
 {
 	struct thread *td;
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 
 	kp->ki_estcpu = 0;
 	kp->ki_pctcpu = 0;
 	FOREACH_THREAD_IN_PROC(p, td) {
 		thread_lock(td);
 		kp->ki_pctcpu += sched_pctcpu(td);
 		kp->ki_estcpu += td->td_estcpu;
 		thread_unlock(td);
 	}
 }
 
 /*
  * Clear kinfo_proc and fill in any information that is common
  * to all threads in the process.
  * Must be called with the target process locked.
  */
 static void
 fill_kinfo_proc_only(struct proc *p, struct kinfo_proc *kp)
 {
 	struct thread *td0;
 	struct tty *tp;
 	struct session *sp;
 	struct ucred *cred;
 	struct sigacts *ps;
 
 	/* For proc_realparent. */
 	sx_assert(&proctree_lock, SX_LOCKED);
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	bzero(kp, sizeof(*kp));
 
 	kp->ki_structsize = sizeof(*kp);
 	kp->ki_paddr = p;
 	kp->ki_addr =/* p->p_addr; */0; /* XXX */
 	kp->ki_args = p->p_args;
 	kp->ki_textvp = p->p_textvp;
 #ifdef KTRACE
 	kp->ki_tracep = p->p_tracevp;
 	kp->ki_traceflag = p->p_traceflag;
 #endif
 	kp->ki_fd = p->p_fd;
 	kp->ki_vmspace = p->p_vmspace;
 	kp->ki_flag = p->p_flag;
 	kp->ki_flag2 = p->p_flag2;
 	cred = p->p_ucred;
 	if (cred) {
 		kp->ki_uid = cred->cr_uid;
 		kp->ki_ruid = cred->cr_ruid;
 		kp->ki_svuid = cred->cr_svuid;
 		kp->ki_cr_flags = 0;
 		if (cred->cr_flags & CRED_FLAG_CAPMODE)
 			kp->ki_cr_flags |= KI_CRF_CAPABILITY_MODE;
 		/* XXX bde doesn't like KI_NGROUPS */
 		if (cred->cr_ngroups > KI_NGROUPS) {
 			kp->ki_ngroups = KI_NGROUPS;
 			kp->ki_cr_flags |= KI_CRF_GRP_OVERFLOW;
 		} else
 			kp->ki_ngroups = cred->cr_ngroups;
 		bcopy(cred->cr_groups, kp->ki_groups,
 		    kp->ki_ngroups * sizeof(gid_t));
 		kp->ki_rgid = cred->cr_rgid;
 		kp->ki_svgid = cred->cr_svgid;
 		/* If jailed(cred), emulate the old P_JAILED flag. */
 		if (jailed(cred)) {
 			kp->ki_flag |= P_JAILED;
 			/* If inside the jail, use 0 as a jail ID. */
 			if (cred->cr_prison != curthread->td_ucred->cr_prison)
 				kp->ki_jid = cred->cr_prison->pr_id;
 		}
 		strlcpy(kp->ki_loginclass, cred->cr_loginclass->lc_name,
 		    sizeof(kp->ki_loginclass));
 	}
 	ps = p->p_sigacts;
 	if (ps) {
 		mtx_lock(&ps->ps_mtx);
 		kp->ki_sigignore = ps->ps_sigignore;
 		kp->ki_sigcatch = ps->ps_sigcatch;
 		mtx_unlock(&ps->ps_mtx);
 	}
 	if (p->p_state != PRS_NEW &&
 	    p->p_state != PRS_ZOMBIE &&
 	    p->p_vmspace != NULL) {
 		struct vmspace *vm = p->p_vmspace;
 
 		kp->ki_size = vm->vm_map.size;
 		kp->ki_rssize = vmspace_resident_count(vm); /*XXX*/
 		FOREACH_THREAD_IN_PROC(p, td0) {
 			if (!TD_IS_SWAPPED(td0))
 				kp->ki_rssize += td0->td_kstack_pages;
 		}
 		kp->ki_swrss = vm->vm_swrss;
 		kp->ki_tsize = vm->vm_tsize;
 		kp->ki_dsize = vm->vm_dsize;
 		kp->ki_ssize = vm->vm_ssize;
 	} else if (p->p_state == PRS_ZOMBIE)
 		kp->ki_stat = SZOMB;
 	if (kp->ki_flag & P_INMEM)
 		kp->ki_sflag = PS_INMEM;
 	else
 		kp->ki_sflag = 0;
 	/* Calculate legacy swtime as seconds since 'swtick'. */
 	kp->ki_swtime = (ticks - p->p_swtick) / hz;
 	kp->ki_pid = p->p_pid;
 	kp->ki_nice = p->p_nice;
 	kp->ki_fibnum = p->p_fibnum;
 	kp->ki_start = p->p_stats->p_start;
 	timevaladd(&kp->ki_start, &boottime);
 	PROC_STATLOCK(p);
 	rufetch(p, &kp->ki_rusage);
 	kp->ki_runtime = cputick2usec(p->p_rux.rux_runtime);
 	calcru(p, &kp->ki_rusage.ru_utime, &kp->ki_rusage.ru_stime);
 	PROC_STATUNLOCK(p);
 	calccru(p, &kp->ki_childutime, &kp->ki_childstime);
 	/* Some callers want child times in a single value. */
 	kp->ki_childtime = kp->ki_childstime;
 	timevaladd(&kp->ki_childtime, &kp->ki_childutime);
 
 	FOREACH_THREAD_IN_PROC(p, td0)
 		kp->ki_cow += td0->td_cow;
 
 	tp = NULL;
 	if (p->p_pgrp) {
 		kp->ki_pgid = p->p_pgrp->pg_id;
 		kp->ki_jobc = p->p_pgrp->pg_jobc;
 		sp = p->p_pgrp->pg_session;
 
 		if (sp != NULL) {
 			kp->ki_sid = sp->s_sid;
 			SESS_LOCK(sp);
 			strlcpy(kp->ki_login, sp->s_login,
 			    sizeof(kp->ki_login));
 			if (sp->s_ttyvp)
 				kp->ki_kiflag |= KI_CTTY;
 			if (SESS_LEADER(p))
 				kp->ki_kiflag |= KI_SLEADER;
 			/* XXX proctree_lock */
 			tp = sp->s_ttyp;
 			SESS_UNLOCK(sp);
 		}
 	}
 	if ((p->p_flag & P_CONTROLT) && tp != NULL) {
 		kp->ki_tdev = tty_udev(tp);
 		kp->ki_tpgid = tp->t_pgrp ? tp->t_pgrp->pg_id : NO_PID;
 		if (tp->t_session)
 			kp->ki_tsid = tp->t_session->s_sid;
 	} else
 		kp->ki_tdev = NODEV;
 	if (p->p_comm[0] != '\0')
 		strlcpy(kp->ki_comm, p->p_comm, sizeof(kp->ki_comm));
 	if (p->p_sysent && p->p_sysent->sv_name != NULL &&
 	    p->p_sysent->sv_name[0] != '\0')
 		strlcpy(kp->ki_emul, p->p_sysent->sv_name, sizeof(kp->ki_emul));
 	kp->ki_siglist = p->p_siglist;
 	kp->ki_xstat = KW_EXITCODE(p->p_xexit, p->p_xsig);
 	kp->ki_acflag = p->p_acflag;
 	kp->ki_lock = p->p_lock;
 	if (p->p_pptr) {
 		kp->ki_ppid = proc_realparent(p)->p_pid;
 		if (p->p_flag & P_TRACED)
 			kp->ki_tracer = p->p_pptr->p_pid;
 	}
 }
 
 /*
  * Fill in information that is thread specific.  Must be called with
  * target process locked.  If 'preferthread' is set, overwrite certain
  * process-related fields that are maintained for both threads and
  * processes.
  */
 static void
 fill_kinfo_thread(struct thread *td, struct kinfo_proc *kp, int preferthread)
 {
 	struct proc *p;
 
 	p = td->td_proc;
 	kp->ki_tdaddr = td;
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 
 	if (preferthread)
 		PROC_STATLOCK(p);
 	thread_lock(td);
 	if (td->td_wmesg != NULL)
 		strlcpy(kp->ki_wmesg, td->td_wmesg, sizeof(kp->ki_wmesg));
 	else
 		bzero(kp->ki_wmesg, sizeof(kp->ki_wmesg));
 	strlcpy(kp->ki_tdname, td->td_name, sizeof(kp->ki_tdname));
 	if (TD_ON_LOCK(td)) {
 		kp->ki_kiflag |= KI_LOCKBLOCK;
 		strlcpy(kp->ki_lockname, td->td_lockname,
 		    sizeof(kp->ki_lockname));
 	} else {
 		kp->ki_kiflag &= ~KI_LOCKBLOCK;
 		bzero(kp->ki_lockname, sizeof(kp->ki_lockname));
 	}
 
 	if (p->p_state == PRS_NORMAL) { /* approximate. */
 		if (TD_ON_RUNQ(td) ||
 		    TD_CAN_RUN(td) ||
 		    TD_IS_RUNNING(td)) {
 			kp->ki_stat = SRUN;
 		} else if (P_SHOULDSTOP(p)) {
 			kp->ki_stat = SSTOP;
 		} else if (TD_IS_SLEEPING(td)) {
 			kp->ki_stat = SSLEEP;
 		} else if (TD_ON_LOCK(td)) {
 			kp->ki_stat = SLOCK;
 		} else {
 			kp->ki_stat = SWAIT;
 		}
 	} else if (p->p_state == PRS_ZOMBIE) {
 		kp->ki_stat = SZOMB;
 	} else {
 		kp->ki_stat = SIDL;
 	}
 
 	/* Things in the thread */
 	kp->ki_wchan = td->td_wchan;
 	kp->ki_pri.pri_level = td->td_priority;
 	kp->ki_pri.pri_native = td->td_base_pri;
 
 	/*
 	 * Note: legacy fields; clamp at the old NOCPU value and/or
 	 * the maximum u_char CPU value.
 	 */
 	if (td->td_lastcpu == NOCPU)
 		kp->ki_lastcpu_old = NOCPU_OLD;
 	else if (td->td_lastcpu > MAXCPU_OLD)
 		kp->ki_lastcpu_old = MAXCPU_OLD;
 	else
 		kp->ki_lastcpu_old = td->td_lastcpu;
 
 	if (td->td_oncpu == NOCPU)
 		kp->ki_oncpu_old = NOCPU_OLD;
 	else if (td->td_oncpu > MAXCPU_OLD)
 		kp->ki_oncpu_old = MAXCPU_OLD;
 	else
 		kp->ki_oncpu_old = td->td_oncpu;
 
 	kp->ki_lastcpu = td->td_lastcpu;
 	kp->ki_oncpu = td->td_oncpu;
 	kp->ki_tdflags = td->td_flags;
 	kp->ki_tid = td->td_tid;
 	kp->ki_numthreads = p->p_numthreads;
 	kp->ki_pcb = td->td_pcb;
 	kp->ki_kstack = (void *)td->td_kstack;
 	kp->ki_slptime = (ticks - td->td_slptick) / hz;
 	kp->ki_pri.pri_class = td->td_pri_class;
 	kp->ki_pri.pri_user = td->td_user_pri;
 
 	if (preferthread) {
 		rufetchtd(td, &kp->ki_rusage);
 		kp->ki_runtime = cputick2usec(td->td_rux.rux_runtime);
 		kp->ki_pctcpu = sched_pctcpu(td);
 		kp->ki_estcpu = td->td_estcpu;
 		kp->ki_cow = td->td_cow;
 	}
 
 	/* We can't get this anymore but ps etc never used it anyway. */
 	kp->ki_rqindex = 0;
 
 	if (preferthread)
 		kp->ki_siglist = td->td_siglist;
 	kp->ki_sigmask = td->td_sigmask;
 	thread_unlock(td);
 	if (preferthread)
 		PROC_STATUNLOCK(p);
 }
 
 /*
  * Fill in a kinfo_proc structure for the specified process.
  * Must be called with the target process locked.
  */
 void
 fill_kinfo_proc(struct proc *p, struct kinfo_proc *kp)
 {
 
 	MPASS(FIRST_THREAD_IN_PROC(p) != NULL);
 
 	fill_kinfo_proc_only(p, kp);
 	fill_kinfo_thread(FIRST_THREAD_IN_PROC(p), kp, 0);
 	fill_kinfo_aggregate(p, kp);
 }
 
 struct pstats *
 pstats_alloc(void)
 {
 
 	return (malloc(sizeof(struct pstats), M_SUBPROC, M_ZERO|M_WAITOK));
 }
 
 /*
  * Copy parts of p_stats; zero the rest of p_stats (statistics).
  */
 void
 pstats_fork(struct pstats *src, struct pstats *dst)
 {
 
 	bzero(&dst->pstat_startzero,
 	    __rangeof(struct pstats, pstat_startzero, pstat_endzero));
 	bcopy(&src->pstat_startcopy, &dst->pstat_startcopy,
 	    __rangeof(struct pstats, pstat_startcopy, pstat_endcopy));
 }
 
 void
 pstats_free(struct pstats *ps)
 {
 
 	free(ps, M_SUBPROC);
 }
 
 static struct proc *
 zpfind_locked(pid_t pid)
 {
 	struct proc *p;
 
 	sx_assert(&allproc_lock, SX_LOCKED);
 	LIST_FOREACH(p, &zombproc, p_list) {
 		if (p->p_pid == pid) {
 			PROC_LOCK(p);
 			break;
 		}
 	}
 	return (p);
 }
 
 /*
  * Locate a zombie process by number
  */
 struct proc *
 zpfind(pid_t pid)
 {
 	struct proc *p;
 
 	sx_slock(&allproc_lock);
 	p = zpfind_locked(pid);
 	sx_sunlock(&allproc_lock);
 	return (p);
 }
 
 #ifdef COMPAT_FREEBSD32
 
 /*
  * This function is typically used to copy out the kernel address, so
  * it can be replaced by assignment of zero.
  */
 static inline uint32_t
 ptr32_trim(void *ptr)
 {
 	uintptr_t uptr;
 
 	uptr = (uintptr_t)ptr;
 	return ((uptr > UINT_MAX) ? 0 : uptr);
 }
 
 #define PTRTRIM_CP(src,dst,fld) \
 	do { (dst).fld = ptr32_trim((src).fld); } while (0)
 
 static void
 freebsd32_kinfo_proc_out(const struct kinfo_proc *ki, struct kinfo_proc32 *ki32)
 {
 	int i;
 
 	bzero(ki32, sizeof(struct kinfo_proc32));
 	ki32->ki_structsize = sizeof(struct kinfo_proc32);
 	CP(*ki, *ki32, ki_layout);
 	PTRTRIM_CP(*ki, *ki32, ki_args);
 	PTRTRIM_CP(*ki, *ki32, ki_paddr);
 	PTRTRIM_CP(*ki, *ki32, ki_addr);
 	PTRTRIM_CP(*ki, *ki32, ki_tracep);
 	PTRTRIM_CP(*ki, *ki32, ki_textvp);
 	PTRTRIM_CP(*ki, *ki32, ki_fd);
 	PTRTRIM_CP(*ki, *ki32, ki_vmspace);
 	PTRTRIM_CP(*ki, *ki32, ki_wchan);
 	CP(*ki, *ki32, ki_pid);
 	CP(*ki, *ki32, ki_ppid);
 	CP(*ki, *ki32, ki_pgid);
 	CP(*ki, *ki32, ki_tpgid);
 	CP(*ki, *ki32, ki_sid);
 	CP(*ki, *ki32, ki_tsid);
 	CP(*ki, *ki32, ki_jobc);
 	CP(*ki, *ki32, ki_tdev);
 	CP(*ki, *ki32, ki_siglist);
 	CP(*ki, *ki32, ki_sigmask);
 	CP(*ki, *ki32, ki_sigignore);
 	CP(*ki, *ki32, ki_sigcatch);
 	CP(*ki, *ki32, ki_uid);
 	CP(*ki, *ki32, ki_ruid);
 	CP(*ki, *ki32, ki_svuid);
 	CP(*ki, *ki32, ki_rgid);
 	CP(*ki, *ki32, ki_svgid);
 	CP(*ki, *ki32, ki_ngroups);
 	for (i = 0; i < KI_NGROUPS; i++)
 		CP(*ki, *ki32, ki_groups[i]);
 	CP(*ki, *ki32, ki_size);
 	CP(*ki, *ki32, ki_rssize);
 	CP(*ki, *ki32, ki_swrss);
 	CP(*ki, *ki32, ki_tsize);
 	CP(*ki, *ki32, ki_dsize);
 	CP(*ki, *ki32, ki_ssize);
 	CP(*ki, *ki32, ki_xstat);
 	CP(*ki, *ki32, ki_acflag);
 	CP(*ki, *ki32, ki_pctcpu);
 	CP(*ki, *ki32, ki_estcpu);
 	CP(*ki, *ki32, ki_slptime);
 	CP(*ki, *ki32, ki_swtime);
 	CP(*ki, *ki32, ki_cow);
 	CP(*ki, *ki32, ki_runtime);
 	TV_CP(*ki, *ki32, ki_start);
 	TV_CP(*ki, *ki32, ki_childtime);
 	CP(*ki, *ki32, ki_flag);
 	CP(*ki, *ki32, ki_kiflag);
 	CP(*ki, *ki32, ki_traceflag);
 	CP(*ki, *ki32, ki_stat);
 	CP(*ki, *ki32, ki_nice);
 	CP(*ki, *ki32, ki_lock);
 	CP(*ki, *ki32, ki_rqindex);
 	CP(*ki, *ki32, ki_oncpu);
 	CP(*ki, *ki32, ki_lastcpu);
 
 	/* XXX TODO: wrap cpu value as appropriate */
 	CP(*ki, *ki32, ki_oncpu_old);
 	CP(*ki, *ki32, ki_lastcpu_old);
 
 	bcopy(ki->ki_tdname, ki32->ki_tdname, TDNAMLEN + 1);
 	bcopy(ki->ki_wmesg, ki32->ki_wmesg, WMESGLEN + 1);
 	bcopy(ki->ki_login, ki32->ki_login, LOGNAMELEN + 1);
 	bcopy(ki->ki_lockname, ki32->ki_lockname, LOCKNAMELEN + 1);
 	bcopy(ki->ki_comm, ki32->ki_comm, COMMLEN + 1);
 	bcopy(ki->ki_emul, ki32->ki_emul, KI_EMULNAMELEN + 1);
 	bcopy(ki->ki_loginclass, ki32->ki_loginclass, LOGINCLASSLEN + 1);
 	CP(*ki, *ki32, ki_tracer);
 	CP(*ki, *ki32, ki_flag2);
 	CP(*ki, *ki32, ki_fibnum);
 	CP(*ki, *ki32, ki_cr_flags);
 	CP(*ki, *ki32, ki_jid);
 	CP(*ki, *ki32, ki_numthreads);
 	CP(*ki, *ki32, ki_tid);
 	CP(*ki, *ki32, ki_pri);
 	freebsd32_rusage_out(&ki->ki_rusage, &ki32->ki_rusage);
 	freebsd32_rusage_out(&ki->ki_rusage_ch, &ki32->ki_rusage_ch);
 	PTRTRIM_CP(*ki, *ki32, ki_pcb);
 	PTRTRIM_CP(*ki, *ki32, ki_kstack);
 	PTRTRIM_CP(*ki, *ki32, ki_udata);
 	CP(*ki, *ki32, ki_sflag);
 	CP(*ki, *ki32, ki_tdflags);
 }
 #endif
 
 int
 kern_proc_out(struct proc *p, struct sbuf *sb, int flags)
 {
 	struct thread *td;
 	struct kinfo_proc ki;
 #ifdef COMPAT_FREEBSD32
 	struct kinfo_proc32 ki32;
 #endif
 	int error;
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	MPASS(FIRST_THREAD_IN_PROC(p) != NULL);
 
 	error = 0;
 	fill_kinfo_proc(p, &ki);
 	if ((flags & KERN_PROC_NOTHREADS) != 0) {
 #ifdef COMPAT_FREEBSD32
 		if ((flags & KERN_PROC_MASK32) != 0) {
 			freebsd32_kinfo_proc_out(&ki, &ki32);
 			if (sbuf_bcat(sb, &ki32, sizeof(ki32)) != 0)
 				error = ENOMEM;
 		} else
 #endif
 			if (sbuf_bcat(sb, &ki, sizeof(ki)) != 0)
 				error = ENOMEM;
 	} else {
 		FOREACH_THREAD_IN_PROC(p, td) {
 			fill_kinfo_thread(td, &ki, 1);
 #ifdef COMPAT_FREEBSD32
 			if ((flags & KERN_PROC_MASK32) != 0) {
 				freebsd32_kinfo_proc_out(&ki, &ki32);
 				if (sbuf_bcat(sb, &ki32, sizeof(ki32)) != 0)
 					error = ENOMEM;
 			} else
 #endif
 				if (sbuf_bcat(sb, &ki, sizeof(ki)) != 0)
 					error = ENOMEM;
 			if (error != 0)
 				break;
 		}
 	}
 	PROC_UNLOCK(p);
 	return (error);
 }
 
 static int
 sysctl_out_proc(struct proc *p, struct sysctl_req *req, int flags,
     int doingzomb)
 {
 	struct sbuf sb;
 	struct kinfo_proc ki;
 	struct proc *np;
 	int error, error2;
 	pid_t pid;
 
 	pid = p->p_pid;
 	sbuf_new_for_sysctl(&sb, (char *)&ki, sizeof(ki), req);
 	sbuf_clear_flags(&sb, SBUF_INCLUDENUL);
 	error = kern_proc_out(p, &sb, flags);
 	error2 = sbuf_finish(&sb);
 	sbuf_delete(&sb);
 	if (error != 0)
 		return (error);
 	else if (error2 != 0)
 		return (error2);
 	if (doingzomb)
 		np = zpfind(pid);
 	else {
 		if (pid == 0)
 			return (0);
 		np = pfind(pid);
 	}
 	if (np == NULL)
 		return (ESRCH);
 	if (np != p) {
 		PROC_UNLOCK(np);
 		return (ESRCH);
 	}
 	PROC_UNLOCK(np);
 	return (0);
 }
 
 static int
 sysctl_kern_proc(SYSCTL_HANDLER_ARGS)
 {
 	int *name = (int *)arg1;
 	u_int namelen = arg2;
 	struct proc *p;
 	int flags, doingzomb, oid_number;
 	int error = 0;
 
 	oid_number = oidp->oid_number;
 	if (oid_number != KERN_PROC_ALL &&
 	    (oid_number & KERN_PROC_INC_THREAD) == 0)
 		flags = KERN_PROC_NOTHREADS;
 	else {
 		flags = 0;
 		oid_number &= ~KERN_PROC_INC_THREAD;
 	}
 #ifdef COMPAT_FREEBSD32
 	if (req->flags & SCTL_MASK32)
 		flags |= KERN_PROC_MASK32;
 #endif
 	if (oid_number == KERN_PROC_PID) {
 		if (namelen != 1)
 			return (EINVAL);
 		error = sysctl_wire_old_buffer(req, 0);
 		if (error)
 			return (error);
 		sx_slock(&proctree_lock);
 		error = pget((pid_t)name[0], PGET_CANSEE, &p);
 		if (error == 0)
 			error = sysctl_out_proc(p, req, flags, 0);
 		sx_sunlock(&proctree_lock);
 		return (error);
 	}
 
 	switch (oid_number) {
 	case KERN_PROC_ALL:
 		if (namelen != 0)
 			return (EINVAL);
 		break;
 	case KERN_PROC_PROC:
 		if (namelen != 0 && namelen != 1)
 			return (EINVAL);
 		break;
 	default:
 		if (namelen != 1)
 			return (EINVAL);
 		break;
 	}
 
 	if (!req->oldptr) {
 		/* overestimate by 5 procs */
 		error = SYSCTL_OUT(req, 0, sizeof (struct kinfo_proc) * 5);
 		if (error)
 			return (error);
 	}
 	error = sysctl_wire_old_buffer(req, 0);
 	if (error != 0)
 		return (error);
 	sx_slock(&proctree_lock);
 	sx_slock(&allproc_lock);
 	for (doingzomb=0 ; doingzomb < 2 ; doingzomb++) {
 		if (!doingzomb)
 			p = LIST_FIRST(&allproc);
 		else
 			p = LIST_FIRST(&zombproc);
 		for (; p != 0; p = LIST_NEXT(p, p_list)) {
 			/*
 			 * Skip embryonic processes.
 			 */
 			PROC_LOCK(p);
 			if (p->p_state == PRS_NEW) {
 				PROC_UNLOCK(p);
 				continue;
 			}
 			KASSERT(p->p_ucred != NULL,
 			    ("process credential is NULL for non-NEW proc"));
 			/*
 			 * Show a user only appropriate processes.
 			 */
 			if (p_cansee(curthread, p)) {
 				PROC_UNLOCK(p);
 				continue;
 			}
 			/*
 			 * TODO - make more efficient (see notes below).
 			 * do by session.
 			 */
 			switch (oid_number) {
 
 			case KERN_PROC_GID:
 				if (p->p_ucred->cr_gid != (gid_t)name[0]) {
 					PROC_UNLOCK(p);
 					continue;
 				}
 				break;
 
 			case KERN_PROC_PGRP:
 				/* could do this by traversing pgrp */
 				if (p->p_pgrp == NULL ||
 				    p->p_pgrp->pg_id != (pid_t)name[0]) {
 					PROC_UNLOCK(p);
 					continue;
 				}
 				break;
 
 			case KERN_PROC_RGID:
 				if (p->p_ucred->cr_rgid != (gid_t)name[0]) {
 					PROC_UNLOCK(p);
 					continue;
 				}
 				break;
 
 			case KERN_PROC_SESSION:
 				if (p->p_session == NULL ||
 				    p->p_session->s_sid != (pid_t)name[0]) {
 					PROC_UNLOCK(p);
 					continue;
 				}
 				break;
 
 			case KERN_PROC_TTY:
 				if ((p->p_flag & P_CONTROLT) == 0 ||
 				    p->p_session == NULL) {
 					PROC_UNLOCK(p);
 					continue;
 				}
 				/* XXX proctree_lock */
 				SESS_LOCK(p->p_session);
 				if (p->p_session->s_ttyp == NULL ||
 				    tty_udev(p->p_session->s_ttyp) !=
 				    (dev_t)name[0]) {
 					SESS_UNLOCK(p->p_session);
 					PROC_UNLOCK(p);
 					continue;
 				}
 				SESS_UNLOCK(p->p_session);
 				break;
 
 			case KERN_PROC_UID:
 				if (p->p_ucred->cr_uid != (uid_t)name[0]) {
 					PROC_UNLOCK(p);
 					continue;
 				}
 				break;
 
 			case KERN_PROC_RUID:
 				if (p->p_ucred->cr_ruid != (uid_t)name[0]) {
 					PROC_UNLOCK(p);
 					continue;
 				}
 				break;
 
 			case KERN_PROC_PROC:
 				break;
 
 			default:
 				break;
 
 			}
 
 			error = sysctl_out_proc(p, req, flags, doingzomb);
 			if (error) {
 				sx_sunlock(&allproc_lock);
 				sx_sunlock(&proctree_lock);
 				return (error);
 			}
 		}
 	}
 	sx_sunlock(&allproc_lock);
 	sx_sunlock(&proctree_lock);
 	return (0);
 }
 
 struct pargs *
 pargs_alloc(int len)
 {
 	struct pargs *pa;
 
 	pa = malloc(sizeof(struct pargs) + len, M_PARGS,
 		M_WAITOK);
 	refcount_init(&pa->ar_ref, 1);
 	pa->ar_length = len;
 	return (pa);
 }
 
 static void
 pargs_free(struct pargs *pa)
 {
 
 	free(pa, M_PARGS);
 }
 
 void
 pargs_hold(struct pargs *pa)
 {
 
 	if (pa == NULL)
 		return;
 	refcount_acquire(&pa->ar_ref);
 }
 
 void
 pargs_drop(struct pargs *pa)
 {
 
 	if (pa == NULL)
 		return;
 	if (refcount_release(&pa->ar_ref))
 		pargs_free(pa);
 }
 
 static int
 proc_read_string(struct thread *td, struct proc *p, const char *sptr, char *buf,
     size_t len)
 {
 	ssize_t n;
 
 	/*
 	 * This may return a short read if the string is shorter than the chunk
 	 * and is aligned at the end of the page, and the following page is not
 	 * mapped.
 	 */
 	n = proc_readmem(td, p, (vm_offset_t)sptr, buf, len);
 	if (n <= 0)
 		return (ENOMEM);
 	return (0);
 }
 
 #define PROC_AUXV_MAX	256	/* Safety limit on auxv size. */
 
 enum proc_vector_type {
 	PROC_ARG,
 	PROC_ENV,
 	PROC_AUX,
 };
 
 #ifdef COMPAT_FREEBSD32
 static int
 get_proc_vector32(struct thread *td, struct proc *p, char ***proc_vectorp,
     size_t *vsizep, enum proc_vector_type type)
 {
 	struct freebsd32_ps_strings pss;
 	Elf32_Auxinfo aux;
 	vm_offset_t vptr, ptr;
 	uint32_t *proc_vector32;
 	char **proc_vector;
 	size_t vsize, size;
 	int i, error;
 
 	error = 0;
 	if (proc_readmem(td, p, (vm_offset_t)p->p_sysent->sv_psstrings, &pss,
 	    sizeof(pss)) != sizeof(pss))
 		return (ENOMEM);
 	switch (type) {
 	case PROC_ARG:
 		vptr = (vm_offset_t)PTRIN(pss.ps_argvstr);
 		vsize = pss.ps_nargvstr;
 		if (vsize > ARG_MAX)
 			return (ENOEXEC);
 		size = vsize * sizeof(int32_t);
 		break;
 	case PROC_ENV:
 		vptr = (vm_offset_t)PTRIN(pss.ps_envstr);
 		vsize = pss.ps_nenvstr;
 		if (vsize > ARG_MAX)
 			return (ENOEXEC);
 		size = vsize * sizeof(int32_t);
 		break;
 	case PROC_AUX:
 		vptr = (vm_offset_t)PTRIN(pss.ps_envstr) +
 		    (pss.ps_nenvstr + 1) * sizeof(int32_t);
 		if (vptr % 4 != 0)
 			return (ENOEXEC);
 		for (ptr = vptr, i = 0; i < PROC_AUXV_MAX; i++) {
 			if (proc_readmem(td, p, ptr, &aux, sizeof(aux)) !=
 			    sizeof(aux))
 				return (ENOMEM);
 			if (aux.a_type == AT_NULL)
 				break;
 			ptr += sizeof(aux);
 		}
 		if (aux.a_type != AT_NULL)
 			return (ENOEXEC);
 		vsize = i + 1;
 		size = vsize * sizeof(aux);
 		break;
 	default:
 		KASSERT(0, ("Wrong proc vector type: %d", type));
 		return (EINVAL);
 	}
 	proc_vector32 = malloc(size, M_TEMP, M_WAITOK);
 	if (proc_readmem(td, p, vptr, proc_vector32, size) != size) {
 		error = ENOMEM;
 		goto done;
 	}
 	if (type == PROC_AUX) {
 		*proc_vectorp = (char **)proc_vector32;
 		*vsizep = vsize;
 		return (0);
 	}
 	proc_vector = malloc(vsize * sizeof(char *), M_TEMP, M_WAITOK);
 	for (i = 0; i < (int)vsize; i++)
 		proc_vector[i] = PTRIN(proc_vector32[i]);
 	*proc_vectorp = proc_vector;
 	*vsizep = vsize;
 done:
 	free(proc_vector32, M_TEMP);
 	return (error);
 }
 #endif
 
 static int
 get_proc_vector(struct thread *td, struct proc *p, char ***proc_vectorp,
     size_t *vsizep, enum proc_vector_type type)
 {
 	struct ps_strings pss;
 	Elf_Auxinfo aux;
 	vm_offset_t vptr, ptr;
 	char **proc_vector;
 	size_t vsize, size;
 	int i;
 
 #ifdef COMPAT_FREEBSD32
 	if (SV_PROC_FLAG(p, SV_ILP32) != 0)
 		return (get_proc_vector32(td, p, proc_vectorp, vsizep, type));
 #endif
 	if (proc_readmem(td, p, (vm_offset_t)p->p_sysent->sv_psstrings, &pss,
 	    sizeof(pss)) != sizeof(pss))
 		return (ENOMEM);
 	switch (type) {
 	case PROC_ARG:
 		vptr = (vm_offset_t)pss.ps_argvstr;
 		vsize = pss.ps_nargvstr;
 		if (vsize > ARG_MAX)
 			return (ENOEXEC);
 		size = vsize * sizeof(char *);
 		break;
 	case PROC_ENV:
 		vptr = (vm_offset_t)pss.ps_envstr;
 		vsize = pss.ps_nenvstr;
 		if (vsize > ARG_MAX)
 			return (ENOEXEC);
 		size = vsize * sizeof(char *);
 		break;
 	case PROC_AUX:
 		/*
 		 * The aux array is just above env array on the stack. Check
 		 * that the address is naturally aligned.
 		 */
 		vptr = (vm_offset_t)pss.ps_envstr + (pss.ps_nenvstr + 1)
 		    * sizeof(char *);
 #if __ELF_WORD_SIZE == 64
 		if (vptr % sizeof(uint64_t) != 0)
 #else
 		if (vptr % sizeof(uint32_t) != 0)
 #endif
 			return (ENOEXEC);
 		/*
 		 * We count the array size reading the aux vectors from the
 		 * stack until AT_NULL vector is returned.  So (to keep the code
 		 * simple) we read the process stack twice: the first time here
 		 * to find the size and the second time when copying the vectors
 		 * to the allocated proc_vector.
 		 */
 		for (ptr = vptr, i = 0; i < PROC_AUXV_MAX; i++) {
 			if (proc_readmem(td, p, ptr, &aux, sizeof(aux)) !=
 			    sizeof(aux))
 				return (ENOMEM);
 			if (aux.a_type == AT_NULL)
 				break;
 			ptr += sizeof(aux);
 		}
 		/*
 		 * If the PROC_AUXV_MAX entries are iterated over, and we have
 		 * not reached AT_NULL, it is most likely we are reading wrong
 		 * data: either the process doesn't have auxv array or data has
 		 * been modified. Return the error in this case.
 		 */
 		if (aux.a_type != AT_NULL)
 			return (ENOEXEC);
 		vsize = i + 1;
 		size = vsize * sizeof(aux);
 		break;
 	default:
 		KASSERT(0, ("Wrong proc vector type: %d", type));
 		return (EINVAL); /* In case we are built without INVARIANTS. */
 	}
 	proc_vector = malloc(size, M_TEMP, M_WAITOK);
 	if (proc_readmem(td, p, vptr, proc_vector, size) != size) {
 		free(proc_vector, M_TEMP);
 		return (ENOMEM);
 	}
 	*proc_vectorp = proc_vector;
 	*vsizep = vsize;
 
 	return (0);
 }
 
 #define GET_PS_STRINGS_CHUNK_SZ	256	/* Chunk size (bytes) for ps_strings operations. */
 
 static int
 get_ps_strings(struct thread *td, struct proc *p, struct sbuf *sb,
     enum proc_vector_type type)
 {
 	size_t done, len, nchr, vsize;
 	int error, i;
 	char **proc_vector, *sptr;
 	char pss_string[GET_PS_STRINGS_CHUNK_SZ];
 
 	PROC_ASSERT_HELD(p);
 
 	/*
 	 * We are not going to read more than 2 * (PATH_MAX + ARG_MAX) bytes.
 	 */
 	nchr = 2 * (PATH_MAX + ARG_MAX);
 
 	error = get_proc_vector(td, p, &proc_vector, &vsize, type);
 	if (error != 0)
 		return (error);
 	for (done = 0, i = 0; i < (int)vsize && done < nchr; i++) {
 		/*
 		 * The program may have scribbled into its argv array, e.g. to
 		 * remove some arguments.  If that has happened, break out
 		 * before trying to read from NULL.
 		 */
 		if (proc_vector[i] == NULL)
 			break;
 		for (sptr = proc_vector[i]; ; sptr += GET_PS_STRINGS_CHUNK_SZ) {
 			error = proc_read_string(td, p, sptr, pss_string,
 			    sizeof(pss_string));
 			if (error != 0)
 				goto done;
 			len = strnlen(pss_string, GET_PS_STRINGS_CHUNK_SZ);
 			if (done + len >= nchr)
 				len = nchr - done - 1;
 			sbuf_bcat(sb, pss_string, len);
 			if (len != GET_PS_STRINGS_CHUNK_SZ)
 				break;
 			done += GET_PS_STRINGS_CHUNK_SZ;
 		}
 		sbuf_bcat(sb, "", 1);
 		done += len + 1;
 	}
 done:
 	free(proc_vector, M_TEMP);
 	return (error);
 }
 
 int
 proc_getargv(struct thread *td, struct proc *p, struct sbuf *sb)
 {
 
 	return (get_ps_strings(curthread, p, sb, PROC_ARG));
 }
 
 int
 proc_getenvv(struct thread *td, struct proc *p, struct sbuf *sb)
 {
 
 	return (get_ps_strings(curthread, p, sb, PROC_ENV));
 }
 
 int
 proc_getauxv(struct thread *td, struct proc *p, struct sbuf *sb)
 {
 	size_t vsize, size;
 	char **auxv;
 	int error;
 
 	error = get_proc_vector(td, p, &auxv, &vsize, PROC_AUX);
 	if (error == 0) {
 #ifdef COMPAT_FREEBSD32
 		if (SV_PROC_FLAG(p, SV_ILP32) != 0)
 			size = vsize * sizeof(Elf32_Auxinfo);
 		else
 #endif
 			size = vsize * sizeof(Elf_Auxinfo);
 		if (sbuf_bcat(sb, auxv, size) != 0)
 			error = ENOMEM;
 		free(auxv, M_TEMP);
 	}
 	return (error);
 }
 
 /*
  * This sysctl allows a process to retrieve the argument list or process
  * title for another process without groping around in the address space
  * of the other process.  It also allow a process to set its own "process 
  * title to a string of its own choice.
  */
 static int
 sysctl_kern_proc_args(SYSCTL_HANDLER_ARGS)
 {
 	int *name = (int *)arg1;
 	u_int namelen = arg2;
 	struct pargs *newpa, *pa;
 	struct proc *p;
 	struct sbuf sb;
 	int flags, error = 0, error2;
 
 	if (namelen != 1)
 		return (EINVAL);
 
 	flags = PGET_CANSEE;
 	if (req->newptr != NULL)
 		flags |= PGET_ISCURRENT;
 	error = pget((pid_t)name[0], flags, &p);
 	if (error)
 		return (error);
 
 	pa = p->p_args;
 	if (pa != NULL) {
 		pargs_hold(pa);
 		PROC_UNLOCK(p);
 		error = SYSCTL_OUT(req, pa->ar_args, pa->ar_length);
 		pargs_drop(pa);
 	} else if ((p->p_flag & (P_WEXIT | P_SYSTEM)) == 0) {
 		_PHOLD(p);
 		PROC_UNLOCK(p);
 		sbuf_new_for_sysctl(&sb, NULL, GET_PS_STRINGS_CHUNK_SZ, req);
 		sbuf_clear_flags(&sb, SBUF_INCLUDENUL);
 		error = proc_getargv(curthread, p, &sb);
 		error2 = sbuf_finish(&sb);
 		PRELE(p);
 		sbuf_delete(&sb);
 		if (error == 0 && error2 != 0)
 			error = error2;
 	} else {
 		PROC_UNLOCK(p);
 	}
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 
 	if (req->newlen + sizeof(struct pargs) > ps_arg_cache_limit)
 		return (ENOMEM);
 	newpa = pargs_alloc(req->newlen);
 	error = SYSCTL_IN(req, newpa->ar_args, req->newlen);
 	if (error != 0) {
 		pargs_free(newpa);
 		return (error);
 	}
 	PROC_LOCK(p);
 	pa = p->p_args;
 	p->p_args = newpa;
 	PROC_UNLOCK(p);
 	pargs_drop(pa);
 	return (0);
 }
 
 /*
  * This sysctl allows a process to retrieve environment of another process.
  */
 static int
 sysctl_kern_proc_env(SYSCTL_HANDLER_ARGS)
 {
 	int *name = (int *)arg1;
 	u_int namelen = arg2;
 	struct proc *p;
 	struct sbuf sb;
 	int error, error2;
 
 	if (namelen != 1)
 		return (EINVAL);
 
 	error = pget((pid_t)name[0], PGET_WANTREAD, &p);
 	if (error != 0)
 		return (error);
 	if ((p->p_flag & P_SYSTEM) != 0) {
 		PRELE(p);
 		return (0);
 	}
 
 	sbuf_new_for_sysctl(&sb, NULL, GET_PS_STRINGS_CHUNK_SZ, req);
 	sbuf_clear_flags(&sb, SBUF_INCLUDENUL);
 	error = proc_getenvv(curthread, p, &sb);
 	error2 = sbuf_finish(&sb);
 	PRELE(p);
 	sbuf_delete(&sb);
 	return (error != 0 ? error : error2);
 }
 
 /*
  * This sysctl allows a process to retrieve ELF auxiliary vector of
  * another process.
  */
 static int
 sysctl_kern_proc_auxv(SYSCTL_HANDLER_ARGS)
 {
 	int *name = (int *)arg1;
 	u_int namelen = arg2;
 	struct proc *p;
 	struct sbuf sb;
 	int error, error2;
 
 	if (namelen != 1)
 		return (EINVAL);
 
 	error = pget((pid_t)name[0], PGET_WANTREAD, &p);
 	if (error != 0)
 		return (error);
 	if ((p->p_flag & P_SYSTEM) != 0) {
 		PRELE(p);
 		return (0);
 	}
 	sbuf_new_for_sysctl(&sb, NULL, GET_PS_STRINGS_CHUNK_SZ, req);
 	sbuf_clear_flags(&sb, SBUF_INCLUDENUL);
 	error = proc_getauxv(curthread, p, &sb);
 	error2 = sbuf_finish(&sb);
 	PRELE(p);
 	sbuf_delete(&sb);
 	return (error != 0 ? error : error2);
 }
 
 /*
  * This sysctl allows a process to retrieve the path of the executable for
  * itself or another process.
  */
 static int
 sysctl_kern_proc_pathname(SYSCTL_HANDLER_ARGS)
 {
 	pid_t *pidp = (pid_t *)arg1;
 	unsigned int arglen = arg2;
 	struct proc *p;
 	struct vnode *vp;
 	char *retbuf, *freebuf;
 	int error;
 
 	if (arglen != 1)
 		return (EINVAL);
 	if (*pidp == -1) {	/* -1 means this process */
 		p = req->td->td_proc;
 	} else {
 		error = pget(*pidp, PGET_CANSEE, &p);
 		if (error != 0)
 			return (error);
 	}
 
 	vp = p->p_textvp;
 	if (vp == NULL) {
 		if (*pidp != -1)
 			PROC_UNLOCK(p);
 		return (0);
 	}
 	vref(vp);
 	if (*pidp != -1)
 		PROC_UNLOCK(p);
 	error = vn_fullpath(req->td, vp, &retbuf, &freebuf);
 	vrele(vp);
 	if (error)
 		return (error);
 	error = SYSCTL_OUT(req, retbuf, strlen(retbuf) + 1);
 	free(freebuf, M_TEMP);
 	return (error);
 }
 
 static int
 sysctl_kern_proc_sv_name(SYSCTL_HANDLER_ARGS)
 {
 	struct proc *p;
 	char *sv_name;
 	int *name;
 	int namelen;
 	int error;
 
 	namelen = arg2;
 	if (namelen != 1)
 		return (EINVAL);
 
 	name = (int *)arg1;
 	error = pget((pid_t)name[0], PGET_CANSEE, &p);
 	if (error != 0)
 		return (error);
 	sv_name = p->p_sysent->sv_name;
 	PROC_UNLOCK(p);
 	return (sysctl_handle_string(oidp, sv_name, 0, req));
 }
 
 #ifdef KINFO_OVMENTRY_SIZE
 CTASSERT(sizeof(struct kinfo_ovmentry) == KINFO_OVMENTRY_SIZE);
 #endif
 
 #ifdef COMPAT_FREEBSD7
 static int
 sysctl_kern_proc_ovmmap(SYSCTL_HANDLER_ARGS)
 {
 	vm_map_entry_t entry, tmp_entry;
 	unsigned int last_timestamp;
 	char *fullpath, *freepath;
 	struct kinfo_ovmentry *kve;
 	struct vattr va;
 	struct ucred *cred;
 	int error, *name;
 	struct vnode *vp;
 	struct proc *p;
 	vm_map_t map;
 	struct vmspace *vm;
 
 	name = (int *)arg1;
 	error = pget((pid_t)name[0], PGET_WANTREAD, &p);
 	if (error != 0)
 		return (error);
 	vm = vmspace_acquire_ref(p);
 	if (vm == NULL) {
 		PRELE(p);
 		return (ESRCH);
 	}
 	kve = malloc(sizeof(*kve), M_TEMP, M_WAITOK);
 
 	map = &vm->vm_map;
 	vm_map_lock_read(map);
 	for (entry = map->header.next; entry != &map->header;
 	    entry = entry->next) {
 		vm_object_t obj, tobj, lobj;
 		vm_offset_t addr;
 
 		if (entry->eflags & MAP_ENTRY_IS_SUB_MAP)
 			continue;
 
 		bzero(kve, sizeof(*kve));
 		kve->kve_structsize = sizeof(*kve);
 
 		kve->kve_private_resident = 0;
 		obj = entry->object.vm_object;
 		if (obj != NULL) {
 			VM_OBJECT_RLOCK(obj);
 			if (obj->shadow_count == 1)
 				kve->kve_private_resident =
 				    obj->resident_page_count;
 		}
 		kve->kve_resident = 0;
 		addr = entry->start;
 		while (addr < entry->end) {
 			if (pmap_extract(map->pmap, addr))
 				kve->kve_resident++;
 			addr += PAGE_SIZE;
 		}
 
 		for (lobj = tobj = obj; tobj; tobj = tobj->backing_object) {
 			if (tobj != obj)
 				VM_OBJECT_RLOCK(tobj);
 			if (lobj != obj)
 				VM_OBJECT_RUNLOCK(lobj);
 			lobj = tobj;
 		}
 
 		kve->kve_start = (void*)entry->start;
 		kve->kve_end = (void*)entry->end;
 		kve->kve_offset = (off_t)entry->offset;
 
 		if (entry->protection & VM_PROT_READ)
 			kve->kve_protection |= KVME_PROT_READ;
 		if (entry->protection & VM_PROT_WRITE)
 			kve->kve_protection |= KVME_PROT_WRITE;
 		if (entry->protection & VM_PROT_EXECUTE)
 			kve->kve_protection |= KVME_PROT_EXEC;
 
 		if (entry->eflags & MAP_ENTRY_COW)
 			kve->kve_flags |= KVME_FLAG_COW;
 		if (entry->eflags & MAP_ENTRY_NEEDS_COPY)
 			kve->kve_flags |= KVME_FLAG_NEEDS_COPY;
 		if (entry->eflags & MAP_ENTRY_NOCOREDUMP)
 			kve->kve_flags |= KVME_FLAG_NOCOREDUMP;
 
 		last_timestamp = map->timestamp;
 		vm_map_unlock_read(map);
 
 		kve->kve_fileid = 0;
 		kve->kve_fsid = 0;
 		freepath = NULL;
 		fullpath = "";
 		if (lobj) {
 			vp = NULL;
 			switch (lobj->type) {
 			case OBJT_DEFAULT:
 				kve->kve_type = KVME_TYPE_DEFAULT;
 				break;
 			case OBJT_VNODE:
 				kve->kve_type = KVME_TYPE_VNODE;
 				vp = lobj->handle;
 				vref(vp);
 				break;
 			case OBJT_SWAP:
 				if ((lobj->flags & OBJ_TMPFS_NODE) != 0) {
 					kve->kve_type = KVME_TYPE_VNODE;
 					if ((lobj->flags & OBJ_TMPFS) != 0) {
 						vp = lobj->un_pager.swp.swp_tmpfs;
 						vref(vp);
 					}
 				} else {
 					kve->kve_type = KVME_TYPE_SWAP;
 				}
 				break;
 			case OBJT_DEVICE:
 				kve->kve_type = KVME_TYPE_DEVICE;
 				break;
 			case OBJT_PHYS:
 				kve->kve_type = KVME_TYPE_PHYS;
 				break;
 			case OBJT_DEAD:
 				kve->kve_type = KVME_TYPE_DEAD;
 				break;
 			case OBJT_SG:
 				kve->kve_type = KVME_TYPE_SG;
 				break;
 			default:
 				kve->kve_type = KVME_TYPE_UNKNOWN;
 				break;
 			}
 			if (lobj != obj)
 				VM_OBJECT_RUNLOCK(lobj);
 
 			kve->kve_ref_count = obj->ref_count;
 			kve->kve_shadow_count = obj->shadow_count;
 			VM_OBJECT_RUNLOCK(obj);
 			if (vp != NULL) {
 				vn_fullpath(curthread, vp, &fullpath,
 				    &freepath);
 				cred = curthread->td_ucred;
 				vn_lock(vp, LK_SHARED | LK_RETRY);
 				if (VOP_GETATTR(vp, &va, cred) == 0) {
 					kve->kve_fileid = va.va_fileid;
 					kve->kve_fsid = va.va_fsid;
 				}
 				vput(vp);
 			}
 		} else {
 			kve->kve_type = KVME_TYPE_NONE;
 			kve->kve_ref_count = 0;
 			kve->kve_shadow_count = 0;
 		}
 
 		strlcpy(kve->kve_path, fullpath, sizeof(kve->kve_path));
 		if (freepath != NULL)
 			free(freepath, M_TEMP);
 
 		error = SYSCTL_OUT(req, kve, sizeof(*kve));
 		vm_map_lock_read(map);
 		if (error)
 			break;
 		if (last_timestamp != map->timestamp) {
 			vm_map_lookup_entry(map, addr - 1, &tmp_entry);
 			entry = tmp_entry;
 		}
 	}
 	vm_map_unlock_read(map);
 	vmspace_free(vm);
 	PRELE(p);
 	free(kve, M_TEMP);
 	return (error);
 }
 #endif	/* COMPAT_FREEBSD7 */
 
 #ifdef KINFO_VMENTRY_SIZE
 CTASSERT(sizeof(struct kinfo_vmentry) == KINFO_VMENTRY_SIZE);
 #endif
 
 static void
 kern_proc_vmmap_resident(vm_map_t map, vm_map_entry_t entry,
     struct kinfo_vmentry *kve)
 {
 	vm_object_t obj, tobj;
 	vm_page_t m, m_adv;
 	vm_offset_t addr;
 	vm_paddr_t locked_pa;
 	vm_pindex_t pi, pi_adv, pindex;
 
 	locked_pa = 0;
 	obj = entry->object.vm_object;
 	addr = entry->start;
 	m_adv = NULL;
 	pi = OFF_TO_IDX(entry->offset);
 	for (; addr < entry->end; addr += IDX_TO_OFF(pi_adv), pi += pi_adv) {
 		if (m_adv != NULL) {
 			m = m_adv;
 		} else {
 			pi_adv = OFF_TO_IDX(entry->end - addr);
 			pindex = pi;
 			for (tobj = obj;; tobj = tobj->backing_object) {
 				m = vm_page_find_least(tobj, pindex);
 				if (m != NULL) {
 					if (m->pindex == pindex)
 						break;
 					if (pi_adv > m->pindex - pindex) {
 						pi_adv = m->pindex - pindex;
 						m_adv = m;
 					}
 				}
 				if (tobj->backing_object == NULL)
 					goto next;
 				pindex += OFF_TO_IDX(tobj->
 				    backing_object_offset);
 			}
 		}
 		m_adv = NULL;
 		if (m->psind != 0 && addr + pagesizes[1] <= entry->end &&
 		    (addr & (pagesizes[1] - 1)) == 0 &&
 		    (pmap_mincore(map->pmap, addr, &locked_pa) &
 		    MINCORE_SUPER) != 0) {
 			kve->kve_flags |= KVME_FLAG_SUPER;
 			pi_adv = OFF_TO_IDX(pagesizes[1]);
 		} else {
 			/*
 			 * We do not test the found page on validity.
 			 * Either the page is busy and being paged in,
 			 * or it was invalidated.  The first case
 			 * should be counted as resident, the second
 			 * is not so clear; we do account both.
 			 */
 			pi_adv = 1;
 		}
 		kve->kve_resident += pi_adv;
 next:;
 	}
 	PA_UNLOCK_COND(locked_pa);
 }
 
 /*
  * Must be called with the process locked and will return unlocked.
  */
 int
 kern_proc_vmmap_out(struct proc *p, struct sbuf *sb, ssize_t maxlen, int flags)
 {
 	vm_map_entry_t entry, tmp_entry;
 	struct vattr va;
 	vm_map_t map;
 	vm_object_t obj, tobj, lobj;
 	char *fullpath, *freepath;
 	struct kinfo_vmentry *kve;
 	struct ucred *cred;
 	struct vnode *vp;
 	struct vmspace *vm;
 	vm_offset_t addr;
 	unsigned int last_timestamp;
 	int error;
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 
 	_PHOLD(p);
 	PROC_UNLOCK(p);
 	vm = vmspace_acquire_ref(p);
 	if (vm == NULL) {
 		PRELE(p);
 		return (ESRCH);
 	}
 	kve = malloc(sizeof(*kve), M_TEMP, M_WAITOK | M_ZERO);
 
 	error = 0;
 	map = &vm->vm_map;
 	vm_map_lock_read(map);
 	for (entry = map->header.next; entry != &map->header;
 	    entry = entry->next) {
 		if (entry->eflags & MAP_ENTRY_IS_SUB_MAP)
 			continue;
 
 		addr = entry->end;
 		bzero(kve, sizeof(*kve));
 		obj = entry->object.vm_object;
 		if (obj != NULL) {
 			for (tobj = obj; tobj != NULL;
 			    tobj = tobj->backing_object) {
 				VM_OBJECT_RLOCK(tobj);
 				lobj = tobj;
 			}
 			if (obj->backing_object == NULL)
 				kve->kve_private_resident =
 				    obj->resident_page_count;
 			if (!vmmap_skip_res_cnt)
 				kern_proc_vmmap_resident(map, entry, kve);
 			for (tobj = obj; tobj != NULL;
 			    tobj = tobj->backing_object) {
 				if (tobj != obj && tobj != lobj)
 					VM_OBJECT_RUNLOCK(tobj);
 			}
 		} else {
 			lobj = NULL;
 		}
 
 		kve->kve_start = entry->start;
 		kve->kve_end = entry->end;
 		kve->kve_offset = entry->offset;
 
 		if (entry->protection & VM_PROT_READ)
 			kve->kve_protection |= KVME_PROT_READ;
 		if (entry->protection & VM_PROT_WRITE)
 			kve->kve_protection |= KVME_PROT_WRITE;
 		if (entry->protection & VM_PROT_EXECUTE)
 			kve->kve_protection |= KVME_PROT_EXEC;
 
 		if (entry->eflags & MAP_ENTRY_COW)
 			kve->kve_flags |= KVME_FLAG_COW;
 		if (entry->eflags & MAP_ENTRY_NEEDS_COPY)
 			kve->kve_flags |= KVME_FLAG_NEEDS_COPY;
 		if (entry->eflags & MAP_ENTRY_NOCOREDUMP)
 			kve->kve_flags |= KVME_FLAG_NOCOREDUMP;
 		if (entry->eflags & MAP_ENTRY_GROWS_UP)
 			kve->kve_flags |= KVME_FLAG_GROWS_UP;
 		if (entry->eflags & MAP_ENTRY_GROWS_DOWN)
 			kve->kve_flags |= KVME_FLAG_GROWS_DOWN;
 
 		last_timestamp = map->timestamp;
 		vm_map_unlock_read(map);
 
 		freepath = NULL;
 		fullpath = "";
 		if (lobj != NULL) {
 			vp = NULL;
 			switch (lobj->type) {
 			case OBJT_DEFAULT:
 				kve->kve_type = KVME_TYPE_DEFAULT;
 				break;
 			case OBJT_VNODE:
 				kve->kve_type = KVME_TYPE_VNODE;
 				vp = lobj->handle;
 				vref(vp);
 				break;
 			case OBJT_SWAP:
 				if ((lobj->flags & OBJ_TMPFS_NODE) != 0) {
 					kve->kve_type = KVME_TYPE_VNODE;
 					if ((lobj->flags & OBJ_TMPFS) != 0) {
 						vp = lobj->un_pager.swp.swp_tmpfs;
 						vref(vp);
 					}
 				} else {
 					kve->kve_type = KVME_TYPE_SWAP;
 				}
 				break;
 			case OBJT_DEVICE:
 				kve->kve_type = KVME_TYPE_DEVICE;
 				break;
 			case OBJT_PHYS:
 				kve->kve_type = KVME_TYPE_PHYS;
 				break;
 			case OBJT_DEAD:
 				kve->kve_type = KVME_TYPE_DEAD;
 				break;
 			case OBJT_SG:
 				kve->kve_type = KVME_TYPE_SG;
 				break;
 			case OBJT_MGTDEVICE:
 				kve->kve_type = KVME_TYPE_MGTDEVICE;
 				break;
 			default:
 				kve->kve_type = KVME_TYPE_UNKNOWN;
 				break;
 			}
 			if (lobj != obj)
 				VM_OBJECT_RUNLOCK(lobj);
 
 			kve->kve_ref_count = obj->ref_count;
 			kve->kve_shadow_count = obj->shadow_count;
 			VM_OBJECT_RUNLOCK(obj);
 			if (vp != NULL) {
 				vn_fullpath(curthread, vp, &fullpath,
 				    &freepath);
 				kve->kve_vn_type = vntype_to_kinfo(vp->v_type);
 				cred = curthread->td_ucred;
 				vn_lock(vp, LK_SHARED | LK_RETRY);
 				if (VOP_GETATTR(vp, &va, cred) == 0) {
 					kve->kve_vn_fileid = va.va_fileid;
 					kve->kve_vn_fsid = va.va_fsid;
 					kve->kve_vn_mode =
 					    MAKEIMODE(va.va_type, va.va_mode);
 					kve->kve_vn_size = va.va_size;
 					kve->kve_vn_rdev = va.va_rdev;
 					kve->kve_status = KF_ATTR_VALID;
 				}
 				vput(vp);
 			}
 		} else {
 			kve->kve_type = KVME_TYPE_NONE;
 			kve->kve_ref_count = 0;
 			kve->kve_shadow_count = 0;
 		}
 
 		strlcpy(kve->kve_path, fullpath, sizeof(kve->kve_path));
 		if (freepath != NULL)
 			free(freepath, M_TEMP);
 
 		/* Pack record size down */
 		if ((flags & KERN_VMMAP_PACK_KINFO) != 0)
 			kve->kve_structsize =
 			    offsetof(struct kinfo_vmentry, kve_path) +
 			    strlen(kve->kve_path) + 1;
 		else
 			kve->kve_structsize = sizeof(*kve);
 		kve->kve_structsize = roundup(kve->kve_structsize,
 		    sizeof(uint64_t));
 
 		/* Halt filling and truncate rather than exceeding maxlen */
 		if (maxlen != -1 && maxlen < kve->kve_structsize) {
 			error = 0;
 			vm_map_lock_read(map);
 			break;
 		} else if (maxlen != -1)
 			maxlen -= kve->kve_structsize;
 
 		if (sbuf_bcat(sb, kve, kve->kve_structsize) != 0)
 			error = ENOMEM;
 		vm_map_lock_read(map);
 		if (error != 0)
 			break;
 		if (last_timestamp != map->timestamp) {
 			vm_map_lookup_entry(map, addr - 1, &tmp_entry);
 			entry = tmp_entry;
 		}
 	}
 	vm_map_unlock_read(map);
 	vmspace_free(vm);
 	PRELE(p);
 	free(kve, M_TEMP);
 	return (error);
 }
 
 static int
 sysctl_kern_proc_vmmap(SYSCTL_HANDLER_ARGS)
 {
 	struct proc *p;
 	struct sbuf sb;
 	int error, error2, *name;
 
 	name = (int *)arg1;
 	sbuf_new_for_sysctl(&sb, NULL, sizeof(struct kinfo_vmentry), req);
 	sbuf_clear_flags(&sb, SBUF_INCLUDENUL);
 	error = pget((pid_t)name[0], PGET_CANDEBUG | PGET_NOTWEXIT, &p);
 	if (error != 0) {
 		sbuf_delete(&sb);
 		return (error);
 	}
 	error = kern_proc_vmmap_out(p, &sb, -1, KERN_VMMAP_PACK_KINFO);
 	error2 = sbuf_finish(&sb);
 	sbuf_delete(&sb);
 	return (error != 0 ? error : error2);
 }
 
 #if defined(STACK) || defined(DDB)
 static int
 sysctl_kern_proc_kstack(SYSCTL_HANDLER_ARGS)
 {
 	struct kinfo_kstack *kkstp;
 	int error, i, *name, numthreads;
 	lwpid_t *lwpidarray;
 	struct thread *td;
 	struct stack *st;
 	struct sbuf sb;
 	struct proc *p;
 
 	name = (int *)arg1;
 	error = pget((pid_t)name[0], PGET_NOTINEXEC | PGET_WANTREAD, &p);
 	if (error != 0)
 		return (error);
 
 	kkstp = malloc(sizeof(*kkstp), M_TEMP, M_WAITOK);
 	st = stack_create();
 
 	lwpidarray = NULL;
 	numthreads = 0;
 	PROC_LOCK(p);
 repeat:
 	if (numthreads < p->p_numthreads) {
 		if (lwpidarray != NULL) {
 			free(lwpidarray, M_TEMP);
 			lwpidarray = NULL;
 		}
 		numthreads = p->p_numthreads;
 		PROC_UNLOCK(p);
 		lwpidarray = malloc(sizeof(*lwpidarray) * numthreads, M_TEMP,
 		    M_WAITOK | M_ZERO);
 		PROC_LOCK(p);
 		goto repeat;
 	}
 	i = 0;
 
 	/*
 	 * XXXRW: During the below loop, execve(2) and countless other sorts
 	 * of changes could have taken place.  Should we check to see if the
 	 * vmspace has been replaced, or the like, in order to prevent
 	 * giving a snapshot that spans, say, execve(2), with some threads
 	 * before and some after?  Among other things, the credentials could
 	 * have changed, in which case the right to extract debug info might
 	 * no longer be assured.
 	 */
 	FOREACH_THREAD_IN_PROC(p, td) {
 		KASSERT(i < numthreads,
 		    ("sysctl_kern_proc_kstack: numthreads"));
 		lwpidarray[i] = td->td_tid;
 		i++;
 	}
 	numthreads = i;
 	for (i = 0; i < numthreads; i++) {
 		td = thread_find(p, lwpidarray[i]);
 		if (td == NULL) {
 			continue;
 		}
 		bzero(kkstp, sizeof(*kkstp));
 		(void)sbuf_new(&sb, kkstp->kkst_trace,
 		    sizeof(kkstp->kkst_trace), SBUF_FIXEDLEN);
 		thread_lock(td);
 		kkstp->kkst_tid = td->td_tid;
 		if (TD_IS_SWAPPED(td)) {
 			kkstp->kkst_state = KKST_STATE_SWAPPED;
 		} else if (TD_IS_RUNNING(td)) {
 			if (stack_save_td_running(st, td) == 0)
 				kkstp->kkst_state = KKST_STATE_STACKOK;
 			else
 				kkstp->kkst_state = KKST_STATE_RUNNING;
 		} else {
 			kkstp->kkst_state = KKST_STATE_STACKOK;
 			stack_save_td(st, td);
 		}
 		thread_unlock(td);
 		PROC_UNLOCK(p);
 		stack_sbuf_print(&sb, st);
 		sbuf_finish(&sb);
 		sbuf_delete(&sb);
 		error = SYSCTL_OUT(req, kkstp, sizeof(*kkstp));
 		PROC_LOCK(p);
 		if (error)
 			break;
 	}
 	_PRELE(p);
 	PROC_UNLOCK(p);
 	if (lwpidarray != NULL)
 		free(lwpidarray, M_TEMP);
 	stack_destroy(st);
 	free(kkstp, M_TEMP);
 	return (error);
 }
 #endif
 
 /*
  * This sysctl allows a process to retrieve the full list of groups from
  * itself or another process.
  */
 static int
 sysctl_kern_proc_groups(SYSCTL_HANDLER_ARGS)
 {
 	pid_t *pidp = (pid_t *)arg1;
 	unsigned int arglen = arg2;
 	struct proc *p;
 	struct ucred *cred;
 	int error;
 
 	if (arglen != 1)
 		return (EINVAL);
 	if (*pidp == -1) {	/* -1 means this process */
 		p = req->td->td_proc;
 		PROC_LOCK(p);
 	} else {
 		error = pget(*pidp, PGET_CANSEE, &p);
 		if (error != 0)
 			return (error);
 	}
 
 	cred = crhold(p->p_ucred);
 	PROC_UNLOCK(p);
 
 	error = SYSCTL_OUT(req, cred->cr_groups,
 	    cred->cr_ngroups * sizeof(gid_t));
 	crfree(cred);
 	return (error);
 }
 
 /*
  * This sysctl allows a process to retrieve or/and set the resource limit for
  * another process.
  */
 static int
 sysctl_kern_proc_rlimit(SYSCTL_HANDLER_ARGS)
 {
 	int *name = (int *)arg1;
 	u_int namelen = arg2;
 	struct rlimit rlim;
 	struct proc *p;
 	u_int which;
 	int flags, error;
 
 	if (namelen != 2)
 		return (EINVAL);
 
 	which = (u_int)name[1];
 	if (which >= RLIM_NLIMITS)
 		return (EINVAL);
 
 	if (req->newptr != NULL && req->newlen != sizeof(rlim))
 		return (EINVAL);
 
 	flags = PGET_HOLD | PGET_NOTWEXIT;
 	if (req->newptr != NULL)
 		flags |= PGET_CANDEBUG;
 	else
 		flags |= PGET_CANSEE;
 	error = pget((pid_t)name[0], flags, &p);
 	if (error != 0)
 		return (error);
 
 	/*
 	 * Retrieve limit.
 	 */
 	if (req->oldptr != NULL) {
 		PROC_LOCK(p);
 		lim_rlimit_proc(p, which, &rlim);
 		PROC_UNLOCK(p);
 	}
 	error = SYSCTL_OUT(req, &rlim, sizeof(rlim));
 	if (error != 0)
 		goto errout;
 
 	/*
 	 * Set limit.
 	 */
 	if (req->newptr != NULL) {
 		error = SYSCTL_IN(req, &rlim, sizeof(rlim));
 		if (error == 0)
 			error = kern_proc_setrlimit(curthread, p, which, &rlim);
 	}
 
 errout:
 	PRELE(p);
 	return (error);
 }
 
 /*
  * This sysctl allows a process to retrieve ps_strings structure location of
  * another process.
  */
 static int
 sysctl_kern_proc_ps_strings(SYSCTL_HANDLER_ARGS)
 {
 	int *name = (int *)arg1;
 	u_int namelen = arg2;
 	struct proc *p;
 	vm_offset_t ps_strings;
 	int error;
 #ifdef COMPAT_FREEBSD32
 	uint32_t ps_strings32;
 #endif
 
 	if (namelen != 1)
 		return (EINVAL);
 
 	error = pget((pid_t)name[0], PGET_CANDEBUG, &p);
 	if (error != 0)
 		return (error);
 #ifdef COMPAT_FREEBSD32
 	if ((req->flags & SCTL_MASK32) != 0) {
 		/*
 		 * We return 0 if the 32 bit emulation request is for a 64 bit
 		 * process.
 		 */
 		ps_strings32 = SV_PROC_FLAG(p, SV_ILP32) != 0 ?
 		    PTROUT(p->p_sysent->sv_psstrings) : 0;
 		PROC_UNLOCK(p);
 		error = SYSCTL_OUT(req, &ps_strings32, sizeof(ps_strings32));
 		return (error);
 	}
 #endif
 	ps_strings = p->p_sysent->sv_psstrings;
 	PROC_UNLOCK(p);
 	error = SYSCTL_OUT(req, &ps_strings, sizeof(ps_strings));
 	return (error);
 }
 
 /*
  * This sysctl allows a process to retrieve umask of another process.
  */
 static int
 sysctl_kern_proc_umask(SYSCTL_HANDLER_ARGS)
 {
 	int *name = (int *)arg1;
 	u_int namelen = arg2;
 	struct proc *p;
 	int error;
 	u_short fd_cmask;
 
 	if (namelen != 1)
 		return (EINVAL);
 
 	error = pget((pid_t)name[0], PGET_WANTREAD, &p);
 	if (error != 0)
 		return (error);
 
 	FILEDESC_SLOCK(p->p_fd);
 	fd_cmask = p->p_fd->fd_cmask;
 	FILEDESC_SUNLOCK(p->p_fd);
 	PRELE(p);
 	error = SYSCTL_OUT(req, &fd_cmask, sizeof(fd_cmask));
 	return (error);
 }
 
 /*
  * This sysctl allows a process to set and retrieve binary osreldate of
  * another process.
  */
 static int
 sysctl_kern_proc_osrel(SYSCTL_HANDLER_ARGS)
 {
 	int *name = (int *)arg1;
 	u_int namelen = arg2;
 	struct proc *p;
 	int flags, error, osrel;
 
 	if (namelen != 1)
 		return (EINVAL);
 
 	if (req->newptr != NULL && req->newlen != sizeof(osrel))
 		return (EINVAL);
 
 	flags = PGET_HOLD | PGET_NOTWEXIT;
 	if (req->newptr != NULL)
 		flags |= PGET_CANDEBUG;
 	else
 		flags |= PGET_CANSEE;
 	error = pget((pid_t)name[0], flags, &p);
 	if (error != 0)
 		return (error);
 
 	error = SYSCTL_OUT(req, &p->p_osrel, sizeof(p->p_osrel));
 	if (error != 0)
 		goto errout;
 
 	if (req->newptr != NULL) {
 		error = SYSCTL_IN(req, &osrel, sizeof(osrel));
 		if (error != 0)
 			goto errout;
 		if (osrel < 0) {
 			error = EINVAL;
 			goto errout;
 		}
 		p->p_osrel = osrel;
 	}
 errout:
 	PRELE(p);
 	return (error);
 }
 
 static int
 sysctl_kern_proc_sigtramp(SYSCTL_HANDLER_ARGS)
 {
 	int *name = (int *)arg1;
 	u_int namelen = arg2;
 	struct proc *p;
 	struct kinfo_sigtramp kst;
 	const struct sysentvec *sv;
 	int error;
 #ifdef COMPAT_FREEBSD32
 	struct kinfo_sigtramp32 kst32;
 #endif
 
 	if (namelen != 1)
 		return (EINVAL);
 
 	error = pget((pid_t)name[0], PGET_CANDEBUG, &p);
 	if (error != 0)
 		return (error);
 	sv = p->p_sysent;
 #ifdef COMPAT_FREEBSD32
 	if ((req->flags & SCTL_MASK32) != 0) {
 		bzero(&kst32, sizeof(kst32));
 		if (SV_PROC_FLAG(p, SV_ILP32)) {
 			if (sv->sv_sigcode_base != 0) {
 				kst32.ksigtramp_start = sv->sv_sigcode_base;
 				kst32.ksigtramp_end = sv->sv_sigcode_base +
 				    *sv->sv_szsigcode;
 			} else {
 				kst32.ksigtramp_start = sv->sv_psstrings -
 				    *sv->sv_szsigcode;
 				kst32.ksigtramp_end = sv->sv_psstrings;
 			}
 		}
 		PROC_UNLOCK(p);
 		error = SYSCTL_OUT(req, &kst32, sizeof(kst32));
 		return (error);
 	}
 #endif
 	bzero(&kst, sizeof(kst));
 	if (sv->sv_sigcode_base != 0) {
 		kst.ksigtramp_start = (char *)sv->sv_sigcode_base;
 		kst.ksigtramp_end = (char *)sv->sv_sigcode_base +
 		    *sv->sv_szsigcode;
 	} else {
 		kst.ksigtramp_start = (char *)sv->sv_psstrings -
 		    *sv->sv_szsigcode;
 		kst.ksigtramp_end = (char *)sv->sv_psstrings;
 	}
 	PROC_UNLOCK(p);
 	error = SYSCTL_OUT(req, &kst, sizeof(kst));
 	return (error);
 }
 
 SYSCTL_NODE(_kern, KERN_PROC, proc, CTLFLAG_RD,  0, "Process table");
 
 SYSCTL_PROC(_kern_proc, KERN_PROC_ALL, all, CTLFLAG_RD|CTLTYPE_STRUCT|
 	CTLFLAG_MPSAFE, 0, 0, sysctl_kern_proc, "S,proc",
 	"Return entire process table");
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_GID, gid, CTLFLAG_RD | CTLFLAG_MPSAFE,
 	sysctl_kern_proc, "Process table");
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_PGRP, pgrp, CTLFLAG_RD | CTLFLAG_MPSAFE,
 	sysctl_kern_proc, "Process table");
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_RGID, rgid, CTLFLAG_RD | CTLFLAG_MPSAFE,
 	sysctl_kern_proc, "Process table");
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_SESSION, sid, CTLFLAG_RD |
 	CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table");
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_TTY, tty, CTLFLAG_RD | CTLFLAG_MPSAFE,
 	sysctl_kern_proc, "Process table");
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_UID, uid, CTLFLAG_RD | CTLFLAG_MPSAFE,
 	sysctl_kern_proc, "Process table");
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_RUID, ruid, CTLFLAG_RD | CTLFLAG_MPSAFE,
 	sysctl_kern_proc, "Process table");
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_PID, pid, CTLFLAG_RD | CTLFLAG_MPSAFE,
 	sysctl_kern_proc, "Process table");
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_PROC, proc, CTLFLAG_RD | CTLFLAG_MPSAFE,
 	sysctl_kern_proc, "Return process table, no threads");
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_ARGS, args,
 	CTLFLAG_RW | CTLFLAG_CAPWR | CTLFLAG_ANYBODY | CTLFLAG_MPSAFE,
 	sysctl_kern_proc_args, "Process argument list");
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_ENV, env, CTLFLAG_RD | CTLFLAG_MPSAFE,
 	sysctl_kern_proc_env, "Process environment");
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_AUXV, auxv, CTLFLAG_RD |
 	CTLFLAG_MPSAFE, sysctl_kern_proc_auxv, "Process ELF auxiliary vector");
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_PATHNAME, pathname, CTLFLAG_RD |
 	CTLFLAG_MPSAFE, sysctl_kern_proc_pathname, "Process executable path");
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_SV_NAME, sv_name, CTLFLAG_RD |
 	CTLFLAG_MPSAFE, sysctl_kern_proc_sv_name,
 	"Process syscall vector name (ABI type)");
 
 static SYSCTL_NODE(_kern_proc, (KERN_PROC_GID | KERN_PROC_INC_THREAD), gid_td,
 	CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table");
 
 static SYSCTL_NODE(_kern_proc, (KERN_PROC_PGRP | KERN_PROC_INC_THREAD), pgrp_td,
 	CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table");
 
 static SYSCTL_NODE(_kern_proc, (KERN_PROC_RGID | KERN_PROC_INC_THREAD), rgid_td,
 	CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table");
 
 static SYSCTL_NODE(_kern_proc, (KERN_PROC_SESSION | KERN_PROC_INC_THREAD),
 	sid_td, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table");
 
 static SYSCTL_NODE(_kern_proc, (KERN_PROC_TTY | KERN_PROC_INC_THREAD), tty_td,
 	CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table");
 
 static SYSCTL_NODE(_kern_proc, (KERN_PROC_UID | KERN_PROC_INC_THREAD), uid_td,
 	CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table");
 
 static SYSCTL_NODE(_kern_proc, (KERN_PROC_RUID | KERN_PROC_INC_THREAD), ruid_td,
 	CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table");
 
 static SYSCTL_NODE(_kern_proc, (KERN_PROC_PID | KERN_PROC_INC_THREAD), pid_td,
 	CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc, "Process table");
 
 static SYSCTL_NODE(_kern_proc, (KERN_PROC_PROC | KERN_PROC_INC_THREAD), proc_td,
 	CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_kern_proc,
 	"Return process table, no threads");
 
 #ifdef COMPAT_FREEBSD7
 static SYSCTL_NODE(_kern_proc, KERN_PROC_OVMMAP, ovmmap, CTLFLAG_RD |
 	CTLFLAG_MPSAFE, sysctl_kern_proc_ovmmap, "Old Process vm map entries");
 #endif
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_VMMAP, vmmap, CTLFLAG_RD |
 	CTLFLAG_MPSAFE, sysctl_kern_proc_vmmap, "Process vm map entries");
 
 #if defined(STACK) || defined(DDB)
 static SYSCTL_NODE(_kern_proc, KERN_PROC_KSTACK, kstack, CTLFLAG_RD |
 	CTLFLAG_MPSAFE, sysctl_kern_proc_kstack, "Process kernel stacks");
 #endif
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_GROUPS, groups, CTLFLAG_RD |
 	CTLFLAG_MPSAFE, sysctl_kern_proc_groups, "Process groups");
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_RLIMIT, rlimit, CTLFLAG_RW |
 	CTLFLAG_ANYBODY | CTLFLAG_MPSAFE, sysctl_kern_proc_rlimit,
 	"Process resource limits");
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_PS_STRINGS, ps_strings, CTLFLAG_RD |
 	CTLFLAG_MPSAFE, sysctl_kern_proc_ps_strings,
 	"Process ps_strings location");
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_UMASK, umask, CTLFLAG_RD |
 	CTLFLAG_MPSAFE, sysctl_kern_proc_umask, "Process umask");
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_OSREL, osrel, CTLFLAG_RW |
 	CTLFLAG_ANYBODY | CTLFLAG_MPSAFE, sysctl_kern_proc_osrel,
 	"Process binary osreldate");
 
 static SYSCTL_NODE(_kern_proc, KERN_PROC_SIGTRAMP, sigtramp, CTLFLAG_RD |
 	CTLFLAG_MPSAFE, sysctl_kern_proc_sigtramp,
 	"Process signal trampoline location");
 
 int allproc_gen;
 
 void
 stop_all_proc(void)
 {
 	struct proc *cp, *p;
 	int r, gen;
 	bool restart, seen_stopped, seen_exiting, stopped_some;
 
 	cp = curproc;
 	/*
 	 * stop_all_proc() assumes that all process which have
 	 * usermode must be stopped, except current process, for
 	 * obvious reasons.  Since other threads in the process
 	 * establishing global stop could unstop something, disable
 	 * calls from multithreaded processes as precaution.  The
 	 * service must not be user-callable anyway.
 	 */
 	KASSERT((cp->p_flag & P_HADTHREADS) == 0 ||
 	    (cp->p_flag & P_KTHREAD) != 0, ("mt stop_all_proc"));
 
 allproc_loop:
 	sx_xlock(&allproc_lock);
 	gen = allproc_gen;
 	seen_exiting = seen_stopped = stopped_some = restart = false;
 	LIST_REMOVE(cp, p_list);
 	LIST_INSERT_HEAD(&allproc, cp, p_list);
 	for (;;) {
 		p = LIST_NEXT(cp, p_list);
 		if (p == NULL)
 			break;
 		LIST_REMOVE(cp, p_list);
 		LIST_INSERT_AFTER(p, cp, p_list);
 		PROC_LOCK(p);
 		if ((p->p_flag & (P_KTHREAD | P_SYSTEM |
 		    P_TOTAL_STOP)) != 0) {
 			PROC_UNLOCK(p);
 			continue;
 		}
 		if ((p->p_flag & P_WEXIT) != 0) {
 			seen_exiting = true;
 			PROC_UNLOCK(p);
 			continue;
 		}
 		if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
 			/*
 			 * Stopped processes are tolerated when there
 			 * are no other processes which might continue
 			 * them.  P_STOPPED_SINGLE but not
 			 * P_TOTAL_STOP process still has at least one
 			 * thread running.
 			 */
 			seen_stopped = true;
 			PROC_UNLOCK(p);
 			continue;
 		}
 		_PHOLD(p);
 		sx_xunlock(&allproc_lock);
 		r = thread_single(p, SINGLE_ALLPROC);
 		if (r != 0)
 			restart = true;
 		else
 			stopped_some = true;
 		_PRELE(p);
 		PROC_UNLOCK(p);
 		sx_xlock(&allproc_lock);
 	}
 	/* Catch forked children we did not see in iteration. */
 	if (gen != allproc_gen)
 		restart = true;
 	sx_xunlock(&allproc_lock);
 	if (restart || stopped_some || seen_exiting || seen_stopped) {
 		kern_yield(PRI_USER);
 		goto allproc_loop;
 	}
 }
 
 void
 resume_all_proc(void)
 {
 	struct proc *cp, *p;
 
 	cp = curproc;
 	sx_xlock(&allproc_lock);
 	LIST_REMOVE(cp, p_list);
 	LIST_INSERT_HEAD(&allproc, cp, p_list);
 	for (;;) {
 		p = LIST_NEXT(cp, p_list);
 		if (p == NULL)
 			break;
 		LIST_REMOVE(cp, p_list);
 		LIST_INSERT_AFTER(p, cp, p_list);
 		PROC_LOCK(p);
 		if ((p->p_flag & P_TOTAL_STOP) != 0) {
 			sx_xunlock(&allproc_lock);
 			_PHOLD(p);
 			thread_single_end(p, SINGLE_ALLPROC);
 			_PRELE(p);
 			PROC_UNLOCK(p);
 			sx_xlock(&allproc_lock);
 		} else {
 			PROC_UNLOCK(p);
 		}
 	}
 	sx_xunlock(&allproc_lock);
 }
 
 #define	TOTAL_STOP_DEBUG	1
 #ifdef TOTAL_STOP_DEBUG
 volatile static int ap_resume;
 #include <sys/mount.h>
 
 static int
 sysctl_debug_stop_all_proc(SYSCTL_HANDLER_ARGS)
 {
 	int error, val;
 
 	val = 0;
 	ap_resume = 0;
 	error = sysctl_handle_int(oidp, &val, 0, req);
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 	if (val != 0) {
 		stop_all_proc();
 		syncer_suspend();
 		while (ap_resume == 0)
 			;
 		syncer_resume();
 		resume_all_proc();
 	}
 	return (0);
 }
 
 SYSCTL_PROC(_debug, OID_AUTO, stop_all_proc, CTLTYPE_INT | CTLFLAG_RW |
     CTLFLAG_MPSAFE, __DEVOLATILE(int *, &ap_resume), 0,
     sysctl_debug_stop_all_proc, "I",
     "");
 #endif
Index: head/sys/kern/kern_racct.c
===================================================================
--- head/sys/kern/kern_racct.c	(revision 292383)
+++ head/sys/kern/kern_racct.c	(revision 292384)
@@ -1,1316 +1,1317 @@
 /*-
  * Copyright (c) 2010 The FreeBSD Foundation
  * All rights reserved.
  *
  * This software was developed by Edward Tomasz Napierala under sponsorship
  * from the FreeBSD Foundation.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_sched.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/eventhandler.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/kthread.h>
 #include <sys/lock.h>
 #include <sys/loginclass.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/racct.h>
 #include <sys/resourcevar.h>
 #include <sys/sbuf.h>
 #include <sys/sched.h>
 #include <sys/sdt.h>
 #include <sys/smp.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 #include <sys/sysent.h>
 #include <sys/sysproto.h>
 #include <sys/umtx.h>
 #include <machine/smp.h>
 
 #ifdef RCTL
 #include <sys/rctl.h>
 #endif
 
 #ifdef RACCT
 
 FEATURE(racct, "Resource Accounting");
 
 /*
  * Do not block processes that have their %cpu usage <= pcpu_threshold.
  */
 static int pcpu_threshold = 1;
 #ifdef RACCT_DEFAULT_TO_DISABLED
 int racct_enable = 0;
 #else
 int racct_enable = 1;
 #endif
 
 SYSCTL_NODE(_kern, OID_AUTO, racct, CTLFLAG_RW, 0, "Resource Accounting");
 SYSCTL_UINT(_kern_racct, OID_AUTO, enable, CTLFLAG_RDTUN, &racct_enable,
     0, "Enable RACCT/RCTL");
 SYSCTL_UINT(_kern_racct, OID_AUTO, pcpu_threshold, CTLFLAG_RW, &pcpu_threshold,
     0, "Processes with higher %cpu usage than this value can be throttled.");
 
 /*
  * How many seconds it takes to use the scheduler %cpu calculations.  When a
  * process starts, we compute its %cpu usage by dividing its runtime by the
  * process wall clock time.  After RACCT_PCPU_SECS pass, we use the value
  * provided by the scheduler.
  */
 #define RACCT_PCPU_SECS		3
 
 static struct mtx racct_lock;
 MTX_SYSINIT(racct_lock, &racct_lock, "racct lock", MTX_DEF);
 
 static uma_zone_t racct_zone;
 
 static void racct_sub_racct(struct racct *dest, const struct racct *src);
 static void racct_sub_cred_locked(struct ucred *cred, int resource,
 		uint64_t amount);
 static void racct_add_cred_locked(struct ucred *cred, int resource,
 		uint64_t amount);
 
 SDT_PROVIDER_DEFINE(racct);
-SDT_PROBE_DEFINE3(racct, kernel, rusage, add, "struct proc *", "int",
-    "uint64_t");
-SDT_PROBE_DEFINE3(racct, kernel, rusage, add__failure,
+SDT_PROBE_DEFINE3(racct, , rusage, add,
     "struct proc *", "int", "uint64_t");
-SDT_PROBE_DEFINE3(racct, kernel, rusage, add__cred, "struct ucred *",
-    "int", "uint64_t");
-SDT_PROBE_DEFINE3(racct, kernel, rusage, add__force, "struct proc *",
-    "int", "uint64_t");
-SDT_PROBE_DEFINE3(racct, kernel, rusage, set, "struct proc *", "int",
-    "uint64_t");
-SDT_PROBE_DEFINE3(racct, kernel, rusage, set__failure,
+SDT_PROBE_DEFINE3(racct, , rusage, add__failure,
     "struct proc *", "int", "uint64_t");
-SDT_PROBE_DEFINE3(racct, kernel, rusage, sub, "struct proc *", "int",
-    "uint64_t");
-SDT_PROBE_DEFINE3(racct, kernel, rusage, sub__cred, "struct ucred *",
-    "int", "uint64_t");
-SDT_PROBE_DEFINE1(racct, kernel, racct, create, "struct racct *");
-SDT_PROBE_DEFINE1(racct, kernel, racct, destroy, "struct racct *");
-SDT_PROBE_DEFINE2(racct, kernel, racct, join, "struct racct *",
+SDT_PROBE_DEFINE3(racct, , rusage, add__cred,
+    "struct ucred *", "int", "uint64_t");
+SDT_PROBE_DEFINE3(racct, , rusage, add__force,
+    "struct proc *", "int", "uint64_t");
+SDT_PROBE_DEFINE3(racct, , rusage, set,
+    "struct proc *", "int", "uint64_t");
+SDT_PROBE_DEFINE3(racct, , rusage, set__failure,
+    "struct proc *", "int", "uint64_t");
+SDT_PROBE_DEFINE3(racct, , rusage, sub,
+    "struct proc *", "int", "uint64_t");
+SDT_PROBE_DEFINE3(racct, , rusage, sub__cred,
+    "struct ucred *", "int", "uint64_t");
+SDT_PROBE_DEFINE1(racct, , racct, create,
     "struct racct *");
-SDT_PROBE_DEFINE2(racct, kernel, racct, join__failure,
-    "struct racct *", "struct racct *");
-SDT_PROBE_DEFINE2(racct, kernel, racct, leave, "struct racct *",
+SDT_PROBE_DEFINE1(racct, , racct, destroy,
     "struct racct *");
+SDT_PROBE_DEFINE2(racct, , racct, join,
+    "struct racct *", "struct racct *");
+SDT_PROBE_DEFINE2(racct, , racct, join__failure,
+    "struct racct *", "struct racct *");
+SDT_PROBE_DEFINE2(racct, , racct, leave,
+    "struct racct *", "struct racct *");
 
 int racct_types[] = {
 	[RACCT_CPU] =
 		RACCT_IN_MILLIONS,
 	[RACCT_DATA] =
 		RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
 	[RACCT_STACK] =
 		RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
 	[RACCT_CORE] =
 		RACCT_DENIABLE,
 	[RACCT_RSS] =
 		RACCT_RECLAIMABLE,
 	[RACCT_MEMLOCK] =
 		RACCT_RECLAIMABLE | RACCT_DENIABLE,
 	[RACCT_NPROC] =
 		RACCT_RECLAIMABLE | RACCT_DENIABLE,
 	[RACCT_NOFILE] =
 		RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
 	[RACCT_VMEM] =
 		RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
 	[RACCT_NPTS] =
 		RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
 	[RACCT_SWAP] =
 		RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
 	[RACCT_NTHR] =
 		RACCT_RECLAIMABLE | RACCT_DENIABLE,
 	[RACCT_MSGQQUEUED] =
 		RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
 	[RACCT_MSGQSIZE] =
 		RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
 	[RACCT_NMSGQ] =
 		RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
 	[RACCT_NSEM] =
 		RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
 	[RACCT_NSEMOP] =
 		RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
 	[RACCT_NSHM] =
 		RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
 	[RACCT_SHMSIZE] =
 		RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
 	[RACCT_WALLCLOCK] =
 		RACCT_IN_MILLIONS,
 	[RACCT_PCTCPU] =
 		RACCT_DECAYING | RACCT_DENIABLE | RACCT_IN_MILLIONS };
 
 static const fixpt_t RACCT_DECAY_FACTOR = 0.3 * FSCALE;
 
 #ifdef SCHED_4BSD
 /*
  * Contains intermediate values for %cpu calculations to avoid using floating
  * point in the kernel.
  * ccpu_exp[k] = FSCALE * (ccpu/FSCALE)^k = FSCALE * exp(-k/20)
  * It is needed only for the 4BSD scheduler, because in ULE, the ccpu equals to
  * zero so the calculations are more straightforward.
  */
 fixpt_t ccpu_exp[] = {
 	[0] = FSCALE * 1,
 	[1] = FSCALE * 0.95122942450071400909,
 	[2] = FSCALE * 0.90483741803595957316,
 	[3] = FSCALE * 0.86070797642505780722,
 	[4] = FSCALE * 0.81873075307798185866,
 	[5] = FSCALE * 0.77880078307140486824,
 	[6] = FSCALE * 0.74081822068171786606,
 	[7] = FSCALE * 0.70468808971871343435,
 	[8] = FSCALE * 0.67032004603563930074,
 	[9] = FSCALE * 0.63762815162177329314,
 	[10] = FSCALE * 0.60653065971263342360,
 	[11] = FSCALE * 0.57694981038048669531,
 	[12] = FSCALE * 0.54881163609402643262,
 	[13] = FSCALE * 0.52204577676101604789,
 	[14] = FSCALE * 0.49658530379140951470,
 	[15] = FSCALE * 0.47236655274101470713,
 	[16] = FSCALE * 0.44932896411722159143,
 	[17] = FSCALE * 0.42741493194872666992,
 	[18] = FSCALE * 0.40656965974059911188,
 	[19] = FSCALE * 0.38674102345450120691,
 	[20] = FSCALE * 0.36787944117144232159,
 	[21] = FSCALE * 0.34993774911115535467,
 	[22] = FSCALE * 0.33287108369807955328,
 	[23] = FSCALE * 0.31663676937905321821,
 	[24] = FSCALE * 0.30119421191220209664,
 	[25] = FSCALE * 0.28650479686019010032,
 	[26] = FSCALE * 0.27253179303401260312,
 	[27] = FSCALE * 0.25924026064589150757,
 	[28] = FSCALE * 0.24659696394160647693,
 	[29] = FSCALE * 0.23457028809379765313,
 	[30] = FSCALE * 0.22313016014842982893,
 	[31] = FSCALE * 0.21224797382674305771,
 	[32] = FSCALE * 0.20189651799465540848,
 	[33] = FSCALE * 0.19204990862075411423,
 	[34] = FSCALE * 0.18268352405273465022,
 	[35] = FSCALE * 0.17377394345044512668,
 	[36] = FSCALE * 0.16529888822158653829,
 	[37] = FSCALE * 0.15723716631362761621,
 	[38] = FSCALE * 0.14956861922263505264,
 	[39] = FSCALE * 0.14227407158651357185,
 	[40] = FSCALE * 0.13533528323661269189,
 	[41] = FSCALE * 0.12873490358780421886,
 	[42] = FSCALE * 0.12245642825298191021,
 	[43] = FSCALE * 0.11648415777349695786,
 	[44] = FSCALE * 0.11080315836233388333,
 	[45] = FSCALE * 0.10539922456186433678,
 	[46] = FSCALE * 0.10025884372280373372,
 	[47] = FSCALE * 0.09536916221554961888,
 	[48] = FSCALE * 0.09071795328941250337,
 	[49] = FSCALE * 0.08629358649937051097,
 	[50] = FSCALE * 0.08208499862389879516,
 	[51] = FSCALE * 0.07808166600115315231,
 	[52] = FSCALE * 0.07427357821433388042,
 	[53] = FSCALE * 0.07065121306042958674,
 	[54] = FSCALE * 0.06720551273974976512,
 	[55] = FSCALE * 0.06392786120670757270,
 	[56] = FSCALE * 0.06081006262521796499,
 	[57] = FSCALE * 0.05784432087483846296,
 	[58] = FSCALE * 0.05502322005640722902,
 	[59] = FSCALE * 0.05233970594843239308,
 	[60] = FSCALE * 0.04978706836786394297,
 	[61] = FSCALE * 0.04735892439114092119,
 	[62] = FSCALE * 0.04504920239355780606,
 	[63] = FSCALE * 0.04285212686704017991,
 	[64] = FSCALE * 0.04076220397836621516,
 	[65] = FSCALE * 0.03877420783172200988,
 	[66] = FSCALE * 0.03688316740124000544,
 	[67] = FSCALE * 0.03508435410084502588,
 	[68] = FSCALE * 0.03337326996032607948,
 	[69] = FSCALE * 0.03174563637806794323,
 	[70] = FSCALE * 0.03019738342231850073,
 	[71] = FSCALE * 0.02872463965423942912,
 	[72] = FSCALE * 0.02732372244729256080,
 	[73] = FSCALE * 0.02599112877875534358,
 	[74] = FSCALE * 0.02472352647033939120,
 	[75] = FSCALE * 0.02351774585600910823,
 	[76] = FSCALE * 0.02237077185616559577,
 	[77] = FSCALE * 0.02127973643837716938,
 	[78] = FSCALE * 0.02024191144580438847,
 	[79] = FSCALE * 0.01925470177538692429,
 	[80] = FSCALE * 0.01831563888873418029,
 	[81] = FSCALE * 0.01742237463949351138,
 	[82] = FSCALE * 0.01657267540176124754,
 	[83] = FSCALE * 0.01576441648485449082,
 	[84] = FSCALE * 0.01499557682047770621,
 	[85] = FSCALE * 0.01426423390899925527,
 	[86] = FSCALE * 0.01356855901220093175,
 	[87] = FSCALE * 0.01290681258047986886,
 	[88] = FSCALE * 0.01227733990306844117,
 	[89] = FSCALE * 0.01167856697039544521,
 	[90] = FSCALE * 0.01110899653824230649,
 	[91] = FSCALE * 0.01056720438385265337,
 	[92] = FSCALE * 0.01005183574463358164,
 	[93] = FSCALE * 0.00956160193054350793,
 	[94] = FSCALE * 0.00909527710169581709,
 	[95] = FSCALE * 0.00865169520312063417,
 	[96] = FSCALE * 0.00822974704902002884,
 	[97] = FSCALE * 0.00782837754922577143,
 	[98] = FSCALE * 0.00744658307092434051,
 	[99] = FSCALE * 0.00708340892905212004,
 	[100] = FSCALE * 0.00673794699908546709,
 	[101] = FSCALE * 0.00640933344625638184,
 	[102] = FSCALE * 0.00609674656551563610,
 	[103] = FSCALE * 0.00579940472684214321,
 	[104] = FSCALE * 0.00551656442076077241,
 	[105] = FSCALE * 0.00524751839918138427,
 	[106] = FSCALE * 0.00499159390691021621,
 	[107] = FSCALE * 0.00474815099941147558,
 	[108] = FSCALE * 0.00451658094261266798,
 	[109] = FSCALE * 0.00429630469075234057,
 	[110] = FSCALE * 0.00408677143846406699,
 };
 #endif
 
 #define	CCPU_EXP_MAX	110
 
 /*
  * This function is analogical to the getpcpu() function in the ps(1) command.
  * They should both calculate in the same way so that the racct %cpu
  * calculations are consistent with the values showed by the ps(1) tool.
  * The calculations are more complex in the 4BSD scheduler because of the value
  * of the ccpu variable.  In ULE it is defined to be zero which saves us some
  * work.
  */
 static uint64_t
 racct_getpcpu(struct proc *p, u_int pcpu)
 {
 	u_int swtime;
 #ifdef SCHED_4BSD
 	fixpt_t pctcpu, pctcpu_next;
 #endif
 #ifdef SMP
 	struct pcpu *pc;
 	int found;
 #endif
 	fixpt_t p_pctcpu;
 	struct thread *td;
 
 	ASSERT_RACCT_ENABLED();
 
 	/*
 	 * If the process is swapped out, we count its %cpu usage as zero.
 	 * This behaviour is consistent with the userland ps(1) tool.
 	 */
 	if ((p->p_flag & P_INMEM) == 0)
 		return (0);
 	swtime = (ticks - p->p_swtick) / hz;
 
 	/*
 	 * For short-lived processes, the sched_pctcpu() returns small
 	 * values even for cpu intensive processes.  Therefore we use
 	 * our own estimate in this case.
 	 */
 	if (swtime < RACCT_PCPU_SECS)
 		return (pcpu);
 
 	p_pctcpu = 0;
 	FOREACH_THREAD_IN_PROC(p, td) {
 		if (td == PCPU_GET(idlethread))
 			continue;
 #ifdef SMP
 		found = 0;
 		STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) {
 			if (td == pc->pc_idlethread) {
 				found = 1;
 				break;
 			}
 		}
 		if (found)
 			continue;
 #endif
 		thread_lock(td);
 #ifdef SCHED_4BSD
 		pctcpu = sched_pctcpu(td);
 		/* Count also the yet unfinished second. */
 		pctcpu_next = (pctcpu * ccpu_exp[1]) >> FSHIFT;
 		pctcpu_next += sched_pctcpu_delta(td);
 		p_pctcpu += max(pctcpu, pctcpu_next);
 #else
 		/*
 		 * In ULE the %cpu statistics are updated on every
 		 * sched_pctcpu() call.  So special calculations to
 		 * account for the latest (unfinished) second are
 		 * not needed.
 		 */
 		p_pctcpu += sched_pctcpu(td);
 #endif
 		thread_unlock(td);
 	}
 
 #ifdef SCHED_4BSD
 	if (swtime <= CCPU_EXP_MAX)
 		return ((100 * (uint64_t)p_pctcpu * 1000000) /
 		    (FSCALE - ccpu_exp[swtime]));
 #endif
 
 	return ((100 * (uint64_t)p_pctcpu * 1000000) / FSCALE);
 }
 
 static void
 racct_add_racct(struct racct *dest, const struct racct *src)
 {
 	int i;
 
 	ASSERT_RACCT_ENABLED();
 	mtx_assert(&racct_lock, MA_OWNED);
 
 	/*
 	 * Update resource usage in dest.
 	 */
 	for (i = 0; i <= RACCT_MAX; i++) {
 		KASSERT(dest->r_resources[i] >= 0,
 		    ("%s: resource %d propagation meltdown: dest < 0",
 		    __func__, i));
 		KASSERT(src->r_resources[i] >= 0,
 		    ("%s: resource %d propagation meltdown: src < 0",
 		    __func__, i));
 		dest->r_resources[i] += src->r_resources[i];
 	}
 }
 
 static void
 racct_sub_racct(struct racct *dest, const struct racct *src)
 {
 	int i;
 
 	ASSERT_RACCT_ENABLED();
 	mtx_assert(&racct_lock, MA_OWNED);
 
 	/*
 	 * Update resource usage in dest.
 	 */
 	for (i = 0; i <= RACCT_MAX; i++) {
 		if (!RACCT_IS_SLOPPY(i) && !RACCT_IS_DECAYING(i)) {
 			KASSERT(dest->r_resources[i] >= 0,
 			    ("%s: resource %d propagation meltdown: dest < 0",
 			    __func__, i));
 			KASSERT(src->r_resources[i] >= 0,
 			    ("%s: resource %d propagation meltdown: src < 0",
 			    __func__, i));
 			KASSERT(src->r_resources[i] <= dest->r_resources[i],
 			    ("%s: resource %d propagation meltdown: src > dest",
 			    __func__, i));
 		}
 		if (RACCT_CAN_DROP(i)) {
 			dest->r_resources[i] -= src->r_resources[i];
 			if (dest->r_resources[i] < 0) {
 				KASSERT(RACCT_IS_SLOPPY(i) ||
 				    RACCT_IS_DECAYING(i),
 				    ("%s: resource %d usage < 0", __func__, i));
 				dest->r_resources[i] = 0;
 			}
 		}
 	}
 }
 
 void
 racct_create(struct racct **racctp)
 {
 
 	if (!racct_enable)
 		return;
 
-	SDT_PROBE1(racct, kernel, racct, create, racctp);
+	SDT_PROBE1(racct, , racct, create, racctp);
 
 	KASSERT(*racctp == NULL, ("racct already allocated"));
 
 	*racctp = uma_zalloc(racct_zone, M_WAITOK | M_ZERO);
 }
 
 static void
 racct_destroy_locked(struct racct **racctp)
 {
 	int i;
 	struct racct *racct;
 
 	ASSERT_RACCT_ENABLED();
 
-	SDT_PROBE1(racct, kernel, racct, destroy, racctp);
+	SDT_PROBE1(racct, , racct, destroy, racctp);
 
 	mtx_assert(&racct_lock, MA_OWNED);
 	KASSERT(racctp != NULL, ("NULL racctp"));
 	KASSERT(*racctp != NULL, ("NULL racct"));
 
 	racct = *racctp;
 
 	for (i = 0; i <= RACCT_MAX; i++) {
 		if (RACCT_IS_SLOPPY(i))
 			continue;
 		if (!RACCT_IS_RECLAIMABLE(i))
 			continue;
 		KASSERT(racct->r_resources[i] == 0,
 		    ("destroying non-empty racct: "
 		    "%ju allocated for resource %d\n",
 		    racct->r_resources[i], i));
 	}
 	uma_zfree(racct_zone, racct);
 	*racctp = NULL;
 }
 
 void
 racct_destroy(struct racct **racct)
 {
 
 	if (!racct_enable)
 		return;
 
 	mtx_lock(&racct_lock);
 	racct_destroy_locked(racct);
 	mtx_unlock(&racct_lock);
 }
 
 /*
  * Increase consumption of 'resource' by 'amount' for 'racct',
  * but not its parents.  Differently from other cases, 'amount' here
  * may be less than zero.
  */
 static void
 racct_adjust_resource(struct racct *racct, int resource,
     int64_t amount)
 {
 
 	ASSERT_RACCT_ENABLED();
 	mtx_assert(&racct_lock, MA_OWNED);
 	KASSERT(racct != NULL, ("NULL racct"));
 
 	racct->r_resources[resource] += amount;
 	if (racct->r_resources[resource] < 0) {
 		KASSERT(RACCT_IS_SLOPPY(resource) || RACCT_IS_DECAYING(resource),
 		    ("%s: resource %d usage < 0", __func__, resource));
 		racct->r_resources[resource] = 0;
 	}
 	
 	/*
 	 * There are some cases where the racct %cpu resource would grow
 	 * beyond 100% per core.  For example in racct_proc_exit() we add
 	 * the process %cpu usage to the ucred racct containers.  If too
 	 * many processes terminated in a short time span, the ucred %cpu
 	 * resource could grow too much.  Also, the 4BSD scheduler sometimes
 	 * returns for a thread more than 100% cpu usage. So we set a sane
 	 * boundary here to 100% * the maxumum number of CPUs.
 	 */
 	if ((resource == RACCT_PCTCPU) &&
 	    (racct->r_resources[RACCT_PCTCPU] > 100 * 1000000 * (int64_t)MAXCPU))
 		racct->r_resources[RACCT_PCTCPU] = 100 * 1000000 * (int64_t)MAXCPU;
 }
 
 static int
 racct_add_locked(struct proc *p, int resource, uint64_t amount)
 {
 #ifdef RCTL
 	int error;
 #endif
 
 	ASSERT_RACCT_ENABLED();
 
-	SDT_PROBE3(racct, kernel, rusage, add, p, resource, amount);
+	SDT_PROBE3(racct, , rusage, add, p, resource, amount);
 
 	/*
 	 * We need proc lock to dereference p->p_ucred.
 	 */
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 
 #ifdef RCTL
 	error = rctl_enforce(p, resource, amount);
 	if (error && RACCT_IS_DENIABLE(resource)) {
-		SDT_PROBE3(racct, kernel, rusage, add__failure, p, resource,
-		    amount);
+		SDT_PROBE3(racct, , rusage, add__failure, p, resource, amount);
 		return (error);
 	}
 #endif
 	racct_adjust_resource(p->p_racct, resource, amount);
 	racct_add_cred_locked(p->p_ucred, resource, amount);
 
 	return (0);
 }
 
 /*
  * Increase allocation of 'resource' by 'amount' for process 'p'.
  * Return 0 if it's below limits, or errno, if it's not.
  */
 int
 racct_add(struct proc *p, int resource, uint64_t amount)
 {
 	int error;
 
 	if (!racct_enable)
 		return (0);
 
 	mtx_lock(&racct_lock);
 	error = racct_add_locked(p, resource, amount);
 	mtx_unlock(&racct_lock);
 	return (error);
 }
 
 static void
 racct_add_cred_locked(struct ucred *cred, int resource, uint64_t amount)
 {
 	struct prison *pr;
 
 	ASSERT_RACCT_ENABLED();
 
-	SDT_PROBE3(racct, kernel, rusage, add__cred, cred, resource, amount);
+	SDT_PROBE3(racct, , rusage, add__cred, cred, resource, amount);
 
 	racct_adjust_resource(cred->cr_ruidinfo->ui_racct, resource, amount);
 	for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent)
 		racct_adjust_resource(pr->pr_prison_racct->prr_racct, resource,
 		    amount);
 	racct_adjust_resource(cred->cr_loginclass->lc_racct, resource, amount);
 }
 
 /*
  * Increase allocation of 'resource' by 'amount' for credential 'cred'.
  * Doesn't check for limits and never fails.
  *
  * XXX: Shouldn't this ever return an error?
  */
 void
 racct_add_cred(struct ucred *cred, int resource, uint64_t amount)
 {
 
 	if (!racct_enable)
 		return;
 
 	mtx_lock(&racct_lock);
 	racct_add_cred_locked(cred, resource, amount);
 	mtx_unlock(&racct_lock);
 }
 
 /*
  * Increase allocation of 'resource' by 'amount' for process 'p'.
  * Doesn't check for limits and never fails.
  */
 void
 racct_add_force(struct proc *p, int resource, uint64_t amount)
 {
 
 	if (!racct_enable)
 		return;
 
-	SDT_PROBE3(racct, kernel, rusage, add__force, p, resource, amount);
+	SDT_PROBE3(racct, , rusage, add__force, p, resource, amount);
 
 	/*
 	 * We need proc lock to dereference p->p_ucred.
 	 */
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 
 	mtx_lock(&racct_lock);
 	racct_adjust_resource(p->p_racct, resource, amount);
 	racct_add_cred_locked(p->p_ucred, resource, amount);
 	mtx_unlock(&racct_lock);
 }
 
 static int
 racct_set_locked(struct proc *p, int resource, uint64_t amount)
 {
 	int64_t old_amount, decayed_amount;
 	int64_t diff_proc, diff_cred;
 #ifdef RCTL
 	int error;
 #endif
 
 	ASSERT_RACCT_ENABLED();
 
-	SDT_PROBE3(racct, kernel, rusage, set, p, resource, amount);
+	SDT_PROBE3(racct, , rusage, set, p, resource, amount);
 
 	/*
 	 * We need proc lock to dereference p->p_ucred.
 	 */
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 
 	old_amount = p->p_racct->r_resources[resource];
 	/*
 	 * The diffs may be negative.
 	 */
 	diff_proc = amount - old_amount;
 	if (RACCT_IS_DECAYING(resource)) {
 		/*
 		 * Resources in per-credential racct containers may decay.
 		 * If this is the case, we need to calculate the difference
 		 * between the new amount and the proportional value of the
 		 * old amount that has decayed in the ucred racct containers.
 		 */
 		decayed_amount = old_amount * RACCT_DECAY_FACTOR / FSCALE;
 		diff_cred = amount - decayed_amount;
 	} else
 		diff_cred = diff_proc;
 #ifdef notyet
 	KASSERT(diff_proc >= 0 || RACCT_CAN_DROP(resource),
 	    ("%s: usage of non-droppable resource %d dropping", __func__,
 	     resource));
 #endif
 #ifdef RCTL
 	if (diff_proc > 0) {
 		error = rctl_enforce(p, resource, diff_proc);
 		if (error && RACCT_IS_DENIABLE(resource)) {
-			SDT_PROBE3(racct, kernel, rusage, set__failure, p,
-			    resource, amount);
+			SDT_PROBE3(racct, , rusage, set__failure, p, resource,
+			    amount);
 			return (error);
 		}
 	}
 #endif
 	racct_adjust_resource(p->p_racct, resource, diff_proc);
 	if (diff_cred > 0)
 		racct_add_cred_locked(p->p_ucred, resource, diff_cred);
 	else if (diff_cred < 0)
 		racct_sub_cred_locked(p->p_ucred, resource, -diff_cred);
 
 	return (0);
 }
 
 /*
  * Set allocation of 'resource' to 'amount' for process 'p'.
  * Return 0 if it's below limits, or errno, if it's not.
  *
  * Note that decreasing the allocation always returns 0,
  * even if it's above the limit.
  */
 int
 racct_set(struct proc *p, int resource, uint64_t amount)
 {
 	int error;
 
 	if (!racct_enable)
 		return (0);
 
 	mtx_lock(&racct_lock);
 	error = racct_set_locked(p, resource, amount);
 	mtx_unlock(&racct_lock);
 	return (error);
 }
 
 static void
 racct_set_force_locked(struct proc *p, int resource, uint64_t amount)
 {
 	int64_t old_amount, decayed_amount;
 	int64_t diff_proc, diff_cred;
 
 	ASSERT_RACCT_ENABLED();
 
-	SDT_PROBE3(racct, kernel, rusage, set, p, resource, amount);
+	SDT_PROBE3(racct, , rusage, set, p, resource, amount);
 
 	/*
 	 * We need proc lock to dereference p->p_ucred.
 	 */
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 
 	old_amount = p->p_racct->r_resources[resource];
 	/*
 	 * The diffs may be negative.
 	 */
 	diff_proc = amount - old_amount;
 	if (RACCT_IS_DECAYING(resource)) {
 		/*
 		 * Resources in per-credential racct containers may decay.
 		 * If this is the case, we need to calculate the difference
 		 * between the new amount and the proportional value of the
 		 * old amount that has decayed in the ucred racct containers.
 		 */
 		decayed_amount = old_amount * RACCT_DECAY_FACTOR / FSCALE;
 		diff_cred = amount - decayed_amount;
 	} else
 		diff_cred = diff_proc;
 
 	racct_adjust_resource(p->p_racct, resource, diff_proc);
 	if (diff_cred > 0)
 		racct_add_cred_locked(p->p_ucred, resource, diff_cred);
 	else if (diff_cred < 0)
 		racct_sub_cred_locked(p->p_ucred, resource, -diff_cred);
 }
 
 void
 racct_set_force(struct proc *p, int resource, uint64_t amount)
 {
 
 	if (!racct_enable)
 		return;
 
 	mtx_lock(&racct_lock);
 	racct_set_force_locked(p, resource, amount);
 	mtx_unlock(&racct_lock);
 }
 
 /*
  * Returns amount of 'resource' the process 'p' can keep allocated.
  * Allocating more than that would be denied, unless the resource
  * is marked undeniable.  Amount of already allocated resource does
  * not matter.
  */
 uint64_t
 racct_get_limit(struct proc *p, int resource)
 {
 
 	if (!racct_enable)
 		return (UINT64_MAX);
 
 #ifdef RCTL
 	return (rctl_get_limit(p, resource));
 #else
 	return (UINT64_MAX);
 #endif
 }
 
 /*
  * Returns amount of 'resource' the process 'p' can keep allocated.
  * Allocating more than that would be denied, unless the resource
  * is marked undeniable.  Amount of already allocated resource does
  * matter.
  */
 uint64_t
 racct_get_available(struct proc *p, int resource)
 {
 
 	if (!racct_enable)
 		return (UINT64_MAX);
 
 #ifdef RCTL
 	return (rctl_get_available(p, resource));
 #else
 	return (UINT64_MAX);
 #endif
 }
 
 /*
  * Returns amount of the %cpu resource that process 'p' can add to its %cpu
  * utilization.  Adding more than that would lead to the process being
  * throttled.
  */
 static int64_t
 racct_pcpu_available(struct proc *p)
 {
 
 	ASSERT_RACCT_ENABLED();
 
 #ifdef RCTL
 	return (rctl_pcpu_available(p));
 #else
 	return (INT64_MAX);
 #endif
 }
 
 /*
  * Decrease allocation of 'resource' by 'amount' for process 'p'.
  */
 void
 racct_sub(struct proc *p, int resource, uint64_t amount)
 {
 
 	if (!racct_enable)
 		return;
 
-	SDT_PROBE3(racct, kernel, rusage, sub, p, resource, amount);
+	SDT_PROBE3(racct, , rusage, sub, p, resource, amount);
 
 	/*
 	 * We need proc lock to dereference p->p_ucred.
 	 */
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	KASSERT(RACCT_CAN_DROP(resource),
 	    ("%s: called for non-droppable resource %d", __func__, resource));
 
 	mtx_lock(&racct_lock);
 	KASSERT(amount <= p->p_racct->r_resources[resource],
 	    ("%s: freeing %ju of resource %d, which is more "
 	     "than allocated %jd for %s (pid %d)", __func__, amount, resource,
 	    (intmax_t)p->p_racct->r_resources[resource], p->p_comm, p->p_pid));
 
 	racct_adjust_resource(p->p_racct, resource, -amount);
 	racct_sub_cred_locked(p->p_ucred, resource, amount);
 	mtx_unlock(&racct_lock);
 }
 
 static void
 racct_sub_cred_locked(struct ucred *cred, int resource, uint64_t amount)
 {
 	struct prison *pr;
 
 	ASSERT_RACCT_ENABLED();
 
-	SDT_PROBE3(racct, kernel, rusage, sub__cred, cred, resource, amount);
+	SDT_PROBE3(racct, , rusage, sub__cred, cred, resource, amount);
 
 #ifdef notyet
 	KASSERT(RACCT_CAN_DROP(resource),
 	    ("%s: called for resource %d which can not drop", __func__,
 	     resource));
 #endif
 
 	racct_adjust_resource(cred->cr_ruidinfo->ui_racct, resource, -amount);
 	for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent)
 		racct_adjust_resource(pr->pr_prison_racct->prr_racct, resource,
 		    -amount);
 	racct_adjust_resource(cred->cr_loginclass->lc_racct, resource, -amount);
 }
 
 /*
  * Decrease allocation of 'resource' by 'amount' for credential 'cred'.
  */
 void
 racct_sub_cred(struct ucred *cred, int resource, uint64_t amount)
 {
 
 	if (!racct_enable)
 		return;
 
 	mtx_lock(&racct_lock);
 	racct_sub_cred_locked(cred, resource, amount);
 	mtx_unlock(&racct_lock);
 }
 
 /*
  * Inherit resource usage information from the parent process.
  */
 int
 racct_proc_fork(struct proc *parent, struct proc *child)
 {
 	int i, error = 0;
 
 	if (!racct_enable)
 		return (0);
 
 	/*
 	 * Create racct for the child process.
 	 */
 	racct_create(&child->p_racct);
 
 	PROC_LOCK(parent);
 	PROC_LOCK(child);
 	mtx_lock(&racct_lock);
 
 #ifdef RCTL
 	error = rctl_proc_fork(parent, child);
 	if (error != 0)
 		goto out;
 #endif
 
 	/* Init process cpu time. */
 	child->p_prev_runtime = 0;
 	child->p_throttled = 0;
 
 	/*
 	 * Inherit resource usage.
 	 */
 	for (i = 0; i <= RACCT_MAX; i++) {
 		if (parent->p_racct->r_resources[i] == 0 ||
 		    !RACCT_IS_INHERITABLE(i))
 			continue;
 
 		error = racct_set_locked(child, i,
 		    parent->p_racct->r_resources[i]);
 		if (error != 0)
 			goto out;
 	}
 
 	error = racct_add_locked(child, RACCT_NPROC, 1);
 	error += racct_add_locked(child, RACCT_NTHR, 1);
 
 out:
 	mtx_unlock(&racct_lock);
 	PROC_UNLOCK(child);
 	PROC_UNLOCK(parent);
 
 	if (error != 0)
 		racct_proc_exit(child);
 
 	return (error);
 }
 
 /*
  * Called at the end of fork1(), to handle rules that require the process
  * to be fully initialized.
  */
 void
 racct_proc_fork_done(struct proc *child)
 {
 
 #ifdef RCTL
 	if (!racct_enable)
 		return;
 
 	PROC_LOCK(child);
 	mtx_lock(&racct_lock);
 	rctl_enforce(child, RACCT_NPROC, 0);
 	rctl_enforce(child, RACCT_NTHR, 0);
 	mtx_unlock(&racct_lock);
 	PROC_UNLOCK(child);
 #endif
 }
 
 void
 racct_proc_exit(struct proc *p)
 {
 	int i;
 	uint64_t runtime;
 	struct timeval wallclock;
 	uint64_t pct_estimate, pct;
 
 	if (!racct_enable)
 		return;
 
 	PROC_LOCK(p);
 	/*
 	 * We don't need to calculate rux, proc_reap() has already done this.
 	 */
 	runtime = cputick2usec(p->p_rux.rux_runtime);
 #ifdef notyet
 	KASSERT(runtime >= p->p_prev_runtime, ("runtime < p_prev_runtime"));
 #else
 	if (runtime < p->p_prev_runtime)
 		runtime = p->p_prev_runtime;
 #endif
 	microuptime(&wallclock);
 	timevalsub(&wallclock, &p->p_stats->p_start);
 	if (wallclock.tv_sec > 0 || wallclock.tv_usec > 0) {
 		pct_estimate = (1000000 * runtime * 100) /
 		    ((uint64_t)wallclock.tv_sec * 1000000 +
 		    wallclock.tv_usec);
 	} else
 		pct_estimate = 0;
 	pct = racct_getpcpu(p, pct_estimate);
 
 	mtx_lock(&racct_lock);
 	racct_set_locked(p, RACCT_CPU, runtime);
 	racct_add_cred_locked(p->p_ucred, RACCT_PCTCPU, pct);
 
 	for (i = 0; i <= RACCT_MAX; i++) {
 		if (p->p_racct->r_resources[i] == 0)
 			continue;
 	    	if (!RACCT_IS_RECLAIMABLE(i))
 			continue;
 		racct_set_locked(p, i, 0);
 	}
 
 	mtx_unlock(&racct_lock);
 	PROC_UNLOCK(p);
 
 #ifdef RCTL
 	rctl_racct_release(p->p_racct);
 #endif
 	racct_destroy(&p->p_racct);
 }
 
 /*
  * Called after credentials change, to move resource utilisation
  * between raccts.
  */
 void
 racct_proc_ucred_changed(struct proc *p, struct ucred *oldcred,
     struct ucred *newcred)
 {
 	struct uidinfo *olduip, *newuip;
 	struct loginclass *oldlc, *newlc;
 	struct prison *oldpr, *newpr, *pr;
 
 	if (!racct_enable)
 		return;
 
 	PROC_LOCK_ASSERT(p, MA_NOTOWNED);
 
 	newuip = newcred->cr_ruidinfo;
 	olduip = oldcred->cr_ruidinfo;
 	newlc = newcred->cr_loginclass;
 	oldlc = oldcred->cr_loginclass;
 	newpr = newcred->cr_prison;
 	oldpr = oldcred->cr_prison;
 
 	mtx_lock(&racct_lock);
 	if (newuip != olduip) {
 		racct_sub_racct(olduip->ui_racct, p->p_racct);
 		racct_add_racct(newuip->ui_racct, p->p_racct);
 	}
 	if (newlc != oldlc) {
 		racct_sub_racct(oldlc->lc_racct, p->p_racct);
 		racct_add_racct(newlc->lc_racct, p->p_racct);
 	}
 	if (newpr != oldpr) {
 		for (pr = oldpr; pr != NULL; pr = pr->pr_parent)
 			racct_sub_racct(pr->pr_prison_racct->prr_racct,
 			    p->p_racct);
 		for (pr = newpr; pr != NULL; pr = pr->pr_parent)
 			racct_add_racct(pr->pr_prison_racct->prr_racct,
 			    p->p_racct);
 	}
 	mtx_unlock(&racct_lock);
 
 #ifdef RCTL
 	rctl_proc_ucred_changed(p, newcred);
 #endif
 }
 
 void
 racct_move(struct racct *dest, struct racct *src)
 {
 
 	ASSERT_RACCT_ENABLED();
 
 	mtx_lock(&racct_lock);
 
 	racct_add_racct(dest, src);
 	racct_sub_racct(src, src);
 
 	mtx_unlock(&racct_lock);
 }
 
 static void
 racct_proc_throttle(struct proc *p)
 {
 	struct thread *td;
 #ifdef SMP
 	int cpuid;
 #endif
 
 	ASSERT_RACCT_ENABLED();
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 
 	/*
 	 * Do not block kernel processes.  Also do not block processes with
 	 * low %cpu utilization to improve interactivity.
 	 */
 	if (((p->p_flag & (P_SYSTEM | P_KTHREAD)) != 0) ||
 	    (p->p_racct->r_resources[RACCT_PCTCPU] <= pcpu_threshold))
 		return;
 	p->p_throttled = 1;
 
 	FOREACH_THREAD_IN_PROC(p, td) {
 		thread_lock(td);
 		switch (td->td_state) {
 		case TDS_RUNQ:
 			/*
 			 * If the thread is on the scheduler run-queue, we can
 			 * not just remove it from there.  So we set the flag
 			 * TDF_NEEDRESCHED for the thread, so that once it is
 			 * running, it is taken off the cpu as soon as possible.
 			 */
 			td->td_flags |= TDF_NEEDRESCHED;
 			break;
 		case TDS_RUNNING:
 			/*
 			 * If the thread is running, we request a context
 			 * switch for it by setting the TDF_NEEDRESCHED flag.
 			 */
 			td->td_flags |= TDF_NEEDRESCHED;
 #ifdef SMP
 			cpuid = td->td_oncpu;
 			if ((cpuid != NOCPU) && (td != curthread))
 				ipi_cpu(cpuid, IPI_AST);
 #endif
 			break;
 		default:
 			break;
 		}
 		thread_unlock(td);
 	}
 }
 
 static void
 racct_proc_wakeup(struct proc *p)
 {
 
 	ASSERT_RACCT_ENABLED();
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 
 	if (p->p_throttled) {
 		p->p_throttled = 0;
 		wakeup(p->p_racct);
 	}
 }
 
 static void
 racct_decay_resource(struct racct *racct, void * res, void* dummy)
 {
 	int resource;
 	int64_t r_old, r_new;
 
 	ASSERT_RACCT_ENABLED();
 	mtx_assert(&racct_lock, MA_OWNED);
 
 	resource = *(int *)res;
 	r_old = racct->r_resources[resource];
 
 	/* If there is nothing to decay, just exit. */
 	if (r_old <= 0)
 		return;
 
 	r_new = r_old * RACCT_DECAY_FACTOR / FSCALE;
 	racct->r_resources[resource] = r_new;
 }
 
 static void
 racct_decay_pre(void)
 {
 
 	mtx_lock(&racct_lock);
 }
 
 static void
 racct_decay_post(void)
 {
 
 	mtx_unlock(&racct_lock);
 }
 
 static void
 racct_decay(int resource)
 {
 
 	ASSERT_RACCT_ENABLED();
 
 	ui_racct_foreach(racct_decay_resource, racct_decay_pre,
 	    racct_decay_post, &resource, NULL);
 	loginclass_racct_foreach(racct_decay_resource, racct_decay_pre,
 	    racct_decay_post, &resource, NULL);
 	prison_racct_foreach(racct_decay_resource, racct_decay_pre,
 	    racct_decay_post, &resource, NULL);
 }
 
 static void
 racctd(void)
 {
 	struct thread *td;
 	struct proc *p;
 	struct timeval wallclock;
 	uint64_t runtime;
 	uint64_t pct, pct_estimate;
 
 	ASSERT_RACCT_ENABLED();
 
 	for (;;) {
 		racct_decay(RACCT_PCTCPU);
 
 		sx_slock(&allproc_lock);
 
 		LIST_FOREACH(p, &zombproc, p_list) {
 			PROC_LOCK(p);
 			racct_set(p, RACCT_PCTCPU, 0);
 			PROC_UNLOCK(p);
 		}
 
 		FOREACH_PROC_IN_SYSTEM(p) {
 			PROC_LOCK(p);
 			if (p->p_state != PRS_NORMAL) {
 				PROC_UNLOCK(p);
 				continue;
 			}
 
 			microuptime(&wallclock);
 			timevalsub(&wallclock, &p->p_stats->p_start);
 			PROC_STATLOCK(p);
 			FOREACH_THREAD_IN_PROC(p, td)
 				ruxagg(p, td);
 			runtime = cputick2usec(p->p_rux.rux_runtime);
 			PROC_STATUNLOCK(p);
 #ifdef notyet
 			KASSERT(runtime >= p->p_prev_runtime,
 			    ("runtime < p_prev_runtime"));
 #else
 			if (runtime < p->p_prev_runtime)
 				runtime = p->p_prev_runtime;
 #endif
 			p->p_prev_runtime = runtime;
 			if (wallclock.tv_sec > 0 || wallclock.tv_usec > 0) {
 				pct_estimate = (1000000 * runtime * 100) /
 				    ((uint64_t)wallclock.tv_sec * 1000000 +
 				    wallclock.tv_usec);
 			} else
 				pct_estimate = 0;
 			pct = racct_getpcpu(p, pct_estimate);
 			mtx_lock(&racct_lock);
 			racct_set_force_locked(p, RACCT_PCTCPU, pct);
 			racct_set_locked(p, RACCT_CPU, runtime);
 			racct_set_locked(p, RACCT_WALLCLOCK,
 			    (uint64_t)wallclock.tv_sec * 1000000 +
 			    wallclock.tv_usec);
 			mtx_unlock(&racct_lock);
 			PROC_UNLOCK(p);
 		}
 
 		/*
 		 * To ensure that processes are throttled in a fair way, we need
 		 * to iterate over all processes again and check the limits
 		 * for %cpu resource only after ucred racct containers have been
 		 * properly filled.
 		 */
 		FOREACH_PROC_IN_SYSTEM(p) {
 			PROC_LOCK(p);
 			if (p->p_state != PRS_NORMAL) {
 				PROC_UNLOCK(p);
 				continue;
 			}
 
 			if (racct_pcpu_available(p) <= 0)
 				racct_proc_throttle(p);
 			else if (p->p_throttled)
 				racct_proc_wakeup(p);
 			PROC_UNLOCK(p);
 		}
 		sx_sunlock(&allproc_lock);
 		pause("-", hz);
 	}
 }
 
 static struct kproc_desc racctd_kp = {
 	"racctd",
 	racctd,
 	NULL
 };
 
 static void
 racctd_init(void)
 {
 	if (!racct_enable)
 		return;
 
 	kproc_start(&racctd_kp);
 }
 SYSINIT(racctd, SI_SUB_RACCTD, SI_ORDER_FIRST, racctd_init, NULL);
 
 static void
 racct_init(void)
 {
 	if (!racct_enable)
 		return;
 
 	racct_zone = uma_zcreate("racct", sizeof(struct racct),
 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
 	/*
 	 * XXX: Move this somewhere.
 	 */
 	prison0.pr_prison_racct = prison_racct_find("0");
 }
 SYSINIT(racct, SI_SUB_RACCT, SI_ORDER_FIRST, racct_init, NULL);
 
 #endif /* !RACCT */
Index: head/sys/kern/kern_sig.c
===================================================================
--- head/sys/kern/kern_sig.c	(revision 292383)
+++ head/sys/kern/kern_sig.c	(revision 292384)
@@ -1,3575 +1,3575 @@
 /*-
  * Copyright (c) 1982, 1986, 1989, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)kern_sig.c	8.7 (Berkeley) 4/18/94
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_compat.h"
 #include "opt_gzio.h"
 #include "opt_ktrace.h"
 
 #include <sys/param.h>
 #include <sys/ctype.h>
 #include <sys/systm.h>
 #include <sys/signalvar.h>
 #include <sys/vnode.h>
 #include <sys/acct.h>
 #include <sys/bus.h>
 #include <sys/capsicum.h>
 #include <sys/condvar.h>
 #include <sys/event.h>
 #include <sys/fcntl.h>
 #include <sys/imgact.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/ktrace.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/refcount.h>
 #include <sys/namei.h>
 #include <sys/proc.h>
 #include <sys/procdesc.h>
 #include <sys/posix4.h>
 #include <sys/pioctl.h>
 #include <sys/racct.h>
 #include <sys/resourcevar.h>
 #include <sys/sdt.h>
 #include <sys/sbuf.h>
 #include <sys/sleepqueue.h>
 #include <sys/smp.h>
 #include <sys/stat.h>
 #include <sys/sx.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysctl.h>
 #include <sys/sysent.h>
 #include <sys/syslog.h>
 #include <sys/sysproto.h>
 #include <sys/timers.h>
 #include <sys/unistd.h>
 #include <sys/wait.h>
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/uma.h>
 
 #include <sys/jail.h>
 
 #include <machine/cpu.h>
 
 #include <security/audit/audit.h>
 
 #define	ONSIG	32		/* NSIG for osig* syscalls.  XXX. */
 
 SDT_PROVIDER_DECLARE(proc);
-SDT_PROBE_DEFINE3(proc, kernel, , signal__send, "struct thread *",
-    "struct proc *", "int");
-SDT_PROBE_DEFINE2(proc, kernel, , signal__clear, "int",
-    "ksiginfo_t *");
-SDT_PROBE_DEFINE3(proc, kernel, , signal__discard,
+SDT_PROBE_DEFINE3(proc, , , signal__send,
     "struct thread *", "struct proc *", "int");
+SDT_PROBE_DEFINE2(proc, , , signal__clear,
+    "int", "ksiginfo_t *");
+SDT_PROBE_DEFINE3(proc, , , signal__discard,
+    "struct thread *", "struct proc *", "int");
 
 static int	coredump(struct thread *);
 static int	killpg1(struct thread *td, int sig, int pgid, int all,
 		    ksiginfo_t *ksi);
 static int	issignal(struct thread *td);
 static int	sigprop(int sig);
 static void	tdsigwakeup(struct thread *, int, sig_t, int);
 static void	sig_suspend_threads(struct thread *, struct proc *, int);
 static int	filt_sigattach(struct knote *kn);
 static void	filt_sigdetach(struct knote *kn);
 static int	filt_signal(struct knote *kn, long hint);
 static struct thread *sigtd(struct proc *p, int sig, int prop);
 static void	sigqueue_start(void);
 
 static uma_zone_t	ksiginfo_zone = NULL;
 struct filterops sig_filtops = {
 	.f_isfd = 0,
 	.f_attach = filt_sigattach,
 	.f_detach = filt_sigdetach,
 	.f_event = filt_signal,
 };
 
 static int	kern_logsigexit = 1;
 SYSCTL_INT(_kern, KERN_LOGSIGEXIT, logsigexit, CTLFLAG_RW,
     &kern_logsigexit, 0,
     "Log processes quitting on abnormal signals to syslog(3)");
 
 static int	kern_forcesigexit = 1;
 SYSCTL_INT(_kern, OID_AUTO, forcesigexit, CTLFLAG_RW,
     &kern_forcesigexit, 0, "Force trap signal to be handled");
 
 static SYSCTL_NODE(_kern, OID_AUTO, sigqueue, CTLFLAG_RW, 0,
     "POSIX real time signal");
 
 static int	max_pending_per_proc = 128;
 SYSCTL_INT(_kern_sigqueue, OID_AUTO, max_pending_per_proc, CTLFLAG_RW,
     &max_pending_per_proc, 0, "Max pending signals per proc");
 
 static int	preallocate_siginfo = 1024;
 SYSCTL_INT(_kern_sigqueue, OID_AUTO, preallocate, CTLFLAG_RDTUN,
     &preallocate_siginfo, 0, "Preallocated signal memory size");
 
 static int	signal_overflow = 0;
 SYSCTL_INT(_kern_sigqueue, OID_AUTO, overflow, CTLFLAG_RD,
     &signal_overflow, 0, "Number of signals overflew");
 
 static int	signal_alloc_fail = 0;
 SYSCTL_INT(_kern_sigqueue, OID_AUTO, alloc_fail, CTLFLAG_RD,
     &signal_alloc_fail, 0, "signals failed to be allocated");
 
 SYSINIT(signal, SI_SUB_P1003_1B, SI_ORDER_FIRST+3, sigqueue_start, NULL);
 
 /*
  * Policy -- Can ucred cr1 send SIGIO to process cr2?
  * Should use cr_cansignal() once cr_cansignal() allows SIGIO and SIGURG
  * in the right situations.
  */
 #define CANSIGIO(cr1, cr2) \
 	((cr1)->cr_uid == 0 || \
 	    (cr1)->cr_ruid == (cr2)->cr_ruid || \
 	    (cr1)->cr_uid == (cr2)->cr_ruid || \
 	    (cr1)->cr_ruid == (cr2)->cr_uid || \
 	    (cr1)->cr_uid == (cr2)->cr_uid)
 
 static int	sugid_coredump;
 SYSCTL_INT(_kern, OID_AUTO, sugid_coredump, CTLFLAG_RWTUN,
     &sugid_coredump, 0, "Allow setuid and setgid processes to dump core");
 
 static int	capmode_coredump;
 SYSCTL_INT(_kern, OID_AUTO, capmode_coredump, CTLFLAG_RWTUN,
     &capmode_coredump, 0, "Allow processes in capability mode to dump core");
 
 static int	do_coredump = 1;
 SYSCTL_INT(_kern, OID_AUTO, coredump, CTLFLAG_RW,
 	&do_coredump, 0, "Enable/Disable coredumps");
 
 static int	set_core_nodump_flag = 0;
 SYSCTL_INT(_kern, OID_AUTO, nodump_coredump, CTLFLAG_RW, &set_core_nodump_flag,
 	0, "Enable setting the NODUMP flag on coredump files");
 
 static int	coredump_devctl = 0;
 SYSCTL_INT(_kern, OID_AUTO, coredump_devctl, CTLFLAG_RW, &coredump_devctl,
 	0, "Generate a devctl notification when processes coredump");
 
 /*
  * Signal properties and actions.
  * The array below categorizes the signals and their default actions
  * according to the following properties:
  */
 #define	SA_KILL		0x01		/* terminates process by default */
 #define	SA_CORE		0x02		/* ditto and coredumps */
 #define	SA_STOP		0x04		/* suspend process */
 #define	SA_TTYSTOP	0x08		/* ditto, from tty */
 #define	SA_IGNORE	0x10		/* ignore by default */
 #define	SA_CONT		0x20		/* continue if suspended */
 #define	SA_CANTMASK	0x40		/* non-maskable, catchable */
 
 static int sigproptbl[NSIG] = {
 	SA_KILL,			/* SIGHUP */
 	SA_KILL,			/* SIGINT */
 	SA_KILL|SA_CORE,		/* SIGQUIT */
 	SA_KILL|SA_CORE,		/* SIGILL */
 	SA_KILL|SA_CORE,		/* SIGTRAP */
 	SA_KILL|SA_CORE,		/* SIGABRT */
 	SA_KILL|SA_CORE,		/* SIGEMT */
 	SA_KILL|SA_CORE,		/* SIGFPE */
 	SA_KILL,			/* SIGKILL */
 	SA_KILL|SA_CORE,		/* SIGBUS */
 	SA_KILL|SA_CORE,		/* SIGSEGV */
 	SA_KILL|SA_CORE,		/* SIGSYS */
 	SA_KILL,			/* SIGPIPE */
 	SA_KILL,			/* SIGALRM */
 	SA_KILL,			/* SIGTERM */
 	SA_IGNORE,			/* SIGURG */
 	SA_STOP,			/* SIGSTOP */
 	SA_STOP|SA_TTYSTOP,		/* SIGTSTP */
 	SA_IGNORE|SA_CONT,		/* SIGCONT */
 	SA_IGNORE,			/* SIGCHLD */
 	SA_STOP|SA_TTYSTOP,		/* SIGTTIN */
 	SA_STOP|SA_TTYSTOP,		/* SIGTTOU */
 	SA_IGNORE,			/* SIGIO */
 	SA_KILL,			/* SIGXCPU */
 	SA_KILL,			/* SIGXFSZ */
 	SA_KILL,			/* SIGVTALRM */
 	SA_KILL,			/* SIGPROF */
 	SA_IGNORE,			/* SIGWINCH  */
 	SA_IGNORE,			/* SIGINFO */
 	SA_KILL,			/* SIGUSR1 */
 	SA_KILL,			/* SIGUSR2 */
 };
 
 static void reschedule_signals(struct proc *p, sigset_t block, int flags);
 
 static void
 sigqueue_start(void)
 {
 	ksiginfo_zone = uma_zcreate("ksiginfo", sizeof(ksiginfo_t),
 		NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
 	uma_prealloc(ksiginfo_zone, preallocate_siginfo);
 	p31b_setcfg(CTL_P1003_1B_REALTIME_SIGNALS, _POSIX_REALTIME_SIGNALS);
 	p31b_setcfg(CTL_P1003_1B_RTSIG_MAX, SIGRTMAX - SIGRTMIN + 1);
 	p31b_setcfg(CTL_P1003_1B_SIGQUEUE_MAX, max_pending_per_proc);
 }
 
 ksiginfo_t *
 ksiginfo_alloc(int wait)
 {
 	int flags;
 
 	flags = M_ZERO;
 	if (! wait)
 		flags |= M_NOWAIT;
 	if (ksiginfo_zone != NULL)
 		return ((ksiginfo_t *)uma_zalloc(ksiginfo_zone, flags));
 	return (NULL);
 }
 
 void
 ksiginfo_free(ksiginfo_t *ksi)
 {
 	uma_zfree(ksiginfo_zone, ksi);
 }
 
 static __inline int
 ksiginfo_tryfree(ksiginfo_t *ksi)
 {
 	if (!(ksi->ksi_flags & KSI_EXT)) {
 		uma_zfree(ksiginfo_zone, ksi);
 		return (1);
 	}
 	return (0);
 }
 
 void
 sigqueue_init(sigqueue_t *list, struct proc *p)
 {
 	SIGEMPTYSET(list->sq_signals);
 	SIGEMPTYSET(list->sq_kill);
 	TAILQ_INIT(&list->sq_list);
 	list->sq_proc = p;
 	list->sq_flags = SQ_INIT;
 }
 
 /*
  * Get a signal's ksiginfo.
  * Return:
  *	0	-	signal not found
  *	others	-	signal number
  */
 static int
 sigqueue_get(sigqueue_t *sq, int signo, ksiginfo_t *si)
 {
 	struct proc *p = sq->sq_proc;
 	struct ksiginfo *ksi, *next;
 	int count = 0;
 
 	KASSERT(sq->sq_flags & SQ_INIT, ("sigqueue not inited"));
 
 	if (!SIGISMEMBER(sq->sq_signals, signo))
 		return (0);
 
 	if (SIGISMEMBER(sq->sq_kill, signo)) {
 		count++;
 		SIGDELSET(sq->sq_kill, signo);
 	}
 
 	TAILQ_FOREACH_SAFE(ksi, &sq->sq_list, ksi_link, next) {
 		if (ksi->ksi_signo == signo) {
 			if (count == 0) {
 				TAILQ_REMOVE(&sq->sq_list, ksi, ksi_link);
 				ksi->ksi_sigq = NULL;
 				ksiginfo_copy(ksi, si);
 				if (ksiginfo_tryfree(ksi) && p != NULL)
 					p->p_pendingcnt--;
 			}
 			if (++count > 1)
 				break;
 		}
 	}
 
 	if (count <= 1)
 		SIGDELSET(sq->sq_signals, signo);
 	si->ksi_signo = signo;
 	return (signo);
 }
 
 void
 sigqueue_take(ksiginfo_t *ksi)
 {
 	struct ksiginfo *kp;
 	struct proc	*p;
 	sigqueue_t	*sq;
 
 	if (ksi == NULL || (sq = ksi->ksi_sigq) == NULL)
 		return;
 
 	p = sq->sq_proc;
 	TAILQ_REMOVE(&sq->sq_list, ksi, ksi_link);
 	ksi->ksi_sigq = NULL;
 	if (!(ksi->ksi_flags & KSI_EXT) && p != NULL)
 		p->p_pendingcnt--;
 
 	for (kp = TAILQ_FIRST(&sq->sq_list); kp != NULL;
 	     kp = TAILQ_NEXT(kp, ksi_link)) {
 		if (kp->ksi_signo == ksi->ksi_signo)
 			break;
 	}
 	if (kp == NULL && !SIGISMEMBER(sq->sq_kill, ksi->ksi_signo))
 		SIGDELSET(sq->sq_signals, ksi->ksi_signo);
 }
 
 static int
 sigqueue_add(sigqueue_t *sq, int signo, ksiginfo_t *si)
 {
 	struct proc *p = sq->sq_proc;
 	struct ksiginfo *ksi;
 	int ret = 0;
 
 	KASSERT(sq->sq_flags & SQ_INIT, ("sigqueue not inited"));
 
 	if (signo == SIGKILL || signo == SIGSTOP || si == NULL) {
 		SIGADDSET(sq->sq_kill, signo);
 		goto out_set_bit;
 	}
 
 	/* directly insert the ksi, don't copy it */
 	if (si->ksi_flags & KSI_INS) {
 		if (si->ksi_flags & KSI_HEAD)
 			TAILQ_INSERT_HEAD(&sq->sq_list, si, ksi_link);
 		else
 			TAILQ_INSERT_TAIL(&sq->sq_list, si, ksi_link);
 		si->ksi_sigq = sq;
 		goto out_set_bit;
 	}
 
 	if (__predict_false(ksiginfo_zone == NULL)) {
 		SIGADDSET(sq->sq_kill, signo);
 		goto out_set_bit;
 	}
 
 	if (p != NULL && p->p_pendingcnt >= max_pending_per_proc) {
 		signal_overflow++;
 		ret = EAGAIN;
 	} else if ((ksi = ksiginfo_alloc(0)) == NULL) {
 		signal_alloc_fail++;
 		ret = EAGAIN;
 	} else {
 		if (p != NULL)
 			p->p_pendingcnt++;
 		ksiginfo_copy(si, ksi);
 		ksi->ksi_signo = signo;
 		if (si->ksi_flags & KSI_HEAD)
 			TAILQ_INSERT_HEAD(&sq->sq_list, ksi, ksi_link);
 		else
 			TAILQ_INSERT_TAIL(&sq->sq_list, ksi, ksi_link);
 		ksi->ksi_sigq = sq;
 	}
 
 	if ((si->ksi_flags & KSI_TRAP) != 0 ||
 	    (si->ksi_flags & KSI_SIGQ) == 0) {
 		if (ret != 0)
 			SIGADDSET(sq->sq_kill, signo);
 		ret = 0;
 		goto out_set_bit;
 	}
 
 	if (ret != 0)
 		return (ret);
 
 out_set_bit:
 	SIGADDSET(sq->sq_signals, signo);
 	return (ret);
 }
 
 void
 sigqueue_flush(sigqueue_t *sq)
 {
 	struct proc *p = sq->sq_proc;
 	ksiginfo_t *ksi;
 
 	KASSERT(sq->sq_flags & SQ_INIT, ("sigqueue not inited"));
 
 	if (p != NULL)
 		PROC_LOCK_ASSERT(p, MA_OWNED);
 
 	while ((ksi = TAILQ_FIRST(&sq->sq_list)) != NULL) {
 		TAILQ_REMOVE(&sq->sq_list, ksi, ksi_link);
 		ksi->ksi_sigq = NULL;
 		if (ksiginfo_tryfree(ksi) && p != NULL)
 			p->p_pendingcnt--;
 	}
 
 	SIGEMPTYSET(sq->sq_signals);
 	SIGEMPTYSET(sq->sq_kill);
 }
 
 static void
 sigqueue_move_set(sigqueue_t *src, sigqueue_t *dst, const sigset_t *set)
 {
 	sigset_t tmp;
 	struct proc *p1, *p2;
 	ksiginfo_t *ksi, *next;
 
 	KASSERT(src->sq_flags & SQ_INIT, ("src sigqueue not inited"));
 	KASSERT(dst->sq_flags & SQ_INIT, ("dst sigqueue not inited"));
 	p1 = src->sq_proc;
 	p2 = dst->sq_proc;
 	/* Move siginfo to target list */
 	TAILQ_FOREACH_SAFE(ksi, &src->sq_list, ksi_link, next) {
 		if (SIGISMEMBER(*set, ksi->ksi_signo)) {
 			TAILQ_REMOVE(&src->sq_list, ksi, ksi_link);
 			if (p1 != NULL)
 				p1->p_pendingcnt--;
 			TAILQ_INSERT_TAIL(&dst->sq_list, ksi, ksi_link);
 			ksi->ksi_sigq = dst;
 			if (p2 != NULL)
 				p2->p_pendingcnt++;
 		}
 	}
 
 	/* Move pending bits to target list */
 	tmp = src->sq_kill;
 	SIGSETAND(tmp, *set);
 	SIGSETOR(dst->sq_kill, tmp);
 	SIGSETNAND(src->sq_kill, tmp);
 
 	tmp = src->sq_signals;
 	SIGSETAND(tmp, *set);
 	SIGSETOR(dst->sq_signals, tmp);
 	SIGSETNAND(src->sq_signals, tmp);
 }
 
 #if 0
 static void
 sigqueue_move(sigqueue_t *src, sigqueue_t *dst, int signo)
 {
 	sigset_t set;
 
 	SIGEMPTYSET(set);
 	SIGADDSET(set, signo);
 	sigqueue_move_set(src, dst, &set);
 }
 #endif
 
 static void
 sigqueue_delete_set(sigqueue_t *sq, const sigset_t *set)
 {
 	struct proc *p = sq->sq_proc;
 	ksiginfo_t *ksi, *next;
 
 	KASSERT(sq->sq_flags & SQ_INIT, ("src sigqueue not inited"));
 
 	/* Remove siginfo queue */
 	TAILQ_FOREACH_SAFE(ksi, &sq->sq_list, ksi_link, next) {
 		if (SIGISMEMBER(*set, ksi->ksi_signo)) {
 			TAILQ_REMOVE(&sq->sq_list, ksi, ksi_link);
 			ksi->ksi_sigq = NULL;
 			if (ksiginfo_tryfree(ksi) && p != NULL)
 				p->p_pendingcnt--;
 		}
 	}
 	SIGSETNAND(sq->sq_kill, *set);
 	SIGSETNAND(sq->sq_signals, *set);
 }
 
 void
 sigqueue_delete(sigqueue_t *sq, int signo)
 {
 	sigset_t set;
 
 	SIGEMPTYSET(set);
 	SIGADDSET(set, signo);
 	sigqueue_delete_set(sq, &set);
 }
 
 /* Remove a set of signals for a process */
 static void
 sigqueue_delete_set_proc(struct proc *p, const sigset_t *set)
 {
 	sigqueue_t worklist;
 	struct thread *td0;
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 
 	sigqueue_init(&worklist, NULL);
 	sigqueue_move_set(&p->p_sigqueue, &worklist, set);
 
 	FOREACH_THREAD_IN_PROC(p, td0)
 		sigqueue_move_set(&td0->td_sigqueue, &worklist, set);
 
 	sigqueue_flush(&worklist);
 }
 
 void
 sigqueue_delete_proc(struct proc *p, int signo)
 {
 	sigset_t set;
 
 	SIGEMPTYSET(set);
 	SIGADDSET(set, signo);
 	sigqueue_delete_set_proc(p, &set);
 }
 
 static void
 sigqueue_delete_stopmask_proc(struct proc *p)
 {
 	sigset_t set;
 
 	SIGEMPTYSET(set);
 	SIGADDSET(set, SIGSTOP);
 	SIGADDSET(set, SIGTSTP);
 	SIGADDSET(set, SIGTTIN);
 	SIGADDSET(set, SIGTTOU);
 	sigqueue_delete_set_proc(p, &set);
 }
 
 /*
  * Determine signal that should be delivered to thread td, the current
  * thread, 0 if none.  If there is a pending stop signal with default
  * action, the process stops in issignal().
  */
 int
 cursig(struct thread *td)
 {
 	PROC_LOCK_ASSERT(td->td_proc, MA_OWNED);
 	mtx_assert(&td->td_proc->p_sigacts->ps_mtx, MA_OWNED);
 	THREAD_LOCK_ASSERT(td, MA_NOTOWNED);
 	return (SIGPENDING(td) ? issignal(td) : 0);
 }
 
 /*
  * Arrange for ast() to handle unmasked pending signals on return to user
  * mode.  This must be called whenever a signal is added to td_sigqueue or
  * unmasked in td_sigmask.
  */
 void
 signotify(struct thread *td)
 {
 	struct proc *p;
 
 	p = td->td_proc;
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 
 	if (SIGPENDING(td)) {
 		thread_lock(td);
 		td->td_flags |= TDF_NEEDSIGCHK | TDF_ASTPENDING;
 		thread_unlock(td);
 	}
 }
 
 int
 sigonstack(size_t sp)
 {
 	struct thread *td = curthread;
 
 	return ((td->td_pflags & TDP_ALTSTACK) ?
 #if defined(COMPAT_43)
 	    ((td->td_sigstk.ss_size == 0) ?
 		(td->td_sigstk.ss_flags & SS_ONSTACK) :
 		((sp - (size_t)td->td_sigstk.ss_sp) < td->td_sigstk.ss_size))
 #else
 	    ((sp - (size_t)td->td_sigstk.ss_sp) < td->td_sigstk.ss_size)
 #endif
 	    : 0);
 }
 
 static __inline int
 sigprop(int sig)
 {
 
 	if (sig > 0 && sig < NSIG)
 		return (sigproptbl[_SIG_IDX(sig)]);
 	return (0);
 }
 
 int
 sig_ffs(sigset_t *set)
 {
 	int i;
 
 	for (i = 0; i < _SIG_WORDS; i++)
 		if (set->__bits[i])
 			return (ffs(set->__bits[i]) + (i * 32));
 	return (0);
 }
 
 static bool
 sigact_flag_test(const struct sigaction *act, int flag)
 {
 
 	/*
 	 * SA_SIGINFO is reset when signal disposition is set to
 	 * ignore or default.  Other flags are kept according to user
 	 * settings.
 	 */
 	return ((act->sa_flags & flag) != 0 && (flag != SA_SIGINFO ||
 	    ((__sighandler_t *)act->sa_sigaction != SIG_IGN &&
 	    (__sighandler_t *)act->sa_sigaction != SIG_DFL)));
 }
 
 /*
  * kern_sigaction
  * sigaction
  * freebsd4_sigaction
  * osigaction
  */
 int
 kern_sigaction(struct thread *td, int sig, const struct sigaction *act,
     struct sigaction *oact, int flags)
 {
 	struct sigacts *ps;
 	struct proc *p = td->td_proc;
 
 	if (!_SIG_VALID(sig))
 		return (EINVAL);
 	if (act != NULL && act->sa_handler != SIG_DFL &&
 	    act->sa_handler != SIG_IGN && (act->sa_flags & ~(SA_ONSTACK |
 	    SA_RESTART | SA_RESETHAND | SA_NOCLDSTOP | SA_NODEFER |
 	    SA_NOCLDWAIT | SA_SIGINFO)) != 0)
 		return (EINVAL);
 
 	PROC_LOCK(p);
 	ps = p->p_sigacts;
 	mtx_lock(&ps->ps_mtx);
 	if (oact) {
 		oact->sa_mask = ps->ps_catchmask[_SIG_IDX(sig)];
 		oact->sa_flags = 0;
 		if (SIGISMEMBER(ps->ps_sigonstack, sig))
 			oact->sa_flags |= SA_ONSTACK;
 		if (!SIGISMEMBER(ps->ps_sigintr, sig))
 			oact->sa_flags |= SA_RESTART;
 		if (SIGISMEMBER(ps->ps_sigreset, sig))
 			oact->sa_flags |= SA_RESETHAND;
 		if (SIGISMEMBER(ps->ps_signodefer, sig))
 			oact->sa_flags |= SA_NODEFER;
 		if (SIGISMEMBER(ps->ps_siginfo, sig)) {
 			oact->sa_flags |= SA_SIGINFO;
 			oact->sa_sigaction =
 			    (__siginfohandler_t *)ps->ps_sigact[_SIG_IDX(sig)];
 		} else
 			oact->sa_handler = ps->ps_sigact[_SIG_IDX(sig)];
 		if (sig == SIGCHLD && ps->ps_flag & PS_NOCLDSTOP)
 			oact->sa_flags |= SA_NOCLDSTOP;
 		if (sig == SIGCHLD && ps->ps_flag & PS_NOCLDWAIT)
 			oact->sa_flags |= SA_NOCLDWAIT;
 	}
 	if (act) {
 		if ((sig == SIGKILL || sig == SIGSTOP) &&
 		    act->sa_handler != SIG_DFL) {
 			mtx_unlock(&ps->ps_mtx);
 			PROC_UNLOCK(p);
 			return (EINVAL);
 		}
 
 		/*
 		 * Change setting atomically.
 		 */
 
 		ps->ps_catchmask[_SIG_IDX(sig)] = act->sa_mask;
 		SIG_CANTMASK(ps->ps_catchmask[_SIG_IDX(sig)]);
 		if (sigact_flag_test(act, SA_SIGINFO)) {
 			ps->ps_sigact[_SIG_IDX(sig)] =
 			    (__sighandler_t *)act->sa_sigaction;
 			SIGADDSET(ps->ps_siginfo, sig);
 		} else {
 			ps->ps_sigact[_SIG_IDX(sig)] = act->sa_handler;
 			SIGDELSET(ps->ps_siginfo, sig);
 		}
 		if (!sigact_flag_test(act, SA_RESTART))
 			SIGADDSET(ps->ps_sigintr, sig);
 		else
 			SIGDELSET(ps->ps_sigintr, sig);
 		if (sigact_flag_test(act, SA_ONSTACK))
 			SIGADDSET(ps->ps_sigonstack, sig);
 		else
 			SIGDELSET(ps->ps_sigonstack, sig);
 		if (sigact_flag_test(act, SA_RESETHAND))
 			SIGADDSET(ps->ps_sigreset, sig);
 		else
 			SIGDELSET(ps->ps_sigreset, sig);
 		if (sigact_flag_test(act, SA_NODEFER))
 			SIGADDSET(ps->ps_signodefer, sig);
 		else
 			SIGDELSET(ps->ps_signodefer, sig);
 		if (sig == SIGCHLD) {
 			if (act->sa_flags & SA_NOCLDSTOP)
 				ps->ps_flag |= PS_NOCLDSTOP;
 			else
 				ps->ps_flag &= ~PS_NOCLDSTOP;
 			if (act->sa_flags & SA_NOCLDWAIT) {
 				/*
 				 * Paranoia: since SA_NOCLDWAIT is implemented
 				 * by reparenting the dying child to PID 1 (and
 				 * trust it to reap the zombie), PID 1 itself
 				 * is forbidden to set SA_NOCLDWAIT.
 				 */
 				if (p->p_pid == 1)
 					ps->ps_flag &= ~PS_NOCLDWAIT;
 				else
 					ps->ps_flag |= PS_NOCLDWAIT;
 			} else
 				ps->ps_flag &= ~PS_NOCLDWAIT;
 			if (ps->ps_sigact[_SIG_IDX(SIGCHLD)] == SIG_IGN)
 				ps->ps_flag |= PS_CLDSIGIGN;
 			else
 				ps->ps_flag &= ~PS_CLDSIGIGN;
 		}
 		/*
 		 * Set bit in ps_sigignore for signals that are set to SIG_IGN,
 		 * and for signals set to SIG_DFL where the default is to
 		 * ignore. However, don't put SIGCONT in ps_sigignore, as we
 		 * have to restart the process.
 		 */
 		if (ps->ps_sigact[_SIG_IDX(sig)] == SIG_IGN ||
 		    (sigprop(sig) & SA_IGNORE &&
 		     ps->ps_sigact[_SIG_IDX(sig)] == SIG_DFL)) {
 			/* never to be seen again */
 			sigqueue_delete_proc(p, sig);
 			if (sig != SIGCONT)
 				/* easier in psignal */
 				SIGADDSET(ps->ps_sigignore, sig);
 			SIGDELSET(ps->ps_sigcatch, sig);
 		} else {
 			SIGDELSET(ps->ps_sigignore, sig);
 			if (ps->ps_sigact[_SIG_IDX(sig)] == SIG_DFL)
 				SIGDELSET(ps->ps_sigcatch, sig);
 			else
 				SIGADDSET(ps->ps_sigcatch, sig);
 		}
 #ifdef COMPAT_FREEBSD4
 		if (ps->ps_sigact[_SIG_IDX(sig)] == SIG_IGN ||
 		    ps->ps_sigact[_SIG_IDX(sig)] == SIG_DFL ||
 		    (flags & KSA_FREEBSD4) == 0)
 			SIGDELSET(ps->ps_freebsd4, sig);
 		else
 			SIGADDSET(ps->ps_freebsd4, sig);
 #endif
 #ifdef COMPAT_43
 		if (ps->ps_sigact[_SIG_IDX(sig)] == SIG_IGN ||
 		    ps->ps_sigact[_SIG_IDX(sig)] == SIG_DFL ||
 		    (flags & KSA_OSIGSET) == 0)
 			SIGDELSET(ps->ps_osigset, sig);
 		else
 			SIGADDSET(ps->ps_osigset, sig);
 #endif
 	}
 	mtx_unlock(&ps->ps_mtx);
 	PROC_UNLOCK(p);
 	return (0);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct sigaction_args {
 	int	sig;
 	struct	sigaction *act;
 	struct	sigaction *oact;
 };
 #endif
 int
 sys_sigaction(td, uap)
 	struct thread *td;
 	register struct sigaction_args *uap;
 {
 	struct sigaction act, oact;
 	register struct sigaction *actp, *oactp;
 	int error;
 
 	actp = (uap->act != NULL) ? &act : NULL;
 	oactp = (uap->oact != NULL) ? &oact : NULL;
 	if (actp) {
 		error = copyin(uap->act, actp, sizeof(act));
 		if (error)
 			return (error);
 	}
 	error = kern_sigaction(td, uap->sig, actp, oactp, 0);
 	if (oactp && !error)
 		error = copyout(oactp, uap->oact, sizeof(oact));
 	return (error);
 }
 
 #ifdef COMPAT_FREEBSD4
 #ifndef _SYS_SYSPROTO_H_
 struct freebsd4_sigaction_args {
 	int	sig;
 	struct	sigaction *act;
 	struct	sigaction *oact;
 };
 #endif
 int
 freebsd4_sigaction(td, uap)
 	struct thread *td;
 	register struct freebsd4_sigaction_args *uap;
 {
 	struct sigaction act, oact;
 	register struct sigaction *actp, *oactp;
 	int error;
 
 
 	actp = (uap->act != NULL) ? &act : NULL;
 	oactp = (uap->oact != NULL) ? &oact : NULL;
 	if (actp) {
 		error = copyin(uap->act, actp, sizeof(act));
 		if (error)
 			return (error);
 	}
 	error = kern_sigaction(td, uap->sig, actp, oactp, KSA_FREEBSD4);
 	if (oactp && !error)
 		error = copyout(oactp, uap->oact, sizeof(oact));
 	return (error);
 }
 #endif	/* COMAPT_FREEBSD4 */
 
 #ifdef COMPAT_43	/* XXX - COMPAT_FBSD3 */
 #ifndef _SYS_SYSPROTO_H_
 struct osigaction_args {
 	int	signum;
 	struct	osigaction *nsa;
 	struct	osigaction *osa;
 };
 #endif
 int
 osigaction(td, uap)
 	struct thread *td;
 	register struct osigaction_args *uap;
 {
 	struct osigaction sa;
 	struct sigaction nsa, osa;
 	register struct sigaction *nsap, *osap;
 	int error;
 
 	if (uap->signum <= 0 || uap->signum >= ONSIG)
 		return (EINVAL);
 
 	nsap = (uap->nsa != NULL) ? &nsa : NULL;
 	osap = (uap->osa != NULL) ? &osa : NULL;
 
 	if (nsap) {
 		error = copyin(uap->nsa, &sa, sizeof(sa));
 		if (error)
 			return (error);
 		nsap->sa_handler = sa.sa_handler;
 		nsap->sa_flags = sa.sa_flags;
 		OSIG2SIG(sa.sa_mask, nsap->sa_mask);
 	}
 	error = kern_sigaction(td, uap->signum, nsap, osap, KSA_OSIGSET);
 	if (osap && !error) {
 		sa.sa_handler = osap->sa_handler;
 		sa.sa_flags = osap->sa_flags;
 		SIG2OSIG(osap->sa_mask, sa.sa_mask);
 		error = copyout(&sa, uap->osa, sizeof(sa));
 	}
 	return (error);
 }
 
 #if !defined(__i386__)
 /* Avoid replicating the same stub everywhere */
 int
 osigreturn(td, uap)
 	struct thread *td;
 	struct osigreturn_args *uap;
 {
 
 	return (nosys(td, (struct nosys_args *)uap));
 }
 #endif
 #endif /* COMPAT_43 */
 
 /*
  * Initialize signal state for process 0;
  * set to ignore signals that are ignored by default.
  */
 void
 siginit(p)
 	struct proc *p;
 {
 	register int i;
 	struct sigacts *ps;
 
 	PROC_LOCK(p);
 	ps = p->p_sigacts;
 	mtx_lock(&ps->ps_mtx);
 	for (i = 1; i <= NSIG; i++) {
 		if (sigprop(i) & SA_IGNORE && i != SIGCONT) {
 			SIGADDSET(ps->ps_sigignore, i);
 		}
 	}
 	mtx_unlock(&ps->ps_mtx);
 	PROC_UNLOCK(p);
 }
 
 /*
  * Reset specified signal to the default disposition.
  */
 static void
 sigdflt(struct sigacts *ps, int sig)
 {
 
 	mtx_assert(&ps->ps_mtx, MA_OWNED);
 	SIGDELSET(ps->ps_sigcatch, sig);
 	if ((sigprop(sig) & SA_IGNORE) != 0 && sig != SIGCONT)
 		SIGADDSET(ps->ps_sigignore, sig);
 	ps->ps_sigact[_SIG_IDX(sig)] = SIG_DFL;
 	SIGDELSET(ps->ps_siginfo, sig);
 }
 
 /*
  * Reset signals for an exec of the specified process.
  */
 void
 execsigs(struct proc *p)
 {
 	sigset_t osigignore;
 	struct sigacts *ps;
 	int sig;
 	struct thread *td;
 
 	/*
 	 * Reset caught signals.  Held signals remain held
 	 * through td_sigmask (unless they were caught,
 	 * and are now ignored by default).
 	 */
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	td = FIRST_THREAD_IN_PROC(p);
 	ps = p->p_sigacts;
 	mtx_lock(&ps->ps_mtx);
 	while (SIGNOTEMPTY(ps->ps_sigcatch)) {
 		sig = sig_ffs(&ps->ps_sigcatch);
 		sigdflt(ps, sig);
 		if ((sigprop(sig) & SA_IGNORE) != 0)
 			sigqueue_delete_proc(p, sig);
 	}
 
 	/*
 	 * As CloudABI processes cannot modify signal handlers, fully
 	 * reset all signals to their default behavior. Do ignore
 	 * SIGPIPE, as it would otherwise be impossible to recover from
 	 * writes to broken pipes and sockets.
 	 */
 	if (SV_PROC_ABI(p) == SV_ABI_CLOUDABI) {
 		osigignore = ps->ps_sigignore;
 		while (SIGNOTEMPTY(osigignore)) {
 			sig = sig_ffs(&osigignore);
 			SIGDELSET(osigignore, sig);
 			if (sig != SIGPIPE)
 				sigdflt(ps, sig);
 		}
 		SIGADDSET(ps->ps_sigignore, SIGPIPE);
 	}
 
 	/*
 	 * Reset stack state to the user stack.
 	 * Clear set of signals caught on the signal stack.
 	 */
 	td->td_sigstk.ss_flags = SS_DISABLE;
 	td->td_sigstk.ss_size = 0;
 	td->td_sigstk.ss_sp = 0;
 	td->td_pflags &= ~TDP_ALTSTACK;
 	/*
 	 * Reset no zombies if child dies flag as Solaris does.
 	 */
 	ps->ps_flag &= ~(PS_NOCLDWAIT | PS_CLDSIGIGN);
 	if (ps->ps_sigact[_SIG_IDX(SIGCHLD)] == SIG_IGN)
 		ps->ps_sigact[_SIG_IDX(SIGCHLD)] = SIG_DFL;
 	mtx_unlock(&ps->ps_mtx);
 }
 
 /*
  * kern_sigprocmask()
  *
  *	Manipulate signal mask.
  */
 int
 kern_sigprocmask(struct thread *td, int how, sigset_t *set, sigset_t *oset,
     int flags)
 {
 	sigset_t new_block, oset1;
 	struct proc *p;
 	int error;
 
 	p = td->td_proc;
 	if ((flags & SIGPROCMASK_PROC_LOCKED) != 0)
 		PROC_LOCK_ASSERT(p, MA_OWNED);
 	else
 		PROC_LOCK(p);
 	mtx_assert(&p->p_sigacts->ps_mtx, (flags & SIGPROCMASK_PS_LOCKED) != 0
 	    ? MA_OWNED : MA_NOTOWNED);
 	if (oset != NULL)
 		*oset = td->td_sigmask;
 
 	error = 0;
 	if (set != NULL) {
 		switch (how) {
 		case SIG_BLOCK:
 			SIG_CANTMASK(*set);
 			oset1 = td->td_sigmask;
 			SIGSETOR(td->td_sigmask, *set);
 			new_block = td->td_sigmask;
 			SIGSETNAND(new_block, oset1);
 			break;
 		case SIG_UNBLOCK:
 			SIGSETNAND(td->td_sigmask, *set);
 			signotify(td);
 			goto out;
 		case SIG_SETMASK:
 			SIG_CANTMASK(*set);
 			oset1 = td->td_sigmask;
 			if (flags & SIGPROCMASK_OLD)
 				SIGSETLO(td->td_sigmask, *set);
 			else
 				td->td_sigmask = *set;
 			new_block = td->td_sigmask;
 			SIGSETNAND(new_block, oset1);
 			signotify(td);
 			break;
 		default:
 			error = EINVAL;
 			goto out;
 		}
 
 		/*
 		 * The new_block set contains signals that were not previously
 		 * blocked, but are blocked now.
 		 *
 		 * In case we block any signal that was not previously blocked
 		 * for td, and process has the signal pending, try to schedule
 		 * signal delivery to some thread that does not block the
 		 * signal, possibly waking it up.
 		 */
 		if (p->p_numthreads != 1)
 			reschedule_signals(p, new_block, flags);
 	}
 
 out:
 	if (!(flags & SIGPROCMASK_PROC_LOCKED))
 		PROC_UNLOCK(p);
 	return (error);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct sigprocmask_args {
 	int	how;
 	const sigset_t *set;
 	sigset_t *oset;
 };
 #endif
 int
 sys_sigprocmask(td, uap)
 	register struct thread *td;
 	struct sigprocmask_args *uap;
 {
 	sigset_t set, oset;
 	sigset_t *setp, *osetp;
 	int error;
 
 	setp = (uap->set != NULL) ? &set : NULL;
 	osetp = (uap->oset != NULL) ? &oset : NULL;
 	if (setp) {
 		error = copyin(uap->set, setp, sizeof(set));
 		if (error)
 			return (error);
 	}
 	error = kern_sigprocmask(td, uap->how, setp, osetp, 0);
 	if (osetp && !error) {
 		error = copyout(osetp, uap->oset, sizeof(oset));
 	}
 	return (error);
 }
 
 #ifdef COMPAT_43	/* XXX - COMPAT_FBSD3 */
 #ifndef _SYS_SYSPROTO_H_
 struct osigprocmask_args {
 	int	how;
 	osigset_t mask;
 };
 #endif
 int
 osigprocmask(td, uap)
 	register struct thread *td;
 	struct osigprocmask_args *uap;
 {
 	sigset_t set, oset;
 	int error;
 
 	OSIG2SIG(uap->mask, set);
 	error = kern_sigprocmask(td, uap->how, &set, &oset, 1);
 	SIG2OSIG(oset, td->td_retval[0]);
 	return (error);
 }
 #endif /* COMPAT_43 */
 
 int
 sys_sigwait(struct thread *td, struct sigwait_args *uap)
 {
 	ksiginfo_t ksi;
 	sigset_t set;
 	int error;
 
 	error = copyin(uap->set, &set, sizeof(set));
 	if (error) {
 		td->td_retval[0] = error;
 		return (0);
 	}
 
 	error = kern_sigtimedwait(td, set, &ksi, NULL);
 	if (error) {
 		if (error == EINTR && td->td_proc->p_osrel < P_OSREL_SIGWAIT)
 			error = ERESTART;
 		if (error == ERESTART)
 			return (error);
 		td->td_retval[0] = error;
 		return (0);
 	}
 
 	error = copyout(&ksi.ksi_signo, uap->sig, sizeof(ksi.ksi_signo));
 	td->td_retval[0] = error;
 	return (0);
 }
 
 int
 sys_sigtimedwait(struct thread *td, struct sigtimedwait_args *uap)
 {
 	struct timespec ts;
 	struct timespec *timeout;
 	sigset_t set;
 	ksiginfo_t ksi;
 	int error;
 
 	if (uap->timeout) {
 		error = copyin(uap->timeout, &ts, sizeof(ts));
 		if (error)
 			return (error);
 
 		timeout = &ts;
 	} else
 		timeout = NULL;
 
 	error = copyin(uap->set, &set, sizeof(set));
 	if (error)
 		return (error);
 
 	error = kern_sigtimedwait(td, set, &ksi, timeout);
 	if (error)
 		return (error);
 
 	if (uap->info)
 		error = copyout(&ksi.ksi_info, uap->info, sizeof(siginfo_t));
 
 	if (error == 0)
 		td->td_retval[0] = ksi.ksi_signo;
 	return (error);
 }
 
 int
 sys_sigwaitinfo(struct thread *td, struct sigwaitinfo_args *uap)
 {
 	ksiginfo_t ksi;
 	sigset_t set;
 	int error;
 
 	error = copyin(uap->set, &set, sizeof(set));
 	if (error)
 		return (error);
 
 	error = kern_sigtimedwait(td, set, &ksi, NULL);
 	if (error)
 		return (error);
 
 	if (uap->info)
 		error = copyout(&ksi.ksi_info, uap->info, sizeof(siginfo_t));
 
 	if (error == 0)
 		td->td_retval[0] = ksi.ksi_signo;
 	return (error);
 }
 
 int
 kern_sigtimedwait(struct thread *td, sigset_t waitset, ksiginfo_t *ksi,
 	struct timespec *timeout)
 {
 	struct sigacts *ps;
 	sigset_t saved_mask, new_block;
 	struct proc *p;
 	int error, sig, timo, timevalid = 0;
 	struct timespec rts, ets, ts;
 	struct timeval tv;
 
 	p = td->td_proc;
 	error = 0;
 	ets.tv_sec = 0;
 	ets.tv_nsec = 0;
 
 	if (timeout != NULL) {
 		if (timeout->tv_nsec >= 0 && timeout->tv_nsec < 1000000000) {
 			timevalid = 1;
 			getnanouptime(&rts);
 			ets = rts;
 			timespecadd(&ets, timeout);
 		}
 	}
 	ksiginfo_init(ksi);
 	/* Some signals can not be waited for. */
 	SIG_CANTMASK(waitset);
 	ps = p->p_sigacts;
 	PROC_LOCK(p);
 	saved_mask = td->td_sigmask;
 	SIGSETNAND(td->td_sigmask, waitset);
 	for (;;) {
 		mtx_lock(&ps->ps_mtx);
 		sig = cursig(td);
 		mtx_unlock(&ps->ps_mtx);
 		if (sig != 0 && SIGISMEMBER(waitset, sig)) {
 			if (sigqueue_get(&td->td_sigqueue, sig, ksi) != 0 ||
 			    sigqueue_get(&p->p_sigqueue, sig, ksi) != 0) {
 				error = 0;
 				break;
 			}
 		}
 
 		if (error != 0)
 			break;
 
 		/*
 		 * POSIX says this must be checked after looking for pending
 		 * signals.
 		 */
 		if (timeout != NULL) {
 			if (!timevalid) {
 				error = EINVAL;
 				break;
 			}
 			getnanouptime(&rts);
 			if (timespeccmp(&rts, &ets, >=)) {
 				error = EAGAIN;
 				break;
 			}
 			ts = ets;
 			timespecsub(&ts, &rts);
 			TIMESPEC_TO_TIMEVAL(&tv, &ts);
 			timo = tvtohz(&tv);
 		} else {
 			timo = 0;
 		}
 
 		error = msleep(ps, &p->p_mtx, PPAUSE|PCATCH, "sigwait", timo);
 
 		if (timeout != NULL) {
 			if (error == ERESTART) {
 				/* Timeout can not be restarted. */
 				error = EINTR;
 			} else if (error == EAGAIN) {
 				/* We will calculate timeout by ourself. */
 				error = 0;
 			}
 		}
 	}
 
 	new_block = saved_mask;
 	SIGSETNAND(new_block, td->td_sigmask);
 	td->td_sigmask = saved_mask;
 	/*
 	 * Fewer signals can be delivered to us, reschedule signal
 	 * notification.
 	 */
 	if (p->p_numthreads != 1)
 		reschedule_signals(p, new_block, 0);
 
 	if (error == 0) {
-		SDT_PROBE2(proc, kernel, , signal__clear, sig, ksi);
+		SDT_PROBE2(proc, , , signal__clear, sig, ksi);
 
 		if (ksi->ksi_code == SI_TIMER)
 			itimer_accept(p, ksi->ksi_timerid, ksi);
 
 #ifdef KTRACE
 		if (KTRPOINT(td, KTR_PSIG)) {
 			sig_t action;
 
 			mtx_lock(&ps->ps_mtx);
 			action = ps->ps_sigact[_SIG_IDX(sig)];
 			mtx_unlock(&ps->ps_mtx);
 			ktrpsig(sig, action, &td->td_sigmask, ksi->ksi_code);
 		}
 #endif
 		if (sig == SIGKILL)
 			sigexit(td, sig);
 	}
 	PROC_UNLOCK(p);
 	return (error);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct sigpending_args {
 	sigset_t	*set;
 };
 #endif
 int
 sys_sigpending(td, uap)
 	struct thread *td;
 	struct sigpending_args *uap;
 {
 	struct proc *p = td->td_proc;
 	sigset_t pending;
 
 	PROC_LOCK(p);
 	pending = p->p_sigqueue.sq_signals;
 	SIGSETOR(pending, td->td_sigqueue.sq_signals);
 	PROC_UNLOCK(p);
 	return (copyout(&pending, uap->set, sizeof(sigset_t)));
 }
 
 #ifdef COMPAT_43	/* XXX - COMPAT_FBSD3 */
 #ifndef _SYS_SYSPROTO_H_
 struct osigpending_args {
 	int	dummy;
 };
 #endif
 int
 osigpending(td, uap)
 	struct thread *td;
 	struct osigpending_args *uap;
 {
 	struct proc *p = td->td_proc;
 	sigset_t pending;
 
 	PROC_LOCK(p);
 	pending = p->p_sigqueue.sq_signals;
 	SIGSETOR(pending, td->td_sigqueue.sq_signals);
 	PROC_UNLOCK(p);
 	SIG2OSIG(pending, td->td_retval[0]);
 	return (0);
 }
 #endif /* COMPAT_43 */
 
 #if defined(COMPAT_43)
 /*
  * Generalized interface signal handler, 4.3-compatible.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct osigvec_args {
 	int	signum;
 	struct	sigvec *nsv;
 	struct	sigvec *osv;
 };
 #endif
 /* ARGSUSED */
 int
 osigvec(td, uap)
 	struct thread *td;
 	register struct osigvec_args *uap;
 {
 	struct sigvec vec;
 	struct sigaction nsa, osa;
 	register struct sigaction *nsap, *osap;
 	int error;
 
 	if (uap->signum <= 0 || uap->signum >= ONSIG)
 		return (EINVAL);
 	nsap = (uap->nsv != NULL) ? &nsa : NULL;
 	osap = (uap->osv != NULL) ? &osa : NULL;
 	if (nsap) {
 		error = copyin(uap->nsv, &vec, sizeof(vec));
 		if (error)
 			return (error);
 		nsap->sa_handler = vec.sv_handler;
 		OSIG2SIG(vec.sv_mask, nsap->sa_mask);
 		nsap->sa_flags = vec.sv_flags;
 		nsap->sa_flags ^= SA_RESTART;	/* opposite of SV_INTERRUPT */
 	}
 	error = kern_sigaction(td, uap->signum, nsap, osap, KSA_OSIGSET);
 	if (osap && !error) {
 		vec.sv_handler = osap->sa_handler;
 		SIG2OSIG(osap->sa_mask, vec.sv_mask);
 		vec.sv_flags = osap->sa_flags;
 		vec.sv_flags &= ~SA_NOCLDWAIT;
 		vec.sv_flags ^= SA_RESTART;
 		error = copyout(&vec, uap->osv, sizeof(vec));
 	}
 	return (error);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct osigblock_args {
 	int	mask;
 };
 #endif
 int
 osigblock(td, uap)
 	register struct thread *td;
 	struct osigblock_args *uap;
 {
 	sigset_t set, oset;
 
 	OSIG2SIG(uap->mask, set);
 	kern_sigprocmask(td, SIG_BLOCK, &set, &oset, 0);
 	SIG2OSIG(oset, td->td_retval[0]);
 	return (0);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct osigsetmask_args {
 	int	mask;
 };
 #endif
 int
 osigsetmask(td, uap)
 	struct thread *td;
 	struct osigsetmask_args *uap;
 {
 	sigset_t set, oset;
 
 	OSIG2SIG(uap->mask, set);
 	kern_sigprocmask(td, SIG_SETMASK, &set, &oset, 0);
 	SIG2OSIG(oset, td->td_retval[0]);
 	return (0);
 }
 #endif /* COMPAT_43 */
 
 /*
  * Suspend calling thread until signal, providing mask to be set in the
  * meantime.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct sigsuspend_args {
 	const sigset_t *sigmask;
 };
 #endif
 /* ARGSUSED */
 int
 sys_sigsuspend(td, uap)
 	struct thread *td;
 	struct sigsuspend_args *uap;
 {
 	sigset_t mask;
 	int error;
 
 	error = copyin(uap->sigmask, &mask, sizeof(mask));
 	if (error)
 		return (error);
 	return (kern_sigsuspend(td, mask));
 }
 
 int
 kern_sigsuspend(struct thread *td, sigset_t mask)
 {
 	struct proc *p = td->td_proc;
 	int has_sig, sig;
 
 	/*
 	 * When returning from sigsuspend, we want
 	 * the old mask to be restored after the
 	 * signal handler has finished.  Thus, we
 	 * save it here and mark the sigacts structure
 	 * to indicate this.
 	 */
 	PROC_LOCK(p);
 	kern_sigprocmask(td, SIG_SETMASK, &mask, &td->td_oldsigmask,
 	    SIGPROCMASK_PROC_LOCKED);
 	td->td_pflags |= TDP_OLDMASK;
 
 	/*
 	 * Process signals now. Otherwise, we can get spurious wakeup
 	 * due to signal entered process queue, but delivered to other
 	 * thread. But sigsuspend should return only on signal
 	 * delivery.
 	 */
 	(p->p_sysent->sv_set_syscall_retval)(td, EINTR);
 	for (has_sig = 0; !has_sig;) {
 		while (msleep(&p->p_sigacts, &p->p_mtx, PPAUSE|PCATCH, "pause",
 			0) == 0)
 			/* void */;
 		thread_suspend_check(0);
 		mtx_lock(&p->p_sigacts->ps_mtx);
 		while ((sig = cursig(td)) != 0)
 			has_sig += postsig(sig);
 		mtx_unlock(&p->p_sigacts->ps_mtx);
 	}
 	PROC_UNLOCK(p);
 	td->td_errno = EINTR;
 	td->td_pflags |= TDP_NERRNO;
 	return (EJUSTRETURN);
 }
 
 #ifdef COMPAT_43	/* XXX - COMPAT_FBSD3 */
 /*
  * Compatibility sigsuspend call for old binaries.  Note nonstandard calling
  * convention: libc stub passes mask, not pointer, to save a copyin.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct osigsuspend_args {
 	osigset_t mask;
 };
 #endif
 /* ARGSUSED */
 int
 osigsuspend(td, uap)
 	struct thread *td;
 	struct osigsuspend_args *uap;
 {
 	sigset_t mask;
 
 	OSIG2SIG(uap->mask, mask);
 	return (kern_sigsuspend(td, mask));
 }
 #endif /* COMPAT_43 */
 
 #if defined(COMPAT_43)
 #ifndef _SYS_SYSPROTO_H_
 struct osigstack_args {
 	struct	sigstack *nss;
 	struct	sigstack *oss;
 };
 #endif
 /* ARGSUSED */
 int
 osigstack(td, uap)
 	struct thread *td;
 	register struct osigstack_args *uap;
 {
 	struct sigstack nss, oss;
 	int error = 0;
 
 	if (uap->nss != NULL) {
 		error = copyin(uap->nss, &nss, sizeof(nss));
 		if (error)
 			return (error);
 	}
 	oss.ss_sp = td->td_sigstk.ss_sp;
 	oss.ss_onstack = sigonstack(cpu_getstack(td));
 	if (uap->nss != NULL) {
 		td->td_sigstk.ss_sp = nss.ss_sp;
 		td->td_sigstk.ss_size = 0;
 		td->td_sigstk.ss_flags |= nss.ss_onstack & SS_ONSTACK;
 		td->td_pflags |= TDP_ALTSTACK;
 	}
 	if (uap->oss != NULL)
 		error = copyout(&oss, uap->oss, sizeof(oss));
 
 	return (error);
 }
 #endif /* COMPAT_43 */
 
 #ifndef _SYS_SYSPROTO_H_
 struct sigaltstack_args {
 	stack_t	*ss;
 	stack_t	*oss;
 };
 #endif
 /* ARGSUSED */
 int
 sys_sigaltstack(td, uap)
 	struct thread *td;
 	register struct sigaltstack_args *uap;
 {
 	stack_t ss, oss;
 	int error;
 
 	if (uap->ss != NULL) {
 		error = copyin(uap->ss, &ss, sizeof(ss));
 		if (error)
 			return (error);
 	}
 	error = kern_sigaltstack(td, (uap->ss != NULL) ? &ss : NULL,
 	    (uap->oss != NULL) ? &oss : NULL);
 	if (error)
 		return (error);
 	if (uap->oss != NULL)
 		error = copyout(&oss, uap->oss, sizeof(stack_t));
 	return (error);
 }
 
 int
 kern_sigaltstack(struct thread *td, stack_t *ss, stack_t *oss)
 {
 	struct proc *p = td->td_proc;
 	int oonstack;
 
 	oonstack = sigonstack(cpu_getstack(td));
 
 	if (oss != NULL) {
 		*oss = td->td_sigstk;
 		oss->ss_flags = (td->td_pflags & TDP_ALTSTACK)
 		    ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE;
 	}
 
 	if (ss != NULL) {
 		if (oonstack)
 			return (EPERM);
 		if ((ss->ss_flags & ~SS_DISABLE) != 0)
 			return (EINVAL);
 		if (!(ss->ss_flags & SS_DISABLE)) {
 			if (ss->ss_size < p->p_sysent->sv_minsigstksz)
 				return (ENOMEM);
 
 			td->td_sigstk = *ss;
 			td->td_pflags |= TDP_ALTSTACK;
 		} else {
 			td->td_pflags &= ~TDP_ALTSTACK;
 		}
 	}
 	return (0);
 }
 
 /*
  * Common code for kill process group/broadcast kill.
  * cp is calling process.
  */
 static int
 killpg1(struct thread *td, int sig, int pgid, int all, ksiginfo_t *ksi)
 {
 	struct proc *p;
 	struct pgrp *pgrp;
 	int err;
 	int ret;
 
 	ret = ESRCH;
 	if (all) {
 		/*
 		 * broadcast
 		 */
 		sx_slock(&allproc_lock);
 		FOREACH_PROC_IN_SYSTEM(p) {
 			PROC_LOCK(p);
 			if (p->p_pid <= 1 || p->p_flag & P_SYSTEM ||
 			    p == td->td_proc || p->p_state == PRS_NEW) {
 				PROC_UNLOCK(p);
 				continue;
 			}
 			err = p_cansignal(td, p, sig);
 			if (err == 0) {
 				if (sig)
 					pksignal(p, sig, ksi);
 				ret = err;
 			}
 			else if (ret == ESRCH)
 				ret = err;
 			PROC_UNLOCK(p);
 		}
 		sx_sunlock(&allproc_lock);
 	} else {
 		sx_slock(&proctree_lock);
 		if (pgid == 0) {
 			/*
 			 * zero pgid means send to my process group.
 			 */
 			pgrp = td->td_proc->p_pgrp;
 			PGRP_LOCK(pgrp);
 		} else {
 			pgrp = pgfind(pgid);
 			if (pgrp == NULL) {
 				sx_sunlock(&proctree_lock);
 				return (ESRCH);
 			}
 		}
 		sx_sunlock(&proctree_lock);
 		LIST_FOREACH(p, &pgrp->pg_members, p_pglist) {
 			PROC_LOCK(p);
 			if (p->p_pid <= 1 || p->p_flag & P_SYSTEM ||
 			    p->p_state == PRS_NEW) {
 				PROC_UNLOCK(p);
 				continue;
 			}
 			err = p_cansignal(td, p, sig);
 			if (err == 0) {
 				if (sig)
 					pksignal(p, sig, ksi);
 				ret = err;
 			}
 			else if (ret == ESRCH)
 				ret = err;
 			PROC_UNLOCK(p);
 		}
 		PGRP_UNLOCK(pgrp);
 	}
 	return (ret);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct kill_args {
 	int	pid;
 	int	signum;
 };
 #endif
 /* ARGSUSED */
 int
 sys_kill(struct thread *td, struct kill_args *uap)
 {
 	ksiginfo_t ksi;
 	struct proc *p;
 	int error;
 
 	/*
 	 * A process in capability mode can send signals only to himself.
 	 * The main rationale behind this is that abort(3) is implemented as
 	 * kill(getpid(), SIGABRT).
 	 */
 	if (IN_CAPABILITY_MODE(td) && uap->pid != td->td_proc->p_pid)
 		return (ECAPMODE);
 
 	AUDIT_ARG_SIGNUM(uap->signum);
 	AUDIT_ARG_PID(uap->pid);
 	if ((u_int)uap->signum > _SIG_MAXSIG)
 		return (EINVAL);
 
 	ksiginfo_init(&ksi);
 	ksi.ksi_signo = uap->signum;
 	ksi.ksi_code = SI_USER;
 	ksi.ksi_pid = td->td_proc->p_pid;
 	ksi.ksi_uid = td->td_ucred->cr_ruid;
 
 	if (uap->pid > 0) {
 		/* kill single process */
 		if ((p = pfind(uap->pid)) == NULL) {
 			if ((p = zpfind(uap->pid)) == NULL)
 				return (ESRCH);
 		}
 		AUDIT_ARG_PROCESS(p);
 		error = p_cansignal(td, p, uap->signum);
 		if (error == 0 && uap->signum)
 			pksignal(p, uap->signum, &ksi);
 		PROC_UNLOCK(p);
 		return (error);
 	}
 	switch (uap->pid) {
 	case -1:		/* broadcast signal */
 		return (killpg1(td, uap->signum, 0, 1, &ksi));
 	case 0:			/* signal own process group */
 		return (killpg1(td, uap->signum, 0, 0, &ksi));
 	default:		/* negative explicit process group */
 		return (killpg1(td, uap->signum, -uap->pid, 0, &ksi));
 	}
 	/* NOTREACHED */
 }
 
 int
 sys_pdkill(td, uap)
 	struct thread *td;
 	struct pdkill_args *uap;
 {
 	struct proc *p;
 	cap_rights_t rights;
 	int error;
 
 	AUDIT_ARG_SIGNUM(uap->signum);
 	AUDIT_ARG_FD(uap->fd);
 	if ((u_int)uap->signum > _SIG_MAXSIG)
 		return (EINVAL);
 
 	error = procdesc_find(td, uap->fd,
 	    cap_rights_init(&rights, CAP_PDKILL), &p);
 	if (error)
 		return (error);
 	AUDIT_ARG_PROCESS(p);
 	error = p_cansignal(td, p, uap->signum);
 	if (error == 0 && uap->signum)
 		kern_psignal(p, uap->signum);
 	PROC_UNLOCK(p);
 	return (error);
 }
 
 #if defined(COMPAT_43)
 #ifndef _SYS_SYSPROTO_H_
 struct okillpg_args {
 	int	pgid;
 	int	signum;
 };
 #endif
 /* ARGSUSED */
 int
 okillpg(struct thread *td, struct okillpg_args *uap)
 {
 	ksiginfo_t ksi;
 
 	AUDIT_ARG_SIGNUM(uap->signum);
 	AUDIT_ARG_PID(uap->pgid);
 	if ((u_int)uap->signum > _SIG_MAXSIG)
 		return (EINVAL);
 
 	ksiginfo_init(&ksi);
 	ksi.ksi_signo = uap->signum;
 	ksi.ksi_code = SI_USER;
 	ksi.ksi_pid = td->td_proc->p_pid;
 	ksi.ksi_uid = td->td_ucred->cr_ruid;
 	return (killpg1(td, uap->signum, uap->pgid, 0, &ksi));
 }
 #endif /* COMPAT_43 */
 
 #ifndef _SYS_SYSPROTO_H_
 struct sigqueue_args {
 	pid_t pid;
 	int signum;
 	/* union sigval */ void *value;
 };
 #endif
 int
 sys_sigqueue(struct thread *td, struct sigqueue_args *uap)
 {
 	ksiginfo_t ksi;
 	struct proc *p;
 	int error;
 
 	if ((u_int)uap->signum > _SIG_MAXSIG)
 		return (EINVAL);
 
 	/*
 	 * Specification says sigqueue can only send signal to
 	 * single process.
 	 */
 	if (uap->pid <= 0)
 		return (EINVAL);
 
 	if ((p = pfind(uap->pid)) == NULL) {
 		if ((p = zpfind(uap->pid)) == NULL)
 			return (ESRCH);
 	}
 	error = p_cansignal(td, p, uap->signum);
 	if (error == 0 && uap->signum != 0) {
 		ksiginfo_init(&ksi);
 		ksi.ksi_flags = KSI_SIGQ;
 		ksi.ksi_signo = uap->signum;
 		ksi.ksi_code = SI_QUEUE;
 		ksi.ksi_pid = td->td_proc->p_pid;
 		ksi.ksi_uid = td->td_ucred->cr_ruid;
 		ksi.ksi_value.sival_ptr = uap->value;
 		error = pksignal(p, ksi.ksi_signo, &ksi);
 	}
 	PROC_UNLOCK(p);
 	return (error);
 }
 
 /*
  * Send a signal to a process group.
  */
 void
 gsignal(int pgid, int sig, ksiginfo_t *ksi)
 {
 	struct pgrp *pgrp;
 
 	if (pgid != 0) {
 		sx_slock(&proctree_lock);
 		pgrp = pgfind(pgid);
 		sx_sunlock(&proctree_lock);
 		if (pgrp != NULL) {
 			pgsignal(pgrp, sig, 0, ksi);
 			PGRP_UNLOCK(pgrp);
 		}
 	}
 }
 
 /*
  * Send a signal to a process group.  If checktty is 1,
  * limit to members which have a controlling terminal.
  */
 void
 pgsignal(struct pgrp *pgrp, int sig, int checkctty, ksiginfo_t *ksi)
 {
 	struct proc *p;
 
 	if (pgrp) {
 		PGRP_LOCK_ASSERT(pgrp, MA_OWNED);
 		LIST_FOREACH(p, &pgrp->pg_members, p_pglist) {
 			PROC_LOCK(p);
 			if (p->p_state == PRS_NORMAL &&
 			    (checkctty == 0 || p->p_flag & P_CONTROLT))
 				pksignal(p, sig, ksi);
 			PROC_UNLOCK(p);
 		}
 	}
 }
 
 
 /*
  * Recalculate the signal mask and reset the signal disposition after
  * usermode frame for delivery is formed.  Should be called after
  * mach-specific routine, because sysent->sv_sendsig() needs correct
  * ps_siginfo and signal mask.
  */
 static void
 postsig_done(int sig, struct thread *td, struct sigacts *ps)
 {
 	sigset_t mask;
 
 	mtx_assert(&ps->ps_mtx, MA_OWNED);
 	td->td_ru.ru_nsignals++;
 	mask = ps->ps_catchmask[_SIG_IDX(sig)];
 	if (!SIGISMEMBER(ps->ps_signodefer, sig))
 		SIGADDSET(mask, sig);
 	kern_sigprocmask(td, SIG_BLOCK, &mask, NULL,
 	    SIGPROCMASK_PROC_LOCKED | SIGPROCMASK_PS_LOCKED);
 	if (SIGISMEMBER(ps->ps_sigreset, sig))
 		sigdflt(ps, sig);
 }
 
 
 /*
  * Send a signal caused by a trap to the current thread.  If it will be
  * caught immediately, deliver it with correct code.  Otherwise, post it
  * normally.
  */
 void
 trapsignal(struct thread *td, ksiginfo_t *ksi)
 {
 	struct sigacts *ps;
 	struct proc *p;
 	int sig;
 	int code;
 
 	p = td->td_proc;
 	sig = ksi->ksi_signo;
 	code = ksi->ksi_code;
 	KASSERT(_SIG_VALID(sig), ("invalid signal"));
 
 	PROC_LOCK(p);
 	ps = p->p_sigacts;
 	mtx_lock(&ps->ps_mtx);
 	if ((p->p_flag & P_TRACED) == 0 && SIGISMEMBER(ps->ps_sigcatch, sig) &&
 	    !SIGISMEMBER(td->td_sigmask, sig)) {
 #ifdef KTRACE
 		if (KTRPOINT(curthread, KTR_PSIG))
 			ktrpsig(sig, ps->ps_sigact[_SIG_IDX(sig)],
 			    &td->td_sigmask, code);
 #endif
 		(*p->p_sysent->sv_sendsig)(ps->ps_sigact[_SIG_IDX(sig)],
 				ksi, &td->td_sigmask);
 		postsig_done(sig, td, ps);
 		mtx_unlock(&ps->ps_mtx);
 	} else {
 		/*
 		 * Avoid a possible infinite loop if the thread
 		 * masking the signal or process is ignoring the
 		 * signal.
 		 */
 		if (kern_forcesigexit &&
 		    (SIGISMEMBER(td->td_sigmask, sig) ||
 		     ps->ps_sigact[_SIG_IDX(sig)] == SIG_IGN)) {
 			SIGDELSET(td->td_sigmask, sig);
 			SIGDELSET(ps->ps_sigcatch, sig);
 			SIGDELSET(ps->ps_sigignore, sig);
 			ps->ps_sigact[_SIG_IDX(sig)] = SIG_DFL;
 		}
 		mtx_unlock(&ps->ps_mtx);
 		p->p_code = code;	/* XXX for core dump/debugger */
 		p->p_sig = sig;		/* XXX to verify code */
 		tdsendsignal(p, td, sig, ksi);
 	}
 	PROC_UNLOCK(p);
 }
 
 static struct thread *
 sigtd(struct proc *p, int sig, int prop)
 {
 	struct thread *td, *signal_td;
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 
 	/*
 	 * Check if current thread can handle the signal without
 	 * switching context to another thread.
 	 */
 	if (curproc == p && !SIGISMEMBER(curthread->td_sigmask, sig))
 		return (curthread);
 	signal_td = NULL;
 	FOREACH_THREAD_IN_PROC(p, td) {
 		if (!SIGISMEMBER(td->td_sigmask, sig)) {
 			signal_td = td;
 			break;
 		}
 	}
 	if (signal_td == NULL)
 		signal_td = FIRST_THREAD_IN_PROC(p);
 	return (signal_td);
 }
 
 /*
  * Send the signal to the process.  If the signal has an action, the action
  * is usually performed by the target process rather than the caller; we add
  * the signal to the set of pending signals for the process.
  *
  * Exceptions:
  *   o When a stop signal is sent to a sleeping process that takes the
  *     default action, the process is stopped without awakening it.
  *   o SIGCONT restarts stopped processes (or puts them back to sleep)
  *     regardless of the signal action (eg, blocked or ignored).
  *
  * Other ignored signals are discarded immediately.
  *
  * NB: This function may be entered from the debugger via the "kill" DDB
  * command.  There is little that can be done to mitigate the possibly messy
  * side effects of this unwise possibility.
  */
 void
 kern_psignal(struct proc *p, int sig)
 {
 	ksiginfo_t ksi;
 
 	ksiginfo_init(&ksi);
 	ksi.ksi_signo = sig;
 	ksi.ksi_code = SI_KERNEL;
 	(void) tdsendsignal(p, NULL, sig, &ksi);
 }
 
 int
 pksignal(struct proc *p, int sig, ksiginfo_t *ksi)
 {
 
 	return (tdsendsignal(p, NULL, sig, ksi));
 }
 
 /* Utility function for finding a thread to send signal event to. */
 int
 sigev_findtd(struct proc *p ,struct sigevent *sigev, struct thread **ttd)
 {
 	struct thread *td;
 
 	if (sigev->sigev_notify == SIGEV_THREAD_ID) {
 		td = tdfind(sigev->sigev_notify_thread_id, p->p_pid);
 		if (td == NULL)
 			return (ESRCH);
 		*ttd = td;
 	} else {
 		*ttd = NULL;
 		PROC_LOCK(p);
 	}
 	return (0);
 }
 
 void
 tdsignal(struct thread *td, int sig)
 {
 	ksiginfo_t ksi;
 
 	ksiginfo_init(&ksi);
 	ksi.ksi_signo = sig;
 	ksi.ksi_code = SI_KERNEL;
 	(void) tdsendsignal(td->td_proc, td, sig, &ksi);
 }
 
 void
 tdksignal(struct thread *td, int sig, ksiginfo_t *ksi)
 {
 
 	(void) tdsendsignal(td->td_proc, td, sig, ksi);
 }
 
 int
 tdsendsignal(struct proc *p, struct thread *td, int sig, ksiginfo_t *ksi)
 {
 	sig_t action;
 	sigqueue_t *sigqueue;
 	int prop;
 	struct sigacts *ps;
 	int intrval;
 	int ret = 0;
 	int wakeup_swapper;
 
 	MPASS(td == NULL || p == td->td_proc);
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 
 	if (!_SIG_VALID(sig))
 		panic("%s(): invalid signal %d", __func__, sig);
 
 	KASSERT(ksi == NULL || !KSI_ONQ(ksi), ("%s: ksi on queue", __func__));
 
 	/*
 	 * IEEE Std 1003.1-2001: return success when killing a zombie.
 	 */
 	if (p->p_state == PRS_ZOMBIE) {
 		if (ksi && (ksi->ksi_flags & KSI_INS))
 			ksiginfo_tryfree(ksi);
 		return (ret);
 	}
 
 	ps = p->p_sigacts;
 	KNOTE_LOCKED(&p->p_klist, NOTE_SIGNAL | sig);
 	prop = sigprop(sig);
 
 	if (td == NULL) {
 		td = sigtd(p, sig, prop);
 		sigqueue = &p->p_sigqueue;
 	} else
 		sigqueue = &td->td_sigqueue;
 
-	SDT_PROBE3(proc, kernel, , signal__send, td, p, sig);
+	SDT_PROBE3(proc, , , signal__send, td, p, sig);
 
 	/*
 	 * If the signal is being ignored,
 	 * then we forget about it immediately.
 	 * (Note: we don't set SIGCONT in ps_sigignore,
 	 * and if it is set to SIG_IGN,
 	 * action will be SIG_DFL here.)
 	 */
 	mtx_lock(&ps->ps_mtx);
 	if (SIGISMEMBER(ps->ps_sigignore, sig)) {
-		SDT_PROBE3(proc, kernel, , signal__discard, td, p, sig);
+		SDT_PROBE3(proc, , , signal__discard, td, p, sig);
 
 		mtx_unlock(&ps->ps_mtx);
 		if (ksi && (ksi->ksi_flags & KSI_INS))
 			ksiginfo_tryfree(ksi);
 		return (ret);
 	}
 	if (SIGISMEMBER(td->td_sigmask, sig))
 		action = SIG_HOLD;
 	else if (SIGISMEMBER(ps->ps_sigcatch, sig))
 		action = SIG_CATCH;
 	else
 		action = SIG_DFL;
 	if (SIGISMEMBER(ps->ps_sigintr, sig))
 		intrval = EINTR;
 	else
 		intrval = ERESTART;
 	mtx_unlock(&ps->ps_mtx);
 
 	if (prop & SA_CONT)
 		sigqueue_delete_stopmask_proc(p);
 	else if (prop & SA_STOP) {
 		/*
 		 * If sending a tty stop signal to a member of an orphaned
 		 * process group, discard the signal here if the action
 		 * is default; don't stop the process below if sleeping,
 		 * and don't clear any pending SIGCONT.
 		 */
 		if ((prop & SA_TTYSTOP) &&
 		    (p->p_pgrp->pg_jobc == 0) &&
 		    (action == SIG_DFL)) {
 			if (ksi && (ksi->ksi_flags & KSI_INS))
 				ksiginfo_tryfree(ksi);
 			return (ret);
 		}
 		sigqueue_delete_proc(p, SIGCONT);
 		if (p->p_flag & P_CONTINUED) {
 			p->p_flag &= ~P_CONTINUED;
 			PROC_LOCK(p->p_pptr);
 			sigqueue_take(p->p_ksi);
 			PROC_UNLOCK(p->p_pptr);
 		}
 	}
 
 	ret = sigqueue_add(sigqueue, sig, ksi);
 	if (ret != 0)
 		return (ret);
 	signotify(td);
 	/*
 	 * Defer further processing for signals which are held,
 	 * except that stopped processes must be continued by SIGCONT.
 	 */
 	if (action == SIG_HOLD &&
 	    !((prop & SA_CONT) && (p->p_flag & P_STOPPED_SIG)))
 		return (ret);
 	/*
 	 * SIGKILL: Remove procfs STOPEVENTs.
 	 */
 	if (sig == SIGKILL) {
 		/* from procfs_ioctl.c: PIOCBIC */
 		p->p_stops = 0;
 		/* from procfs_ioctl.c: PIOCCONT */
 		p->p_step = 0;
 		wakeup(&p->p_step);
 	}
 	/*
 	 * Some signals have a process-wide effect and a per-thread
 	 * component.  Most processing occurs when the process next
 	 * tries to cross the user boundary, however there are some
 	 * times when processing needs to be done immediately, such as
 	 * waking up threads so that they can cross the user boundary.
 	 * We try to do the per-process part here.
 	 */
 	if (P_SHOULDSTOP(p)) {
 		KASSERT(!(p->p_flag & P_WEXIT),
 		    ("signal to stopped but exiting process"));
 		if (sig == SIGKILL) {
 			/*
 			 * If traced process is already stopped,
 			 * then no further action is necessary.
 			 */
 			if (p->p_flag & P_TRACED)
 				goto out;
 			/*
 			 * SIGKILL sets process running.
 			 * It will die elsewhere.
 			 * All threads must be restarted.
 			 */
 			p->p_flag &= ~P_STOPPED_SIG;
 			goto runfast;
 		}
 
 		if (prop & SA_CONT) {
 			/*
 			 * If traced process is already stopped,
 			 * then no further action is necessary.
 			 */
 			if (p->p_flag & P_TRACED)
 				goto out;
 			/*
 			 * If SIGCONT is default (or ignored), we continue the
 			 * process but don't leave the signal in sigqueue as
 			 * it has no further action.  If SIGCONT is held, we
 			 * continue the process and leave the signal in
 			 * sigqueue.  If the process catches SIGCONT, let it
 			 * handle the signal itself.  If it isn't waiting on
 			 * an event, it goes back to run state.
 			 * Otherwise, process goes back to sleep state.
 			 */
 			p->p_flag &= ~P_STOPPED_SIG;
 			PROC_SLOCK(p);
 			if (p->p_numthreads == p->p_suspcount) {
 				PROC_SUNLOCK(p);
 				p->p_flag |= P_CONTINUED;
 				p->p_xsig = SIGCONT;
 				PROC_LOCK(p->p_pptr);
 				childproc_continued(p);
 				PROC_UNLOCK(p->p_pptr);
 				PROC_SLOCK(p);
 			}
 			if (action == SIG_DFL) {
 				thread_unsuspend(p);
 				PROC_SUNLOCK(p);
 				sigqueue_delete(sigqueue, sig);
 				goto out;
 			}
 			if (action == SIG_CATCH) {
 				/*
 				 * The process wants to catch it so it needs
 				 * to run at least one thread, but which one?
 				 */
 				PROC_SUNLOCK(p);
 				goto runfast;
 			}
 			/*
 			 * The signal is not ignored or caught.
 			 */
 			thread_unsuspend(p);
 			PROC_SUNLOCK(p);
 			goto out;
 		}
 
 		if (prop & SA_STOP) {
 			/*
 			 * If traced process is already stopped,
 			 * then no further action is necessary.
 			 */
 			if (p->p_flag & P_TRACED)
 				goto out;
 			/*
 			 * Already stopped, don't need to stop again
 			 * (If we did the shell could get confused).
 			 * Just make sure the signal STOP bit set.
 			 */
 			p->p_flag |= P_STOPPED_SIG;
 			sigqueue_delete(sigqueue, sig);
 			goto out;
 		}
 
 		/*
 		 * All other kinds of signals:
 		 * If a thread is sleeping interruptibly, simulate a
 		 * wakeup so that when it is continued it will be made
 		 * runnable and can look at the signal.  However, don't make
 		 * the PROCESS runnable, leave it stopped.
 		 * It may run a bit until it hits a thread_suspend_check().
 		 */
 		wakeup_swapper = 0;
 		PROC_SLOCK(p);
 		thread_lock(td);
 		if (TD_ON_SLEEPQ(td) && (td->td_flags & TDF_SINTR))
 			wakeup_swapper = sleepq_abort(td, intrval);
 		thread_unlock(td);
 		PROC_SUNLOCK(p);
 		if (wakeup_swapper)
 			kick_proc0();
 		goto out;
 		/*
 		 * Mutexes are short lived. Threads waiting on them will
 		 * hit thread_suspend_check() soon.
 		 */
 	} else if (p->p_state == PRS_NORMAL) {
 		if (p->p_flag & P_TRACED || action == SIG_CATCH) {
 			tdsigwakeup(td, sig, action, intrval);
 			goto out;
 		}
 
 		MPASS(action == SIG_DFL);
 
 		if (prop & SA_STOP) {
 			if (p->p_flag & (P_PPWAIT|P_WEXIT))
 				goto out;
 			p->p_flag |= P_STOPPED_SIG;
 			p->p_xsig = sig;
 			PROC_SLOCK(p);
 			sig_suspend_threads(td, p, 1);
 			if (p->p_numthreads == p->p_suspcount) {
 				/*
 				 * only thread sending signal to another
 				 * process can reach here, if thread is sending
 				 * signal to its process, because thread does
 				 * not suspend itself here, p_numthreads
 				 * should never be equal to p_suspcount.
 				 */
 				thread_stopped(p);
 				PROC_SUNLOCK(p);
 				sigqueue_delete_proc(p, p->p_xsig);
 			} else
 				PROC_SUNLOCK(p);
 			goto out;
 		}
 	} else {
 		/* Not in "NORMAL" state. discard the signal. */
 		sigqueue_delete(sigqueue, sig);
 		goto out;
 	}
 
 	/*
 	 * The process is not stopped so we need to apply the signal to all the
 	 * running threads.
 	 */
 runfast:
 	tdsigwakeup(td, sig, action, intrval);
 	PROC_SLOCK(p);
 	thread_unsuspend(p);
 	PROC_SUNLOCK(p);
 out:
 	/* If we jump here, proc slock should not be owned. */
 	PROC_SLOCK_ASSERT(p, MA_NOTOWNED);
 	return (ret);
 }
 
 /*
  * The force of a signal has been directed against a single
  * thread.  We need to see what we can do about knocking it
  * out of any sleep it may be in etc.
  */
 static void
 tdsigwakeup(struct thread *td, int sig, sig_t action, int intrval)
 {
 	struct proc *p = td->td_proc;
 	register int prop;
 	int wakeup_swapper;
 
 	wakeup_swapper = 0;
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	prop = sigprop(sig);
 
 	PROC_SLOCK(p);
 	thread_lock(td);
 	/*
 	 * Bring the priority of a thread up if we want it to get
 	 * killed in this lifetime.  Be careful to avoid bumping the
 	 * priority of the idle thread, since we still allow to signal
 	 * kernel processes.
 	 */
 	if (action == SIG_DFL && (prop & SA_KILL) != 0 &&
 	    td->td_priority > PUSER && !TD_IS_IDLETHREAD(td))
 		sched_prio(td, PUSER);
 	if (TD_ON_SLEEPQ(td)) {
 		/*
 		 * If thread is sleeping uninterruptibly
 		 * we can't interrupt the sleep... the signal will
 		 * be noticed when the process returns through
 		 * trap() or syscall().
 		 */
 		if ((td->td_flags & TDF_SINTR) == 0)
 			goto out;
 		/*
 		 * If SIGCONT is default (or ignored) and process is
 		 * asleep, we are finished; the process should not
 		 * be awakened.
 		 */
 		if ((prop & SA_CONT) && action == SIG_DFL) {
 			thread_unlock(td);
 			PROC_SUNLOCK(p);
 			sigqueue_delete(&p->p_sigqueue, sig);
 			/*
 			 * It may be on either list in this state.
 			 * Remove from both for now.
 			 */
 			sigqueue_delete(&td->td_sigqueue, sig);
 			return;
 		}
 
 		/*
 		 * Don't awaken a sleeping thread for SIGSTOP if the
 		 * STOP signal is deferred.
 		 */
 		if ((prop & SA_STOP) && (td->td_flags & TDF_SBDRY))
 			goto out;
 
 		/*
 		 * Give low priority threads a better chance to run.
 		 */
 		if (td->td_priority > PUSER && !TD_IS_IDLETHREAD(td))
 			sched_prio(td, PUSER);
 
 		wakeup_swapper = sleepq_abort(td, intrval);
 	} else {
 		/*
 		 * Other states do nothing with the signal immediately,
 		 * other than kicking ourselves if we are running.
 		 * It will either never be noticed, or noticed very soon.
 		 */
 #ifdef SMP
 		if (TD_IS_RUNNING(td) && td != curthread)
 			forward_signal(td);
 #endif
 	}
 out:
 	PROC_SUNLOCK(p);
 	thread_unlock(td);
 	if (wakeup_swapper)
 		kick_proc0();
 }
 
 static void
 sig_suspend_threads(struct thread *td, struct proc *p, int sending)
 {
 	struct thread *td2;
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	PROC_SLOCK_ASSERT(p, MA_OWNED);
 
 	FOREACH_THREAD_IN_PROC(p, td2) {
 		thread_lock(td2);
 		td2->td_flags |= TDF_ASTPENDING | TDF_NEEDSUSPCHK;
 		if ((TD_IS_SLEEPING(td2) || TD_IS_SWAPPED(td2)) &&
 		    (td2->td_flags & TDF_SINTR)) {
 			if (td2->td_flags & TDF_SBDRY) {
 				/*
 				 * Once a thread is asleep with
 				 * TDF_SBDRY set, it should never
 				 * become suspended due to this check.
 				 */
 				KASSERT(!TD_IS_SUSPENDED(td2),
 				    ("thread with deferred stops suspended"));
 			} else if (!TD_IS_SUSPENDED(td2)) {
 				thread_suspend_one(td2);
 			}
 		} else if (!TD_IS_SUSPENDED(td2)) {
 			if (sending || td != td2)
 				td2->td_flags |= TDF_ASTPENDING;
 #ifdef SMP
 			if (TD_IS_RUNNING(td2) && td2 != td)
 				forward_signal(td2);
 #endif
 		}
 		thread_unlock(td2);
 	}
 }
 
 int
 ptracestop(struct thread *td, int sig)
 {
 	struct proc *p = td->td_proc;
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	KASSERT(!(p->p_flag & P_WEXIT), ("Stopping exiting process"));
 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK,
 	    &p->p_mtx.lock_object, "Stopping for traced signal");
 
 	td->td_dbgflags |= TDB_XSIG;
 	td->td_xsig = sig;
 	CTR4(KTR_PTRACE, "ptracestop: tid %d (pid %d) flags %#x sig %d",
 	    td->td_tid, p->p_pid, td->td_dbgflags, sig);
 	PROC_SLOCK(p);
 	while ((p->p_flag & P_TRACED) && (td->td_dbgflags & TDB_XSIG)) {
 		if (p->p_flag & P_SINGLE_EXIT) {
 			td->td_dbgflags &= ~TDB_XSIG;
 			PROC_SUNLOCK(p);
 			return (sig);
 		}
 		/*
 		 * Just make wait() to work, the last stopped thread
 		 * will win.
 		 */
 		p->p_xsig = sig;
 		p->p_xthread = td;
 		p->p_flag |= (P_STOPPED_SIG|P_STOPPED_TRACE);
 		sig_suspend_threads(td, p, 0);
 		if ((td->td_dbgflags & TDB_STOPATFORK) != 0) {
 			td->td_dbgflags &= ~TDB_STOPATFORK;
 			cv_broadcast(&p->p_dbgwait);
 		}
 stopme:
 		thread_suspend_switch(td, p);
 		if (p->p_xthread == td)
 			p->p_xthread = NULL;
 		if (!(p->p_flag & P_TRACED))
 			break;
 		if (td->td_dbgflags & TDB_SUSPEND) {
 			if (p->p_flag & P_SINGLE_EXIT)
 				break;
 			goto stopme;
 		}
 	}
 	PROC_SUNLOCK(p);
 	return (td->td_xsig);
 }
 
 static void
 reschedule_signals(struct proc *p, sigset_t block, int flags)
 {
 	struct sigacts *ps;
 	struct thread *td;
 	int sig;
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	ps = p->p_sigacts;
 	mtx_assert(&ps->ps_mtx, (flags & SIGPROCMASK_PS_LOCKED) != 0 ?
 	    MA_OWNED : MA_NOTOWNED);
 	if (SIGISEMPTY(p->p_siglist))
 		return;
 	SIGSETAND(block, p->p_siglist);
 	while ((sig = sig_ffs(&block)) != 0) {
 		SIGDELSET(block, sig);
 		td = sigtd(p, sig, 0);
 		signotify(td);
 		if (!(flags & SIGPROCMASK_PS_LOCKED))
 			mtx_lock(&ps->ps_mtx);
 		if (p->p_flag & P_TRACED || SIGISMEMBER(ps->ps_sigcatch, sig))
 			tdsigwakeup(td, sig, SIG_CATCH,
 			    (SIGISMEMBER(ps->ps_sigintr, sig) ? EINTR :
 			     ERESTART));
 		if (!(flags & SIGPROCMASK_PS_LOCKED))
 			mtx_unlock(&ps->ps_mtx);
 	}
 }
 
 void
 tdsigcleanup(struct thread *td)
 {
 	struct proc *p;
 	sigset_t unblocked;
 
 	p = td->td_proc;
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 
 	sigqueue_flush(&td->td_sigqueue);
 	if (p->p_numthreads == 1)
 		return;
 
 	/*
 	 * Since we cannot handle signals, notify signal post code
 	 * about this by filling the sigmask.
 	 *
 	 * Also, if needed, wake up thread(s) that do not block the
 	 * same signals as the exiting thread, since the thread might
 	 * have been selected for delivery and woken up.
 	 */
 	SIGFILLSET(unblocked);
 	SIGSETNAND(unblocked, td->td_sigmask);
 	SIGFILLSET(td->td_sigmask);
 	reschedule_signals(p, unblocked, 0);
 
 }
 
 /*
  * Defer the delivery of SIGSTOP for the current thread.  Returns true
  * if stops were deferred and false if they were already deferred.
  */
 int
 sigdeferstop(void)
 {
 	struct thread *td;
 
 	td = curthread;
 	if (td->td_flags & TDF_SBDRY)
 		return (0);
 	thread_lock(td);
 	td->td_flags |= TDF_SBDRY;
 	thread_unlock(td);
 	return (1);
 }
 
 /*
  * Permit the delivery of SIGSTOP for the current thread.  This does
  * not immediately suspend if a stop was posted.  Instead, the thread
  * will suspend either via ast() or a subsequent interruptible sleep.
  */
 int
 sigallowstop(void)
 {
 	struct thread *td;
 	int prev;
 
 	td = curthread;
 	thread_lock(td);
 	prev = (td->td_flags & TDF_SBDRY) != 0;
 	td->td_flags &= ~TDF_SBDRY;
 	thread_unlock(td);
 	return (prev);
 }
 
 /*
  * If the current process has received a signal (should be caught or cause
  * termination, should interrupt current syscall), return the signal number.
  * Stop signals with default action are processed immediately, then cleared;
  * they aren't returned.  This is checked after each entry to the system for
  * a syscall or trap (though this can usually be done without calling issignal
  * by checking the pending signal masks in cursig.) The normal call
  * sequence is
  *
  *	while (sig = cursig(curthread))
  *		postsig(sig);
  */
 static int
 issignal(struct thread *td)
 {
 	struct proc *p;
 	struct sigacts *ps;
 	struct sigqueue *queue;
 	sigset_t sigpending;
 	int sig, prop, newsig;
 
 	p = td->td_proc;
 	ps = p->p_sigacts;
 	mtx_assert(&ps->ps_mtx, MA_OWNED);
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	for (;;) {
 		int traced = (p->p_flag & P_TRACED) || (p->p_stops & S_SIG);
 
 		sigpending = td->td_sigqueue.sq_signals;
 		SIGSETOR(sigpending, p->p_sigqueue.sq_signals);
 		SIGSETNAND(sigpending, td->td_sigmask);
 
 		if (p->p_flag & P_PPWAIT || td->td_flags & TDF_SBDRY)
 			SIG_STOPSIGMASK(sigpending);
 		if (SIGISEMPTY(sigpending))	/* no signal to send */
 			return (0);
 		sig = sig_ffs(&sigpending);
 
 		if (p->p_stops & S_SIG) {
 			mtx_unlock(&ps->ps_mtx);
 			stopevent(p, S_SIG, sig);
 			mtx_lock(&ps->ps_mtx);
 		}
 
 		/*
 		 * We should see pending but ignored signals
 		 * only if P_TRACED was on when they were posted.
 		 */
 		if (SIGISMEMBER(ps->ps_sigignore, sig) && (traced == 0)) {
 			sigqueue_delete(&td->td_sigqueue, sig);
 			sigqueue_delete(&p->p_sigqueue, sig);
 			continue;
 		}
 		if (p->p_flag & P_TRACED && (p->p_flag & P_PPTRACE) == 0) {
 			/*
 			 * If traced, always stop.
 			 * Remove old signal from queue before the stop.
 			 * XXX shrug off debugger, it causes siginfo to
 			 * be thrown away.
 			 */
 			queue = &td->td_sigqueue;
 			td->td_dbgksi.ksi_signo = 0;
 			if (sigqueue_get(queue, sig, &td->td_dbgksi) == 0) {
 				queue = &p->p_sigqueue;
 				sigqueue_get(queue, sig, &td->td_dbgksi);
 			}
 
 			mtx_unlock(&ps->ps_mtx);
 			newsig = ptracestop(td, sig);
 			mtx_lock(&ps->ps_mtx);
 
 			if (sig != newsig) {
 
 				/*
 				 * If parent wants us to take the signal,
 				 * then it will leave it in p->p_xsig;
 				 * otherwise we just look for signals again.
 				*/
 				if (newsig == 0)
 					continue;
 				sig = newsig;
 
 				/*
 				 * Put the new signal into td_sigqueue. If the
 				 * signal is being masked, look for other
 				 * signals.
 				 */
 				sigqueue_add(queue, sig, NULL);
 				if (SIGISMEMBER(td->td_sigmask, sig))
 					continue;
 				signotify(td);
 			} else {
 				if (td->td_dbgksi.ksi_signo != 0) {
 					td->td_dbgksi.ksi_flags |= KSI_HEAD;
 					if (sigqueue_add(&td->td_sigqueue, sig,
 					    &td->td_dbgksi) != 0)
 						td->td_dbgksi.ksi_signo = 0;
 				}
 				if (td->td_dbgksi.ksi_signo == 0)
 					sigqueue_add(&td->td_sigqueue, sig,
 					    NULL);
 			}
 
 			/*
 			 * If the traced bit got turned off, go back up
 			 * to the top to rescan signals.  This ensures
 			 * that p_sig* and p_sigact are consistent.
 			 */
 			if ((p->p_flag & P_TRACED) == 0)
 				continue;
 		}
 
 		prop = sigprop(sig);
 
 		/*
 		 * Decide whether the signal should be returned.
 		 * Return the signal's number, or fall through
 		 * to clear it from the pending mask.
 		 */
 		switch ((intptr_t)p->p_sigacts->ps_sigact[_SIG_IDX(sig)]) {
 
 		case (intptr_t)SIG_DFL:
 			/*
 			 * Don't take default actions on system processes.
 			 */
 			if (p->p_pid <= 1) {
 #ifdef DIAGNOSTIC
 				/*
 				 * Are you sure you want to ignore SIGSEGV
 				 * in init? XXX
 				 */
 				printf("Process (pid %lu) got signal %d\n",
 					(u_long)p->p_pid, sig);
 #endif
 				break;		/* == ignore */
 			}
 			/*
 			 * If there is a pending stop signal to process
 			 * with default action, stop here,
 			 * then clear the signal.  However,
 			 * if process is member of an orphaned
 			 * process group, ignore tty stop signals.
 			 */
 			if (prop & SA_STOP) {
 				if (p->p_flag & (P_TRACED|P_WEXIT) ||
 				    (p->p_pgrp->pg_jobc == 0 &&
 				     prop & SA_TTYSTOP))
 					break;	/* == ignore */
 				mtx_unlock(&ps->ps_mtx);
 				WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK,
 				    &p->p_mtx.lock_object, "Catching SIGSTOP");
 				p->p_flag |= P_STOPPED_SIG;
 				p->p_xsig = sig;
 				PROC_SLOCK(p);
 				sig_suspend_threads(td, p, 0);
 				thread_suspend_switch(td, p);
 				PROC_SUNLOCK(p);
 				mtx_lock(&ps->ps_mtx);
 				break;
 			} else if (prop & SA_IGNORE) {
 				/*
 				 * Except for SIGCONT, shouldn't get here.
 				 * Default action is to ignore; drop it.
 				 */
 				break;		/* == ignore */
 			} else
 				return (sig);
 			/*NOTREACHED*/
 
 		case (intptr_t)SIG_IGN:
 			/*
 			 * Masking above should prevent us ever trying
 			 * to take action on an ignored signal other
 			 * than SIGCONT, unless process is traced.
 			 */
 			if ((prop & SA_CONT) == 0 &&
 			    (p->p_flag & P_TRACED) == 0)
 				printf("issignal\n");
 			break;		/* == ignore */
 
 		default:
 			/*
 			 * This signal has an action, let
 			 * postsig() process it.
 			 */
 			return (sig);
 		}
 		sigqueue_delete(&td->td_sigqueue, sig);	/* take the signal! */
 		sigqueue_delete(&p->p_sigqueue, sig);
 	}
 	/* NOTREACHED */
 }
 
 void
 thread_stopped(struct proc *p)
 {
 	int n;
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	PROC_SLOCK_ASSERT(p, MA_OWNED);
 	n = p->p_suspcount;
 	if (p == curproc)
 		n++;
 	if ((p->p_flag & P_STOPPED_SIG) && (n == p->p_numthreads)) {
 		PROC_SUNLOCK(p);
 		p->p_flag &= ~P_WAITED;
 		PROC_LOCK(p->p_pptr);
 		childproc_stopped(p, (p->p_flag & P_TRACED) ?
 			CLD_TRAPPED : CLD_STOPPED);
 		PROC_UNLOCK(p->p_pptr);
 		PROC_SLOCK(p);
 	}
 }
 
 /*
  * Take the action for the specified signal
  * from the current set of pending signals.
  */
 int
 postsig(sig)
 	register int sig;
 {
 	struct thread *td = curthread;
 	register struct proc *p = td->td_proc;
 	struct sigacts *ps;
 	sig_t action;
 	ksiginfo_t ksi;
 	sigset_t returnmask;
 
 	KASSERT(sig != 0, ("postsig"));
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	ps = p->p_sigacts;
 	mtx_assert(&ps->ps_mtx, MA_OWNED);
 	ksiginfo_init(&ksi);
 	if (sigqueue_get(&td->td_sigqueue, sig, &ksi) == 0 &&
 	    sigqueue_get(&p->p_sigqueue, sig, &ksi) == 0)
 		return (0);
 	ksi.ksi_signo = sig;
 	if (ksi.ksi_code == SI_TIMER)
 		itimer_accept(p, ksi.ksi_timerid, &ksi);
 	action = ps->ps_sigact[_SIG_IDX(sig)];
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_PSIG))
 		ktrpsig(sig, action, td->td_pflags & TDP_OLDMASK ?
 		    &td->td_oldsigmask : &td->td_sigmask, ksi.ksi_code);
 #endif
 	if (p->p_stops & S_SIG) {
 		mtx_unlock(&ps->ps_mtx);
 		stopevent(p, S_SIG, sig);
 		mtx_lock(&ps->ps_mtx);
 	}
 
 	if (action == SIG_DFL) {
 		/*
 		 * Default action, where the default is to kill
 		 * the process.  (Other cases were ignored above.)
 		 */
 		mtx_unlock(&ps->ps_mtx);
 		sigexit(td, sig);
 		/* NOTREACHED */
 	} else {
 		/*
 		 * If we get here, the signal must be caught.
 		 */
 		KASSERT(action != SIG_IGN && !SIGISMEMBER(td->td_sigmask, sig),
 		    ("postsig action"));
 		/*
 		 * Set the new mask value and also defer further
 		 * occurrences of this signal.
 		 *
 		 * Special case: user has done a sigsuspend.  Here the
 		 * current mask is not of interest, but rather the
 		 * mask from before the sigsuspend is what we want
 		 * restored after the signal processing is completed.
 		 */
 		if (td->td_pflags & TDP_OLDMASK) {
 			returnmask = td->td_oldsigmask;
 			td->td_pflags &= ~TDP_OLDMASK;
 		} else
 			returnmask = td->td_sigmask;
 
 		if (p->p_sig == sig) {
 			p->p_code = 0;
 			p->p_sig = 0;
 		}
 		(*p->p_sysent->sv_sendsig)(action, &ksi, &returnmask);
 		postsig_done(sig, td, ps);
 	}
 	return (1);
 }
 
 /*
  * Kill the current process for stated reason.
  */
 void
 killproc(p, why)
 	struct proc *p;
 	char *why;
 {
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	CTR3(KTR_PROC, "killproc: proc %p (pid %d, %s)", p, p->p_pid,
 	    p->p_comm);
 	log(LOG_ERR, "pid %d (%s), uid %d, was killed: %s\n", p->p_pid,
 	    p->p_comm, p->p_ucred ? p->p_ucred->cr_uid : -1, why);
 	p->p_flag |= P_WKILLED;
 	kern_psignal(p, SIGKILL);
 }
 
 /*
  * Force the current process to exit with the specified signal, dumping core
  * if appropriate.  We bypass the normal tests for masked and caught signals,
  * allowing unrecoverable failures to terminate the process without changing
  * signal state.  Mark the accounting record with the signal termination.
  * If dumping core, save the signal number for the debugger.  Calls exit and
  * does not return.
  */
 void
 sigexit(td, sig)
 	struct thread *td;
 	int sig;
 {
 	struct proc *p = td->td_proc;
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	p->p_acflag |= AXSIG;
 	/*
 	 * We must be single-threading to generate a core dump.  This
 	 * ensures that the registers in the core file are up-to-date.
 	 * Also, the ELF dump handler assumes that the thread list doesn't
 	 * change out from under it.
 	 *
 	 * XXX If another thread attempts to single-thread before us
 	 *     (e.g. via fork()), we won't get a dump at all.
 	 */
 	if ((sigprop(sig) & SA_CORE) && thread_single(p, SINGLE_NO_EXIT) == 0) {
 		p->p_sig = sig;
 		/*
 		 * Log signals which would cause core dumps
 		 * (Log as LOG_INFO to appease those who don't want
 		 * these messages.)
 		 * XXX : Todo, as well as euid, write out ruid too
 		 * Note that coredump() drops proc lock.
 		 */
 		if (coredump(td) == 0)
 			sig |= WCOREFLAG;
 		if (kern_logsigexit)
 			log(LOG_INFO,
 			    "pid %d (%s), uid %d: exited on signal %d%s\n",
 			    p->p_pid, p->p_comm,
 			    td->td_ucred ? td->td_ucred->cr_uid : -1,
 			    sig &~ WCOREFLAG,
 			    sig & WCOREFLAG ? " (core dumped)" : "");
 	} else
 		PROC_UNLOCK(p);
 	exit1(td, 0, sig);
 	/* NOTREACHED */
 }
 
 /*
  * Send queued SIGCHLD to parent when child process's state
  * is changed.
  */
 static void
 sigparent(struct proc *p, int reason, int status)
 {
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	PROC_LOCK_ASSERT(p->p_pptr, MA_OWNED);
 
 	if (p->p_ksi != NULL) {
 		p->p_ksi->ksi_signo  = SIGCHLD;
 		p->p_ksi->ksi_code   = reason;
 		p->p_ksi->ksi_status = status;
 		p->p_ksi->ksi_pid    = p->p_pid;
 		p->p_ksi->ksi_uid    = p->p_ucred->cr_ruid;
 		if (KSI_ONQ(p->p_ksi))
 			return;
 	}
 	pksignal(p->p_pptr, SIGCHLD, p->p_ksi);
 }
 
 static void
 childproc_jobstate(struct proc *p, int reason, int sig)
 {
 	struct sigacts *ps;
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	PROC_LOCK_ASSERT(p->p_pptr, MA_OWNED);
 
 	/*
 	 * Wake up parent sleeping in kern_wait(), also send
 	 * SIGCHLD to parent, but SIGCHLD does not guarantee
 	 * that parent will awake, because parent may masked
 	 * the signal.
 	 */
 	p->p_pptr->p_flag |= P_STATCHILD;
 	wakeup(p->p_pptr);
 
 	ps = p->p_pptr->p_sigacts;
 	mtx_lock(&ps->ps_mtx);
 	if ((ps->ps_flag & PS_NOCLDSTOP) == 0) {
 		mtx_unlock(&ps->ps_mtx);
 		sigparent(p, reason, sig);
 	} else
 		mtx_unlock(&ps->ps_mtx);
 }
 
 void
 childproc_stopped(struct proc *p, int reason)
 {
 
 	childproc_jobstate(p, reason, p->p_xsig);
 }
 
 void
 childproc_continued(struct proc *p)
 {
 	childproc_jobstate(p, CLD_CONTINUED, SIGCONT);
 }
 
 void
 childproc_exited(struct proc *p)
 {
 	int reason, status;
 
 	if (WCOREDUMP(p->p_xsig)) {
 		reason = CLD_DUMPED;
 		status = WTERMSIG(p->p_xsig);
 	} else if (WIFSIGNALED(p->p_xsig)) {
 		reason = CLD_KILLED;
 		status = WTERMSIG(p->p_xsig);
 	} else {
 		reason = CLD_EXITED;
 		status = p->p_xexit;
 	}
 	/*
 	 * XXX avoid calling wakeup(p->p_pptr), the work is
 	 * done in exit1().
 	 */
 	sigparent(p, reason, status);
 }
 
 /*
  * We only have 1 character for the core count in the format
  * string, so the range will be 0-9
  */
 #define MAX_NUM_CORES 10
 static int num_cores = 5;
 
 static int
 sysctl_debug_num_cores_check (SYSCTL_HANDLER_ARGS)
 {
 	int error;
 	int new_val;
 
 	new_val = num_cores;
 	error = sysctl_handle_int(oidp, &new_val, 0, req);
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 	if (new_val > MAX_NUM_CORES)
 		new_val = MAX_NUM_CORES;
 	if (new_val < 0)
 		new_val = 0;
 	num_cores = new_val;
 	return (0);
 }
 SYSCTL_PROC(_debug, OID_AUTO, ncores, CTLTYPE_INT|CTLFLAG_RW,
 	    0, sizeof(int), sysctl_debug_num_cores_check, "I", "");
 
 #define	GZ_SUFFIX	".gz"
 
 #ifdef GZIO
 static int compress_user_cores = 1;
 SYSCTL_INT(_kern, OID_AUTO, compress_user_cores, CTLFLAG_RWTUN,
     &compress_user_cores, 0, "Compression of user corefiles");
 
 int compress_user_cores_gzlevel = 6;
 SYSCTL_INT(_kern, OID_AUTO, compress_user_cores_gzlevel, CTLFLAG_RWTUN,
     &compress_user_cores_gzlevel, 0, "Corefile gzip compression level");
 #else
 static int compress_user_cores = 0;
 #endif
 
 /*
  * Protect the access to corefilename[] by allproc_lock.
  */
 #define	corefilename_lock	allproc_lock
 
 static char corefilename[MAXPATHLEN] = {"%N.core"};
 
 static int
 sysctl_kern_corefile(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 
 	sx_xlock(&corefilename_lock);
 	error = sysctl_handle_string(oidp, corefilename, sizeof(corefilename),
 	    req);
 	sx_xunlock(&corefilename_lock);
 
 	return (error);
 }
 SYSCTL_PROC(_kern, OID_AUTO, corefile, CTLTYPE_STRING | CTLFLAG_RWTUN |
     CTLFLAG_MPSAFE, 0, 0, sysctl_kern_corefile, "A",
     "Process corefile name format string");
 
 /*
  * corefile_open(comm, uid, pid, td, compress, vpp, namep)
  * Expand the name described in corefilename, using name, uid, and pid
  * and open/create core file.
  * corefilename is a printf-like string, with three format specifiers:
  *	%N	name of process ("name")
  *	%P	process id (pid)
  *	%U	user id (uid)
  * For example, "%N.core" is the default; they can be disabled completely
  * by using "/dev/null", or all core files can be stored in "/cores/%U/%N-%P".
  * This is controlled by the sysctl variable kern.corefile (see above).
  */
 static int
 corefile_open(const char *comm, uid_t uid, pid_t pid, struct thread *td,
     int compress, struct vnode **vpp, char **namep)
 {
 	struct nameidata nd;
 	struct sbuf sb;
 	const char *format;
 	char *hostname, *name;
 	int indexpos, i, error, cmode, flags, oflags;
 
 	hostname = NULL;
 	format = corefilename;
 	name = malloc(MAXPATHLEN, M_TEMP, M_WAITOK | M_ZERO);
 	indexpos = -1;
 	(void)sbuf_new(&sb, name, MAXPATHLEN, SBUF_FIXEDLEN);
 	sx_slock(&corefilename_lock);
 	for (i = 0; format[i] != '\0'; i++) {
 		switch (format[i]) {
 		case '%':	/* Format character */
 			i++;
 			switch (format[i]) {
 			case '%':
 				sbuf_putc(&sb, '%');
 				break;
 			case 'H':	/* hostname */
 				if (hostname == NULL) {
 					hostname = malloc(MAXHOSTNAMELEN,
 					    M_TEMP, M_WAITOK);
 				}
 				getcredhostname(td->td_ucred, hostname,
 				    MAXHOSTNAMELEN);
 				sbuf_printf(&sb, "%s", hostname);
 				break;
 			case 'I':	/* autoincrementing index */
 				sbuf_printf(&sb, "0");
 				indexpos = sbuf_len(&sb) - 1;
 				break;
 			case 'N':	/* process name */
 				sbuf_printf(&sb, "%s", comm);
 				break;
 			case 'P':	/* process id */
 				sbuf_printf(&sb, "%u", pid);
 				break;
 			case 'U':	/* user id */
 				sbuf_printf(&sb, "%u", uid);
 				break;
 			default:
 				log(LOG_ERR,
 				    "Unknown format character %c in "
 				    "corename `%s'\n", format[i], format);
 				break;
 			}
 			break;
 		default:
 			sbuf_putc(&sb, format[i]);
 			break;
 		}
 	}
 	sx_sunlock(&corefilename_lock);
 	free(hostname, M_TEMP);
 	if (compress)
 		sbuf_printf(&sb, GZ_SUFFIX);
 	if (sbuf_error(&sb) != 0) {
 		log(LOG_ERR, "pid %ld (%s), uid (%lu): corename is too "
 		    "long\n", (long)pid, comm, (u_long)uid);
 		sbuf_delete(&sb);
 		free(name, M_TEMP);
 		return (ENOMEM);
 	}
 	sbuf_finish(&sb);
 	sbuf_delete(&sb);
 
 	cmode = S_IRUSR | S_IWUSR;
 	oflags = VN_OPEN_NOAUDIT | VN_OPEN_NAMECACHE |
 	    (capmode_coredump ? VN_OPEN_NOCAPCHECK : 0);
 
 	/*
 	 * If the core format has a %I in it, then we need to check
 	 * for existing corefiles before returning a name.
 	 * To do this we iterate over 0..num_cores to find a
 	 * non-existing core file name to use.
 	 */
 	if (indexpos != -1) {
 		for (i = 0; i < num_cores; i++) {
 			flags = O_CREAT | O_EXCL | FWRITE | O_NOFOLLOW;
 			name[indexpos] = '0' + i;
 			NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, name, td);
 			error = vn_open_cred(&nd, &flags, cmode, oflags,
 			    td->td_ucred, NULL);
 			if (error) {
 				if (error == EEXIST)
 					continue;
 				log(LOG_ERR,
 				    "pid %d (%s), uid (%u):  Path `%s' failed "
 				    "on initial open test, error = %d\n",
 				    pid, comm, uid, name, error);
 			}
 			goto out;
 		}
 	}
 
 	flags = O_CREAT | FWRITE | O_NOFOLLOW;
 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, name, td);
 	error = vn_open_cred(&nd, &flags, cmode, oflags, td->td_ucred, NULL);
 out:
 	if (error) {
 #ifdef AUDIT
 		audit_proc_coredump(td, name, error);
 #endif
 		free(name, M_TEMP);
 		return (error);
 	}
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	*vpp = nd.ni_vp;
 	*namep = name;
 	return (0);
 }
 
 static int
 coredump_sanitise_path(const char *path)
 {
 	size_t i;
 
 	/*
 	 * Only send a subset of ASCII to devd(8) because it
 	 * might pass these strings to sh -c.
 	 */
 	for (i = 0; path[i]; i++)
 		if (!(isalpha(path[i]) || isdigit(path[i])) &&
 		    path[i] != '/' && path[i] != '.' &&
 		    path[i] != '-')
 			return (0);
 
 	return (1);
 }
 
 /*
  * Dump a process' core.  The main routine does some
  * policy checking, and creates the name of the coredump;
  * then it passes on a vnode and a size limit to the process-specific
  * coredump routine if there is one; if there _is not_ one, it returns
  * ENOSYS; otherwise it returns the error from the process-specific routine.
  */
 
 static int
 coredump(struct thread *td)
 {
 	struct proc *p = td->td_proc;
 	struct ucred *cred = td->td_ucred;
 	struct vnode *vp;
 	struct flock lf;
 	struct vattr vattr;
 	int error, error1, locked;
 	char *name;			/* name of corefile */
 	void *rl_cookie;
 	off_t limit;
 	char *data = NULL;
 	char *fullpath, *freepath = NULL;
 	size_t len;
 	static const char comm_name[] = "comm=";
 	static const char core_name[] = "core=";
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	MPASS((p->p_flag & P_HADTHREADS) == 0 || p->p_singlethread == td);
 	_STOPEVENT(p, S_CORE, 0);
 
 	if (!do_coredump || (!sugid_coredump && (p->p_flag & P_SUGID) != 0) ||
 	    (p->p_flag2 & P2_NOTRACE) != 0) {
 		PROC_UNLOCK(p);
 		return (EFAULT);
 	}
 
 	/*
 	 * Note that the bulk of limit checking is done after
 	 * the corefile is created.  The exception is if the limit
 	 * for corefiles is 0, in which case we don't bother
 	 * creating the corefile at all.  This layout means that
 	 * a corefile is truncated instead of not being created,
 	 * if it is larger than the limit.
 	 */
 	limit = (off_t)lim_cur(td, RLIMIT_CORE);
 	if (limit == 0 || racct_get_available(p, RACCT_CORE) == 0) {
 		PROC_UNLOCK(p);
 		return (EFBIG);
 	}
 	PROC_UNLOCK(p);
 
 	error = corefile_open(p->p_comm, cred->cr_uid, p->p_pid, td,
 	    compress_user_cores, &vp, &name);
 	if (error != 0)
 		return (error);
 
 	/*
 	 * Don't dump to non-regular files or files with links.
 	 * Do not dump into system files.
 	 */
 	if (vp->v_type != VREG || VOP_GETATTR(vp, &vattr, cred) != 0 ||
 	    vattr.va_nlink != 1 || (vp->v_vflag & VV_SYSTEM) != 0) {
 		VOP_UNLOCK(vp, 0);
 		error = EFAULT;
 		goto out;
 	}
 
 	VOP_UNLOCK(vp, 0);
 
 	/* Postpone other writers, including core dumps of other processes. */
 	rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX);
 
 	lf.l_whence = SEEK_SET;
 	lf.l_start = 0;
 	lf.l_len = 0;
 	lf.l_type = F_WRLCK;
 	locked = (VOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &lf, F_FLOCK) == 0);
 
 	VATTR_NULL(&vattr);
 	vattr.va_size = 0;
 	if (set_core_nodump_flag)
 		vattr.va_flags = UF_NODUMP;
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 	VOP_SETATTR(vp, &vattr, cred);
 	VOP_UNLOCK(vp, 0);
 	PROC_LOCK(p);
 	p->p_acflag |= ACORE;
 	PROC_UNLOCK(p);
 
 	if (p->p_sysent->sv_coredump != NULL) {
 		error = p->p_sysent->sv_coredump(td, vp, limit,
 		    compress_user_cores ? IMGACT_CORE_COMPRESS : 0);
 	} else {
 		error = ENOSYS;
 	}
 
 	if (locked) {
 		lf.l_type = F_UNLCK;
 		VOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &lf, F_FLOCK);
 	}
 	vn_rangelock_unlock(vp, rl_cookie);
 
 	/*
 	 * Notify the userland helper that a process triggered a core dump.
 	 * This allows the helper to run an automated debugging session.
 	 */
 	if (error != 0 || coredump_devctl == 0)
 		goto out;
 	len = MAXPATHLEN * 2 + sizeof(comm_name) - 1 +
 	    sizeof(' ') + sizeof(core_name) - 1;
 	data = malloc(len, M_TEMP, M_WAITOK);
 	if (vn_fullpath_global(td, p->p_textvp, &fullpath, &freepath) != 0)
 		goto out;
 	if (!coredump_sanitise_path(fullpath))
 		goto out;
 	snprintf(data, len, "%s%s ", comm_name, fullpath);
 	free(freepath, M_TEMP);
 	freepath = NULL;
 	if (vn_fullpath_global(td, vp, &fullpath, &freepath) != 0)
 		goto out;
 	if (!coredump_sanitise_path(fullpath))
 		goto out;
 	strlcat(data, core_name, len);
 	strlcat(data, fullpath, len);
 	devctl_notify("kernel", "signal", "coredump", data);
 out:
 	error1 = vn_close(vp, FWRITE, cred, td);
 	if (error == 0)
 		error = error1;
 #ifdef AUDIT
 	audit_proc_coredump(td, name, error);
 #endif
 	free(freepath, M_TEMP);
 	free(data, M_TEMP);
 	free(name, M_TEMP);
 	return (error);
 }
 
 /*
  * Nonexistent system call-- signal process (may want to handle it).  Flag
  * error in case process won't see signal immediately (blocked or ignored).
  */
 #ifndef _SYS_SYSPROTO_H_
 struct nosys_args {
 	int	dummy;
 };
 #endif
 /* ARGSUSED */
 int
 nosys(td, args)
 	struct thread *td;
 	struct nosys_args *args;
 {
 	struct proc *p = td->td_proc;
 
 	PROC_LOCK(p);
 	tdsignal(td, SIGSYS);
 	PROC_UNLOCK(p);
 	return (ENOSYS);
 }
 
 /*
  * Send a SIGIO or SIGURG signal to a process or process group using stored
  * credentials rather than those of the current process.
  */
 void
 pgsigio(sigiop, sig, checkctty)
 	struct sigio **sigiop;
 	int sig, checkctty;
 {
 	ksiginfo_t ksi;
 	struct sigio *sigio;
 
 	ksiginfo_init(&ksi);
 	ksi.ksi_signo = sig;
 	ksi.ksi_code = SI_KERNEL;
 
 	SIGIO_LOCK();
 	sigio = *sigiop;
 	if (sigio == NULL) {
 		SIGIO_UNLOCK();
 		return;
 	}
 	if (sigio->sio_pgid > 0) {
 		PROC_LOCK(sigio->sio_proc);
 		if (CANSIGIO(sigio->sio_ucred, sigio->sio_proc->p_ucred))
 			kern_psignal(sigio->sio_proc, sig);
 		PROC_UNLOCK(sigio->sio_proc);
 	} else if (sigio->sio_pgid < 0) {
 		struct proc *p;
 
 		PGRP_LOCK(sigio->sio_pgrp);
 		LIST_FOREACH(p, &sigio->sio_pgrp->pg_members, p_pglist) {
 			PROC_LOCK(p);
 			if (p->p_state == PRS_NORMAL &&
 			    CANSIGIO(sigio->sio_ucred, p->p_ucred) &&
 			    (checkctty == 0 || (p->p_flag & P_CONTROLT)))
 				kern_psignal(p, sig);
 			PROC_UNLOCK(p);
 		}
 		PGRP_UNLOCK(sigio->sio_pgrp);
 	}
 	SIGIO_UNLOCK();
 }
 
 static int
 filt_sigattach(struct knote *kn)
 {
 	struct proc *p = curproc;
 
 	kn->kn_ptr.p_proc = p;
 	kn->kn_flags |= EV_CLEAR;		/* automatically set */
 
 	knlist_add(&p->p_klist, kn, 0);
 
 	return (0);
 }
 
 static void
 filt_sigdetach(struct knote *kn)
 {
 	struct proc *p = kn->kn_ptr.p_proc;
 
 	knlist_remove(&p->p_klist, kn, 0);
 }
 
 /*
  * signal knotes are shared with proc knotes, so we apply a mask to
  * the hint in order to differentiate them from process hints.  This
  * could be avoided by using a signal-specific knote list, but probably
  * isn't worth the trouble.
  */
 static int
 filt_signal(struct knote *kn, long hint)
 {
 
 	if (hint & NOTE_SIGNAL) {
 		hint &= ~NOTE_SIGNAL;
 
 		if (kn->kn_id == hint)
 			kn->kn_data++;
 	}
 	return (kn->kn_data != 0);
 }
 
 struct sigacts *
 sigacts_alloc(void)
 {
 	struct sigacts *ps;
 
 	ps = malloc(sizeof(struct sigacts), M_SUBPROC, M_WAITOK | M_ZERO);
 	refcount_init(&ps->ps_refcnt, 1);
 	mtx_init(&ps->ps_mtx, "sigacts", NULL, MTX_DEF);
 	return (ps);
 }
 
 void
 sigacts_free(struct sigacts *ps)
 {
 
 	if (refcount_release(&ps->ps_refcnt) == 0)
 		return;
 	mtx_destroy(&ps->ps_mtx);
 	free(ps, M_SUBPROC);
 }
 
 struct sigacts *
 sigacts_hold(struct sigacts *ps)
 {
 
 	refcount_acquire(&ps->ps_refcnt);
 	return (ps);
 }
 
 void
 sigacts_copy(struct sigacts *dest, struct sigacts *src)
 {
 
 	KASSERT(dest->ps_refcnt == 1, ("sigacts_copy to shared dest"));
 	mtx_lock(&src->ps_mtx);
 	bcopy(src, dest, offsetof(struct sigacts, ps_refcnt));
 	mtx_unlock(&src->ps_mtx);
 }
 
 int
 sigacts_shared(struct sigacts *ps)
 {
 
 	return (ps->ps_refcnt > 1);
 }
Index: head/sys/kern/kern_timeout.c
===================================================================
--- head/sys/kern/kern_timeout.c	(revision 292383)
+++ head/sys/kern/kern_timeout.c	(revision 292384)
@@ -1,1619 +1,1617 @@
 /*-
  * Copyright (c) 1982, 1986, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	From: @(#)kern_clock.c	8.5 (Berkeley) 1/21/94
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_callout_profiling.h"
 #if defined(__arm__)
 #include "opt_timer.h"
 #endif
 #include "opt_rss.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/callout.h>
 #include <sys/file.h>
 #include <sys/interrupt.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/sdt.h>
 #include <sys/sleepqueue.h>
 #include <sys/sysctl.h>
 #include <sys/smp.h>
 
 #ifdef SMP
 #include <machine/cpu.h>
 #endif
 
 #ifndef NO_EVENTTIMERS
 DPCPU_DECLARE(sbintime_t, hardclocktime);
 #endif
 
 SDT_PROVIDER_DEFINE(callout_execute);
-SDT_PROBE_DEFINE1(callout_execute, kernel, , callout__start,
-    "struct callout *");
-SDT_PROBE_DEFINE1(callout_execute, kernel, , callout__end,
-    "struct callout *");
+SDT_PROBE_DEFINE1(callout_execute, , , callout__start, "struct callout *");
+SDT_PROBE_DEFINE1(callout_execute, , , callout__end, "struct callout *");
 
 #ifdef CALLOUT_PROFILING
 static int avg_depth;
 SYSCTL_INT(_debug, OID_AUTO, to_avg_depth, CTLFLAG_RD, &avg_depth, 0,
     "Average number of items examined per softclock call. Units = 1/1000");
 static int avg_gcalls;
 SYSCTL_INT(_debug, OID_AUTO, to_avg_gcalls, CTLFLAG_RD, &avg_gcalls, 0,
     "Average number of Giant callouts made per softclock call. Units = 1/1000");
 static int avg_lockcalls;
 SYSCTL_INT(_debug, OID_AUTO, to_avg_lockcalls, CTLFLAG_RD, &avg_lockcalls, 0,
     "Average number of lock callouts made per softclock call. Units = 1/1000");
 static int avg_mpcalls;
 SYSCTL_INT(_debug, OID_AUTO, to_avg_mpcalls, CTLFLAG_RD, &avg_mpcalls, 0,
     "Average number of MP callouts made per softclock call. Units = 1/1000");
 static int avg_depth_dir;
 SYSCTL_INT(_debug, OID_AUTO, to_avg_depth_dir, CTLFLAG_RD, &avg_depth_dir, 0,
     "Average number of direct callouts examined per callout_process call. "
     "Units = 1/1000");
 static int avg_lockcalls_dir;
 SYSCTL_INT(_debug, OID_AUTO, to_avg_lockcalls_dir, CTLFLAG_RD,
     &avg_lockcalls_dir, 0, "Average number of lock direct callouts made per "
     "callout_process call. Units = 1/1000");
 static int avg_mpcalls_dir;
 SYSCTL_INT(_debug, OID_AUTO, to_avg_mpcalls_dir, CTLFLAG_RD, &avg_mpcalls_dir,
     0, "Average number of MP direct callouts made per callout_process call. "
     "Units = 1/1000");
 #endif
 
 static int ncallout;
 SYSCTL_INT(_kern, OID_AUTO, ncallout, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &ncallout, 0,
     "Number of entries in callwheel and size of timeout() preallocation");
 
 #ifdef	RSS
 static int pin_default_swi = 1;
 static int pin_pcpu_swi = 1;
 #else
 static int pin_default_swi = 0;
 static int pin_pcpu_swi = 0;
 #endif
 
 SYSCTL_INT(_kern, OID_AUTO, pin_default_swi, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &pin_default_swi,
     0, "Pin the default (non-per-cpu) swi (shared with PCPU 0 swi)");
 SYSCTL_INT(_kern, OID_AUTO, pin_pcpu_swi, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &pin_pcpu_swi,
     0, "Pin the per-CPU swis (except PCPU 0, which is also default");
 
 /*
  * TODO:
  *	allocate more timeout table slots when table overflows.
  */
 u_int callwheelsize, callwheelmask;
 
 /*
  * The callout cpu exec entities represent informations necessary for
  * describing the state of callouts currently running on the CPU and the ones
  * necessary for migrating callouts to the new callout cpu. In particular,
  * the first entry of the array cc_exec_entity holds informations for callout
  * running in SWI thread context, while the second one holds informations
  * for callout running directly from hardware interrupt context.
  * The cached informations are very important for deferring migration when
  * the migrating callout is already running.
  */
 struct cc_exec {
 	struct callout		*cc_curr;
 	void			(*cc_drain)(void *);
 #ifdef SMP
 	void			(*ce_migration_func)(void *);
 	void			*ce_migration_arg;
 	int			ce_migration_cpu;
 	sbintime_t		ce_migration_time;
 	sbintime_t		ce_migration_prec;
 #endif
 	bool			cc_cancel;
 	bool			cc_waiting;
 };
 
 /*
  * There is one struct callout_cpu per cpu, holding all relevant
  * state for the callout processing thread on the individual CPU.
  */
 struct callout_cpu {
 	struct mtx_padalign	cc_lock;
 	struct cc_exec 		cc_exec_entity[2];
 	struct callout		*cc_next;
 	struct callout		*cc_callout;
 	struct callout_list	*cc_callwheel;
 	struct callout_tailq	cc_expireq;
 	struct callout_slist	cc_callfree;
 	sbintime_t		cc_firstevent;
 	sbintime_t		cc_lastscan;
 	void			*cc_cookie;
 	u_int			cc_bucket;
 	u_int			cc_inited;
 	char			cc_ktr_event_name[20];
 };
 
 #define	callout_migrating(c)	((c)->c_iflags & CALLOUT_DFRMIGRATION)
 
 #define	cc_exec_curr(cc, dir)		cc->cc_exec_entity[dir].cc_curr
 #define	cc_exec_drain(cc, dir)		cc->cc_exec_entity[dir].cc_drain
 #define	cc_exec_next(cc)		cc->cc_next
 #define	cc_exec_cancel(cc, dir)		cc->cc_exec_entity[dir].cc_cancel
 #define	cc_exec_waiting(cc, dir)	cc->cc_exec_entity[dir].cc_waiting
 #ifdef SMP
 #define	cc_migration_func(cc, dir)	cc->cc_exec_entity[dir].ce_migration_func
 #define	cc_migration_arg(cc, dir)	cc->cc_exec_entity[dir].ce_migration_arg
 #define	cc_migration_cpu(cc, dir)	cc->cc_exec_entity[dir].ce_migration_cpu
 #define	cc_migration_time(cc, dir)	cc->cc_exec_entity[dir].ce_migration_time
 #define	cc_migration_prec(cc, dir)	cc->cc_exec_entity[dir].ce_migration_prec
 
 struct callout_cpu cc_cpu[MAXCPU];
 #define	CPUBLOCK	MAXCPU
 #define	CC_CPU(cpu)	(&cc_cpu[(cpu)])
 #define	CC_SELF()	CC_CPU(PCPU_GET(cpuid))
 #else
 struct callout_cpu cc_cpu;
 #define	CC_CPU(cpu)	&cc_cpu
 #define	CC_SELF()	&cc_cpu
 #endif
 #define	CC_LOCK(cc)	mtx_lock_spin(&(cc)->cc_lock)
 #define	CC_UNLOCK(cc)	mtx_unlock_spin(&(cc)->cc_lock)
 #define	CC_LOCK_ASSERT(cc)	mtx_assert(&(cc)->cc_lock, MA_OWNED)
 
 static int timeout_cpu;
 
 static void	callout_cpu_init(struct callout_cpu *cc, int cpu);
 static void	softclock_call_cc(struct callout *c, struct callout_cpu *cc,
 #ifdef CALLOUT_PROFILING
 		    int *mpcalls, int *lockcalls, int *gcalls,
 #endif
 		    int direct);
 
 static MALLOC_DEFINE(M_CALLOUT, "callout", "Callout datastructures");
 
 /**
  * Locked by cc_lock:
  *   cc_curr         - If a callout is in progress, it is cc_curr.
  *                     If cc_curr is non-NULL, threads waiting in
  *                     callout_drain() will be woken up as soon as the
  *                     relevant callout completes.
  *   cc_cancel       - Changing to 1 with both callout_lock and cc_lock held
  *                     guarantees that the current callout will not run.
  *                     The softclock() function sets this to 0 before it
  *                     drops callout_lock to acquire c_lock, and it calls
  *                     the handler only if curr_cancelled is still 0 after
  *                     cc_lock is successfully acquired.
  *   cc_waiting      - If a thread is waiting in callout_drain(), then
  *                     callout_wait is nonzero.  Set only when
  *                     cc_curr is non-NULL.
  */
 
 /*
  * Resets the execution entity tied to a specific callout cpu.
  */
 static void
 cc_cce_cleanup(struct callout_cpu *cc, int direct)
 {
 
 	cc_exec_curr(cc, direct) = NULL;
 	cc_exec_cancel(cc, direct) = false;
 	cc_exec_waiting(cc, direct) = false;
 #ifdef SMP
 	cc_migration_cpu(cc, direct) = CPUBLOCK;
 	cc_migration_time(cc, direct) = 0;
 	cc_migration_prec(cc, direct) = 0;
 	cc_migration_func(cc, direct) = NULL;
 	cc_migration_arg(cc, direct) = NULL;
 #endif
 }
 
 /*
  * Checks if migration is requested by a specific callout cpu.
  */
 static int
 cc_cce_migrating(struct callout_cpu *cc, int direct)
 {
 
 #ifdef SMP
 	return (cc_migration_cpu(cc, direct) != CPUBLOCK);
 #else
 	return (0);
 #endif
 }
 
 /*
  * Kernel low level callwheel initialization
  * called on cpu0 during kernel startup.
  */
 static void
 callout_callwheel_init(void *dummy)
 {
 	struct callout_cpu *cc;
 
 	/*
 	 * Calculate the size of the callout wheel and the preallocated
 	 * timeout() structures.
 	 * XXX: Clip callout to result of previous function of maxusers
 	 * maximum 384.  This is still huge, but acceptable.
 	 */
 	memset(CC_CPU(0), 0, sizeof(cc_cpu));
 	ncallout = imin(16 + maxproc + maxfiles, 18508);
 	TUNABLE_INT_FETCH("kern.ncallout", &ncallout);
 
 	/*
 	 * Calculate callout wheel size, should be next power of two higher
 	 * than 'ncallout'.
 	 */
 	callwheelsize = 1 << fls(ncallout);
 	callwheelmask = callwheelsize - 1;
 
 	/*
 	 * Fetch whether we're pinning the swi's or not.
 	 */
 	TUNABLE_INT_FETCH("kern.pin_default_swi", &pin_default_swi);
 	TUNABLE_INT_FETCH("kern.pin_pcpu_swi", &pin_pcpu_swi);
 
 	/*
 	 * Only cpu0 handles timeout(9) and receives a preallocation.
 	 *
 	 * XXX: Once all timeout(9) consumers are converted this can
 	 * be removed.
 	 */
 	timeout_cpu = PCPU_GET(cpuid);
 	cc = CC_CPU(timeout_cpu);
 	cc->cc_callout = malloc(ncallout * sizeof(struct callout),
 	    M_CALLOUT, M_WAITOK);
 	callout_cpu_init(cc, timeout_cpu);
 }
 SYSINIT(callwheel_init, SI_SUB_CPU, SI_ORDER_ANY, callout_callwheel_init, NULL);
 
 /*
  * Initialize the per-cpu callout structures.
  */
 static void
 callout_cpu_init(struct callout_cpu *cc, int cpu)
 {
 	struct callout *c;
 	int i;
 
 	mtx_init(&cc->cc_lock, "callout", NULL, MTX_SPIN | MTX_RECURSE);
 	SLIST_INIT(&cc->cc_callfree);
 	cc->cc_inited = 1;
 	cc->cc_callwheel = malloc(sizeof(struct callout_list) * callwheelsize,
 	    M_CALLOUT, M_WAITOK);
 	for (i = 0; i < callwheelsize; i++)
 		LIST_INIT(&cc->cc_callwheel[i]);
 	TAILQ_INIT(&cc->cc_expireq);
 	cc->cc_firstevent = SBT_MAX;
 	for (i = 0; i < 2; i++)
 		cc_cce_cleanup(cc, i);
 	snprintf(cc->cc_ktr_event_name, sizeof(cc->cc_ktr_event_name),
 	    "callwheel cpu %d", cpu);
 	if (cc->cc_callout == NULL)	/* Only cpu0 handles timeout(9) */
 		return;
 	for (i = 0; i < ncallout; i++) {
 		c = &cc->cc_callout[i];
 		callout_init(c, 0);
 		c->c_iflags = CALLOUT_LOCAL_ALLOC;
 		SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links.sle);
 	}
 }
 
 #ifdef SMP
 /*
  * Switches the cpu tied to a specific callout.
  * The function expects a locked incoming callout cpu and returns with
  * locked outcoming callout cpu.
  */
 static struct callout_cpu *
 callout_cpu_switch(struct callout *c, struct callout_cpu *cc, int new_cpu)
 {
 	struct callout_cpu *new_cc;
 
 	MPASS(c != NULL && cc != NULL);
 	CC_LOCK_ASSERT(cc);
 
 	/*
 	 * Avoid interrupts and preemption firing after the callout cpu
 	 * is blocked in order to avoid deadlocks as the new thread
 	 * may be willing to acquire the callout cpu lock.
 	 */
 	c->c_cpu = CPUBLOCK;
 	spinlock_enter();
 	CC_UNLOCK(cc);
 	new_cc = CC_CPU(new_cpu);
 	CC_LOCK(new_cc);
 	spinlock_exit();
 	c->c_cpu = new_cpu;
 	return (new_cc);
 }
 #endif
 
 /*
  * Start standard softclock thread.
  */
 static void
 start_softclock(void *dummy)
 {
 	struct callout_cpu *cc;
 	char name[MAXCOMLEN];
 #ifdef SMP
 	int cpu;
 	struct intr_event *ie;
 #endif
 
 	cc = CC_CPU(timeout_cpu);
 	snprintf(name, sizeof(name), "clock (%d)", timeout_cpu);
 	if (swi_add(&clk_intr_event, name, softclock, cc, SWI_CLOCK,
 	    INTR_MPSAFE, &cc->cc_cookie))
 		panic("died while creating standard software ithreads");
 	if (pin_default_swi &&
 	    (intr_event_bind(clk_intr_event, timeout_cpu) != 0)) {
 		printf("%s: timeout clock couldn't be pinned to cpu %d\n",
 		    __func__,
 		    timeout_cpu);
 	}
 
 #ifdef SMP
 	CPU_FOREACH(cpu) {
 		if (cpu == timeout_cpu)
 			continue;
 		cc = CC_CPU(cpu);
 		cc->cc_callout = NULL;	/* Only cpu0 handles timeout(9). */
 		callout_cpu_init(cc, cpu);
 		snprintf(name, sizeof(name), "clock (%d)", cpu);
 		ie = NULL;
 		if (swi_add(&ie, name, softclock, cc, SWI_CLOCK,
 		    INTR_MPSAFE, &cc->cc_cookie))
 			panic("died while creating standard software ithreads");
 		if (pin_pcpu_swi && (intr_event_bind(ie, cpu) != 0)) {
 			printf("%s: per-cpu clock couldn't be pinned to "
 			    "cpu %d\n",
 			    __func__,
 			    cpu);
 		}
 	}
 #endif
 }
 SYSINIT(start_softclock, SI_SUB_SOFTINTR, SI_ORDER_FIRST, start_softclock, NULL);
 
 #define	CC_HASH_SHIFT	8
 
 static inline u_int
 callout_hash(sbintime_t sbt)
 {
 
 	return (sbt >> (32 - CC_HASH_SHIFT));
 }
 
 static inline u_int
 callout_get_bucket(sbintime_t sbt)
 {
 
 	return (callout_hash(sbt) & callwheelmask);
 }
 
 void
 callout_process(sbintime_t now)
 {
 	struct callout *tmp, *tmpn;
 	struct callout_cpu *cc;
 	struct callout_list *sc;
 	sbintime_t first, last, max, tmp_max;
 	uint32_t lookahead;
 	u_int firstb, lastb, nowb;
 #ifdef CALLOUT_PROFILING
 	int depth_dir = 0, mpcalls_dir = 0, lockcalls_dir = 0;
 #endif
 
 	cc = CC_SELF();
 	mtx_lock_spin_flags(&cc->cc_lock, MTX_QUIET);
 
 	/* Compute the buckets of the last scan and present times. */
 	firstb = callout_hash(cc->cc_lastscan);
 	cc->cc_lastscan = now;
 	nowb = callout_hash(now);
 
 	/* Compute the last bucket and minimum time of the bucket after it. */
 	if (nowb == firstb)
 		lookahead = (SBT_1S / 16);
 	else if (nowb - firstb == 1)
 		lookahead = (SBT_1S / 8);
 	else
 		lookahead = (SBT_1S / 2);
 	first = last = now;
 	first += (lookahead / 2);
 	last += lookahead;
 	last &= (0xffffffffffffffffLLU << (32 - CC_HASH_SHIFT));
 	lastb = callout_hash(last) - 1;
 	max = last;
 
 	/*
 	 * Check if we wrapped around the entire wheel from the last scan.
 	 * In case, we need to scan entirely the wheel for pending callouts.
 	 */
 	if (lastb - firstb >= callwheelsize) {
 		lastb = firstb + callwheelsize - 1;
 		if (nowb - firstb >= callwheelsize)
 			nowb = lastb;
 	}
 
 	/* Iterate callwheel from firstb to nowb and then up to lastb. */
 	do {
 		sc = &cc->cc_callwheel[firstb & callwheelmask];
 		tmp = LIST_FIRST(sc);
 		while (tmp != NULL) {
 			/* Run the callout if present time within allowed. */
 			if (tmp->c_time <= now) {
 				/*
 				 * Consumer told us the callout may be run
 				 * directly from hardware interrupt context.
 				 */
 				if (tmp->c_iflags & CALLOUT_DIRECT) {
 #ifdef CALLOUT_PROFILING
 					++depth_dir;
 #endif
 					cc_exec_next(cc) =
 					    LIST_NEXT(tmp, c_links.le);
 					cc->cc_bucket = firstb & callwheelmask;
 					LIST_REMOVE(tmp, c_links.le);
 					softclock_call_cc(tmp, cc,
 #ifdef CALLOUT_PROFILING
 					    &mpcalls_dir, &lockcalls_dir, NULL,
 #endif
 					    1);
 					tmp = cc_exec_next(cc);
 					cc_exec_next(cc) = NULL;
 				} else {
 					tmpn = LIST_NEXT(tmp, c_links.le);
 					LIST_REMOVE(tmp, c_links.le);
 					TAILQ_INSERT_TAIL(&cc->cc_expireq,
 					    tmp, c_links.tqe);
 					tmp->c_iflags |= CALLOUT_PROCESSED;
 					tmp = tmpn;
 				}
 				continue;
 			}
 			/* Skip events from distant future. */
 			if (tmp->c_time >= max)
 				goto next;
 			/*
 			 * Event minimal time is bigger than present maximal
 			 * time, so it cannot be aggregated.
 			 */
 			if (tmp->c_time > last) {
 				lastb = nowb;
 				goto next;
 			}
 			/* Update first and last time, respecting this event. */
 			if (tmp->c_time < first)
 				first = tmp->c_time;
 			tmp_max = tmp->c_time + tmp->c_precision;
 			if (tmp_max < last)
 				last = tmp_max;
 next:
 			tmp = LIST_NEXT(tmp, c_links.le);
 		}
 		/* Proceed with the next bucket. */
 		firstb++;
 		/*
 		 * Stop if we looked after present time and found
 		 * some event we can't execute at now.
 		 * Stop if we looked far enough into the future.
 		 */
 	} while (((int)(firstb - lastb)) <= 0);
 	cc->cc_firstevent = last;
 #ifndef NO_EVENTTIMERS
 	cpu_new_callout(curcpu, last, first);
 #endif
 #ifdef CALLOUT_PROFILING
 	avg_depth_dir += (depth_dir * 1000 - avg_depth_dir) >> 8;
 	avg_mpcalls_dir += (mpcalls_dir * 1000 - avg_mpcalls_dir) >> 8;
 	avg_lockcalls_dir += (lockcalls_dir * 1000 - avg_lockcalls_dir) >> 8;
 #endif
 	mtx_unlock_spin_flags(&cc->cc_lock, MTX_QUIET);
 	/*
 	 * swi_sched acquires the thread lock, so we don't want to call it
 	 * with cc_lock held; incorrect locking order.
 	 */
 	if (!TAILQ_EMPTY(&cc->cc_expireq))
 		swi_sched(cc->cc_cookie, 0);
 }
 
 static struct callout_cpu *
 callout_lock(struct callout *c)
 {
 	struct callout_cpu *cc;
 	int cpu;
 
 	for (;;) {
 		cpu = c->c_cpu;
 #ifdef SMP
 		if (cpu == CPUBLOCK) {
 			while (c->c_cpu == CPUBLOCK)
 				cpu_spinwait();
 			continue;
 		}
 #endif
 		cc = CC_CPU(cpu);
 		CC_LOCK(cc);
 		if (cpu == c->c_cpu)
 			break;
 		CC_UNLOCK(cc);
 	}
 	return (cc);
 }
 
 static void
 callout_cc_add(struct callout *c, struct callout_cpu *cc,
     sbintime_t sbt, sbintime_t precision, void (*func)(void *),
     void *arg, int cpu, int flags)
 {
 	int bucket;
 
 	CC_LOCK_ASSERT(cc);
 	if (sbt < cc->cc_lastscan)
 		sbt = cc->cc_lastscan;
 	c->c_arg = arg;
 	c->c_iflags |= CALLOUT_PENDING;
 	c->c_iflags &= ~CALLOUT_PROCESSED;
 	c->c_flags |= CALLOUT_ACTIVE;
 	if (flags & C_DIRECT_EXEC)
 		c->c_iflags |= CALLOUT_DIRECT;
 	c->c_func = func;
 	c->c_time = sbt;
 	c->c_precision = precision;
 	bucket = callout_get_bucket(c->c_time);
 	CTR3(KTR_CALLOUT, "precision set for %p: %d.%08x",
 	    c, (int)(c->c_precision >> 32),
 	    (u_int)(c->c_precision & 0xffffffff));
 	LIST_INSERT_HEAD(&cc->cc_callwheel[bucket], c, c_links.le);
 	if (cc->cc_bucket == bucket)
 		cc_exec_next(cc) = c;
 #ifndef NO_EVENTTIMERS
 	/*
 	 * Inform the eventtimers(4) subsystem there's a new callout
 	 * that has been inserted, but only if really required.
 	 */
 	if (SBT_MAX - c->c_time < c->c_precision)
 		c->c_precision = SBT_MAX - c->c_time;
 	sbt = c->c_time + c->c_precision;
 	if (sbt < cc->cc_firstevent) {
 		cc->cc_firstevent = sbt;
 		cpu_new_callout(cpu, sbt, c->c_time);
 	}
 #endif
 }
 
 static void
 callout_cc_del(struct callout *c, struct callout_cpu *cc)
 {
 
 	if ((c->c_iflags & CALLOUT_LOCAL_ALLOC) == 0)
 		return;
 	c->c_func = NULL;
 	SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links.sle);
 }
 
 static void
 softclock_call_cc(struct callout *c, struct callout_cpu *cc,
 #ifdef CALLOUT_PROFILING
     int *mpcalls, int *lockcalls, int *gcalls,
 #endif
     int direct)
 {
 	struct rm_priotracker tracker;
 	void (*c_func)(void *);
 	void *c_arg;
 	struct lock_class *class;
 	struct lock_object *c_lock;
 	uintptr_t lock_status;
 	int c_iflags;
 #ifdef SMP
 	struct callout_cpu *new_cc;
 	void (*new_func)(void *);
 	void *new_arg;
 	int flags, new_cpu;
 	sbintime_t new_prec, new_time;
 #endif
 #if defined(DIAGNOSTIC) || defined(CALLOUT_PROFILING) 
 	sbintime_t sbt1, sbt2;
 	struct timespec ts2;
 	static sbintime_t maxdt = 2 * SBT_1MS;	/* 2 msec */
 	static timeout_t *lastfunc;
 #endif
 
 	KASSERT((c->c_iflags & CALLOUT_PENDING) == CALLOUT_PENDING,
 	    ("softclock_call_cc: pend %p %x", c, c->c_iflags));
 	KASSERT((c->c_flags & CALLOUT_ACTIVE) == CALLOUT_ACTIVE,
 	    ("softclock_call_cc: act %p %x", c, c->c_flags));
 	class = (c->c_lock != NULL) ? LOCK_CLASS(c->c_lock) : NULL;
 	lock_status = 0;
 	if (c->c_flags & CALLOUT_SHAREDLOCK) {
 		if (class == &lock_class_rm)
 			lock_status = (uintptr_t)&tracker;
 		else
 			lock_status = 1;
 	}
 	c_lock = c->c_lock;
 	c_func = c->c_func;
 	c_arg = c->c_arg;
 	c_iflags = c->c_iflags;
 	if (c->c_iflags & CALLOUT_LOCAL_ALLOC)
 		c->c_iflags = CALLOUT_LOCAL_ALLOC;
 	else
 		c->c_iflags &= ~CALLOUT_PENDING;
 	
 	cc_exec_curr(cc, direct) = c;
 	cc_exec_cancel(cc, direct) = false;
 	cc_exec_drain(cc, direct) = NULL;
 	CC_UNLOCK(cc);
 	if (c_lock != NULL) {
 		class->lc_lock(c_lock, lock_status);
 		/*
 		 * The callout may have been cancelled
 		 * while we switched locks.
 		 */
 		if (cc_exec_cancel(cc, direct)) {
 			class->lc_unlock(c_lock);
 			goto skip;
 		}
 		/* The callout cannot be stopped now. */
 		cc_exec_cancel(cc, direct) = true;
 		if (c_lock == &Giant.lock_object) {
 #ifdef CALLOUT_PROFILING
 			(*gcalls)++;
 #endif
 			CTR3(KTR_CALLOUT, "callout giant %p func %p arg %p",
 			    c, c_func, c_arg);
 		} else {
 #ifdef CALLOUT_PROFILING
 			(*lockcalls)++;
 #endif
 			CTR3(KTR_CALLOUT, "callout lock %p func %p arg %p",
 			    c, c_func, c_arg);
 		}
 	} else {
 #ifdef CALLOUT_PROFILING
 		(*mpcalls)++;
 #endif
 		CTR3(KTR_CALLOUT, "callout %p func %p arg %p",
 		    c, c_func, c_arg);
 	}
 	KTR_STATE3(KTR_SCHED, "callout", cc->cc_ktr_event_name, "running",
 	    "func:%p", c_func, "arg:%p", c_arg, "direct:%d", direct);
 #if defined(DIAGNOSTIC) || defined(CALLOUT_PROFILING)
 	sbt1 = sbinuptime();
 #endif
 	THREAD_NO_SLEEPING();
-	SDT_PROBE1(callout_execute, kernel, , callout__start, c);
+	SDT_PROBE1(callout_execute, , , callout__start, c);
 	c_func(c_arg);
-	SDT_PROBE1(callout_execute, kernel, , callout__end, c);
+	SDT_PROBE1(callout_execute, , , callout__end, c);
 	THREAD_SLEEPING_OK();
 #if defined(DIAGNOSTIC) || defined(CALLOUT_PROFILING)
 	sbt2 = sbinuptime();
 	sbt2 -= sbt1;
 	if (sbt2 > maxdt) {
 		if (lastfunc != c_func || sbt2 > maxdt * 2) {
 			ts2 = sbttots(sbt2);
 			printf(
 		"Expensive timeout(9) function: %p(%p) %jd.%09ld s\n",
 			    c_func, c_arg, (intmax_t)ts2.tv_sec, ts2.tv_nsec);
 		}
 		maxdt = sbt2;
 		lastfunc = c_func;
 	}
 #endif
 	KTR_STATE0(KTR_SCHED, "callout", cc->cc_ktr_event_name, "idle");
 	CTR1(KTR_CALLOUT, "callout %p finished", c);
 	if ((c_iflags & CALLOUT_RETURNUNLOCKED) == 0)
 		class->lc_unlock(c_lock);
 skip:
 	CC_LOCK(cc);
 	KASSERT(cc_exec_curr(cc, direct) == c, ("mishandled cc_curr"));
 	cc_exec_curr(cc, direct) = NULL;
 	if (cc_exec_drain(cc, direct)) {
 		void (*drain)(void *);
 		
 		drain = cc_exec_drain(cc, direct);
 		cc_exec_drain(cc, direct) = NULL;
 		CC_UNLOCK(cc);
 		drain(c_arg);
 		CC_LOCK(cc);
 	}
 	if (cc_exec_waiting(cc, direct)) {
 		/*
 		 * There is someone waiting for the
 		 * callout to complete.
 		 * If the callout was scheduled for
 		 * migration just cancel it.
 		 */
 		if (cc_cce_migrating(cc, direct)) {
 			cc_cce_cleanup(cc, direct);
 
 			/*
 			 * It should be assert here that the callout is not
 			 * destroyed but that is not easy.
 			 */
 			c->c_iflags &= ~CALLOUT_DFRMIGRATION;
 		}
 		cc_exec_waiting(cc, direct) = false;
 		CC_UNLOCK(cc);
 		wakeup(&cc_exec_waiting(cc, direct));
 		CC_LOCK(cc);
 	} else if (cc_cce_migrating(cc, direct)) {
 		KASSERT((c_iflags & CALLOUT_LOCAL_ALLOC) == 0,
 		    ("Migrating legacy callout %p", c));
 #ifdef SMP
 		/*
 		 * If the callout was scheduled for
 		 * migration just perform it now.
 		 */
 		new_cpu = cc_migration_cpu(cc, direct);
 		new_time = cc_migration_time(cc, direct);
 		new_prec = cc_migration_prec(cc, direct);
 		new_func = cc_migration_func(cc, direct);
 		new_arg = cc_migration_arg(cc, direct);
 		cc_cce_cleanup(cc, direct);
 
 		/*
 		 * It should be assert here that the callout is not destroyed
 		 * but that is not easy.
 		 *
 		 * As first thing, handle deferred callout stops.
 		 */
 		if (!callout_migrating(c)) {
 			CTR3(KTR_CALLOUT,
 			     "deferred cancelled %p func %p arg %p",
 			     c, new_func, new_arg);
 			callout_cc_del(c, cc);
 			return;
 		}
 		c->c_iflags &= ~CALLOUT_DFRMIGRATION;
 
 		new_cc = callout_cpu_switch(c, cc, new_cpu);
 		flags = (direct) ? C_DIRECT_EXEC : 0;
 		callout_cc_add(c, new_cc, new_time, new_prec, new_func,
 		    new_arg, new_cpu, flags);
 		CC_UNLOCK(new_cc);
 		CC_LOCK(cc);
 #else
 		panic("migration should not happen");
 #endif
 	}
 	/*
 	 * If the current callout is locally allocated (from
 	 * timeout(9)) then put it on the freelist.
 	 *
 	 * Note: we need to check the cached copy of c_iflags because
 	 * if it was not local, then it's not safe to deref the
 	 * callout pointer.
 	 */
 	KASSERT((c_iflags & CALLOUT_LOCAL_ALLOC) == 0 ||
 	    c->c_iflags == CALLOUT_LOCAL_ALLOC,
 	    ("corrupted callout"));
 	if (c_iflags & CALLOUT_LOCAL_ALLOC)
 		callout_cc_del(c, cc);
 }
 
 /*
  * The callout mechanism is based on the work of Adam M. Costello and
  * George Varghese, published in a technical report entitled "Redesigning
  * the BSD Callout and Timer Facilities" and modified slightly for inclusion
  * in FreeBSD by Justin T. Gibbs.  The original work on the data structures
  * used in this implementation was published by G. Varghese and T. Lauck in
  * the paper "Hashed and Hierarchical Timing Wheels: Data Structures for
  * the Efficient Implementation of a Timer Facility" in the Proceedings of
  * the 11th ACM Annual Symposium on Operating Systems Principles,
  * Austin, Texas Nov 1987.
  */
 
 /*
  * Software (low priority) clock interrupt.
  * Run periodic events from timeout queue.
  */
 void
 softclock(void *arg)
 {
 	struct callout_cpu *cc;
 	struct callout *c;
 #ifdef CALLOUT_PROFILING
 	int depth = 0, gcalls = 0, lockcalls = 0, mpcalls = 0;
 #endif
 
 	cc = (struct callout_cpu *)arg;
 	CC_LOCK(cc);
 	while ((c = TAILQ_FIRST(&cc->cc_expireq)) != NULL) {
 		TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe);
 		softclock_call_cc(c, cc,
 #ifdef CALLOUT_PROFILING
 		    &mpcalls, &lockcalls, &gcalls,
 #endif
 		    0);
 #ifdef CALLOUT_PROFILING
 		++depth;
 #endif
 	}
 #ifdef CALLOUT_PROFILING
 	avg_depth += (depth * 1000 - avg_depth) >> 8;
 	avg_mpcalls += (mpcalls * 1000 - avg_mpcalls) >> 8;
 	avg_lockcalls += (lockcalls * 1000 - avg_lockcalls) >> 8;
 	avg_gcalls += (gcalls * 1000 - avg_gcalls) >> 8;
 #endif
 	CC_UNLOCK(cc);
 }
 
 /*
  * timeout --
  *	Execute a function after a specified length of time.
  *
  * untimeout --
  *	Cancel previous timeout function call.
  *
  * callout_handle_init --
  *	Initialize a handle so that using it with untimeout is benign.
  *
  *	See AT&T BCI Driver Reference Manual for specification.  This
  *	implementation differs from that one in that although an
  *	identification value is returned from timeout, the original
  *	arguments to timeout as well as the identifier are used to
  *	identify entries for untimeout.
  */
 struct callout_handle
 timeout(timeout_t *ftn, void *arg, int to_ticks)
 {
 	struct callout_cpu *cc;
 	struct callout *new;
 	struct callout_handle handle;
 
 	cc = CC_CPU(timeout_cpu);
 	CC_LOCK(cc);
 	/* Fill in the next free callout structure. */
 	new = SLIST_FIRST(&cc->cc_callfree);
 	if (new == NULL)
 		/* XXX Attempt to malloc first */
 		panic("timeout table full");
 	SLIST_REMOVE_HEAD(&cc->cc_callfree, c_links.sle);
 	callout_reset(new, to_ticks, ftn, arg);
 	handle.callout = new;
 	CC_UNLOCK(cc);
 
 	return (handle);
 }
 
 void
 untimeout(timeout_t *ftn, void *arg, struct callout_handle handle)
 {
 	struct callout_cpu *cc;
 
 	/*
 	 * Check for a handle that was initialized
 	 * by callout_handle_init, but never used
 	 * for a real timeout.
 	 */
 	if (handle.callout == NULL)
 		return;
 
 	cc = callout_lock(handle.callout);
 	if (handle.callout->c_func == ftn && handle.callout->c_arg == arg)
 		callout_stop(handle.callout);
 	CC_UNLOCK(cc);
 }
 
 void
 callout_handle_init(struct callout_handle *handle)
 {
 	handle->callout = NULL;
 }
 
 /*
  * New interface; clients allocate their own callout structures.
  *
  * callout_reset() - establish or change a timeout
  * callout_stop() - disestablish a timeout
  * callout_init() - initialize a callout structure so that it can
  *	safely be passed to callout_reset() and callout_stop()
  *
  * <sys/callout.h> defines three convenience macros:
  *
  * callout_active() - returns truth if callout has not been stopped,
  *	drained, or deactivated since the last time the callout was
  *	reset.
  * callout_pending() - returns truth if callout is still waiting for timeout
  * callout_deactivate() - marks the callout as having been serviced
  */
 int
 callout_reset_sbt_on(struct callout *c, sbintime_t sbt, sbintime_t precision,
     void (*ftn)(void *), void *arg, int cpu, int flags)
 {
 	sbintime_t to_sbt, pr;
 	struct callout_cpu *cc;
 	int cancelled, direct;
 	int ignore_cpu=0;
 
 	cancelled = 0;
 	if (cpu == -1) {
 		ignore_cpu = 1;
 	} else if ((cpu >= MAXCPU) ||
 		   ((CC_CPU(cpu))->cc_inited == 0)) {
 		/* Invalid CPU spec */
 		panic("Invalid CPU in callout %d", cpu);
 	}
 	if (flags & C_ABSOLUTE) {
 		to_sbt = sbt;
 	} else {
 		if ((flags & C_HARDCLOCK) && (sbt < tick_sbt))
 			sbt = tick_sbt;
 		if ((flags & C_HARDCLOCK) ||
 #ifdef NO_EVENTTIMERS
 		    sbt >= sbt_timethreshold) {
 			to_sbt = getsbinuptime();
 
 			/* Add safety belt for the case of hz > 1000. */
 			to_sbt += tc_tick_sbt - tick_sbt;
 #else
 		    sbt >= sbt_tickthreshold) {
 			/*
 			 * Obtain the time of the last hardclock() call on
 			 * this CPU directly from the kern_clocksource.c.
 			 * This value is per-CPU, but it is equal for all
 			 * active ones.
 			 */
 #ifdef __LP64__
 			to_sbt = DPCPU_GET(hardclocktime);
 #else
 			spinlock_enter();
 			to_sbt = DPCPU_GET(hardclocktime);
 			spinlock_exit();
 #endif
 #endif
 			if ((flags & C_HARDCLOCK) == 0)
 				to_sbt += tick_sbt;
 		} else
 			to_sbt = sbinuptime();
 		if (SBT_MAX - to_sbt < sbt)
 			to_sbt = SBT_MAX;
 		else
 			to_sbt += sbt;
 		pr = ((C_PRELGET(flags) < 0) ? sbt >> tc_precexp :
 		    sbt >> C_PRELGET(flags));
 		if (pr > precision)
 			precision = pr;
 	}
 	/* 
 	 * This flag used to be added by callout_cc_add, but the
 	 * first time you call this we could end up with the
 	 * wrong direct flag if we don't do it before we add.
 	 */
 	if (flags & C_DIRECT_EXEC) {
 		direct = 1;
 	} else {
 		direct = 0;
 	}
 	KASSERT(!direct || c->c_lock == NULL,
 	    ("%s: direct callout %p has lock", __func__, c));
 	cc = callout_lock(c);
 	/*
 	 * Don't allow migration of pre-allocated callouts lest they
 	 * become unbalanced or handle the case where the user does
 	 * not care. 
 	 */
 	if ((c->c_iflags & CALLOUT_LOCAL_ALLOC) ||
 	    ignore_cpu) {
 		cpu = c->c_cpu;
 	}
 
 	if (cc_exec_curr(cc, direct) == c) {
 		/*
 		 * We're being asked to reschedule a callout which is
 		 * currently in progress.  If there is a lock then we
 		 * can cancel the callout if it has not really started.
 		 */
 		if (c->c_lock != NULL && !cc_exec_cancel(cc, direct))
 			cancelled = cc_exec_cancel(cc, direct) = true;
 		if (cc_exec_waiting(cc, direct)) {
 			/*
 			 * Someone has called callout_drain to kill this
 			 * callout.  Don't reschedule.
 			 */
 			CTR4(KTR_CALLOUT, "%s %p func %p arg %p",
 			    cancelled ? "cancelled" : "failed to cancel",
 			    c, c->c_func, c->c_arg);
 			CC_UNLOCK(cc);
 			return (cancelled);
 		}
 #ifdef SMP
 		if (callout_migrating(c)) {
 			/* 
 			 * This only occurs when a second callout_reset_sbt_on
 			 * is made after a previous one moved it into
 			 * deferred migration (below). Note we do *not* change
 			 * the prev_cpu even though the previous target may
 			 * be different.
 			 */
 			cc_migration_cpu(cc, direct) = cpu;
 			cc_migration_time(cc, direct) = to_sbt;
 			cc_migration_prec(cc, direct) = precision;
 			cc_migration_func(cc, direct) = ftn;
 			cc_migration_arg(cc, direct) = arg;
 			cancelled = 1;
 			CC_UNLOCK(cc);
 			return (cancelled);
 		}
 #endif
 	}
 	if (c->c_iflags & CALLOUT_PENDING) {
 		if ((c->c_iflags & CALLOUT_PROCESSED) == 0) {
 			if (cc_exec_next(cc) == c)
 				cc_exec_next(cc) = LIST_NEXT(c, c_links.le);
 			LIST_REMOVE(c, c_links.le);
 		} else {
 			TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe);
 		}
 		cancelled = 1;
 		c->c_iflags &= ~ CALLOUT_PENDING;
 		c->c_flags &= ~ CALLOUT_ACTIVE;
 	}
 
 #ifdef SMP
 	/*
 	 * If the callout must migrate try to perform it immediately.
 	 * If the callout is currently running, just defer the migration
 	 * to a more appropriate moment.
 	 */
 	if (c->c_cpu != cpu) {
 		if (cc_exec_curr(cc, direct) == c) {
 			/* 
 			 * Pending will have been removed since we are
 			 * actually executing the callout on another
 			 * CPU. That callout should be waiting on the
 			 * lock the caller holds. If we set both
 			 * active/and/pending after we return and the
 			 * lock on the executing callout proceeds, it
 			 * will then see pending is true and return.
 			 * At the return from the actual callout execution
 			 * the migration will occur in softclock_call_cc
 			 * and this new callout will be placed on the 
 			 * new CPU via a call to callout_cpu_switch() which
 			 * will get the lock on the right CPU followed
 			 * by a call callout_cc_add() which will add it there.
 			 * (see above in softclock_call_cc()).
 			 */
 			cc_migration_cpu(cc, direct) = cpu;
 			cc_migration_time(cc, direct) = to_sbt;
 			cc_migration_prec(cc, direct) = precision;
 			cc_migration_func(cc, direct) = ftn;
 			cc_migration_arg(cc, direct) = arg;
 			c->c_iflags |= (CALLOUT_DFRMIGRATION | CALLOUT_PENDING);
 			c->c_flags |= CALLOUT_ACTIVE;
 			CTR6(KTR_CALLOUT,
 		    "migration of %p func %p arg %p in %d.%08x to %u deferred",
 			    c, c->c_func, c->c_arg, (int)(to_sbt >> 32),
 			    (u_int)(to_sbt & 0xffffffff), cpu);
 			CC_UNLOCK(cc);
 			return (cancelled);
 		}
 		cc = callout_cpu_switch(c, cc, cpu);
 	}
 #endif
 
 	callout_cc_add(c, cc, to_sbt, precision, ftn, arg, cpu, flags);
 	CTR6(KTR_CALLOUT, "%sscheduled %p func %p arg %p in %d.%08x",
 	    cancelled ? "re" : "", c, c->c_func, c->c_arg, (int)(to_sbt >> 32),
 	    (u_int)(to_sbt & 0xffffffff));
 	CC_UNLOCK(cc);
 
 	return (cancelled);
 }
 
 /*
  * Common idioms that can be optimized in the future.
  */
 int
 callout_schedule_on(struct callout *c, int to_ticks, int cpu)
 {
 	return callout_reset_on(c, to_ticks, c->c_func, c->c_arg, cpu);
 }
 
 int
 callout_schedule(struct callout *c, int to_ticks)
 {
 	return callout_reset_on(c, to_ticks, c->c_func, c->c_arg, c->c_cpu);
 }
 
 int
 _callout_stop_safe(struct callout *c, int safe, void (*drain)(void *))
 {
 	struct callout_cpu *cc, *old_cc;
 	struct lock_class *class;
 	int direct, sq_locked, use_lock;
 	int not_on_a_list;
 
 	if (safe)
 		WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, c->c_lock,
 		    "calling %s", __func__);
 
 	/*
 	 * Some old subsystems don't hold Giant while running a callout_stop(),
 	 * so just discard this check for the moment.
 	 */
 	if (!safe && c->c_lock != NULL) {
 		if (c->c_lock == &Giant.lock_object)
 			use_lock = mtx_owned(&Giant);
 		else {
 			use_lock = 1;
 			class = LOCK_CLASS(c->c_lock);
 			class->lc_assert(c->c_lock, LA_XLOCKED);
 		}
 	} else
 		use_lock = 0;
 	if (c->c_iflags & CALLOUT_DIRECT) {
 		direct = 1;
 	} else {
 		direct = 0;
 	}
 	sq_locked = 0;
 	old_cc = NULL;
 again:
 	cc = callout_lock(c);
 
 	if ((c->c_iflags & (CALLOUT_DFRMIGRATION | CALLOUT_PENDING)) ==
 	    (CALLOUT_DFRMIGRATION | CALLOUT_PENDING) &&
 	    ((c->c_flags & CALLOUT_ACTIVE) == CALLOUT_ACTIVE)) {
 		/*
 		 * Special case where this slipped in while we
 		 * were migrating *as* the callout is about to
 		 * execute. The caller probably holds the lock
 		 * the callout wants.
 		 *
 		 * Get rid of the migration first. Then set
 		 * the flag that tells this code *not* to
 		 * try to remove it from any lists (its not
 		 * on one yet). When the callout wheel runs,
 		 * it will ignore this callout.
 		 */
 		c->c_iflags &= ~CALLOUT_PENDING;
 		c->c_flags &= ~CALLOUT_ACTIVE;
 		not_on_a_list = 1;
 	} else {
 		not_on_a_list = 0;
 	}
 
 	/*
 	 * If the callout was migrating while the callout cpu lock was
 	 * dropped,  just drop the sleepqueue lock and check the states
 	 * again.
 	 */
 	if (sq_locked != 0 && cc != old_cc) {
 #ifdef SMP
 		CC_UNLOCK(cc);
 		sleepq_release(&cc_exec_waiting(old_cc, direct));
 		sq_locked = 0;
 		old_cc = NULL;
 		goto again;
 #else
 		panic("migration should not happen");
 #endif
 	}
 
 	/*
 	 * If the callout isn't pending, it's not on the queue, so
 	 * don't attempt to remove it from the queue.  We can try to
 	 * stop it by other means however.
 	 */
 	if (!(c->c_iflags & CALLOUT_PENDING)) {
 		/*
 		 * If it wasn't on the queue and it isn't the current
 		 * callout, then we can't stop it, so just bail.
 		 * It probably has already been run (if locking
 		 * is properly done). You could get here if the caller
 		 * calls stop twice in a row for example. The second
 		 * call would fall here without CALLOUT_ACTIVE set.
 		 */
 		c->c_flags &= ~CALLOUT_ACTIVE;
 		if (cc_exec_curr(cc, direct) != c) {
 			CTR3(KTR_CALLOUT, "failed to stop %p func %p arg %p",
 			    c, c->c_func, c->c_arg);
 			CC_UNLOCK(cc);
 			if (sq_locked)
 				sleepq_release(&cc_exec_waiting(cc, direct));
 			return (-1);
 		}
 
 		if (safe) {
 			/*
 			 * The current callout is running (or just
 			 * about to run) and blocking is allowed, so
 			 * just wait for the current invocation to
 			 * finish.
 			 */
 			while (cc_exec_curr(cc, direct) == c) {
 				/*
 				 * Use direct calls to sleepqueue interface
 				 * instead of cv/msleep in order to avoid
 				 * a LOR between cc_lock and sleepqueue
 				 * chain spinlocks.  This piece of code
 				 * emulates a msleep_spin() call actually.
 				 *
 				 * If we already have the sleepqueue chain
 				 * locked, then we can safely block.  If we
 				 * don't already have it locked, however,
 				 * we have to drop the cc_lock to lock
 				 * it.  This opens several races, so we
 				 * restart at the beginning once we have
 				 * both locks.  If nothing has changed, then
 				 * we will end up back here with sq_locked
 				 * set.
 				 */
 				if (!sq_locked) {
 					CC_UNLOCK(cc);
 					sleepq_lock(
 					    &cc_exec_waiting(cc, direct));
 					sq_locked = 1;
 					old_cc = cc;
 					goto again;
 				}
 
 				/*
 				 * Migration could be cancelled here, but
 				 * as long as it is still not sure when it
 				 * will be packed up, just let softclock()
 				 * take care of it.
 				 */
 				cc_exec_waiting(cc, direct) = true;
 				DROP_GIANT();
 				CC_UNLOCK(cc);
 				sleepq_add(
 				    &cc_exec_waiting(cc, direct),
 				    &cc->cc_lock.lock_object, "codrain",
 				    SLEEPQ_SLEEP, 0);
 				sleepq_wait(
 				    &cc_exec_waiting(cc, direct),
 					     0);
 				sq_locked = 0;
 				old_cc = NULL;
 
 				/* Reacquire locks previously released. */
 				PICKUP_GIANT();
 				CC_LOCK(cc);
 			}
 		} else if (use_lock &&
 			   !cc_exec_cancel(cc, direct) && (drain == NULL)) {
 			
 			/*
 			 * The current callout is waiting for its
 			 * lock which we hold.  Cancel the callout
 			 * and return.  After our caller drops the
 			 * lock, the callout will be skipped in
 			 * softclock(). This *only* works with a
 			 * callout_stop() *not* callout_drain() or
 			 * callout_async_drain().
 			 */
 			cc_exec_cancel(cc, direct) = true;
 			CTR3(KTR_CALLOUT, "cancelled %p func %p arg %p",
 			    c, c->c_func, c->c_arg);
 			KASSERT(!cc_cce_migrating(cc, direct),
 			    ("callout wrongly scheduled for migration"));
 			if (callout_migrating(c)) {
 				c->c_iflags &= ~CALLOUT_DFRMIGRATION;
 #ifdef SMP
 				cc_migration_cpu(cc, direct) = CPUBLOCK;
 				cc_migration_time(cc, direct) = 0;
 				cc_migration_prec(cc, direct) = 0;
 				cc_migration_func(cc, direct) = NULL;
 				cc_migration_arg(cc, direct) = NULL;
 #endif
 			}
 			CC_UNLOCK(cc);
 			KASSERT(!sq_locked, ("sleepqueue chain locked"));
 			return (1);
 		} else if (callout_migrating(c)) {
 			/*
 			 * The callout is currently being serviced
 			 * and the "next" callout is scheduled at
 			 * its completion with a migration. We remove
 			 * the migration flag so it *won't* get rescheduled,
 			 * but we can't stop the one thats running so
 			 * we return 0.
 			 */
 			c->c_iflags &= ~CALLOUT_DFRMIGRATION;
 #ifdef SMP
 			/* 
 			 * We can't call cc_cce_cleanup here since
 			 * if we do it will remove .ce_curr and
 			 * its still running. This will prevent a
 			 * reschedule of the callout when the 
 			 * execution completes.
 			 */
 			cc_migration_cpu(cc, direct) = CPUBLOCK;
 			cc_migration_time(cc, direct) = 0;
 			cc_migration_prec(cc, direct) = 0;
 			cc_migration_func(cc, direct) = NULL;
 			cc_migration_arg(cc, direct) = NULL;
 #endif
 			CTR3(KTR_CALLOUT, "postponing stop %p func %p arg %p",
 			    c, c->c_func, c->c_arg);
  			if (drain) {
 				cc_exec_drain(cc, direct) = drain;
 			}
 			CC_UNLOCK(cc);
 			return (0);
 		}
 		CTR3(KTR_CALLOUT, "failed to stop %p func %p arg %p",
 		    c, c->c_func, c->c_arg);
 		if (drain) {
 			cc_exec_drain(cc, direct) = drain;
 		}
 		CC_UNLOCK(cc);
 		KASSERT(!sq_locked, ("sleepqueue chain still locked"));
 		return (0);
 	}
 	if (sq_locked)
 		sleepq_release(&cc_exec_waiting(cc, direct));
 
 	c->c_iflags &= ~CALLOUT_PENDING;
 	c->c_flags &= ~CALLOUT_ACTIVE;
 
 	CTR3(KTR_CALLOUT, "cancelled %p func %p arg %p",
 	    c, c->c_func, c->c_arg);
 	if (not_on_a_list == 0) {
 		if ((c->c_iflags & CALLOUT_PROCESSED) == 0) {
 			if (cc_exec_next(cc) == c)
 				cc_exec_next(cc) = LIST_NEXT(c, c_links.le);
 			LIST_REMOVE(c, c_links.le);
 		} else {
 			TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe);
 		}
 	}
 	callout_cc_del(c, cc);
 	CC_UNLOCK(cc);
 	return (1);
 }
 
 void
 callout_init(struct callout *c, int mpsafe)
 {
 	bzero(c, sizeof *c);
 	if (mpsafe) {
 		c->c_lock = NULL;
 		c->c_iflags = CALLOUT_RETURNUNLOCKED;
 	} else {
 		c->c_lock = &Giant.lock_object;
 		c->c_iflags = 0;
 	}
 	c->c_cpu = timeout_cpu;
 }
 
 void
 _callout_init_lock(struct callout *c, struct lock_object *lock, int flags)
 {
 	bzero(c, sizeof *c);
 	c->c_lock = lock;
 	KASSERT((flags & ~(CALLOUT_RETURNUNLOCKED | CALLOUT_SHAREDLOCK)) == 0,
 	    ("callout_init_lock: bad flags %d", flags));
 	KASSERT(lock != NULL || (flags & CALLOUT_RETURNUNLOCKED) == 0,
 	    ("callout_init_lock: CALLOUT_RETURNUNLOCKED with no lock"));
 	KASSERT(lock == NULL || !(LOCK_CLASS(lock)->lc_flags &
 	    (LC_SPINLOCK | LC_SLEEPABLE)), ("%s: invalid lock class",
 	    __func__));
 	c->c_iflags = flags & (CALLOUT_RETURNUNLOCKED | CALLOUT_SHAREDLOCK);
 	c->c_cpu = timeout_cpu;
 }
 
 #ifdef APM_FIXUP_CALLTODO
 /* 
  * Adjust the kernel calltodo timeout list.  This routine is used after 
  * an APM resume to recalculate the calltodo timer list values with the 
  * number of hz's we have been sleeping.  The next hardclock() will detect 
  * that there are fired timers and run softclock() to execute them.
  *
  * Please note, I have not done an exhaustive analysis of what code this
  * might break.  I am motivated to have my select()'s and alarm()'s that
  * have expired during suspend firing upon resume so that the applications
  * which set the timer can do the maintanence the timer was for as close
  * as possible to the originally intended time.  Testing this code for a 
  * week showed that resuming from a suspend resulted in 22 to 25 timers 
  * firing, which seemed independant on whether the suspend was 2 hours or
  * 2 days.  Your milage may vary.   - Ken Key <key@cs.utk.edu>
  */
 void
 adjust_timeout_calltodo(struct timeval *time_change)
 {
 	register struct callout *p;
 	unsigned long delta_ticks;
 
 	/* 
 	 * How many ticks were we asleep?
 	 * (stolen from tvtohz()).
 	 */
 
 	/* Don't do anything */
 	if (time_change->tv_sec < 0)
 		return;
 	else if (time_change->tv_sec <= LONG_MAX / 1000000)
 		delta_ticks = (time_change->tv_sec * 1000000 +
 			       time_change->tv_usec + (tick - 1)) / tick + 1;
 	else if (time_change->tv_sec <= LONG_MAX / hz)
 		delta_ticks = time_change->tv_sec * hz +
 			      (time_change->tv_usec + (tick - 1)) / tick + 1;
 	else
 		delta_ticks = LONG_MAX;
 
 	if (delta_ticks > INT_MAX)
 		delta_ticks = INT_MAX;
 
 	/* 
 	 * Now rip through the timer calltodo list looking for timers
 	 * to expire.
 	 */
 
 	/* don't collide with softclock() */
 	CC_LOCK(cc);
 	for (p = calltodo.c_next; p != NULL; p = p->c_next) {
 		p->c_time -= delta_ticks;
 
 		/* Break if the timer had more time on it than delta_ticks */
 		if (p->c_time > 0)
 			break;
 
 		/* take back the ticks the timer didn't use (p->c_time <= 0) */
 		delta_ticks = -p->c_time;
 	}
 	CC_UNLOCK(cc);
 
 	return;
 }
 #endif /* APM_FIXUP_CALLTODO */
 
 static int
 flssbt(sbintime_t sbt)
 {
 
 	sbt += (uint64_t)sbt >> 1;
 	if (sizeof(long) >= sizeof(sbintime_t))
 		return (flsl(sbt));
 	if (sbt >= SBT_1S)
 		return (flsl(((uint64_t)sbt) >> 32) + 32);
 	return (flsl(sbt));
 }
 
 /*
  * Dump immediate statistic snapshot of the scheduled callouts.
  */
 static int
 sysctl_kern_callout_stat(SYSCTL_HANDLER_ARGS)
 {
 	struct callout *tmp;
 	struct callout_cpu *cc;
 	struct callout_list *sc;
 	sbintime_t maxpr, maxt, medpr, medt, now, spr, st, t;
 	int ct[64], cpr[64], ccpbk[32];
 	int error, val, i, count, tcum, pcum, maxc, c, medc;
 #ifdef SMP
 	int cpu;
 #endif
 
 	val = 0;
 	error = sysctl_handle_int(oidp, &val, 0, req);
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 	count = maxc = 0;
 	st = spr = maxt = maxpr = 0;
 	bzero(ccpbk, sizeof(ccpbk));
 	bzero(ct, sizeof(ct));
 	bzero(cpr, sizeof(cpr));
 	now = sbinuptime();
 #ifdef SMP
 	CPU_FOREACH(cpu) {
 		cc = CC_CPU(cpu);
 #else
 		cc = CC_CPU(timeout_cpu);
 #endif
 		CC_LOCK(cc);
 		for (i = 0; i < callwheelsize; i++) {
 			sc = &cc->cc_callwheel[i];
 			c = 0;
 			LIST_FOREACH(tmp, sc, c_links.le) {
 				c++;
 				t = tmp->c_time - now;
 				if (t < 0)
 					t = 0;
 				st += t / SBT_1US;
 				spr += tmp->c_precision / SBT_1US;
 				if (t > maxt)
 					maxt = t;
 				if (tmp->c_precision > maxpr)
 					maxpr = tmp->c_precision;
 				ct[flssbt(t)]++;
 				cpr[flssbt(tmp->c_precision)]++;
 			}
 			if (c > maxc)
 				maxc = c;
 			ccpbk[fls(c + c / 2)]++;
 			count += c;
 		}
 		CC_UNLOCK(cc);
 #ifdef SMP
 	}
 #endif
 
 	for (i = 0, tcum = 0; i < 64 && tcum < count / 2; i++)
 		tcum += ct[i];
 	medt = (i >= 2) ? (((sbintime_t)1) << (i - 2)) : 0;
 	for (i = 0, pcum = 0; i < 64 && pcum < count / 2; i++)
 		pcum += cpr[i];
 	medpr = (i >= 2) ? (((sbintime_t)1) << (i - 2)) : 0;
 	for (i = 0, c = 0; i < 32 && c < count / 2; i++)
 		c += ccpbk[i];
 	medc = (i >= 2) ? (1 << (i - 2)) : 0;
 
 	printf("Scheduled callouts statistic snapshot:\n");
 	printf("  Callouts: %6d  Buckets: %6d*%-3d  Bucket size: 0.%06ds\n",
 	    count, callwheelsize, mp_ncpus, 1000000 >> CC_HASH_SHIFT);
 	printf("  C/Bk: med %5d         avg %6d.%06jd  max %6d\n",
 	    medc,
 	    count / callwheelsize / mp_ncpus,
 	    (uint64_t)count * 1000000 / callwheelsize / mp_ncpus % 1000000,
 	    maxc);
 	printf("  Time: med %5jd.%06jds avg %6jd.%06jds max %6jd.%06jds\n",
 	    medt / SBT_1S, (medt & 0xffffffff) * 1000000 >> 32,
 	    (st / count) / 1000000, (st / count) % 1000000,
 	    maxt / SBT_1S, (maxt & 0xffffffff) * 1000000 >> 32);
 	printf("  Prec: med %5jd.%06jds avg %6jd.%06jds max %6jd.%06jds\n",
 	    medpr / SBT_1S, (medpr & 0xffffffff) * 1000000 >> 32,
 	    (spr / count) / 1000000, (spr / count) % 1000000,
 	    maxpr / SBT_1S, (maxpr & 0xffffffff) * 1000000 >> 32);
 	printf("  Distribution:       \tbuckets\t   time\t   tcum\t"
 	    "   prec\t   pcum\n");
 	for (i = 0, tcum = pcum = 0; i < 64; i++) {
 		if (ct[i] == 0 && cpr[i] == 0)
 			continue;
 		t = (i != 0) ? (((sbintime_t)1) << (i - 1)) : 0;
 		tcum += ct[i];
 		pcum += cpr[i];
 		printf("  %10jd.%06jds\t 2**%d\t%7d\t%7d\t%7d\t%7d\n",
 		    t / SBT_1S, (t & 0xffffffff) * 1000000 >> 32,
 		    i - 1 - (32 - CC_HASH_SHIFT),
 		    ct[i], tcum, cpr[i], pcum);
 	}
 	return (error);
 }
 SYSCTL_PROC(_kern, OID_AUTO, callout_stat,
     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
     0, 0, sysctl_kern_callout_stat, "I",
     "Dump immediate statistic snapshot of the scheduled callouts");
Index: head/sys/kern/vfs_cache.c
===================================================================
--- head/sys/kern/vfs_cache.c	(revision 292383)
+++ head/sys/kern/vfs_cache.c	(revision 292384)
@@ -1,1520 +1,1518 @@
 /*-
  * Copyright (c) 1989, 1993, 1995
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Poul-Henning Kamp of the FreeBSD Project.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)vfs_cache.c	8.5 (Berkeley) 3/22/95
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ktrace.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/filedesc.h>
 #include <sys/fnv_hash.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/fcntl.h>
 #include <sys/mount.h>
 #include <sys/namei.h>
 #include <sys/proc.h>
 #include <sys/rwlock.h>
 #include <sys/sdt.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysctl.h>
 #include <sys/sysproto.h>
 #include <sys/vnode.h>
 #ifdef KTRACE
 #include <sys/ktrace.h>
 #endif
 
 #include <vm/uma.h>
 
 SDT_PROVIDER_DECLARE(vfs);
 SDT_PROBE_DEFINE3(vfs, namecache, enter, done, "struct vnode *", "char *",
     "struct vnode *");
 SDT_PROBE_DEFINE2(vfs, namecache, enter_negative, done, "struct vnode *",
     "char *");
 SDT_PROBE_DEFINE1(vfs, namecache, fullpath, entry, "struct vnode *");
 SDT_PROBE_DEFINE3(vfs, namecache, fullpath, hit, "struct vnode *",
     "char *", "struct vnode *");
 SDT_PROBE_DEFINE1(vfs, namecache, fullpath, miss, "struct vnode *");
 SDT_PROBE_DEFINE3(vfs, namecache, fullpath, return, "int",
     "struct vnode *", "char *");
 SDT_PROBE_DEFINE3(vfs, namecache, lookup, hit, "struct vnode *", "char *",
     "struct vnode *");
 SDT_PROBE_DEFINE2(vfs, namecache, lookup, hit__negative,
     "struct vnode *", "char *");
 SDT_PROBE_DEFINE2(vfs, namecache, lookup, miss, "struct vnode *",
     "char *");
 SDT_PROBE_DEFINE1(vfs, namecache, purge, done, "struct vnode *");
 SDT_PROBE_DEFINE1(vfs, namecache, purge_negative, done, "struct vnode *");
 SDT_PROBE_DEFINE1(vfs, namecache, purgevfs, done, "struct mount *");
 SDT_PROBE_DEFINE3(vfs, namecache, zap, done, "struct vnode *", "char *",
     "struct vnode *");
 SDT_PROBE_DEFINE2(vfs, namecache, zap_negative, done, "struct vnode *",
     "char *");
 
 /*
  * This structure describes the elements in the cache of recent
  * names looked up by namei.
  */
 
 struct	namecache {
 	LIST_ENTRY(namecache) nc_hash;	/* hash chain */
 	LIST_ENTRY(namecache) nc_src;	/* source vnode list */
 	TAILQ_ENTRY(namecache) nc_dst;	/* destination vnode list */
 	struct	vnode *nc_dvp;		/* vnode of parent of name */
 	struct	vnode *nc_vp;		/* vnode the name refers to */
 	u_char	nc_flag;		/* flag bits */
 	u_char	nc_nlen;		/* length of name */
 	char	nc_name[0];		/* segment name + nul */
 };
 
 /*
  * struct namecache_ts repeats struct namecache layout up to the
  * nc_nlen member.
  * struct namecache_ts is used in place of struct namecache when time(s) need
  * to be stored.  The nc_dotdottime field is used when a cache entry is mapping
  * both a non-dotdot directory name plus dotdot for the directory's
  * parent.
  */
 struct	namecache_ts {
 	LIST_ENTRY(namecache) nc_hash;	/* hash chain */
 	LIST_ENTRY(namecache) nc_src;	/* source vnode list */
 	TAILQ_ENTRY(namecache) nc_dst;	/* destination vnode list */
 	struct	vnode *nc_dvp;		/* vnode of parent of name */
 	struct	vnode *nc_vp;		/* vnode the name refers to */
 	u_char	nc_flag;		/* flag bits */
 	u_char	nc_nlen;		/* length of name */
 	struct	timespec nc_time;	/* timespec provided by fs */
 	struct	timespec nc_dotdottime;	/* dotdot timespec provided by fs */
 	int	nc_ticks;		/* ticks value when entry was added */
 	char	nc_name[0];		/* segment name + nul */
 };
 
 /*
  * Flags in namecache.nc_flag
  */
 #define NCF_WHITE	0x01
 #define NCF_ISDOTDOT	0x02
 #define	NCF_TS		0x04
 #define	NCF_DTS		0x08
 
 /*
  * Name caching works as follows:
  *
  * Names found by directory scans are retained in a cache
  * for future reference.  It is managed LRU, so frequently
  * used names will hang around.  Cache is indexed by hash value
  * obtained from (vp, name) where vp refers to the directory
  * containing name.
  *
  * If it is a "negative" entry, (i.e. for a name that is known NOT to
  * exist) the vnode pointer will be NULL.
  *
  * Upon reaching the last segment of a path, if the reference
  * is for DELETE, or NOCACHE is set (rewrite), and the
  * name is located in the cache, it will be dropped.
  */
 
 /*
  * Structures associated with name cacheing.
  */
 #define NCHHASH(hash) \
 	(&nchashtbl[(hash) & nchash])
 static LIST_HEAD(nchashhead, namecache) *nchashtbl;	/* Hash Table */
 static TAILQ_HEAD(, namecache) ncneg;	/* Hash Table */
 static u_long	nchash;			/* size of hash table */
 SYSCTL_ULONG(_debug, OID_AUTO, nchash, CTLFLAG_RD, &nchash, 0,
     "Size of namecache hash table");
 static u_long	ncnegfactor = 16;	/* ratio of negative entries */
 SYSCTL_ULONG(_vfs, OID_AUTO, ncnegfactor, CTLFLAG_RW, &ncnegfactor, 0,
     "Ratio of negative namecache entries");
 static u_long	numneg;			/* number of negative entries allocated */
 SYSCTL_ULONG(_debug, OID_AUTO, numneg, CTLFLAG_RD, &numneg, 0,
     "Number of negative entries in namecache");
 static u_long	numcache;		/* number of cache entries allocated */
 SYSCTL_ULONG(_debug, OID_AUTO, numcache, CTLFLAG_RD, &numcache, 0,
     "Number of namecache entries");
 static u_long	numcachehv;		/* number of cache entries with vnodes held */
 SYSCTL_ULONG(_debug, OID_AUTO, numcachehv, CTLFLAG_RD, &numcachehv, 0,
     "Number of namecache entries with vnodes held");
 static u_int	ncsizefactor = 2;
 SYSCTL_UINT(_vfs, OID_AUTO, ncsizefactor, CTLFLAG_RW, &ncsizefactor, 0,
     "Size factor for namecache");
 
 struct nchstats	nchstats;		/* cache effectiveness statistics */
 
 static struct rwlock cache_lock;
 RW_SYSINIT(vfscache, &cache_lock, "Name Cache");
 
 #define	CACHE_UPGRADE_LOCK()	rw_try_upgrade(&cache_lock)
 #define	CACHE_RLOCK()		rw_rlock(&cache_lock)
 #define	CACHE_RUNLOCK()		rw_runlock(&cache_lock)
 #define	CACHE_WLOCK()		rw_wlock(&cache_lock)
 #define	CACHE_WUNLOCK()		rw_wunlock(&cache_lock)
 
 /*
  * UMA zones for the VFS cache.
  *
  * The small cache is used for entries with short names, which are the
  * most common.  The large cache is used for entries which are too big to
  * fit in the small cache.
  */
 static uma_zone_t cache_zone_small;
 static uma_zone_t cache_zone_small_ts;
 static uma_zone_t cache_zone_large;
 static uma_zone_t cache_zone_large_ts;
 
 #define	CACHE_PATH_CUTOFF	35
 
 static struct namecache *
 cache_alloc(int len, int ts)
 {
 
 	if (len > CACHE_PATH_CUTOFF) {
 		if (ts)
 			return (uma_zalloc(cache_zone_large_ts, M_WAITOK));
 		else
 			return (uma_zalloc(cache_zone_large, M_WAITOK));
 	}
 	if (ts)
 		return (uma_zalloc(cache_zone_small_ts, M_WAITOK));
 	else
 		return (uma_zalloc(cache_zone_small, M_WAITOK));
 }
 
 static void
 cache_free(struct namecache *ncp)
 {
 	int ts;
 
 	if (ncp == NULL)
 		return;
 	ts = ncp->nc_flag & NCF_TS;
 	if (ncp->nc_nlen <= CACHE_PATH_CUTOFF) {
 		if (ts)
 			uma_zfree(cache_zone_small_ts, ncp);
 		else
 			uma_zfree(cache_zone_small, ncp);
 	} else if (ts)
 		uma_zfree(cache_zone_large_ts, ncp);
 	else
 		uma_zfree(cache_zone_large, ncp);
 }
 
 static char *
 nc_get_name(struct namecache *ncp)
 {
 	struct namecache_ts *ncp_ts;
 
 	if ((ncp->nc_flag & NCF_TS) == 0)
 		return (ncp->nc_name);
 	ncp_ts = (struct namecache_ts *)ncp;
 	return (ncp_ts->nc_name);
 }
 
 static void
 cache_out_ts(struct namecache *ncp, struct timespec *tsp, int *ticksp)
 {
 
 	KASSERT((ncp->nc_flag & NCF_TS) != 0 ||
 	    (tsp == NULL && ticksp == NULL),
 	    ("No NCF_TS"));
 
 	if (tsp != NULL)
 		*tsp = ((struct namecache_ts *)ncp)->nc_time;
 	if (ticksp != NULL)
 		*ticksp = ((struct namecache_ts *)ncp)->nc_ticks;
 }
 
 static int	doingcache = 1;		/* 1 => enable the cache */
 SYSCTL_INT(_debug, OID_AUTO, vfscache, CTLFLAG_RW, &doingcache, 0,
     "VFS namecache enabled");
 
 /* Export size information to userland */
 SYSCTL_INT(_debug_sizeof, OID_AUTO, namecache, CTLFLAG_RD, SYSCTL_NULL_INT_PTR,
     sizeof(struct namecache), "sizeof(struct namecache)");
 
 /*
  * The new name cache statistics
  */
 static SYSCTL_NODE(_vfs, OID_AUTO, cache, CTLFLAG_RW, 0,
     "Name cache statistics");
 #define STATNODE(mode, name, var, descr) \
 	SYSCTL_ULONG(_vfs_cache, OID_AUTO, name, mode, var, 0, descr);
 STATNODE(CTLFLAG_RD, numneg, &numneg, "Number of negative cache entries");
 STATNODE(CTLFLAG_RD, numcache, &numcache, "Number of cache entries");
 static u_long numcalls; STATNODE(CTLFLAG_RD, numcalls, &numcalls,
     "Number of cache lookups");
 static u_long dothits; STATNODE(CTLFLAG_RD, dothits, &dothits,
     "Number of '.' hits");
 static u_long dotdothits; STATNODE(CTLFLAG_RD, dotdothits, &dotdothits,
     "Number of '..' hits");
 static u_long numchecks; STATNODE(CTLFLAG_RD, numchecks, &numchecks,
     "Number of checks in lookup");
 static u_long nummiss; STATNODE(CTLFLAG_RD, nummiss, &nummiss,
     "Number of cache misses");
 static u_long nummisszap; STATNODE(CTLFLAG_RD, nummisszap, &nummisszap,
     "Number of cache misses we do not want to cache");
 static u_long numposzaps; STATNODE(CTLFLAG_RD, numposzaps, &numposzaps, 
     "Number of cache hits (positive) we do not want to cache");
 static u_long numposhits; STATNODE(CTLFLAG_RD, numposhits, &numposhits,
     "Number of cache hits (positive)");
 static u_long numnegzaps; STATNODE(CTLFLAG_RD, numnegzaps, &numnegzaps,
     "Number of cache hits (negative) we do not want to cache");
 static u_long numneghits; STATNODE(CTLFLAG_RD, numneghits, &numneghits,
     "Number of cache hits (negative)");
 static u_long numupgrades; STATNODE(CTLFLAG_RD, numupgrades, &numupgrades,
     "Number of updates of the cache after lookup (write lock + retry)");
 
 SYSCTL_OPAQUE(_vfs_cache, OID_AUTO, nchstats, CTLFLAG_RD | CTLFLAG_MPSAFE,
     &nchstats, sizeof(nchstats), "LU",
     "VFS cache effectiveness statistics");
 
 
 
 static void cache_zap(struct namecache *ncp);
 static int vn_vptocnp_locked(struct vnode **vp, struct ucred *cred, char *buf,
     u_int *buflen);
 static int vn_fullpath1(struct thread *td, struct vnode *vp, struct vnode *rdir,
     char *buf, char **retbuf, u_int buflen);
 
 static MALLOC_DEFINE(M_VFSCACHE, "vfscache", "VFS name cache entries");
 
 #ifdef DIAGNOSTIC
 /*
  * Grab an atomic snapshot of the name cache hash chain lengths
  */
 static SYSCTL_NODE(_debug, OID_AUTO, hashstat, CTLFLAG_RW, NULL,
     "hash table stats");
 
 static int
 sysctl_debug_hashstat_rawnchash(SYSCTL_HANDLER_ARGS)
 {
 	struct nchashhead *ncpp;
 	struct namecache *ncp;
 	int i, error, n_nchash, *cntbuf;
 
 retry:
 	n_nchash = nchash + 1;	/* nchash is max index, not count */
 	if (req->oldptr == NULL)
 		return SYSCTL_OUT(req, 0, n_nchash * sizeof(int));
 	cntbuf = malloc(n_nchash * sizeof(int), M_TEMP, M_ZERO | M_WAITOK);
 	CACHE_RLOCK();
 	if (n_nchash != nchash + 1) {
 		CACHE_RUNLOCK();
 		free(cntbuf, M_TEMP);
 		goto retry;
 	}
 	/* Scan hash tables counting entries */
 	for (ncpp = nchashtbl, i = 0; i < n_nchash; ncpp++, i++)
 		LIST_FOREACH(ncp, ncpp, nc_hash)
 			cntbuf[i]++;
 	CACHE_RUNLOCK();
 	for (error = 0, i = 0; i < n_nchash; i++)
 		if ((error = SYSCTL_OUT(req, &cntbuf[i], sizeof(int))) != 0)
 			break;
 	free(cntbuf, M_TEMP);
 	return (error);
 }
 SYSCTL_PROC(_debug_hashstat, OID_AUTO, rawnchash, CTLTYPE_INT|CTLFLAG_RD|
     CTLFLAG_MPSAFE, 0, 0, sysctl_debug_hashstat_rawnchash, "S,int",
     "nchash chain lengths");
 
 static int
 sysctl_debug_hashstat_nchash(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 	struct nchashhead *ncpp;
 	struct namecache *ncp;
 	int n_nchash;
 	int count, maxlength, used, pct;
 
 	if (!req->oldptr)
 		return SYSCTL_OUT(req, 0, 4 * sizeof(int));
 
 	CACHE_RLOCK();
 	n_nchash = nchash + 1;	/* nchash is max index, not count */
 	used = 0;
 	maxlength = 0;
 
 	/* Scan hash tables for applicable entries */
 	for (ncpp = nchashtbl; n_nchash > 0; n_nchash--, ncpp++) {
 		count = 0;
 		LIST_FOREACH(ncp, ncpp, nc_hash) {
 			count++;
 		}
 		if (count)
 			used++;
 		if (maxlength < count)
 			maxlength = count;
 	}
 	n_nchash = nchash + 1;
 	CACHE_RUNLOCK();
 	pct = (used * 100) / (n_nchash / 100);
 	error = SYSCTL_OUT(req, &n_nchash, sizeof(n_nchash));
 	if (error)
 		return (error);
 	error = SYSCTL_OUT(req, &used, sizeof(used));
 	if (error)
 		return (error);
 	error = SYSCTL_OUT(req, &maxlength, sizeof(maxlength));
 	if (error)
 		return (error);
 	error = SYSCTL_OUT(req, &pct, sizeof(pct));
 	if (error)
 		return (error);
 	return (0);
 }
 SYSCTL_PROC(_debug_hashstat, OID_AUTO, nchash, CTLTYPE_INT|CTLFLAG_RD|
     CTLFLAG_MPSAFE, 0, 0, sysctl_debug_hashstat_nchash, "I",
     "nchash statistics (number of total/used buckets, maximum chain length, usage percentage)");
 #endif
 
 /*
  * cache_zap():
  *
  *   Removes a namecache entry from cache, whether it contains an actual
  *   pointer to a vnode or if it is just a negative cache entry.
  */
 static void
 cache_zap(ncp)
 	struct namecache *ncp;
 {
 	struct vnode *vp;
 
 	rw_assert(&cache_lock, RA_WLOCKED);
 	CTR2(KTR_VFS, "cache_zap(%p) vp %p", ncp, ncp->nc_vp);
-#ifdef KDTRACE_HOOKS
 	if (ncp->nc_vp != NULL) {
 		SDT_PROBE3(vfs, namecache, zap, done, ncp->nc_dvp,
 		    nc_get_name(ncp), ncp->nc_vp);
 	} else {
 		SDT_PROBE2(vfs, namecache, zap_negative, done, ncp->nc_dvp,
 		    nc_get_name(ncp));
 	}
-#endif
 	vp = NULL;
 	LIST_REMOVE(ncp, nc_hash);
 	if (ncp->nc_flag & NCF_ISDOTDOT) {
 		if (ncp == ncp->nc_dvp->v_cache_dd)
 			ncp->nc_dvp->v_cache_dd = NULL;
 	} else {
 		LIST_REMOVE(ncp, nc_src);
 		if (LIST_EMPTY(&ncp->nc_dvp->v_cache_src)) {
 			vp = ncp->nc_dvp;
 			numcachehv--;
 		}
 	}
 	if (ncp->nc_vp) {
 		TAILQ_REMOVE(&ncp->nc_vp->v_cache_dst, ncp, nc_dst);
 		if (ncp == ncp->nc_vp->v_cache_dd)
 			ncp->nc_vp->v_cache_dd = NULL;
 	} else {
 		TAILQ_REMOVE(&ncneg, ncp, nc_dst);
 		numneg--;
 	}
 	numcache--;
 	cache_free(ncp);
 	if (vp)
 		vdrop(vp);
 }
 
 /*
  * Lookup an entry in the cache
  *
  * Lookup is called with dvp pointing to the directory to search,
  * cnp pointing to the name of the entry being sought. If the lookup
  * succeeds, the vnode is returned in *vpp, and a status of -1 is
  * returned. If the lookup determines that the name does not exist
  * (negative cacheing), a status of ENOENT is returned. If the lookup
  * fails, a status of zero is returned.  If the directory vnode is
  * recycled out from under us due to a forced unmount, a status of
  * ENOENT is returned.
  *
  * vpp is locked and ref'd on return.  If we're looking up DOTDOT, dvp is
  * unlocked.  If we're looking up . an extra ref is taken, but the lock is
  * not recursively acquired.
  */
 
 int
 cache_lookup(dvp, vpp, cnp, tsp, ticksp)
 	struct vnode *dvp;
 	struct vnode **vpp;
 	struct componentname *cnp;
 	struct timespec *tsp;
 	int *ticksp;
 {
 	struct namecache *ncp;
 	uint32_t hash;
 	int error, ltype, wlocked;
 
 	if (!doingcache) {
 		cnp->cn_flags &= ~MAKEENTRY;
 		return (0);
 	}
 retry:
 	CACHE_RLOCK();
 	wlocked = 0;
 	numcalls++;
 	error = 0;
 
 retry_wlocked:
 	if (cnp->cn_nameptr[0] == '.') {
 		if (cnp->cn_namelen == 1) {
 			*vpp = dvp;
 			CTR2(KTR_VFS, "cache_lookup(%p, %s) found via .",
 			    dvp, cnp->cn_nameptr);
 			dothits++;
 			SDT_PROBE3(vfs, namecache, lookup, hit, dvp, ".", *vpp);
 			if (tsp != NULL)
 				timespecclear(tsp);
 			if (ticksp != NULL)
 				*ticksp = ticks;
 			goto success;
 		}
 		if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') {
 			dotdothits++;
 			if (dvp->v_cache_dd == NULL) {
 				SDT_PROBE3(vfs, namecache, lookup, miss, dvp,
 				    "..", NULL);
 				goto unlock;
 			}
 			if ((cnp->cn_flags & MAKEENTRY) == 0) {
 				if (!wlocked && !CACHE_UPGRADE_LOCK())
 					goto wlock;
 				if (dvp->v_cache_dd->nc_flag & NCF_ISDOTDOT)
 					cache_zap(dvp->v_cache_dd);
 				dvp->v_cache_dd = NULL;
 				CACHE_WUNLOCK();
 				return (0);
 			}
 			ncp = dvp->v_cache_dd;
 			if (ncp->nc_flag & NCF_ISDOTDOT)
 				*vpp = ncp->nc_vp;
 			else
 				*vpp = ncp->nc_dvp;
 			/* Return failure if negative entry was found. */
 			if (*vpp == NULL)
 				goto negative_success;
 			CTR3(KTR_VFS, "cache_lookup(%p, %s) found %p via ..",
 			    dvp, cnp->cn_nameptr, *vpp);
 			SDT_PROBE3(vfs, namecache, lookup, hit, dvp, "..",
 			    *vpp);
 			cache_out_ts(ncp, tsp, ticksp);
 			if ((ncp->nc_flag & (NCF_ISDOTDOT | NCF_DTS)) ==
 			    NCF_DTS && tsp != NULL)
 				*tsp = ((struct namecache_ts *)ncp)->
 				    nc_dotdottime;
 			goto success;
 		}
 	}
 
 	hash = fnv_32_buf(cnp->cn_nameptr, cnp->cn_namelen, FNV1_32_INIT);
 	hash = fnv_32_buf(&dvp, sizeof(dvp), hash);
 	LIST_FOREACH(ncp, (NCHHASH(hash)), nc_hash) {
 		numchecks++;
 		if (ncp->nc_dvp == dvp && ncp->nc_nlen == cnp->cn_namelen &&
 		    !bcmp(nc_get_name(ncp), cnp->cn_nameptr, ncp->nc_nlen))
 			break;
 	}
 
 	/* We failed to find an entry */
 	if (ncp == NULL) {
 		SDT_PROBE3(vfs, namecache, lookup, miss, dvp, cnp->cn_nameptr,
 		    NULL);
 		if ((cnp->cn_flags & MAKEENTRY) == 0) {
 			nummisszap++;
 		} else {
 			nummiss++;
 		}
 		nchstats.ncs_miss++;
 		goto unlock;
 	}
 
 	/* We don't want to have an entry, so dump it */
 	if ((cnp->cn_flags & MAKEENTRY) == 0) {
 		numposzaps++;
 		nchstats.ncs_badhits++;
 		if (!wlocked && !CACHE_UPGRADE_LOCK())
 			goto wlock;
 		cache_zap(ncp);
 		CACHE_WUNLOCK();
 		return (0);
 	}
 
 	/* We found a "positive" match, return the vnode */
 	if (ncp->nc_vp) {
 		numposhits++;
 		nchstats.ncs_goodhits++;
 		*vpp = ncp->nc_vp;
 		CTR4(KTR_VFS, "cache_lookup(%p, %s) found %p via ncp %p",
 		    dvp, cnp->cn_nameptr, *vpp, ncp);
 		SDT_PROBE3(vfs, namecache, lookup, hit, dvp, nc_get_name(ncp),
 		    *vpp);
 		cache_out_ts(ncp, tsp, ticksp);
 		goto success;
 	}
 
 negative_success:
 	/* We found a negative match, and want to create it, so purge */
 	if (cnp->cn_nameiop == CREATE) {
 		numnegzaps++;
 		nchstats.ncs_badhits++;
 		if (!wlocked && !CACHE_UPGRADE_LOCK())
 			goto wlock;
 		cache_zap(ncp);
 		CACHE_WUNLOCK();
 		return (0);
 	}
 
 	if (!wlocked && !CACHE_UPGRADE_LOCK())
 		goto wlock;
 	numneghits++;
 	/*
 	 * We found a "negative" match, so we shift it to the end of
 	 * the "negative" cache entries queue to satisfy LRU.  Also,
 	 * check to see if the entry is a whiteout; indicate this to
 	 * the componentname, if so.
 	 */
 	TAILQ_REMOVE(&ncneg, ncp, nc_dst);
 	TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst);
 	nchstats.ncs_neghits++;
 	if (ncp->nc_flag & NCF_WHITE)
 		cnp->cn_flags |= ISWHITEOUT;
 	SDT_PROBE2(vfs, namecache, lookup, hit__negative, dvp,
 	    nc_get_name(ncp));
 	cache_out_ts(ncp, tsp, ticksp);
 	CACHE_WUNLOCK();
 	return (ENOENT);
 
 wlock:
 	/*
 	 * We need to update the cache after our lookup, so upgrade to
 	 * a write lock and retry the operation.
 	 */
 	CACHE_RUNLOCK();
 	CACHE_WLOCK();
 	numupgrades++;
 	wlocked = 1;
 	goto retry_wlocked;
 
 success:
 	/*
 	 * On success we return a locked and ref'd vnode as per the lookup
 	 * protocol.
 	 */
 	if (dvp == *vpp) {   /* lookup on "." */
 		VREF(*vpp);
 		if (wlocked)
 			CACHE_WUNLOCK();
 		else
 			CACHE_RUNLOCK();
 		/*
 		 * When we lookup "." we still can be asked to lock it
 		 * differently...
 		 */
 		ltype = cnp->cn_lkflags & LK_TYPE_MASK;
 		if (ltype != VOP_ISLOCKED(*vpp)) {
 			if (ltype == LK_EXCLUSIVE) {
 				vn_lock(*vpp, LK_UPGRADE | LK_RETRY);
 				if ((*vpp)->v_iflag & VI_DOOMED) {
 					/* forced unmount */
 					vrele(*vpp);
 					*vpp = NULL;
 					return (ENOENT);
 				}
 			} else
 				vn_lock(*vpp, LK_DOWNGRADE | LK_RETRY);
 		}
 		return (-1);
 	}
 	ltype = 0;	/* silence gcc warning */
 	if (cnp->cn_flags & ISDOTDOT) {
 		ltype = VOP_ISLOCKED(dvp);
 		VOP_UNLOCK(dvp, 0);
 	}
 	vhold(*vpp);
 	if (wlocked)
 		CACHE_WUNLOCK();
 	else
 		CACHE_RUNLOCK();
 	error = vget(*vpp, cnp->cn_lkflags | LK_VNHELD, cnp->cn_thread);
 	if (cnp->cn_flags & ISDOTDOT) {
 		vn_lock(dvp, ltype | LK_RETRY);
 		if (dvp->v_iflag & VI_DOOMED) {
 			if (error == 0)
 				vput(*vpp);
 			*vpp = NULL;
 			return (ENOENT);
 		}
 	}
 	if (error) {
 		*vpp = NULL;
 		goto retry;
 	}
 	if ((cnp->cn_flags & ISLASTCN) &&
 	    (cnp->cn_lkflags & LK_TYPE_MASK) == LK_EXCLUSIVE) {
 		ASSERT_VOP_ELOCKED(*vpp, "cache_lookup");
 	}
 	return (-1);
 
 unlock:
 	if (wlocked)
 		CACHE_WUNLOCK();
 	else
 		CACHE_RUNLOCK();
 	return (0);
 }
 
 /*
  * Add an entry to the cache.
  */
 void
 cache_enter_time(dvp, vp, cnp, tsp, dtsp)
 	struct vnode *dvp;
 	struct vnode *vp;
 	struct componentname *cnp;
 	struct timespec *tsp;
 	struct timespec *dtsp;
 {
 	struct namecache *ncp, *n2;
 	struct namecache_ts *n3;
 	struct nchashhead *ncpp;
 	uint32_t hash;
 	int flag;
 	int hold;
 	int zap;
 	int len;
 
 	CTR3(KTR_VFS, "cache_enter(%p, %p, %s)", dvp, vp, cnp->cn_nameptr);
 	VNASSERT(vp == NULL || (vp->v_iflag & VI_DOOMED) == 0, vp,
 	    ("cache_enter: Adding a doomed vnode"));
 	VNASSERT(dvp == NULL || (dvp->v_iflag & VI_DOOMED) == 0, dvp,
 	    ("cache_enter: Doomed vnode used as src"));
 
 	if (!doingcache)
 		return;
 
 	/*
 	 * Avoid blowout in namecache entries.
 	 */
 	if (numcache >= desiredvnodes * ncsizefactor)
 		return;
 
 	flag = 0;
 	if (cnp->cn_nameptr[0] == '.') {
 		if (cnp->cn_namelen == 1)
 			return;
 		if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') {
 			CACHE_WLOCK();
 			/*
 			 * If dotdot entry already exists, just retarget it
 			 * to new parent vnode, otherwise continue with new
 			 * namecache entry allocation.
 			 */
 			if ((ncp = dvp->v_cache_dd) != NULL &&
 			    ncp->nc_flag & NCF_ISDOTDOT) {
 				KASSERT(ncp->nc_dvp == dvp,
 				    ("wrong isdotdot parent"));
 				if (ncp->nc_vp != NULL) {
 					TAILQ_REMOVE(&ncp->nc_vp->v_cache_dst,
 					    ncp, nc_dst);
 				} else {
 					TAILQ_REMOVE(&ncneg, ncp, nc_dst);
 					numneg--;
 				}
 				if (vp != NULL) {
 					TAILQ_INSERT_HEAD(&vp->v_cache_dst,
 					    ncp, nc_dst);
 				} else {
 					TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst);
 					numneg++;
 				}
 				ncp->nc_vp = vp;
 				CACHE_WUNLOCK();
 				return;
 			}
 			dvp->v_cache_dd = NULL;
 			SDT_PROBE3(vfs, namecache, enter, done, dvp, "..", vp);
 			CACHE_WUNLOCK();
 			flag = NCF_ISDOTDOT;
 		}
 	}
 
 	hold = 0;
 	zap = 0;
 
 	/*
 	 * Calculate the hash key and setup as much of the new
 	 * namecache entry as possible before acquiring the lock.
 	 */
 	ncp = cache_alloc(cnp->cn_namelen, tsp != NULL);
 	ncp->nc_vp = vp;
 	ncp->nc_dvp = dvp;
 	ncp->nc_flag = flag;
 	if (tsp != NULL) {
 		n3 = (struct namecache_ts *)ncp;
 		n3->nc_time = *tsp;
 		n3->nc_ticks = ticks;
 		n3->nc_flag |= NCF_TS;
 		if (dtsp != NULL) {
 			n3->nc_dotdottime = *dtsp;
 			n3->nc_flag |= NCF_DTS;
 		}
 	}
 	len = ncp->nc_nlen = cnp->cn_namelen;
 	hash = fnv_32_buf(cnp->cn_nameptr, len, FNV1_32_INIT);
 	strlcpy(nc_get_name(ncp), cnp->cn_nameptr, len + 1);
 	hash = fnv_32_buf(&dvp, sizeof(dvp), hash);
 	CACHE_WLOCK();
 
 	/*
 	 * See if this vnode or negative entry is already in the cache
 	 * with this name.  This can happen with concurrent lookups of
 	 * the same path name.
 	 */
 	ncpp = NCHHASH(hash);
 	LIST_FOREACH(n2, ncpp, nc_hash) {
 		if (n2->nc_dvp == dvp &&
 		    n2->nc_nlen == cnp->cn_namelen &&
 		    !bcmp(nc_get_name(n2), cnp->cn_nameptr, n2->nc_nlen)) {
 			if (tsp != NULL) {
 				KASSERT((n2->nc_flag & NCF_TS) != 0,
 				    ("no NCF_TS"));
 				n3 = (struct namecache_ts *)n2;
 				n3->nc_time =
 				    ((struct namecache_ts *)ncp)->nc_time;
 				n3->nc_ticks =
 				    ((struct namecache_ts *)ncp)->nc_ticks;
 				if (dtsp != NULL) {
 					n3->nc_dotdottime =
 					    ((struct namecache_ts *)ncp)->
 					    nc_dotdottime;
 					n3->nc_flag |= NCF_DTS;
 				}
 			}
 			CACHE_WUNLOCK();
 			cache_free(ncp);
 			return;
 		}
 	}
 
 	if (flag == NCF_ISDOTDOT) {
 		/*
 		 * See if we are trying to add .. entry, but some other lookup
 		 * has populated v_cache_dd pointer already.
 		 */
 		if (dvp->v_cache_dd != NULL) {
 		    CACHE_WUNLOCK();
 		    cache_free(ncp);
 		    return;
 		}
 		KASSERT(vp == NULL || vp->v_type == VDIR,
 		    ("wrong vnode type %p", vp));
 		dvp->v_cache_dd = ncp;
 	}
 
 	numcache++;
 	if (!vp) {
 		numneg++;
 		if (cnp->cn_flags & ISWHITEOUT)
 			ncp->nc_flag |= NCF_WHITE;
 	} else if (vp->v_type == VDIR) {
 		if (flag != NCF_ISDOTDOT) {
 			/*
 			 * For this case, the cache entry maps both the
 			 * directory name in it and the name ".." for the
 			 * directory's parent.
 			 */
 			if ((n2 = vp->v_cache_dd) != NULL &&
 			    (n2->nc_flag & NCF_ISDOTDOT) != 0)
 				cache_zap(n2);
 			vp->v_cache_dd = ncp;
 		}
 	} else {
 		vp->v_cache_dd = NULL;
 	}
 
 	/*
 	 * Insert the new namecache entry into the appropriate chain
 	 * within the cache entries table.
 	 */
 	LIST_INSERT_HEAD(ncpp, ncp, nc_hash);
 	if (flag != NCF_ISDOTDOT) {
 		if (LIST_EMPTY(&dvp->v_cache_src)) {
 			hold = 1;
 			numcachehv++;
 		}
 		LIST_INSERT_HEAD(&dvp->v_cache_src, ncp, nc_src);
 	}
 
 	/*
 	 * If the entry is "negative", we place it into the
 	 * "negative" cache queue, otherwise, we place it into the
 	 * destination vnode's cache entries queue.
 	 */
 	if (vp) {
 		TAILQ_INSERT_HEAD(&vp->v_cache_dst, ncp, nc_dst);
 		SDT_PROBE3(vfs, namecache, enter, done, dvp, nc_get_name(ncp),
 		    vp);
 	} else {
 		TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst);
 		SDT_PROBE2(vfs, namecache, enter_negative, done, dvp,
 		    nc_get_name(ncp));
 	}
 	if (numneg * ncnegfactor > numcache) {
 		ncp = TAILQ_FIRST(&ncneg);
 		KASSERT(ncp->nc_vp == NULL, ("ncp %p vp %p on ncneg",
 		    ncp, ncp->nc_vp));
 		zap = 1;
 	}
 	if (hold)
 		vhold(dvp);
 	if (zap)
 		cache_zap(ncp);
 	CACHE_WUNLOCK();
 }
 
 /*
  * Name cache initialization, from vfs_init() when we are booting
  */
 static void
 nchinit(void *dummy __unused)
 {
 
 	TAILQ_INIT(&ncneg);
 
 	cache_zone_small = uma_zcreate("S VFS Cache",
 	    sizeof(struct namecache) + CACHE_PATH_CUTOFF + 1,
 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_ZINIT);
 	cache_zone_small_ts = uma_zcreate("STS VFS Cache",
 	    sizeof(struct namecache_ts) + CACHE_PATH_CUTOFF + 1,
 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_ZINIT);
 	cache_zone_large = uma_zcreate("L VFS Cache",
 	    sizeof(struct namecache) + NAME_MAX + 1,
 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_ZINIT);
 	cache_zone_large_ts = uma_zcreate("LTS VFS Cache",
 	    sizeof(struct namecache_ts) + NAME_MAX + 1,
 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_ZINIT);
 
 	nchashtbl = hashinit(desiredvnodes * 2, M_VFSCACHE, &nchash);
 }
 SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_SECOND, nchinit, NULL);
 
 void
 cache_changesize(int newmaxvnodes)
 {
 	struct nchashhead *new_nchashtbl, *old_nchashtbl;
 	u_long new_nchash, old_nchash;
 	struct namecache *ncp;
 	uint32_t hash;
 	int i;
 
 	new_nchashtbl = hashinit(newmaxvnodes * 2, M_VFSCACHE, &new_nchash);
 	/* If same hash table size, nothing to do */
 	if (nchash == new_nchash) {
 		free(new_nchashtbl, M_VFSCACHE);
 		return;
 	}
 	/*
 	 * Move everything from the old hash table to the new table.
 	 * None of the namecache entries in the table can be removed
 	 * because to do so, they have to be removed from the hash table.
 	 */
 	CACHE_WLOCK();
 	old_nchashtbl = nchashtbl;
 	old_nchash = nchash;
 	nchashtbl = new_nchashtbl;
 	nchash = new_nchash;
 	for (i = 0; i <= old_nchash; i++) {
 		while ((ncp = LIST_FIRST(&old_nchashtbl[i])) != NULL) {
 			hash = fnv_32_buf(nc_get_name(ncp), ncp->nc_nlen,
 			    FNV1_32_INIT);
 			hash = fnv_32_buf(&ncp->nc_dvp, sizeof(ncp->nc_dvp),
 			    hash);
 			LIST_REMOVE(ncp, nc_hash);
 			LIST_INSERT_HEAD(NCHHASH(hash), ncp, nc_hash);
 		}
 	}
 	CACHE_WUNLOCK();
 	free(old_nchashtbl, M_VFSCACHE);
 }
 
 /*
  * Invalidate all entries to a particular vnode.
  */
 void
 cache_purge(vp)
 	struct vnode *vp;
 {
 
 	CTR1(KTR_VFS, "cache_purge(%p)", vp);
 	SDT_PROBE1(vfs, namecache, purge, done, vp);
 	CACHE_WLOCK();
 	while (!LIST_EMPTY(&vp->v_cache_src))
 		cache_zap(LIST_FIRST(&vp->v_cache_src));
 	while (!TAILQ_EMPTY(&vp->v_cache_dst))
 		cache_zap(TAILQ_FIRST(&vp->v_cache_dst));
 	if (vp->v_cache_dd != NULL) {
 		KASSERT(vp->v_cache_dd->nc_flag & NCF_ISDOTDOT,
 		   ("lost dotdot link"));
 		cache_zap(vp->v_cache_dd);
 	}
 	KASSERT(vp->v_cache_dd == NULL, ("incomplete purge"));
 	CACHE_WUNLOCK();
 }
 
 /*
  * Invalidate all negative entries for a particular directory vnode.
  */
 void
 cache_purge_negative(vp)
 	struct vnode *vp;
 {
 	struct namecache *cp, *ncp;
 
 	CTR1(KTR_VFS, "cache_purge_negative(%p)", vp);
 	SDT_PROBE1(vfs, namecache, purge_negative, done, vp);
 	CACHE_WLOCK();
 	LIST_FOREACH_SAFE(cp, &vp->v_cache_src, nc_src, ncp) {
 		if (cp->nc_vp == NULL)
 			cache_zap(cp);
 	}
 	CACHE_WUNLOCK();
 }
 
 /*
  * Flush all entries referencing a particular filesystem.
  */
 void
 cache_purgevfs(mp)
 	struct mount *mp;
 {
 	struct nchashhead *ncpp;
 	struct namecache *ncp, *nnp;
 
 	/* Scan hash tables for applicable entries */
 	SDT_PROBE1(vfs, namecache, purgevfs, done, mp);
 	CACHE_WLOCK();
 	for (ncpp = &nchashtbl[nchash]; ncpp >= nchashtbl; ncpp--) {
 		LIST_FOREACH_SAFE(ncp, ncpp, nc_hash, nnp) {
 			if (ncp->nc_dvp->v_mount == mp)
 				cache_zap(ncp);
 		}
 	}
 	CACHE_WUNLOCK();
 }
 
 /*
  * Perform canonical checks and cache lookup and pass on to filesystem
  * through the vop_cachedlookup only if needed.
  */
 
 int
 vfs_cache_lookup(ap)
 	struct vop_lookup_args /* {
 		struct vnode *a_dvp;
 		struct vnode **a_vpp;
 		struct componentname *a_cnp;
 	} */ *ap;
 {
 	struct vnode *dvp;
 	int error;
 	struct vnode **vpp = ap->a_vpp;
 	struct componentname *cnp = ap->a_cnp;
 	struct ucred *cred = cnp->cn_cred;
 	int flags = cnp->cn_flags;
 	struct thread *td = cnp->cn_thread;
 
 	*vpp = NULL;
 	dvp = ap->a_dvp;
 
 	if (dvp->v_type != VDIR)
 		return (ENOTDIR);
 
 	if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
 	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
 		return (EROFS);
 
 	error = VOP_ACCESS(dvp, VEXEC, cred, td);
 	if (error)
 		return (error);
 
 	error = cache_lookup(dvp, vpp, cnp, NULL, NULL);
 	if (error == 0)
 		return (VOP_CACHEDLOOKUP(dvp, vpp, cnp));
 	if (error == -1)
 		return (0);
 	return (error);
 }
 
 /*
  * XXX All of these sysctls would probably be more productive dead.
  */
 static int disablecwd;
 SYSCTL_INT(_debug, OID_AUTO, disablecwd, CTLFLAG_RW, &disablecwd, 0,
    "Disable the getcwd syscall");
 
 /* Implementation of the getcwd syscall. */
 int
 sys___getcwd(td, uap)
 	struct thread *td;
 	struct __getcwd_args *uap;
 {
 
 	return (kern___getcwd(td, uap->buf, UIO_USERSPACE, uap->buflen,
 	    MAXPATHLEN));
 }
 
 int
 kern___getcwd(struct thread *td, char *buf, enum uio_seg bufseg, u_int buflen,
     u_int path_max)
 {
 	char *bp, *tmpbuf;
 	struct filedesc *fdp;
 	struct vnode *cdir, *rdir;
 	int error;
 
 	if (disablecwd)
 		return (ENODEV);
 	if (buflen < 2)
 		return (EINVAL);
 	if (buflen > path_max)
 		buflen = path_max;
 
 	tmpbuf = malloc(buflen, M_TEMP, M_WAITOK);
 	fdp = td->td_proc->p_fd;
 	FILEDESC_SLOCK(fdp);
 	cdir = fdp->fd_cdir;
 	VREF(cdir);
 	rdir = fdp->fd_rdir;
 	VREF(rdir);
 	FILEDESC_SUNLOCK(fdp);
 	error = vn_fullpath1(td, cdir, rdir, tmpbuf, &bp, buflen);
 	vrele(rdir);
 	vrele(cdir);
 
 	if (!error) {
 		if (bufseg == UIO_SYSSPACE)
 			bcopy(bp, buf, strlen(bp) + 1);
 		else
 			error = copyout(bp, buf, strlen(bp) + 1);
 #ifdef KTRACE
 	if (KTRPOINT(curthread, KTR_NAMEI))
 		ktrnamei(bp);
 #endif
 	}
 	free(tmpbuf, M_TEMP);
 	return (error);
 }
 
 /*
  * Thus begins the fullpath magic.
  */
 
 #undef STATNODE
 #define STATNODE(name, descr)						\
 	static u_int name;						\
 	SYSCTL_UINT(_vfs_cache, OID_AUTO, name, CTLFLAG_RD, &name, 0, descr)
 
 static int disablefullpath;
 SYSCTL_INT(_debug, OID_AUTO, disablefullpath, CTLFLAG_RW, &disablefullpath, 0,
     "Disable the vn_fullpath function");
 
 /* These count for kern___getcwd(), too. */
 STATNODE(numfullpathcalls, "Number of fullpath search calls");
 STATNODE(numfullpathfail1, "Number of fullpath search errors (ENOTDIR)");
 STATNODE(numfullpathfail2,
     "Number of fullpath search errors (VOP_VPTOCNP failures)");
 STATNODE(numfullpathfail4, "Number of fullpath search errors (ENOMEM)");
 STATNODE(numfullpathfound, "Number of successful fullpath calls");
 
 /*
  * Retrieve the full filesystem path that correspond to a vnode from the name
  * cache (if available)
  */
 int
 vn_fullpath(struct thread *td, struct vnode *vn, char **retbuf, char **freebuf)
 {
 	char *buf;
 	struct filedesc *fdp;
 	struct vnode *rdir;
 	int error;
 
 	if (disablefullpath)
 		return (ENODEV);
 	if (vn == NULL)
 		return (EINVAL);
 
 	buf = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
 	fdp = td->td_proc->p_fd;
 	FILEDESC_SLOCK(fdp);
 	rdir = fdp->fd_rdir;
 	VREF(rdir);
 	FILEDESC_SUNLOCK(fdp);
 	error = vn_fullpath1(td, vn, rdir, buf, retbuf, MAXPATHLEN);
 	vrele(rdir);
 
 	if (!error)
 		*freebuf = buf;
 	else
 		free(buf, M_TEMP);
 	return (error);
 }
 
 /*
  * This function is similar to vn_fullpath, but it attempts to lookup the
  * pathname relative to the global root mount point.  This is required for the
  * auditing sub-system, as audited pathnames must be absolute, relative to the
  * global root mount point.
  */
 int
 vn_fullpath_global(struct thread *td, struct vnode *vn,
     char **retbuf, char **freebuf)
 {
 	char *buf;
 	int error;
 
 	if (disablefullpath)
 		return (ENODEV);
 	if (vn == NULL)
 		return (EINVAL);
 	buf = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
 	error = vn_fullpath1(td, vn, rootvnode, buf, retbuf, MAXPATHLEN);
 	if (!error)
 		*freebuf = buf;
 	else
 		free(buf, M_TEMP);
 	return (error);
 }
 
 int
 vn_vptocnp(struct vnode **vp, struct ucred *cred, char *buf, u_int *buflen)
 {
 	int error;
 
 	CACHE_RLOCK();
 	error = vn_vptocnp_locked(vp, cred, buf, buflen);
 	if (error == 0)
 		CACHE_RUNLOCK();
 	return (error);
 }
 
 static int
 vn_vptocnp_locked(struct vnode **vp, struct ucred *cred, char *buf,
     u_int *buflen)
 {
 	struct vnode *dvp;
 	struct namecache *ncp;
 	int error;
 
 	TAILQ_FOREACH(ncp, &((*vp)->v_cache_dst), nc_dst) {
 		if ((ncp->nc_flag & NCF_ISDOTDOT) == 0)
 			break;
 	}
 	if (ncp != NULL) {
 		if (*buflen < ncp->nc_nlen) {
 			CACHE_RUNLOCK();
 			vrele(*vp);
 			numfullpathfail4++;
 			error = ENOMEM;
 			SDT_PROBE3(vfs, namecache, fullpath, return, error,
 			    vp, NULL);
 			return (error);
 		}
 		*buflen -= ncp->nc_nlen;
 		memcpy(buf + *buflen, nc_get_name(ncp), ncp->nc_nlen);
 		SDT_PROBE3(vfs, namecache, fullpath, hit, ncp->nc_dvp,
 		    nc_get_name(ncp), vp);
 		dvp = *vp;
 		*vp = ncp->nc_dvp;
 		vref(*vp);
 		CACHE_RUNLOCK();
 		vrele(dvp);
 		CACHE_RLOCK();
 		return (0);
 	}
 	SDT_PROBE1(vfs, namecache, fullpath, miss, vp);
 
 	CACHE_RUNLOCK();
 	vn_lock(*vp, LK_SHARED | LK_RETRY);
 	error = VOP_VPTOCNP(*vp, &dvp, cred, buf, buflen);
 	vput(*vp);
 	if (error) {
 		numfullpathfail2++;
 		SDT_PROBE3(vfs, namecache, fullpath, return,  error, vp, NULL);
 		return (error);
 	}
 
 	*vp = dvp;
 	CACHE_RLOCK();
 	if (dvp->v_iflag & VI_DOOMED) {
 		/* forced unmount */
 		CACHE_RUNLOCK();
 		vrele(dvp);
 		error = ENOENT;
 		SDT_PROBE3(vfs, namecache, fullpath, return, error, vp, NULL);
 		return (error);
 	}
 	/*
 	 * *vp has its use count incremented still.
 	 */
 
 	return (0);
 }
 
 /*
  * The magic behind kern___getcwd() and vn_fullpath().
  */
 static int
 vn_fullpath1(struct thread *td, struct vnode *vp, struct vnode *rdir,
     char *buf, char **retbuf, u_int buflen)
 {
 	int error, slash_prefixed;
 #ifdef KDTRACE_HOOKS
 	struct vnode *startvp = vp;
 #endif
 	struct vnode *vp1;
 
 	buflen--;
 	buf[buflen] = '\0';
 	error = 0;
 	slash_prefixed = 0;
 
 	SDT_PROBE1(vfs, namecache, fullpath, entry, vp);
 	numfullpathcalls++;
 	vref(vp);
 	CACHE_RLOCK();
 	if (vp->v_type != VDIR) {
 		error = vn_vptocnp_locked(&vp, td->td_ucred, buf, &buflen);
 		if (error)
 			return (error);
 		if (buflen == 0) {
 			CACHE_RUNLOCK();
 			vrele(vp);
 			return (ENOMEM);
 		}
 		buf[--buflen] = '/';
 		slash_prefixed = 1;
 	}
 	while (vp != rdir && vp != rootvnode) {
 		if (vp->v_vflag & VV_ROOT) {
 			if (vp->v_iflag & VI_DOOMED) {	/* forced unmount */
 				CACHE_RUNLOCK();
 				vrele(vp);
 				error = ENOENT;
 				SDT_PROBE3(vfs, namecache, fullpath, return,
 				    error, vp, NULL);
 				break;
 			}
 			vp1 = vp->v_mount->mnt_vnodecovered;
 			vref(vp1);
 			CACHE_RUNLOCK();
 			vrele(vp);
 			vp = vp1;
 			CACHE_RLOCK();
 			continue;
 		}
 		if (vp->v_type != VDIR) {
 			CACHE_RUNLOCK();
 			vrele(vp);
 			numfullpathfail1++;
 			error = ENOTDIR;
 			SDT_PROBE3(vfs, namecache, fullpath, return,
 			    error, vp, NULL);
 			break;
 		}
 		error = vn_vptocnp_locked(&vp, td->td_ucred, buf, &buflen);
 		if (error)
 			break;
 		if (buflen == 0) {
 			CACHE_RUNLOCK();
 			vrele(vp);
 			error = ENOMEM;
 			SDT_PROBE3(vfs, namecache, fullpath, return, error,
 			    startvp, NULL);
 			break;
 		}
 		buf[--buflen] = '/';
 		slash_prefixed = 1;
 	}
 	if (error)
 		return (error);
 	if (!slash_prefixed) {
 		if (buflen == 0) {
 			CACHE_RUNLOCK();
 			vrele(vp);
 			numfullpathfail4++;
 			SDT_PROBE3(vfs, namecache, fullpath, return, ENOMEM,
 			    startvp, NULL);
 			return (ENOMEM);
 		}
 		buf[--buflen] = '/';
 	}
 	numfullpathfound++;
 	CACHE_RUNLOCK();
 	vrele(vp);
 
 	SDT_PROBE3(vfs, namecache, fullpath, return, 0, startvp, buf + buflen);
 	*retbuf = buf + buflen;
 	return (0);
 }
 
 struct vnode *
 vn_dir_dd_ino(struct vnode *vp)
 {
 	struct namecache *ncp;
 	struct vnode *ddvp;
 
 	ASSERT_VOP_LOCKED(vp, "vn_dir_dd_ino");
 	CACHE_RLOCK();
 	TAILQ_FOREACH(ncp, &(vp->v_cache_dst), nc_dst) {
 		if ((ncp->nc_flag & NCF_ISDOTDOT) != 0)
 			continue;
 		ddvp = ncp->nc_dvp;
 		vhold(ddvp);
 		CACHE_RUNLOCK();
 		if (vget(ddvp, LK_SHARED | LK_NOWAIT | LK_VNHELD, curthread))
 			return (NULL);
 		return (ddvp);
 	}
 	CACHE_RUNLOCK();
 	return (NULL);
 }
 
 int
 vn_commname(struct vnode *vp, char *buf, u_int buflen)
 {
 	struct namecache *ncp;
 	int l;
 
 	CACHE_RLOCK();
 	TAILQ_FOREACH(ncp, &vp->v_cache_dst, nc_dst)
 		if ((ncp->nc_flag & NCF_ISDOTDOT) == 0)
 			break;
 	if (ncp == NULL) {
 		CACHE_RUNLOCK();
 		return (ENOENT);
 	}
 	l = min(ncp->nc_nlen, buflen - 1);
 	memcpy(buf, nc_get_name(ncp), l);
 	CACHE_RUNLOCK();
 	buf[l] = '\0';
 	return (0);
 }
 
 /* ABI compat shims for old kernel modules. */
 #undef cache_enter
 
 void	cache_enter(struct vnode *dvp, struct vnode *vp,
 	    struct componentname *cnp);
 
 void
 cache_enter(struct vnode *dvp, struct vnode *vp, struct componentname *cnp)
 {
 
 	cache_enter_time(dvp, vp, cnp, NULL, NULL);
 }
 
 /*
  * This function updates path string to vnode's full global path
  * and checks the size of the new path string against the pathlen argument.
  *
  * Requires a locked, referenced vnode.
  * Vnode is re-locked on success or ENODEV, otherwise unlocked.
  *
  * If sysctl debug.disablefullpath is set, ENODEV is returned,
  * vnode is left locked and path remain untouched.
  *
  * If vp is a directory, the call to vn_fullpath_global() always succeeds
  * because it falls back to the ".." lookup if the namecache lookup fails.
  */
 int
 vn_path_to_global_path(struct thread *td, struct vnode *vp, char *path,
     u_int pathlen)
 {
 	struct nameidata nd;
 	struct vnode *vp1;
 	char *rpath, *fbuf;
 	int error;
 
 	ASSERT_VOP_ELOCKED(vp, __func__);
 
 	/* Return ENODEV if sysctl debug.disablefullpath==1 */
 	if (disablefullpath)
 		return (ENODEV);
 
 	/* Construct global filesystem path from vp. */
 	VOP_UNLOCK(vp, 0);
 	error = vn_fullpath_global(td, vp, &rpath, &fbuf);
 
 	if (error != 0) {
 		vrele(vp);
 		return (error);
 	}
 
 	if (strlen(rpath) >= pathlen) {
 		vrele(vp);
 		error = ENAMETOOLONG;
 		goto out;
 	}
 
 	/*
 	 * Re-lookup the vnode by path to detect a possible rename.
 	 * As a side effect, the vnode is relocked.
 	 * If vnode was renamed, return ENOENT.
 	 */
 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1,
 	    UIO_SYSSPACE, path, td);
 	error = namei(&nd);
 	if (error != 0) {
 		vrele(vp);
 		goto out;
 	}
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	vp1 = nd.ni_vp;
 	vrele(vp);
 	if (vp1 == vp)
 		strcpy(path, rpath);
 	else {
 		vput(vp1);
 		error = ENOENT;
 	}
 
 out:
 	free(fbuf, M_TEMP);
 	return (error);
 }
Index: head/sys/netinet/in_kdtrace.c
===================================================================
--- head/sys/netinet/in_kdtrace.c	(revision 292383)
+++ head/sys/netinet/in_kdtrace.c	(revision 292384)
@@ -1,147 +1,147 @@
 /*-
  * Copyright (c) 2013 Mark Johnston <markj@FreeBSD.org>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are
  * met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in
  *    the documentation and/or other materials provided with the
  *    distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/sdt.h>
 
 SDT_PROVIDER_DEFINE(ip);
 SDT_PROVIDER_DEFINE(tcp);
 SDT_PROVIDER_DEFINE(udp);
 
 SDT_PROBE_DEFINE6_XLATE(ip, , , receive,
     "void *", "pktinfo_t *",
     "void *", "csinfo_t *",
     "uint8_t *", "ipinfo_t *",
     "struct ifnet *", "ifinfo_t *",
     "struct ip *", "ipv4info_t *",
     "struct ip6_hdr *", "ipv6info_t *");
 
 SDT_PROBE_DEFINE6_XLATE(ip, , , send,
     "void *", "pktinfo_t *",
     "void *", "csinfo_t *",
     "uint8_t *", "ipinfo_t *",
     "struct ifnet *", "ifinfo_t *",
     "struct ip *", "ipv4info_t *",
     "struct ip6_hdr *", "ipv6info_t *");
 
 SDT_PROBE_DEFINE5_XLATE(tcp, , , accept__established,
     "void *", "pktinfo_t *",
     "struct tcpcb *", "csinfo_t *",
     "uint8_t *", "ipinfo_t *",
     "struct tcpcb *", "tcpsinfo_t *" ,
     "struct tcphdr *", "tcpinfoh_t *");
 
 SDT_PROBE_DEFINE5_XLATE(tcp, , , accept__refused,
     "void *", "pktinfo_t *",
     "struct tcpcb *", "csinfo_t *",
     "uint8_t *", "ipinfo_t *",
     "struct tcpcb *", "tcpsinfo_t *" ,
     "struct tcphdr *", "tcpinfo_t *");
 
 SDT_PROBE_DEFINE5_XLATE(tcp, , , connect__established,
     "void *", "pktinfo_t *",
     "struct tcpcb *", "csinfo_t *",
     "uint8_t *", "ipinfo_t *",
     "struct tcpcb *", "tcpsinfo_t *" ,
     "struct tcphdr *", "tcpinfoh_t *");
 
 SDT_PROBE_DEFINE5_XLATE(tcp, , , connect__refused,
     "void *", "pktinfo_t *",
     "struct tcpcb *", "csinfo_t *",
     "uint8_t *", "ipinfo_t *",
     "struct tcpcb *", "tcpsinfo_t *" ,
     "struct tcphdr *", "tcpinfoh_t *");
 
 SDT_PROBE_DEFINE5_XLATE(tcp, , , connect__request,
     "void *", "pktinfo_t *",
     "struct tcpcb *", "csinfo_t *",
     "uint8_t *", "ipinfo_t *",
     "struct tcpcb *", "tcpsinfo_t *" ,
     "struct tcphdr *", "tcpinfo_t *");
 
 SDT_PROBE_DEFINE5_XLATE(tcp, , , receive,
     "void *", "pktinfo_t *",
     "struct tcpcb *", "csinfo_t *",
     "uint8_t *", "ipinfo_t *",
     "struct tcpcb *", "tcpsinfo_t *" ,
     "struct tcphdr *", "tcpinfoh_t *");
 
 SDT_PROBE_DEFINE5_XLATE(tcp, , , send,
     "void *", "pktinfo_t *",
     "struct tcpcb *", "csinfo_t *",
     "uint8_t *", "ipinfo_t *",
     "struct tcpcb *", "tcpsinfo_t *" ,
     "struct tcphdr *", "tcpinfo_t *");
 
 SDT_PROBE_DEFINE1_XLATE(tcp, , , siftr,
     "struct pkt_node *", "siftrinfo_t *");
 
 SDT_PROBE_DEFINE3_XLATE(tcp, , , debug__input,
     "struct tcpcb *", "tcpsinfo_t *" ,
     "struct tcphdr *", "tcpinfo_t *",
     "uint8_t *", "ipinfo_t *");
 
 SDT_PROBE_DEFINE3_XLATE(tcp, , , debug__output,
     "struct tcpcb *", "tcpsinfo_t *" ,
     "struct tcphdr *", "tcpinfo_t *",
     "uint8_t *", "ipinfo_t *");
 
 SDT_PROBE_DEFINE2_XLATE(tcp, , , debug__user,
     "struct tcpcb *", "tcpsinfo_t *" ,
     "int", "int");
 
 SDT_PROBE_DEFINE3_XLATE(tcp, , , debug__drop,
     "struct tcpcb *", "tcpsinfo_t *" ,
     "struct tcphdr *", "tcpinfo_t *",
-    "uint8_t *", "ipinfo_t *")
+    "uint8_t *", "ipinfo_t *");
 
 SDT_PROBE_DEFINE6_XLATE(tcp, , , state__change,
     "void *", "void *",
     "struct tcpcb *", "csinfo_t *",
     "void *", "void *",
     "struct tcpcb *", "tcpsinfo_t *",
     "void *", "void *",
     "int", "tcplsinfo_t *");
 
 SDT_PROBE_DEFINE5_XLATE(udp, , , receive,
     "void *", "pktinfo_t *",
     "struct inpcb *", "csinfo_t *",
     "uint8_t *", "ipinfo_t *",
     "struct inpcb *", "udpsinfo_t *",
     "struct udphdr *", "udpinfo_t *");
 
 SDT_PROBE_DEFINE5_XLATE(udp, , , send,
     "void *", "pktinfo_t *",
     "struct inpcb *", "csinfo_t *",
     "uint8_t *", "ipinfo_t *",
     "struct inpcb *", "udpsinfo_t *",
     "struct udphdr *", "udpinfo_t *");
Index: head/sys/netinet/in_kdtrace.h
===================================================================
--- head/sys/netinet/in_kdtrace.h	(revision 292383)
+++ head/sys/netinet/in_kdtrace.h	(revision 292384)
@@ -1,72 +1,72 @@
 /*-
  * Copyright (c) 2013 Mark Johnston <markj@FreeBSD.org>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the
  *    distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _SYS_IN_KDTRACE_H_
 #define	_SYS_IN_KDTRACE_H_
 
 #define	IP_PROBE(probe, arg0, arg1, arg2, arg3, arg4, arg5)		\
 	SDT_PROBE6(ip, , , probe, arg0, arg1, arg2, arg3, arg4, arg5)
 #define	UDP_PROBE(probe, arg0, arg1, arg2, arg3, arg4)			\
 	SDT_PROBE5(udp, , , probe, arg0, arg1, arg2, arg3, arg4)
-#define	TCP_PROBE1(probe, arg0) \
+#define	TCP_PROBE1(probe, arg0)						\
 	SDT_PROBE1(tcp, , , probe, arg0)
-#define	TCP_PROBE2(probe, arg0, arg1)		\
+#define	TCP_PROBE2(probe, arg0, arg1)					\
 	SDT_PROBE2(tcp, , , probe, arg0, arg1)
-#define	TCP_PROBE3(probe, arg0, arg1, arg2)		\
+#define	TCP_PROBE3(probe, arg0, arg1, arg2)				\
 	SDT_PROBE3(tcp, , , probe, arg0, arg1, arg2)
-#define	TCP_PROBE4(probe, arg0, arg1, arg2, arg3)	\
+#define	TCP_PROBE4(probe, arg0, arg1, arg2, arg3)			\
 	SDT_PROBE4(tcp, , , probe, arg0, arg1, arg2, arg3)
 #define	TCP_PROBE5(probe, arg0, arg1, arg2, arg3, arg4)			\
 	SDT_PROBE5(tcp, , , probe, arg0, arg1, arg2, arg3, arg4)
 #define	TCP_PROBE6(probe, arg0, arg1, arg2, arg3, arg4, arg5)		\
 	SDT_PROBE6(tcp, , , probe, arg0, arg1, arg2, arg3, arg4, arg5)
 
 SDT_PROVIDER_DECLARE(ip);
 SDT_PROVIDER_DECLARE(tcp);
 SDT_PROVIDER_DECLARE(udp);
 
 SDT_PROBE_DECLARE(ip, , , receive);
 SDT_PROBE_DECLARE(ip, , , send);
 
 SDT_PROBE_DECLARE(tcp, , , accept__established);
 SDT_PROBE_DECLARE(tcp, , , accept__refused);
 SDT_PROBE_DECLARE(tcp, , , connect__established);
 SDT_PROBE_DECLARE(tcp, , , connect__refused);
 SDT_PROBE_DECLARE(tcp, , , connect__request);
 SDT_PROBE_DECLARE(tcp, , , receive);
 SDT_PROBE_DECLARE(tcp, , , send);
 SDT_PROBE_DECLARE(tcp, , , siftr);
 SDT_PROBE_DECLARE(tcp, , , state__change);
 SDT_PROBE_DECLARE(tcp, , , debug__input);
 SDT_PROBE_DECLARE(tcp, , , debug__output);
 SDT_PROBE_DECLARE(tcp, , , debug__user);
 SDT_PROBE_DECLARE(tcp, , , debug__drop);
 
 SDT_PROBE_DECLARE(udp, , , receive);
 SDT_PROBE_DECLARE(udp, , , send);
 
 #endif
Index: head/sys/netinet/sctp_cc_functions.c
===================================================================
--- head/sys/netinet/sctp_cc_functions.c	(revision 292383)
+++ head/sys/netinet/sctp_cc_functions.c	(revision 292384)
@@ -1,2373 +1,2373 @@
 /*-
  * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
  * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved.
  * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
  *
  * a) Redistributions of source code must retain the above copyright notice,
  *    this list of conditions and the following disclaimer.
  *
  * b) Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in
  *    the documentation and/or other materials provided with the distribution.
  *
  * c) Neither the name of Cisco Systems, Inc. nor the names of its
  *    contributors may be used to endorse or promote products derived
  *    from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  * THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <netinet/sctp_os.h>
 #include <netinet/sctp_var.h>
 #include <netinet/sctp_sysctl.h>
 #include <netinet/sctp_pcb.h>
 #include <netinet/sctp_header.h>
 #include <netinet/sctputil.h>
 #include <netinet/sctp_output.h>
 #include <netinet/sctp_input.h>
 #include <netinet/sctp_indata.h>
 #include <netinet/sctp_uio.h>
 #include <netinet/sctp_timer.h>
 #include <netinet/sctp_auth.h>
 #include <netinet/sctp_asconf.h>
 #include <netinet/sctp_dtrace_declare.h>
 
 #define SHIFT_MPTCP_MULTI_N 40
 #define SHIFT_MPTCP_MULTI_Z 16
 #define SHIFT_MPTCP_MULTI 8
 
 static void
 sctp_enforce_cwnd_limit(struct sctp_association *assoc, struct sctp_nets *net)
 {
 	if ((assoc->max_cwnd > 0) &&
 	    (net->cwnd > assoc->max_cwnd) &&
 	    (net->cwnd > (net->mtu - sizeof(struct sctphdr)))) {
 		net->cwnd = assoc->max_cwnd;
 		if (net->cwnd < (net->mtu - sizeof(struct sctphdr))) {
 			net->cwnd = net->mtu - sizeof(struct sctphdr);
 		}
 	}
 }
 
 static void
 sctp_set_initial_cc_param(struct sctp_tcb *stcb, struct sctp_nets *net)
 {
 	struct sctp_association *assoc;
 	uint32_t cwnd_in_mtu;
 
 	assoc = &stcb->asoc;
 	cwnd_in_mtu = SCTP_BASE_SYSCTL(sctp_initial_cwnd);
 	if (cwnd_in_mtu == 0) {
 		/* Using 0 means that the value of RFC 4960 is used. */
 		net->cwnd = min((net->mtu * 4), max((2 * net->mtu), SCTP_INITIAL_CWND));
 	} else {
 		/*
 		 * We take the minimum of the burst limit and the initial
 		 * congestion window.
 		 */
 		if ((assoc->max_burst > 0) && (cwnd_in_mtu > assoc->max_burst))
 			cwnd_in_mtu = assoc->max_burst;
 		net->cwnd = (net->mtu - sizeof(struct sctphdr)) * cwnd_in_mtu;
 	}
 	if ((stcb->asoc.sctp_cmt_on_off == SCTP_CMT_RPV1) ||
 	    (stcb->asoc.sctp_cmt_on_off == SCTP_CMT_RPV2)) {
 		/* In case of resource pooling initialize appropriately */
 		net->cwnd /= assoc->numnets;
 		if (net->cwnd < (net->mtu - sizeof(struct sctphdr))) {
 			net->cwnd = net->mtu - sizeof(struct sctphdr);
 		}
 	}
 	sctp_enforce_cwnd_limit(assoc, net);
 	net->ssthresh = assoc->peers_rwnd;
-	SDT_PROBE(sctp, cwnd, net, init,
+	SDT_PROBE5(sctp, cwnd, net, init,
 	    stcb->asoc.my_vtag, ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), net,
 	    0, net->cwnd);
 	if (SCTP_BASE_SYSCTL(sctp_logging_level) &
 	    (SCTP_CWND_MONITOR_ENABLE | SCTP_CWND_LOGGING_ENABLE)) {
 		sctp_log_cwnd(stcb, net, 0, SCTP_CWND_INITIALIZATION);
 	}
 }
 
 static void
 sctp_cwnd_update_after_fr(struct sctp_tcb *stcb,
     struct sctp_association *asoc)
 {
 	struct sctp_nets *net;
 	uint32_t t_ssthresh, t_cwnd;
 	uint64_t t_ucwnd_sbw;
 
 	/* MT FIXME: Don't compute this over and over again */
 	t_ssthresh = 0;
 	t_cwnd = 0;
 	t_ucwnd_sbw = 0;
 	if ((asoc->sctp_cmt_on_off == SCTP_CMT_RPV1) ||
 	    (asoc->sctp_cmt_on_off == SCTP_CMT_RPV2)) {
 		TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
 			t_ssthresh += net->ssthresh;
 			t_cwnd += net->cwnd;
 			if (net->lastsa > 0) {
 				t_ucwnd_sbw += (uint64_t) net->cwnd / (uint64_t) net->lastsa;
 			}
 		}
 		if (t_ucwnd_sbw == 0) {
 			t_ucwnd_sbw = 1;
 		}
 	}
 	/*-
 	 * CMT fast recovery code. Need to debug. ((sctp_cmt_on_off > 0) &&
 	 * (net->fast_retran_loss_recovery == 0)))
 	 */
 	TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
 		if ((asoc->fast_retran_loss_recovery == 0) ||
 		    (asoc->sctp_cmt_on_off > 0)) {
 			/* out of a RFC2582 Fast recovery window? */
 			if (net->net_ack > 0) {
 				/*
 				 * per section 7.2.3, are there any
 				 * destinations that had a fast retransmit
 				 * to them. If so what we need to do is
 				 * adjust ssthresh and cwnd.
 				 */
 				struct sctp_tmit_chunk *lchk;
 				int old_cwnd = net->cwnd;
 
 				if ((asoc->sctp_cmt_on_off == SCTP_CMT_RPV1) ||
 				    (asoc->sctp_cmt_on_off == SCTP_CMT_RPV2)) {
 					if (asoc->sctp_cmt_on_off == SCTP_CMT_RPV1) {
 						net->ssthresh = (uint32_t) (((uint64_t) 4 *
 						    (uint64_t) net->mtu *
 						    (uint64_t) net->ssthresh) /
 						    (uint64_t) t_ssthresh);
 
 					}
 					if (asoc->sctp_cmt_on_off == SCTP_CMT_RPV2) {
 						uint32_t srtt;
 
 						srtt = net->lastsa;
 						/*
 						 * lastsa>>3;  we don't need
 						 * to devide ...
 						 */
 						if (srtt == 0) {
 							srtt = 1;
 						}
 						/*
 						 * Short Version => Equal to
 						 * Contel Version MBe
 						 */
 						net->ssthresh = (uint32_t) (((uint64_t) 4 *
 						    (uint64_t) net->mtu *
 						    (uint64_t) net->cwnd) /
 						    ((uint64_t) srtt *
 						    t_ucwnd_sbw));
 						 /* INCREASE FACTOR */ ;
 					}
 					if ((net->cwnd > t_cwnd / 2) &&
 					    (net->ssthresh < net->cwnd - t_cwnd / 2)) {
 						net->ssthresh = net->cwnd - t_cwnd / 2;
 					}
 					if (net->ssthresh < net->mtu) {
 						net->ssthresh = net->mtu;
 					}
 				} else {
 					net->ssthresh = net->cwnd / 2;
 					if (net->ssthresh < (net->mtu * 2)) {
 						net->ssthresh = 2 * net->mtu;
 					}
 				}
 				net->cwnd = net->ssthresh;
 				sctp_enforce_cwnd_limit(asoc, net);
-				SDT_PROBE(sctp, cwnd, net, fr,
+				SDT_PROBE5(sctp, cwnd, net, fr,
 				    stcb->asoc.my_vtag, ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), net,
 				    old_cwnd, net->cwnd);
 				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
 					sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd),
 					    SCTP_CWND_LOG_FROM_FR);
 				}
 				lchk = TAILQ_FIRST(&asoc->send_queue);
 
 				net->partial_bytes_acked = 0;
 				/* Turn on fast recovery window */
 				asoc->fast_retran_loss_recovery = 1;
 				if (lchk == NULL) {
 					/* Mark end of the window */
 					asoc->fast_recovery_tsn = asoc->sending_seq - 1;
 				} else {
 					asoc->fast_recovery_tsn = lchk->rec.data.TSN_seq - 1;
 				}
 
 				/*
 				 * CMT fast recovery -- per destination
 				 * recovery variable.
 				 */
 				net->fast_retran_loss_recovery = 1;
 
 				if (lchk == NULL) {
 					/* Mark end of the window */
 					net->fast_recovery_tsn = asoc->sending_seq - 1;
 				} else {
 					net->fast_recovery_tsn = lchk->rec.data.TSN_seq - 1;
 				}
 
 				sctp_timer_stop(SCTP_TIMER_TYPE_SEND,
 				    stcb->sctp_ep, stcb, net,
 				    SCTP_FROM_SCTP_CC_FUNCTIONS + SCTP_LOC_1);
 				sctp_timer_start(SCTP_TIMER_TYPE_SEND,
 				    stcb->sctp_ep, stcb, net);
 			}
 		} else if (net->net_ack > 0) {
 			/*
 			 * Mark a peg that we WOULD have done a cwnd
 			 * reduction but RFC2582 prevented this action.
 			 */
 			SCTP_STAT_INCR(sctps_fastretransinrtt);
 		}
 	}
 }
 
 /* Defines for instantaneous bw decisions */
 #define SCTP_INST_LOOSING 1	/* Loosing to other flows */
 #define SCTP_INST_NEUTRAL 2	/* Neutral, no indication */
 #define SCTP_INST_GAINING 3	/* Gaining, step down possible */
 
 
 static int
 cc_bw_same(struct sctp_tcb *stcb, struct sctp_nets *net, uint64_t nbw,
     uint64_t rtt_offset, uint64_t vtag, uint8_t inst_ind)
 {
 	uint64_t oth, probepoint;
 
 	probepoint = (((uint64_t) net->cwnd) << 32);
 	if (net->rtt > net->cc_mod.rtcc.lbw_rtt + rtt_offset) {
 		/*
 		 * rtt increased we don't update bw.. so we don't update the
 		 * rtt either.
 		 */
 		/* Probe point 5 */
 		probepoint |= ((5 << 16) | 1);
-		SDT_PROBE(sctp, cwnd, net, rttvar,
+		SDT_PROBE5(sctp, cwnd, net, rttvar,
 		    vtag,
 		    ((net->cc_mod.rtcc.lbw << 32) | nbw),
 		    ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
 		    net->flight_size,
 		    probepoint);
 		if ((net->cc_mod.rtcc.steady_step) && (inst_ind != SCTP_INST_LOOSING)) {
 			if (net->cc_mod.rtcc.last_step_state == 5)
 				net->cc_mod.rtcc.step_cnt++;
 			else
 				net->cc_mod.rtcc.step_cnt = 1;
 			net->cc_mod.rtcc.last_step_state = 5;
 			if ((net->cc_mod.rtcc.step_cnt == net->cc_mod.rtcc.steady_step) ||
 			    ((net->cc_mod.rtcc.step_cnt > net->cc_mod.rtcc.steady_step) &&
 			    ((net->cc_mod.rtcc.step_cnt % net->cc_mod.rtcc.steady_step) == 0))) {
 				/* Try a step down */
 				oth = net->cc_mod.rtcc.vol_reduce;
 				oth <<= 16;
 				oth |= net->cc_mod.rtcc.step_cnt;
 				oth <<= 16;
 				oth |= net->cc_mod.rtcc.last_step_state;
-				SDT_PROBE(sctp, cwnd, net, rttstep,
+				SDT_PROBE5(sctp, cwnd, net, rttstep,
 				    vtag,
 				    ((net->cc_mod.rtcc.lbw << 32) | nbw),
 				    ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
 				    oth,
 				    probepoint);
 				if (net->cwnd > (4 * net->mtu)) {
 					net->cwnd -= net->mtu;
 					net->cc_mod.rtcc.vol_reduce++;
 				} else {
 					net->cc_mod.rtcc.step_cnt = 0;
 				}
 			}
 		}
 		return (1);
 	}
 	if (net->rtt < net->cc_mod.rtcc.lbw_rtt - rtt_offset) {
 		/*
 		 * rtt decreased, there could be more room. we update both
 		 * the bw and the rtt here to lock this in as a good step
 		 * down.
 		 */
 		/* Probe point 6 */
 		probepoint |= ((6 << 16) | 0);
-		SDT_PROBE(sctp, cwnd, net, rttvar,
+		SDT_PROBE5(sctp, cwnd, net, rttvar,
 		    vtag,
 		    ((net->cc_mod.rtcc.lbw << 32) | nbw),
 		    ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
 		    net->flight_size,
 		    probepoint);
 		if (net->cc_mod.rtcc.steady_step) {
 			oth = net->cc_mod.rtcc.vol_reduce;
 			oth <<= 16;
 			oth |= net->cc_mod.rtcc.step_cnt;
 			oth <<= 16;
 			oth |= net->cc_mod.rtcc.last_step_state;
-			SDT_PROBE(sctp, cwnd, net, rttstep,
+			SDT_PROBE5(sctp, cwnd, net, rttstep,
 			    vtag,
 			    ((net->cc_mod.rtcc.lbw << 32) | nbw),
 			    ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
 			    oth,
 			    probepoint);
 			if ((net->cc_mod.rtcc.last_step_state == 5) &&
 			    (net->cc_mod.rtcc.step_cnt > net->cc_mod.rtcc.steady_step)) {
 				/* Step down worked */
 				net->cc_mod.rtcc.step_cnt = 0;
 				return (1);
 			} else {
 				net->cc_mod.rtcc.last_step_state = 6;
 				net->cc_mod.rtcc.step_cnt = 0;
 			}
 		}
 		net->cc_mod.rtcc.lbw = nbw;
 		net->cc_mod.rtcc.lbw_rtt = net->rtt;
 		net->cc_mod.rtcc.cwnd_at_bw_set = net->cwnd;
 		if (inst_ind == SCTP_INST_GAINING)
 			return (1);
 		else if (inst_ind == SCTP_INST_NEUTRAL)
 			return (1);
 		else
 			return (0);
 	}
 	/*
 	 * Ok bw and rtt remained the same .. no update to any
 	 */
 	/* Probe point 7 */
 	probepoint |= ((7 << 16) | net->cc_mod.rtcc.ret_from_eq);
-	SDT_PROBE(sctp, cwnd, net, rttvar,
+	SDT_PROBE5(sctp, cwnd, net, rttvar,
 	    vtag,
 	    ((net->cc_mod.rtcc.lbw << 32) | nbw),
 	    ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
 	    net->flight_size,
 	    probepoint);
 	if ((net->cc_mod.rtcc.steady_step) && (inst_ind != SCTP_INST_LOOSING)) {
 		if (net->cc_mod.rtcc.last_step_state == 5)
 			net->cc_mod.rtcc.step_cnt++;
 		else
 			net->cc_mod.rtcc.step_cnt = 1;
 		net->cc_mod.rtcc.last_step_state = 5;
 		if ((net->cc_mod.rtcc.step_cnt == net->cc_mod.rtcc.steady_step) ||
 		    ((net->cc_mod.rtcc.step_cnt > net->cc_mod.rtcc.steady_step) &&
 		    ((net->cc_mod.rtcc.step_cnt % net->cc_mod.rtcc.steady_step) == 0))) {
 			/* Try a step down */
 			if (net->cwnd > (4 * net->mtu)) {
 				net->cwnd -= net->mtu;
 				net->cc_mod.rtcc.vol_reduce++;
 				return (1);
 			} else {
 				net->cc_mod.rtcc.step_cnt = 0;
 			}
 		}
 	}
 	if (inst_ind == SCTP_INST_GAINING)
 		return (1);
 	else if (inst_ind == SCTP_INST_NEUTRAL)
 		return (1);
 	else
 		return ((int)net->cc_mod.rtcc.ret_from_eq);
 }
 
 static int
 cc_bw_decrease(struct sctp_tcb *stcb, struct sctp_nets *net, uint64_t nbw, uint64_t rtt_offset,
     uint64_t vtag, uint8_t inst_ind)
 {
 	uint64_t oth, probepoint;
 
 	/* Bandwidth decreased. */
 	probepoint = (((uint64_t) net->cwnd) << 32);
 	if (net->rtt > net->cc_mod.rtcc.lbw_rtt + rtt_offset) {
 		/* rtt increased */
 		/* Did we add more */
 		if ((net->cwnd > net->cc_mod.rtcc.cwnd_at_bw_set) &&
 		    (inst_ind != SCTP_INST_LOOSING)) {
 			/* We caused it maybe.. back off? */
 			/* PROBE POINT 1 */
 			probepoint |= ((1 << 16) | 1);
-			SDT_PROBE(sctp, cwnd, net, rttvar,
+			SDT_PROBE5(sctp, cwnd, net, rttvar,
 			    vtag,
 			    ((net->cc_mod.rtcc.lbw << 32) | nbw),
 			    ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
 			    net->flight_size,
 			    probepoint);
 			if (net->cc_mod.rtcc.ret_from_eq) {
 				/*
 				 * Switch over to CA if we are less
 				 * aggressive
 				 */
 				net->ssthresh = net->cwnd - 1;
 				net->partial_bytes_acked = 0;
 			}
 			return (1);
 		}
 		/* Probe point 2 */
 		probepoint |= ((2 << 16) | 0);
-		SDT_PROBE(sctp, cwnd, net, rttvar,
+		SDT_PROBE5(sctp, cwnd, net, rttvar,
 		    vtag,
 		    ((net->cc_mod.rtcc.lbw << 32) | nbw),
 		    ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
 		    net->flight_size,
 		    probepoint);
 		/* Someone else - fight for more? */
 		if (net->cc_mod.rtcc.steady_step) {
 			oth = net->cc_mod.rtcc.vol_reduce;
 			oth <<= 16;
 			oth |= net->cc_mod.rtcc.step_cnt;
 			oth <<= 16;
 			oth |= net->cc_mod.rtcc.last_step_state;
-			SDT_PROBE(sctp, cwnd, net, rttstep,
+			SDT_PROBE5(sctp, cwnd, net, rttstep,
 			    vtag,
 			    ((net->cc_mod.rtcc.lbw << 32) | nbw),
 			    ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
 			    oth,
 			    probepoint);
 			/*
 			 * Did we voluntarily give up some? if so take one
 			 * back please
 			 */
 			if ((net->cc_mod.rtcc.vol_reduce) &&
 			    (inst_ind != SCTP_INST_GAINING)) {
 				net->cwnd += net->mtu;
 				sctp_enforce_cwnd_limit(&stcb->asoc, net);
 				net->cc_mod.rtcc.vol_reduce--;
 			}
 			net->cc_mod.rtcc.last_step_state = 2;
 			net->cc_mod.rtcc.step_cnt = 0;
 		}
 		goto out_decision;
 	} else if (net->rtt < net->cc_mod.rtcc.lbw_rtt - rtt_offset) {
 		/* bw & rtt decreased */
 		/* Probe point 3 */
 		probepoint |= ((3 << 16) | 0);
-		SDT_PROBE(sctp, cwnd, net, rttvar,
+		SDT_PROBE5(sctp, cwnd, net, rttvar,
 		    vtag,
 		    ((net->cc_mod.rtcc.lbw << 32) | nbw),
 		    ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
 		    net->flight_size,
 		    probepoint);
 		if (net->cc_mod.rtcc.steady_step) {
 			oth = net->cc_mod.rtcc.vol_reduce;
 			oth <<= 16;
 			oth |= net->cc_mod.rtcc.step_cnt;
 			oth <<= 16;
 			oth |= net->cc_mod.rtcc.last_step_state;
-			SDT_PROBE(sctp, cwnd, net, rttstep,
+			SDT_PROBE5(sctp, cwnd, net, rttstep,
 			    vtag,
 			    ((net->cc_mod.rtcc.lbw << 32) | nbw),
 			    ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
 			    oth,
 			    probepoint);
 			if ((net->cc_mod.rtcc.vol_reduce) &&
 			    (inst_ind != SCTP_INST_GAINING)) {
 				net->cwnd += net->mtu;
 				sctp_enforce_cwnd_limit(&stcb->asoc, net);
 				net->cc_mod.rtcc.vol_reduce--;
 			}
 			net->cc_mod.rtcc.last_step_state = 3;
 			net->cc_mod.rtcc.step_cnt = 0;
 		}
 		goto out_decision;
 	}
 	/* The bw decreased but rtt stayed the same */
 	/* Probe point 4 */
 	probepoint |= ((4 << 16) | 0);
-	SDT_PROBE(sctp, cwnd, net, rttvar,
+	SDT_PROBE5(sctp, cwnd, net, rttvar,
 	    vtag,
 	    ((net->cc_mod.rtcc.lbw << 32) | nbw),
 	    ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
 	    net->flight_size,
 	    probepoint);
 	if (net->cc_mod.rtcc.steady_step) {
 		oth = net->cc_mod.rtcc.vol_reduce;
 		oth <<= 16;
 		oth |= net->cc_mod.rtcc.step_cnt;
 		oth <<= 16;
 		oth |= net->cc_mod.rtcc.last_step_state;
-		SDT_PROBE(sctp, cwnd, net, rttstep,
+		SDT_PROBE5(sctp, cwnd, net, rttstep,
 		    vtag,
 		    ((net->cc_mod.rtcc.lbw << 32) | nbw),
 		    ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
 		    oth,
 		    probepoint);
 		if ((net->cc_mod.rtcc.vol_reduce) &&
 		    (inst_ind != SCTP_INST_GAINING)) {
 			net->cwnd += net->mtu;
 			sctp_enforce_cwnd_limit(&stcb->asoc, net);
 			net->cc_mod.rtcc.vol_reduce--;
 		}
 		net->cc_mod.rtcc.last_step_state = 4;
 		net->cc_mod.rtcc.step_cnt = 0;
 	}
 out_decision:
 	net->cc_mod.rtcc.lbw = nbw;
 	net->cc_mod.rtcc.lbw_rtt = net->rtt;
 	net->cc_mod.rtcc.cwnd_at_bw_set = net->cwnd;
 	if (inst_ind == SCTP_INST_GAINING) {
 		return (1);
 	} else {
 		return (0);
 	}
 }
 
 static int
 cc_bw_increase(struct sctp_tcb *stcb, struct sctp_nets *net, uint64_t nbw, uint64_t vtag)
 {
 	uint64_t oth, probepoint;
 
 	/*
 	 * BW increased, so update and return 0, since all actions in our
 	 * table say to do the normal CC update. Note that we pay no
 	 * attention to the inst_ind since our overall sum is increasing.
 	 */
 	/* PROBE POINT 0 */
 	probepoint = (((uint64_t) net->cwnd) << 32);
-	SDT_PROBE(sctp, cwnd, net, rttvar,
+	SDT_PROBE5(sctp, cwnd, net, rttvar,
 	    vtag,
 	    ((net->cc_mod.rtcc.lbw << 32) | nbw),
 	    ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
 	    net->flight_size,
 	    probepoint);
 	if (net->cc_mod.rtcc.steady_step) {
 		oth = net->cc_mod.rtcc.vol_reduce;
 		oth <<= 16;
 		oth |= net->cc_mod.rtcc.step_cnt;
 		oth <<= 16;
 		oth |= net->cc_mod.rtcc.last_step_state;
-		SDT_PROBE(sctp, cwnd, net, rttstep,
+		SDT_PROBE5(sctp, cwnd, net, rttstep,
 		    vtag,
 		    ((net->cc_mod.rtcc.lbw << 32) | nbw),
 		    ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
 		    oth,
 		    probepoint);
 		net->cc_mod.rtcc.last_step_state = 0;
 		net->cc_mod.rtcc.step_cnt = 0;
 		net->cc_mod.rtcc.vol_reduce = 0;
 	}
 	net->cc_mod.rtcc.lbw = nbw;
 	net->cc_mod.rtcc.lbw_rtt = net->rtt;
 	net->cc_mod.rtcc.cwnd_at_bw_set = net->cwnd;
 	return (0);
 }
 
 /* RTCC Algoritm to limit growth of cwnd, return
  * true if you want to NOT allow cwnd growth
  */
 static int
 cc_bw_limit(struct sctp_tcb *stcb, struct sctp_nets *net, uint64_t nbw)
 {
 	uint64_t bw_offset, rtt_offset;
 	uint64_t probepoint, rtt, vtag;
 	uint64_t bytes_for_this_rtt, inst_bw;
 	uint64_t div, inst_off;
 	int bw_shift;
 	uint8_t inst_ind;
 	int ret;
 
 	/*-
 	 * Here we need to see if we want
 	 * to limit cwnd growth due to increase
 	 * in overall rtt but no increase in bw.
 	 * We use the following table to figure
 	 * out what we should do. When we return
 	 * 0, cc update goes on as planned. If we
 	 * return 1, then no cc update happens and cwnd
 	 * stays where it is at.
 	 * ----------------------------------
 	 *   BW    |    RTT   | Action
 	 * *********************************
 	 *   INC   |    INC   | return 0
 	 * ----------------------------------
 	 *   INC   |    SAME  | return 0
 	 * ----------------------------------
 	 *   INC   |    DECR  | return 0
 	 * ----------------------------------
 	 *   SAME  |    INC   | return 1
 	 * ----------------------------------
 	 *   SAME  |    SAME  | return 1
 	 * ----------------------------------
 	 *   SAME  |    DECR  | return 0
 	 * ----------------------------------
 	 *   DECR  |    INC   | return 0 or 1 based on if we caused.
 	 * ----------------------------------
 	 *   DECR  |    SAME  | return 0
 	 * ----------------------------------
 	 *   DECR  |    DECR  | return 0
 	 * ----------------------------------
 	 *
 	 * We are a bit fuzz on what an increase or
 	 * decrease is. For BW it is the same if
 	 * it did not change within 1/64th. For
 	 * RTT it stayed the same if it did not
 	 * change within 1/32nd
 	 */
 	bw_shift = SCTP_BASE_SYSCTL(sctp_rttvar_bw);
 	rtt = stcb->asoc.my_vtag;
 	vtag = (rtt << 32) | (((uint32_t) (stcb->sctp_ep->sctp_lport)) << 16) | (stcb->rport);
 	probepoint = (((uint64_t) net->cwnd) << 32);
 	rtt = net->rtt;
 	if (net->cc_mod.rtcc.rtt_set_this_sack) {
 		net->cc_mod.rtcc.rtt_set_this_sack = 0;
 		bytes_for_this_rtt = net->cc_mod.rtcc.bw_bytes - net->cc_mod.rtcc.bw_bytes_at_last_rttc;
 		net->cc_mod.rtcc.bw_bytes_at_last_rttc = net->cc_mod.rtcc.bw_bytes;
 		if (net->rtt) {
 			div = net->rtt / 1000;
 			if (div) {
 				inst_bw = bytes_for_this_rtt / div;
 				inst_off = inst_bw >> bw_shift;
 				if (inst_bw > nbw)
 					inst_ind = SCTP_INST_GAINING;
 				else if ((inst_bw + inst_off) < nbw)
 					inst_ind = SCTP_INST_LOOSING;
 				else
 					inst_ind = SCTP_INST_NEUTRAL;
 				probepoint |= ((0xb << 16) | inst_ind);
 			} else {
 				inst_ind = net->cc_mod.rtcc.last_inst_ind;
 				inst_bw = bytes_for_this_rtt / (uint64_t) (net->rtt);
 				/* Can't determine do not change */
 				probepoint |= ((0xc << 16) | inst_ind);
 			}
 		} else {
 			inst_ind = net->cc_mod.rtcc.last_inst_ind;
 			inst_bw = bytes_for_this_rtt;
 			/* Can't determine do not change */
 			probepoint |= ((0xd << 16) | inst_ind);
 		}
-		SDT_PROBE(sctp, cwnd, net, rttvar,
+		SDT_PROBE5(sctp, cwnd, net, rttvar,
 		    vtag,
 		    ((nbw << 32) | inst_bw),
 		    ((net->cc_mod.rtcc.lbw_rtt << 32) | rtt),
 		    net->flight_size,
 		    probepoint);
 	} else {
 		/* No rtt measurement, use last one */
 		inst_ind = net->cc_mod.rtcc.last_inst_ind;
 	}
 	bw_offset = net->cc_mod.rtcc.lbw >> bw_shift;
 	if (nbw > net->cc_mod.rtcc.lbw + bw_offset) {
 		ret = cc_bw_increase(stcb, net, nbw, vtag);
 		goto out;
 	}
 	rtt_offset = net->cc_mod.rtcc.lbw_rtt >> SCTP_BASE_SYSCTL(sctp_rttvar_rtt);
 	if (nbw < net->cc_mod.rtcc.lbw - bw_offset) {
 		ret = cc_bw_decrease(stcb, net, nbw, rtt_offset, vtag, inst_ind);
 		goto out;
 	}
 	/*
 	 * If we reach here then we are in a situation where the bw stayed
 	 * the same.
 	 */
 	ret = cc_bw_same(stcb, net, nbw, rtt_offset, vtag, inst_ind);
 out:
 	net->cc_mod.rtcc.last_inst_ind = inst_ind;
 	return (ret);
 }
 
 static void
 sctp_cwnd_update_after_sack_common(struct sctp_tcb *stcb,
     struct sctp_association *asoc,
     int accum_moved, int reneged_all SCTP_UNUSED, int will_exit, int use_rtcc)
 {
 	struct sctp_nets *net;
 	int old_cwnd;
 	uint32_t t_ssthresh, t_cwnd, incr;
 	uint64_t t_ucwnd_sbw;
 	uint64_t t_path_mptcp;
 	uint64_t mptcp_like_alpha;
 	uint32_t srtt;
 	uint64_t max_path;
 
 	/* MT FIXME: Don't compute this over and over again */
 	t_ssthresh = 0;
 	t_cwnd = 0;
 	t_ucwnd_sbw = 0;
 	t_path_mptcp = 0;
 	mptcp_like_alpha = 1;
 	if ((stcb->asoc.sctp_cmt_on_off == SCTP_CMT_RPV1) ||
 	    (stcb->asoc.sctp_cmt_on_off == SCTP_CMT_RPV2) ||
 	    (stcb->asoc.sctp_cmt_on_off == SCTP_CMT_MPTCP)) {
 		max_path = 0;
 		TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
 			t_ssthresh += net->ssthresh;
 			t_cwnd += net->cwnd;
 			/* lastsa>>3;  we don't need to devide ... */
 			srtt = net->lastsa;
 			if (srtt > 0) {
 				uint64_t tmp;
 
 				t_ucwnd_sbw += (uint64_t) net->cwnd / (uint64_t) srtt;
 				t_path_mptcp += (((uint64_t) net->cwnd) << SHIFT_MPTCP_MULTI_Z) /
 				    (((uint64_t) net->mtu) * (uint64_t) srtt);
 				tmp = (((uint64_t) net->cwnd) << SHIFT_MPTCP_MULTI_N) /
 				    ((uint64_t) net->mtu * (uint64_t) (srtt * srtt));
 				if (tmp > max_path) {
 					max_path = tmp;
 				}
 			}
 		}
 		if (t_path_mptcp > 0) {
 			mptcp_like_alpha = max_path / (t_path_mptcp * t_path_mptcp);
 		} else {
 			mptcp_like_alpha = 1;
 		}
 	}
 	if (t_ssthresh == 0) {
 		t_ssthresh = 1;
 	}
 	if (t_ucwnd_sbw == 0) {
 		t_ucwnd_sbw = 1;
 	}
 	/******************************/
 	/* update cwnd and Early FR   */
 	/******************************/
 	TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
 
 #ifdef JANA_CMT_FAST_RECOVERY
 		/*
 		 * CMT fast recovery code. Need to debug.
 		 */
 		if (net->fast_retran_loss_recovery && net->new_pseudo_cumack) {
 			if (SCTP_TSN_GE(asoc->last_acked_seq, net->fast_recovery_tsn) ||
 			    SCTP_TSN_GE(net->pseudo_cumack, net->fast_recovery_tsn)) {
 				net->will_exit_fast_recovery = 1;
 			}
 		}
 #endif
 		/* if nothing was acked on this destination skip it */
 		if (net->net_ack == 0) {
 			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
 				sctp_log_cwnd(stcb, net, 0, SCTP_CWND_LOG_FROM_SACK);
 			}
 			continue;
 		}
 #ifdef JANA_CMT_FAST_RECOVERY
 		/*
 		 * CMT fast recovery code
 		 */
 		/*
 		 * if (sctp_cmt_on_off > 0 && net->fast_retran_loss_recovery
 		 * && net->will_exit_fast_recovery == 0) { @@@ Do something
 		 * } else if (sctp_cmt_on_off == 0 &&
 		 * asoc->fast_retran_loss_recovery && will_exit == 0) {
 		 */
 #endif
 
 		if (asoc->fast_retran_loss_recovery &&
 		    (will_exit == 0) &&
 		    (asoc->sctp_cmt_on_off == 0)) {
 			/*
 			 * If we are in loss recovery we skip any cwnd
 			 * update
 			 */
 			return;
 		}
 		/*
 		 * Did any measurements go on for this network?
 		 */
 		if (use_rtcc && (net->cc_mod.rtcc.tls_needs_set > 0)) {
 			uint64_t nbw;
 
 			/*
 			 * At this point our bw_bytes has been updated by
 			 * incoming sack information.
 			 * 
 			 * But our bw may not yet be set.
 			 * 
 			 */
 			if ((net->cc_mod.rtcc.new_tot_time / 1000) > 0) {
 				nbw = net->cc_mod.rtcc.bw_bytes / (net->cc_mod.rtcc.new_tot_time / 1000);
 			} else {
 				nbw = net->cc_mod.rtcc.bw_bytes;
 			}
 			if (net->cc_mod.rtcc.lbw) {
 				if (cc_bw_limit(stcb, net, nbw)) {
 					/* Hold here, no update */
 					continue;
 				}
 			} else {
 				uint64_t vtag, probepoint;
 
 				probepoint = (((uint64_t) net->cwnd) << 32);
 				probepoint |= ((0xa << 16) | 0);
 				vtag = (net->rtt << 32) |
 				    (((uint32_t) (stcb->sctp_ep->sctp_lport)) << 16) |
 				    (stcb->rport);
 
-				SDT_PROBE(sctp, cwnd, net, rttvar,
+				SDT_PROBE5(sctp, cwnd, net, rttvar,
 				    vtag,
 				    nbw,
 				    ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
 				    net->flight_size,
 				    probepoint);
 				net->cc_mod.rtcc.lbw = nbw;
 				net->cc_mod.rtcc.lbw_rtt = net->rtt;
 				if (net->cc_mod.rtcc.rtt_set_this_sack) {
 					net->cc_mod.rtcc.rtt_set_this_sack = 0;
 					net->cc_mod.rtcc.bw_bytes_at_last_rttc = net->cc_mod.rtcc.bw_bytes;
 				}
 			}
 		}
 		/*
 		 * CMT: CUC algorithm. Update cwnd if pseudo-cumack has
 		 * moved.
 		 */
 		if (accum_moved ||
 		    ((asoc->sctp_cmt_on_off > 0) && net->new_pseudo_cumack)) {
 			/* If the cumulative ack moved we can proceed */
 			if (net->cwnd <= net->ssthresh) {
 				/* We are in slow start */
 				if (net->flight_size + net->net_ack >= net->cwnd) {
 					uint32_t limit;
 
 					old_cwnd = net->cwnd;
 					switch (asoc->sctp_cmt_on_off) {
 					case SCTP_CMT_RPV1:
 						limit = (uint32_t) (((uint64_t) net->mtu *
 						    (uint64_t) SCTP_BASE_SYSCTL(sctp_L2_abc_variable) *
 						    (uint64_t) net->ssthresh) /
 						    (uint64_t) t_ssthresh);
 						incr = (uint32_t) (((uint64_t) net->net_ack *
 						    (uint64_t) net->ssthresh) /
 						    (uint64_t) t_ssthresh);
 						if (incr > limit) {
 							incr = limit;
 						}
 						if (incr == 0) {
 							incr = 1;
 						}
 						break;
 					case SCTP_CMT_RPV2:
 						/*
 						 * lastsa>>3;  we don't need
 						 * to divide ...
 						 */
 						srtt = net->lastsa;
 						if (srtt == 0) {
 							srtt = 1;
 						}
 						limit = (uint32_t) (((uint64_t) net->mtu *
 						    (uint64_t) SCTP_BASE_SYSCTL(sctp_L2_abc_variable) *
 						    (uint64_t) net->cwnd) /
 						    ((uint64_t) srtt * t_ucwnd_sbw));
 						/* INCREASE FACTOR */
 						incr = (uint32_t) (((uint64_t) net->net_ack *
 						    (uint64_t) net->cwnd) /
 						    ((uint64_t) srtt * t_ucwnd_sbw));
 						/* INCREASE FACTOR */
 						if (incr > limit) {
 							incr = limit;
 						}
 						if (incr == 0) {
 							incr = 1;
 						}
 						break;
 					case SCTP_CMT_MPTCP:
 						limit = (uint32_t) (((uint64_t) net->mtu *
 						    mptcp_like_alpha *
 						    (uint64_t) SCTP_BASE_SYSCTL(sctp_L2_abc_variable)) >>
 						    SHIFT_MPTCP_MULTI);
 						incr = (uint32_t) (((uint64_t) net->net_ack *
 						    mptcp_like_alpha) >>
 						    SHIFT_MPTCP_MULTI);
 						if (incr > limit) {
 							incr = limit;
 						}
 						if (incr > net->net_ack) {
 							incr = net->net_ack;
 						}
 						if (incr > net->mtu) {
 							incr = net->mtu;
 						}
 						break;
 					default:
 						incr = net->net_ack;
 						if (incr > net->mtu * SCTP_BASE_SYSCTL(sctp_L2_abc_variable)) {
 							incr = net->mtu * SCTP_BASE_SYSCTL(sctp_L2_abc_variable);
 						}
 						break;
 					}
 					net->cwnd += incr;
 					sctp_enforce_cwnd_limit(asoc, net);
 					if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
 						sctp_log_cwnd(stcb, net, incr,
 						    SCTP_CWND_LOG_FROM_SS);
 					}
-					SDT_PROBE(sctp, cwnd, net, ack,
+					SDT_PROBE5(sctp, cwnd, net, ack,
 					    stcb->asoc.my_vtag,
 					    ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)),
 					    net,
 					    old_cwnd, net->cwnd);
 				} else {
 					if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
 						sctp_log_cwnd(stcb, net, net->net_ack,
 						    SCTP_CWND_LOG_NOADV_SS);
 					}
 				}
 			} else {
 				/* We are in congestion avoidance */
 				/*
 				 * Add to pba
 				 */
 				net->partial_bytes_acked += net->net_ack;
 
 				if ((net->flight_size + net->net_ack >= net->cwnd) &&
 				    (net->partial_bytes_acked >= net->cwnd)) {
 					net->partial_bytes_acked -= net->cwnd;
 					old_cwnd = net->cwnd;
 					switch (asoc->sctp_cmt_on_off) {
 					case SCTP_CMT_RPV1:
 						incr = (uint32_t) (((uint64_t) net->mtu *
 						    (uint64_t) net->ssthresh) /
 						    (uint64_t) t_ssthresh);
 						if (incr == 0) {
 							incr = 1;
 						}
 						break;
 					case SCTP_CMT_RPV2:
 						/*
 						 * lastsa>>3;  we don't need
 						 * to divide ...
 						 */
 						srtt = net->lastsa;
 						if (srtt == 0) {
 							srtt = 1;
 						}
 						incr = (uint32_t) ((uint64_t) net->mtu *
 						    (uint64_t) net->cwnd /
 						    ((uint64_t) srtt *
 						    t_ucwnd_sbw));
 						/* INCREASE FACTOR */
 						if (incr == 0) {
 							incr = 1;
 						}
 						break;
 					case SCTP_CMT_MPTCP:
 						incr = (uint32_t) ((mptcp_like_alpha *
 						    (uint64_t) net->cwnd) >>
 						    SHIFT_MPTCP_MULTI);
 						if (incr > net->mtu) {
 							incr = net->mtu;
 						}
 						break;
 					default:
 						incr = net->mtu;
 						break;
 					}
 					net->cwnd += incr;
 					sctp_enforce_cwnd_limit(asoc, net);
-					SDT_PROBE(sctp, cwnd, net, ack,
+					SDT_PROBE5(sctp, cwnd, net, ack,
 					    stcb->asoc.my_vtag,
 					    ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)),
 					    net,
 					    old_cwnd, net->cwnd);
 					if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
 						sctp_log_cwnd(stcb, net, net->mtu,
 						    SCTP_CWND_LOG_FROM_CA);
 					}
 				} else {
 					if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
 						sctp_log_cwnd(stcb, net, net->net_ack,
 						    SCTP_CWND_LOG_NOADV_CA);
 					}
 				}
 			}
 		} else {
 			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
 				sctp_log_cwnd(stcb, net, net->mtu,
 				    SCTP_CWND_LOG_NO_CUMACK);
 			}
 		}
 	}
 }
 
 static void
 sctp_cwnd_update_exit_pf_common(struct sctp_tcb *stcb, struct sctp_nets *net)
 {
 	int old_cwnd;
 
 	old_cwnd = net->cwnd;
 	net->cwnd = net->mtu;
-	SDT_PROBE(sctp, cwnd, net, ack,
+	SDT_PROBE5(sctp, cwnd, net, ack,
 	    stcb->asoc.my_vtag, ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), net,
 	    old_cwnd, net->cwnd);
 	SCTPDBG(SCTP_DEBUG_INDATA1, "Destination %p moved from PF to reachable with cwnd %d.\n",
 	    (void *)net, net->cwnd);
 }
 
 
 static void
 sctp_cwnd_update_after_timeout(struct sctp_tcb *stcb, struct sctp_nets *net)
 {
 	int old_cwnd = net->cwnd;
 	uint32_t t_ssthresh, t_cwnd;
 	uint64_t t_ucwnd_sbw;
 
 	/* MT FIXME: Don't compute this over and over again */
 	t_ssthresh = 0;
 	t_cwnd = 0;
 	if ((stcb->asoc.sctp_cmt_on_off == SCTP_CMT_RPV1) ||
 	    (stcb->asoc.sctp_cmt_on_off == SCTP_CMT_RPV2)) {
 		struct sctp_nets *lnet;
 		uint32_t srtt;
 
 		t_ucwnd_sbw = 0;
 		TAILQ_FOREACH(lnet, &stcb->asoc.nets, sctp_next) {
 			t_ssthresh += lnet->ssthresh;
 			t_cwnd += lnet->cwnd;
 			srtt = lnet->lastsa;
 			/* lastsa>>3;  we don't need to divide ... */
 			if (srtt > 0) {
 				t_ucwnd_sbw += (uint64_t) lnet->cwnd / (uint64_t) srtt;
 			}
 		}
 		if (t_ssthresh < 1) {
 			t_ssthresh = 1;
 		}
 		if (t_ucwnd_sbw < 1) {
 			t_ucwnd_sbw = 1;
 		}
 		if (stcb->asoc.sctp_cmt_on_off == SCTP_CMT_RPV1) {
 			net->ssthresh = (uint32_t) (((uint64_t) 4 *
 			    (uint64_t) net->mtu *
 			    (uint64_t) net->ssthresh) /
 			    (uint64_t) t_ssthresh);
 		} else {
 			uint64_t cc_delta;
 
 			srtt = net->lastsa;
 			/* lastsa>>3;  we don't need to divide ... */
 			if (srtt == 0) {
 				srtt = 1;
 			}
 			cc_delta = t_ucwnd_sbw * (uint64_t) srtt / 2;
 			if (cc_delta < t_cwnd) {
 				net->ssthresh = (uint32_t) ((uint64_t) t_cwnd - cc_delta);
 			} else {
 				net->ssthresh = net->mtu;
 			}
 		}
 		if ((net->cwnd > t_cwnd / 2) &&
 		    (net->ssthresh < net->cwnd - t_cwnd / 2)) {
 			net->ssthresh = net->cwnd - t_cwnd / 2;
 		}
 		if (net->ssthresh < net->mtu) {
 			net->ssthresh = net->mtu;
 		}
 	} else {
 		net->ssthresh = max(net->cwnd / 2, 4 * net->mtu);
 	}
 	net->cwnd = net->mtu;
 	net->partial_bytes_acked = 0;
-	SDT_PROBE(sctp, cwnd, net, to,
+	SDT_PROBE5(sctp, cwnd, net, to,
 	    stcb->asoc.my_vtag,
 	    ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)),
 	    net,
 	    old_cwnd, net->cwnd);
 	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
 		sctp_log_cwnd(stcb, net, net->cwnd - old_cwnd, SCTP_CWND_LOG_FROM_RTX);
 	}
 }
 
 static void
 sctp_cwnd_update_after_ecn_echo_common(struct sctp_tcb *stcb, struct sctp_nets *net,
     int in_window, int num_pkt_lost, int use_rtcc)
 {
 	int old_cwnd = net->cwnd;
 
 	if ((use_rtcc) && (net->lan_type == SCTP_LAN_LOCAL) && (net->cc_mod.rtcc.use_dccc_ecn)) {
 		/* Data center Congestion Control */
 		if (in_window == 0) {
 			/*
 			 * Go to CA with the cwnd at the point we sent the
 			 * TSN that was marked with a CE.
 			 */
 			if (net->ecn_prev_cwnd < net->cwnd) {
 				/* Restore to prev cwnd */
 				net->cwnd = net->ecn_prev_cwnd - (net->mtu * num_pkt_lost);
 			} else {
 				/* Just cut in 1/2 */
 				net->cwnd /= 2;
 			}
 			/* Drop to CA */
 			net->ssthresh = net->cwnd - (num_pkt_lost * net->mtu);
 			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
 				sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_SAT);
 			}
 		} else {
 			/*
 			 * Further tuning down required over the drastic
 			 * orginal cut
 			 */
 			net->ssthresh -= (net->mtu * num_pkt_lost);
 			net->cwnd -= (net->mtu * num_pkt_lost);
 			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
 				sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_SAT);
 			}
 		}
 		SCTP_STAT_INCR(sctps_ecnereducedcwnd);
 	} else {
 		if (in_window == 0) {
 			SCTP_STAT_INCR(sctps_ecnereducedcwnd);
 			net->ssthresh = net->cwnd / 2;
 			if (net->ssthresh < net->mtu) {
 				net->ssthresh = net->mtu;
 				/*
 				 * here back off the timer as well, to slow
 				 * us down
 				 */
 				net->RTO <<= 1;
 			}
 			net->cwnd = net->ssthresh;
-			SDT_PROBE(sctp, cwnd, net, ecn,
+			SDT_PROBE5(sctp, cwnd, net, ecn,
 			    stcb->asoc.my_vtag,
 			    ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)),
 			    net,
 			    old_cwnd, net->cwnd);
 			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
 				sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_SAT);
 			}
 		}
 	}
 
 }
 
 static void
 sctp_cwnd_update_after_packet_dropped(struct sctp_tcb *stcb,
     struct sctp_nets *net, struct sctp_pktdrop_chunk *cp,
     uint32_t * bottle_bw, uint32_t * on_queue)
 {
 	uint32_t bw_avail;
 	unsigned int incr;
 	int old_cwnd = net->cwnd;
 
 	/* get bottle neck bw */
 	*bottle_bw = ntohl(cp->bottle_bw);
 	/* and whats on queue */
 	*on_queue = ntohl(cp->current_onq);
 	/*
 	 * adjust the on-queue if our flight is more it could be that the
 	 * router has not yet gotten data "in-flight" to it
 	 */
 	if (*on_queue < net->flight_size) {
 		*on_queue = net->flight_size;
 	}
 	/* rtt is measured in micro seconds, bottle_bw in bytes per second */
 	bw_avail = (uint32_t) (((uint64_t) (*bottle_bw) * net->rtt) / (uint64_t) 1000000);
 	if (bw_avail > *bottle_bw) {
 		/*
 		 * Cap the growth to no more than the bottle neck. This can
 		 * happen as RTT slides up due to queues. It also means if
 		 * you have more than a 1 second RTT with a empty queue you
 		 * will be limited to the bottle_bw per second no matter if
 		 * other points have 1/2 the RTT and you could get more
 		 * out...
 		 */
 		bw_avail = *bottle_bw;
 	}
 	if (*on_queue > bw_avail) {
 		/*
 		 * No room for anything else don't allow anything else to be
 		 * "added to the fire".
 		 */
 		int seg_inflight, seg_onqueue, my_portion;
 
 		net->partial_bytes_acked = 0;
 		/* how much are we over queue size? */
 		incr = *on_queue - bw_avail;
 		if (stcb->asoc.seen_a_sack_this_pkt) {
 			/*
 			 * undo any cwnd adjustment that the sack might have
 			 * made
 			 */
 			net->cwnd = net->prev_cwnd;
 		}
 		/* Now how much of that is mine? */
 		seg_inflight = net->flight_size / net->mtu;
 		seg_onqueue = *on_queue / net->mtu;
 		my_portion = (incr * seg_inflight) / seg_onqueue;
 
 		/* Have I made an adjustment already */
 		if (net->cwnd > net->flight_size) {
 			/*
 			 * for this flight I made an adjustment we need to
 			 * decrease the portion by a share our previous
 			 * adjustment.
 			 */
 			int diff_adj;
 
 			diff_adj = net->cwnd - net->flight_size;
 			if (diff_adj > my_portion)
 				my_portion = 0;
 			else
 				my_portion -= diff_adj;
 		}
 		/*
 		 * back down to the previous cwnd (assume we have had a sack
 		 * before this packet). minus what ever portion of the
 		 * overage is my fault.
 		 */
 		net->cwnd -= my_portion;
 
 		/* we will NOT back down more than 1 MTU */
 		if (net->cwnd <= net->mtu) {
 			net->cwnd = net->mtu;
 		}
 		/* force into CA */
 		net->ssthresh = net->cwnd - 1;
 	} else {
 		/*
 		 * Take 1/4 of the space left or max burst up .. whichever
 		 * is less.
 		 */
 		incr = (bw_avail - *on_queue) >> 2;
 		if ((stcb->asoc.max_burst > 0) &&
 		    (stcb->asoc.max_burst * net->mtu < incr)) {
 			incr = stcb->asoc.max_burst * net->mtu;
 		}
 		net->cwnd += incr;
 	}
 	if (net->cwnd > bw_avail) {
 		/* We can't exceed the pipe size */
 		net->cwnd = bw_avail;
 	}
 	if (net->cwnd < net->mtu) {
 		/* We always have 1 MTU */
 		net->cwnd = net->mtu;
 	}
 	sctp_enforce_cwnd_limit(&stcb->asoc, net);
 	if (net->cwnd - old_cwnd != 0) {
 		/* log only changes */
-		SDT_PROBE(sctp, cwnd, net, pd,
+		SDT_PROBE5(sctp, cwnd, net, pd,
 		    stcb->asoc.my_vtag,
 		    ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)),
 		    net,
 		    old_cwnd, net->cwnd);
 		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
 			sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd),
 			    SCTP_CWND_LOG_FROM_SAT);
 		}
 	}
 }
 
 static void
 sctp_cwnd_update_after_output(struct sctp_tcb *stcb,
     struct sctp_nets *net, int burst_limit)
 {
 	int old_cwnd = net->cwnd;
 
 	if (net->ssthresh < net->cwnd)
 		net->ssthresh = net->cwnd;
 	if (burst_limit) {
 		net->cwnd = (net->flight_size + (burst_limit * net->mtu));
 		sctp_enforce_cwnd_limit(&stcb->asoc, net);
-		SDT_PROBE(sctp, cwnd, net, bl,
+		SDT_PROBE5(sctp, cwnd, net, bl,
 		    stcb->asoc.my_vtag,
 		    ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)),
 		    net,
 		    old_cwnd, net->cwnd);
 		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
 			sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_BRST);
 		}
 	}
 }
 
 static void
 sctp_cwnd_update_after_sack(struct sctp_tcb *stcb,
     struct sctp_association *asoc,
     int accum_moved, int reneged_all, int will_exit)
 {
 	/* Passing a zero argument in last disables the rtcc algoritm */
 	sctp_cwnd_update_after_sack_common(stcb, asoc, accum_moved, reneged_all, will_exit, 0);
 }
 
 static void
 sctp_cwnd_update_after_ecn_echo(struct sctp_tcb *stcb, struct sctp_nets *net,
     int in_window, int num_pkt_lost)
 {
 	/* Passing a zero argument in last disables the rtcc algoritm */
 	sctp_cwnd_update_after_ecn_echo_common(stcb, net, in_window, num_pkt_lost, 0);
 }
 
 /* Here starts the RTCCVAR type CC invented by RRS which
  * is a slight mod to RFC2581. We reuse a common routine or
  * two since these algoritms are so close and need to
  * remain the same.
  */
 static void
 sctp_cwnd_update_rtcc_after_ecn_echo(struct sctp_tcb *stcb, struct sctp_nets *net,
     int in_window, int num_pkt_lost)
 {
 	sctp_cwnd_update_after_ecn_echo_common(stcb, net, in_window, num_pkt_lost, 1);
 }
 
 
 static
 void 
 sctp_cwnd_update_rtcc_tsn_acknowledged(struct sctp_nets *net,
     struct sctp_tmit_chunk *tp1)
 {
 	net->cc_mod.rtcc.bw_bytes += tp1->send_size;
 }
 
 static void
 sctp_cwnd_prepare_rtcc_net_for_sack(struct sctp_tcb *stcb SCTP_UNUSED,
     struct sctp_nets *net)
 {
 	if (net->cc_mod.rtcc.tls_needs_set > 0) {
 		/* We had a bw measurment going on */
 		struct timeval ltls;
 
 		SCTP_GETPTIME_TIMEVAL(&ltls);
 		timevalsub(&ltls, &net->cc_mod.rtcc.tls);
 		net->cc_mod.rtcc.new_tot_time = (ltls.tv_sec * 1000000) + ltls.tv_usec;
 	}
 }
 
 static void
 sctp_cwnd_new_rtcc_transmission_begins(struct sctp_tcb *stcb,
     struct sctp_nets *net)
 {
 	uint64_t vtag, probepoint;
 
 	if (net->cc_mod.rtcc.lbw) {
 		/* Clear the old bw.. we went to 0 in-flight */
 		vtag = (net->rtt << 32) | (((uint32_t) (stcb->sctp_ep->sctp_lport)) << 16) |
 		    (stcb->rport);
 		probepoint = (((uint64_t) net->cwnd) << 32);
 		/* Probe point 8 */
 		probepoint |= ((8 << 16) | 0);
-		SDT_PROBE(sctp, cwnd, net, rttvar,
+		SDT_PROBE5(sctp, cwnd, net, rttvar,
 		    vtag,
 		    ((net->cc_mod.rtcc.lbw << 32) | 0),
 		    ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
 		    net->flight_size,
 		    probepoint);
 		net->cc_mod.rtcc.lbw_rtt = 0;
 		net->cc_mod.rtcc.cwnd_at_bw_set = 0;
 		net->cc_mod.rtcc.lbw = 0;
 		net->cc_mod.rtcc.bw_bytes_at_last_rttc = 0;
 		net->cc_mod.rtcc.vol_reduce = 0;
 		net->cc_mod.rtcc.bw_tot_time = 0;
 		net->cc_mod.rtcc.bw_bytes = 0;
 		net->cc_mod.rtcc.tls_needs_set = 0;
 		if (net->cc_mod.rtcc.steady_step) {
 			net->cc_mod.rtcc.vol_reduce = 0;
 			net->cc_mod.rtcc.step_cnt = 0;
 			net->cc_mod.rtcc.last_step_state = 0;
 		}
 		if (net->cc_mod.rtcc.ret_from_eq) {
 			/* less aggressive one - reset cwnd too */
 			uint32_t cwnd_in_mtu, cwnd;
 
 			cwnd_in_mtu = SCTP_BASE_SYSCTL(sctp_initial_cwnd);
 			if (cwnd_in_mtu == 0) {
 				/*
 				 * Using 0 means that the value of RFC 4960
 				 * is used.
 				 */
 				cwnd = min((net->mtu * 4), max((2 * net->mtu), SCTP_INITIAL_CWND));
 			} else {
 				/*
 				 * We take the minimum of the burst limit
 				 * and the initial congestion window.
 				 */
 				if ((stcb->asoc.max_burst > 0) && (cwnd_in_mtu > stcb->asoc.max_burst))
 					cwnd_in_mtu = stcb->asoc.max_burst;
 				cwnd = (net->mtu - sizeof(struct sctphdr)) * cwnd_in_mtu;
 			}
 			if (net->cwnd > cwnd) {
 				/*
 				 * Only set if we are not a timeout (i.e.
 				 * down to 1 mtu)
 				 */
 				net->cwnd = cwnd;
 			}
 		}
 	}
 }
 
 static void
 sctp_set_rtcc_initial_cc_param(struct sctp_tcb *stcb,
     struct sctp_nets *net)
 {
 	uint64_t vtag, probepoint;
 
 	sctp_set_initial_cc_param(stcb, net);
 	stcb->asoc.use_precise_time = 1;
 	probepoint = (((uint64_t) net->cwnd) << 32);
 	probepoint |= ((9 << 16) | 0);
 	vtag = (net->rtt << 32) |
 	    (((uint32_t) (stcb->sctp_ep->sctp_lport)) << 16) |
 	    (stcb->rport);
-	SDT_PROBE(sctp, cwnd, net, rttvar,
+	SDT_PROBE5(sctp, cwnd, net, rttvar,
 	    vtag,
 	    0,
 	    0,
 	    0,
 	    probepoint);
 	net->cc_mod.rtcc.lbw_rtt = 0;
 	net->cc_mod.rtcc.cwnd_at_bw_set = 0;
 	net->cc_mod.rtcc.vol_reduce = 0;
 	net->cc_mod.rtcc.lbw = 0;
 	net->cc_mod.rtcc.vol_reduce = 0;
 	net->cc_mod.rtcc.bw_bytes_at_last_rttc = 0;
 	net->cc_mod.rtcc.bw_tot_time = 0;
 	net->cc_mod.rtcc.bw_bytes = 0;
 	net->cc_mod.rtcc.tls_needs_set = 0;
 	net->cc_mod.rtcc.ret_from_eq = SCTP_BASE_SYSCTL(sctp_rttvar_eqret);
 	net->cc_mod.rtcc.steady_step = SCTP_BASE_SYSCTL(sctp_steady_step);
 	net->cc_mod.rtcc.use_dccc_ecn = SCTP_BASE_SYSCTL(sctp_use_dccc_ecn);
 	net->cc_mod.rtcc.step_cnt = 0;
 	net->cc_mod.rtcc.last_step_state = 0;
 
 
 }
 
 static int
 sctp_cwnd_rtcc_socket_option(struct sctp_tcb *stcb, int setorget,
     struct sctp_cc_option *cc_opt)
 {
 	struct sctp_nets *net;
 
 	if (setorget == 1) {
 		/* a set */
 		if (cc_opt->option == SCTP_CC_OPT_RTCC_SETMODE) {
 			if ((cc_opt->aid_value.assoc_value != 0) &&
 			    (cc_opt->aid_value.assoc_value != 1)) {
 				return (EINVAL);
 			}
 			TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
 				net->cc_mod.rtcc.ret_from_eq = cc_opt->aid_value.assoc_value;
 			}
 		} else if (cc_opt->option == SCTP_CC_OPT_USE_DCCC_ECN) {
 			if ((cc_opt->aid_value.assoc_value != 0) &&
 			    (cc_opt->aid_value.assoc_value != 1)) {
 				return (EINVAL);
 			}
 			TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
 				net->cc_mod.rtcc.use_dccc_ecn = cc_opt->aid_value.assoc_value;
 			}
 		} else if (cc_opt->option == SCTP_CC_OPT_STEADY_STEP) {
 			TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
 				net->cc_mod.rtcc.steady_step = cc_opt->aid_value.assoc_value;
 			}
 		} else {
 			return (EINVAL);
 		}
 	} else {
 		/* a get */
 		if (cc_opt->option == SCTP_CC_OPT_RTCC_SETMODE) {
 			net = TAILQ_FIRST(&stcb->asoc.nets);
 			if (net == NULL) {
 				return (EFAULT);
 			}
 			cc_opt->aid_value.assoc_value = net->cc_mod.rtcc.ret_from_eq;
 		} else if (cc_opt->option == SCTP_CC_OPT_USE_DCCC_ECN) {
 			net = TAILQ_FIRST(&stcb->asoc.nets);
 			if (net == NULL) {
 				return (EFAULT);
 			}
 			cc_opt->aid_value.assoc_value = net->cc_mod.rtcc.use_dccc_ecn;
 		} else if (cc_opt->option == SCTP_CC_OPT_STEADY_STEP) {
 			net = TAILQ_FIRST(&stcb->asoc.nets);
 			if (net == NULL) {
 				return (EFAULT);
 			}
 			cc_opt->aid_value.assoc_value = net->cc_mod.rtcc.steady_step;
 		} else {
 			return (EINVAL);
 		}
 	}
 	return (0);
 }
 
 static void
 sctp_cwnd_update_rtcc_packet_transmitted(struct sctp_tcb *stcb SCTP_UNUSED,
     struct sctp_nets *net)
 {
 	if (net->cc_mod.rtcc.tls_needs_set == 0) {
 		SCTP_GETPTIME_TIMEVAL(&net->cc_mod.rtcc.tls);
 		net->cc_mod.rtcc.tls_needs_set = 2;
 	}
 }
 
 static void
 sctp_cwnd_update_rtcc_after_sack(struct sctp_tcb *stcb,
     struct sctp_association *asoc,
     int accum_moved, int reneged_all, int will_exit)
 {
 	/* Passing a one argument at the last enables the rtcc algoritm */
 	sctp_cwnd_update_after_sack_common(stcb, asoc, accum_moved, reneged_all, will_exit, 1);
 }
 
 static void
 sctp_rtt_rtcc_calculated(struct sctp_tcb *stcb SCTP_UNUSED,
     struct sctp_nets *net,
     struct timeval *now SCTP_UNUSED)
 {
 	net->cc_mod.rtcc.rtt_set_this_sack = 1;
 }
 
 /* Here starts Sally Floyds HS-TCP */
 
 struct sctp_hs_raise_drop {
 	int32_t cwnd;
 	int32_t increase;
 	int32_t drop_percent;
 };
 
 #define SCTP_HS_TABLE_SIZE 73
 
 struct sctp_hs_raise_drop sctp_cwnd_adjust[SCTP_HS_TABLE_SIZE] = {
 	{38, 1, 50},		/* 0   */
 	{118, 2, 44},		/* 1   */
 	{221, 3, 41},		/* 2   */
 	{347, 4, 38},		/* 3   */
 	{495, 5, 37},		/* 4   */
 	{663, 6, 35},		/* 5   */
 	{851, 7, 34},		/* 6   */
 	{1058, 8, 33},		/* 7   */
 	{1284, 9, 32},		/* 8   */
 	{1529, 10, 31},		/* 9   */
 	{1793, 11, 30},		/* 10  */
 	{2076, 12, 29},		/* 11  */
 	{2378, 13, 28},		/* 12  */
 	{2699, 14, 28},		/* 13  */
 	{3039, 15, 27},		/* 14  */
 	{3399, 16, 27},		/* 15  */
 	{3778, 17, 26},		/* 16  */
 	{4177, 18, 26},		/* 17  */
 	{4596, 19, 25},		/* 18  */
 	{5036, 20, 25},		/* 19  */
 	{5497, 21, 24},		/* 20  */
 	{5979, 22, 24},		/* 21  */
 	{6483, 23, 23},		/* 22  */
 	{7009, 24, 23},		/* 23  */
 	{7558, 25, 22},		/* 24  */
 	{8130, 26, 22},		/* 25  */
 	{8726, 27, 22},		/* 26  */
 	{9346, 28, 21},		/* 27  */
 	{9991, 29, 21},		/* 28  */
 	{10661, 30, 21},	/* 29  */
 	{11358, 31, 20},	/* 30  */
 	{12082, 32, 20},	/* 31  */
 	{12834, 33, 20},	/* 32  */
 	{13614, 34, 19},	/* 33  */
 	{14424, 35, 19},	/* 34  */
 	{15265, 36, 19},	/* 35  */
 	{16137, 37, 19},	/* 36  */
 	{17042, 38, 18},	/* 37  */
 	{17981, 39, 18},	/* 38  */
 	{18955, 40, 18},	/* 39  */
 	{19965, 41, 17},	/* 40  */
 	{21013, 42, 17},	/* 41  */
 	{22101, 43, 17},	/* 42  */
 	{23230, 44, 17},	/* 43  */
 	{24402, 45, 16},	/* 44  */
 	{25618, 46, 16},	/* 45  */
 	{26881, 47, 16},	/* 46  */
 	{28193, 48, 16},	/* 47  */
 	{29557, 49, 15},	/* 48  */
 	{30975, 50, 15},	/* 49  */
 	{32450, 51, 15},	/* 50  */
 	{33986, 52, 15},	/* 51  */
 	{35586, 53, 14},	/* 52  */
 	{37253, 54, 14},	/* 53  */
 	{38992, 55, 14},	/* 54  */
 	{40808, 56, 14},	/* 55  */
 	{42707, 57, 13},	/* 56  */
 	{44694, 58, 13},	/* 57  */
 	{46776, 59, 13},	/* 58  */
 	{48961, 60, 13},	/* 59  */
 	{51258, 61, 13},	/* 60  */
 	{53677, 62, 12},	/* 61  */
 	{56230, 63, 12},	/* 62  */
 	{58932, 64, 12},	/* 63  */
 	{61799, 65, 12},	/* 64  */
 	{64851, 66, 11},	/* 65  */
 	{68113, 67, 11},	/* 66  */
 	{71617, 68, 11},	/* 67  */
 	{75401, 69, 10},	/* 68  */
 	{79517, 70, 10},	/* 69  */
 	{84035, 71, 10},	/* 70  */
 	{89053, 72, 10},	/* 71  */
 	{94717, 73, 9}		/* 72  */
 };
 
 static void
 sctp_hs_cwnd_increase(struct sctp_tcb *stcb, struct sctp_nets *net)
 {
 	int cur_val, i, indx, incr;
 	int old_cwnd = net->cwnd;
 
 	cur_val = net->cwnd >> 10;
 	indx = SCTP_HS_TABLE_SIZE - 1;
 
 	if (cur_val < sctp_cwnd_adjust[0].cwnd) {
 		/* normal mode */
 		if (net->net_ack > net->mtu) {
 			net->cwnd += net->mtu;
 		} else {
 			net->cwnd += net->net_ack;
 		}
 	} else {
 		for (i = net->last_hs_used; i < SCTP_HS_TABLE_SIZE; i++) {
 			if (cur_val < sctp_cwnd_adjust[i].cwnd) {
 				indx = i;
 				break;
 			}
 		}
 		net->last_hs_used = indx;
 		incr = ((sctp_cwnd_adjust[indx].increase) << 10);
 		net->cwnd += incr;
 	}
 	sctp_enforce_cwnd_limit(&stcb->asoc, net);
 	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
 		sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_SS);
 	}
 }
 
 static void
 sctp_hs_cwnd_decrease(struct sctp_tcb *stcb, struct sctp_nets *net)
 {
 	int cur_val, i, indx;
 	int old_cwnd = net->cwnd;
 
 	cur_val = net->cwnd >> 10;
 	if (cur_val < sctp_cwnd_adjust[0].cwnd) {
 		/* normal mode */
 		net->ssthresh = net->cwnd / 2;
 		if (net->ssthresh < (net->mtu * 2)) {
 			net->ssthresh = 2 * net->mtu;
 		}
 		net->cwnd = net->ssthresh;
 	} else {
 		/* drop by the proper amount */
 		net->ssthresh = net->cwnd - (int)((net->cwnd / 100) *
 		    sctp_cwnd_adjust[net->last_hs_used].drop_percent);
 		net->cwnd = net->ssthresh;
 		/* now where are we */
 		indx = net->last_hs_used;
 		cur_val = net->cwnd >> 10;
 		/* reset where we are in the table */
 		if (cur_val < sctp_cwnd_adjust[0].cwnd) {
 			/* feel out of hs */
 			net->last_hs_used = 0;
 		} else {
 			for (i = indx; i >= 1; i--) {
 				if (cur_val > sctp_cwnd_adjust[i - 1].cwnd) {
 					break;
 				}
 			}
 			net->last_hs_used = indx;
 		}
 	}
 	sctp_enforce_cwnd_limit(&stcb->asoc, net);
 	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
 		sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_FR);
 	}
 }
 
 static void
 sctp_hs_cwnd_update_after_fr(struct sctp_tcb *stcb,
     struct sctp_association *asoc)
 {
 	struct sctp_nets *net;
 
 	/*
 	 * CMT fast recovery code. Need to debug. ((sctp_cmt_on_off > 0) &&
 	 * (net->fast_retran_loss_recovery == 0)))
 	 */
 	TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
 		if ((asoc->fast_retran_loss_recovery == 0) ||
 		    (asoc->sctp_cmt_on_off > 0)) {
 			/* out of a RFC2582 Fast recovery window? */
 			if (net->net_ack > 0) {
 				/*
 				 * per section 7.2.3, are there any
 				 * destinations that had a fast retransmit
 				 * to them. If so what we need to do is
 				 * adjust ssthresh and cwnd.
 				 */
 				struct sctp_tmit_chunk *lchk;
 
 				sctp_hs_cwnd_decrease(stcb, net);
 
 				lchk = TAILQ_FIRST(&asoc->send_queue);
 
 				net->partial_bytes_acked = 0;
 				/* Turn on fast recovery window */
 				asoc->fast_retran_loss_recovery = 1;
 				if (lchk == NULL) {
 					/* Mark end of the window */
 					asoc->fast_recovery_tsn = asoc->sending_seq - 1;
 				} else {
 					asoc->fast_recovery_tsn = lchk->rec.data.TSN_seq - 1;
 				}
 
 				/*
 				 * CMT fast recovery -- per destination
 				 * recovery variable.
 				 */
 				net->fast_retran_loss_recovery = 1;
 
 				if (lchk == NULL) {
 					/* Mark end of the window */
 					net->fast_recovery_tsn = asoc->sending_seq - 1;
 				} else {
 					net->fast_recovery_tsn = lchk->rec.data.TSN_seq - 1;
 				}
 
 				sctp_timer_stop(SCTP_TIMER_TYPE_SEND,
 				    stcb->sctp_ep, stcb, net,
 				    SCTP_FROM_SCTP_CC_FUNCTIONS + SCTP_LOC_2);
 				sctp_timer_start(SCTP_TIMER_TYPE_SEND,
 				    stcb->sctp_ep, stcb, net);
 			}
 		} else if (net->net_ack > 0) {
 			/*
 			 * Mark a peg that we WOULD have done a cwnd
 			 * reduction but RFC2582 prevented this action.
 			 */
 			SCTP_STAT_INCR(sctps_fastretransinrtt);
 		}
 	}
 }
 
 static void
 sctp_hs_cwnd_update_after_sack(struct sctp_tcb *stcb,
     struct sctp_association *asoc,
     int accum_moved, int reneged_all SCTP_UNUSED, int will_exit)
 {
 	struct sctp_nets *net;
 
 	/******************************/
 	/* update cwnd and Early FR   */
 	/******************************/
 	TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
 
 #ifdef JANA_CMT_FAST_RECOVERY
 		/*
 		 * CMT fast recovery code. Need to debug.
 		 */
 		if (net->fast_retran_loss_recovery && net->new_pseudo_cumack) {
 			if (SCTP_TSN_GE(asoc->last_acked_seq, net->fast_recovery_tsn) ||
 			    SCTP_TSN_GE(net->pseudo_cumack, net->fast_recovery_tsn)) {
 				net->will_exit_fast_recovery = 1;
 			}
 		}
 #endif
 		/* if nothing was acked on this destination skip it */
 		if (net->net_ack == 0) {
 			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
 				sctp_log_cwnd(stcb, net, 0, SCTP_CWND_LOG_FROM_SACK);
 			}
 			continue;
 		}
 #ifdef JANA_CMT_FAST_RECOVERY
 		/*
 		 * CMT fast recovery code
 		 */
 		/*
 		 * if (sctp_cmt_on_off > 0 && net->fast_retran_loss_recovery
 		 * && net->will_exit_fast_recovery == 0) { @@@ Do something
 		 * } else if (sctp_cmt_on_off == 0 &&
 		 * asoc->fast_retran_loss_recovery && will_exit == 0) {
 		 */
 #endif
 
 		if (asoc->fast_retran_loss_recovery &&
 		    (will_exit == 0) &&
 		    (asoc->sctp_cmt_on_off == 0)) {
 			/*
 			 * If we are in loss recovery we skip any cwnd
 			 * update
 			 */
 			return;
 		}
 		/*
 		 * CMT: CUC algorithm. Update cwnd if pseudo-cumack has
 		 * moved.
 		 */
 		if (accum_moved ||
 		    ((asoc->sctp_cmt_on_off > 0) && net->new_pseudo_cumack)) {
 			/* If the cumulative ack moved we can proceed */
 			if (net->cwnd <= net->ssthresh) {
 				/* We are in slow start */
 				if (net->flight_size + net->net_ack >= net->cwnd) {
 					sctp_hs_cwnd_increase(stcb, net);
 				} else {
 					if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
 						sctp_log_cwnd(stcb, net, net->net_ack,
 						    SCTP_CWND_LOG_NOADV_SS);
 					}
 				}
 			} else {
 				/* We are in congestion avoidance */
 				net->partial_bytes_acked += net->net_ack;
 				if ((net->flight_size + net->net_ack >= net->cwnd) &&
 				    (net->partial_bytes_acked >= net->cwnd)) {
 					net->partial_bytes_acked -= net->cwnd;
 					net->cwnd += net->mtu;
 					sctp_enforce_cwnd_limit(asoc, net);
 					if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
 						sctp_log_cwnd(stcb, net, net->mtu,
 						    SCTP_CWND_LOG_FROM_CA);
 					}
 				} else {
 					if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
 						sctp_log_cwnd(stcb, net, net->net_ack,
 						    SCTP_CWND_LOG_NOADV_CA);
 					}
 				}
 			}
 		} else {
 			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
 				sctp_log_cwnd(stcb, net, net->mtu,
 				    SCTP_CWND_LOG_NO_CUMACK);
 			}
 		}
 	}
 }
 
 
 /*
  * H-TCP congestion control. The algorithm is detailed in:
  * R.N.Shorten, D.J.Leith:
  *   "H-TCP: TCP for high-speed and long-distance networks"
  *   Proc. PFLDnet, Argonne, 2004.
  * http://www.hamilton.ie/net/htcp3.pdf
  */
 
 
 static int use_rtt_scaling = 1;
 static int use_bandwidth_switch = 1;
 
 static inline int
 between(uint32_t seq1, uint32_t seq2, uint32_t seq3)
 {
 	return (seq3 - seq2 >= seq1 - seq2);
 }
 
 static inline uint32_t
 htcp_cong_time(struct htcp *ca)
 {
 	return (sctp_get_tick_count() - ca->last_cong);
 }
 
 static inline uint32_t
 htcp_ccount(struct htcp *ca)
 {
 	return (htcp_cong_time(ca) / ca->minRTT);
 }
 
 static inline void
 htcp_reset(struct htcp *ca)
 {
 	ca->undo_last_cong = ca->last_cong;
 	ca->undo_maxRTT = ca->maxRTT;
 	ca->undo_old_maxB = ca->old_maxB;
 	ca->last_cong = sctp_get_tick_count();
 }
 
 #ifdef SCTP_NOT_USED
 
 static uint32_t
 htcp_cwnd_undo(struct sctp_tcb *stcb, struct sctp_nets *net)
 {
 	net->cc_mod.htcp_ca.last_cong = net->cc_mod.htcp_ca.undo_last_cong;
 	net->cc_mod.htcp_ca.maxRTT = net->cc_mod.htcp_ca.undo_maxRTT;
 	net->cc_mod.htcp_ca.old_maxB = net->cc_mod.htcp_ca.undo_old_maxB;
 	return (max(net->cwnd, ((net->ssthresh / net->mtu << 7) / net->cc_mod.htcp_ca.beta) * net->mtu));
 }
 
 #endif
 
 static inline void
 measure_rtt(struct sctp_nets *net)
 {
 	uint32_t srtt = net->lastsa >> SCTP_RTT_SHIFT;
 
 	/* keep track of minimum RTT seen so far, minRTT is zero at first */
 	if (net->cc_mod.htcp_ca.minRTT > srtt || !net->cc_mod.htcp_ca.minRTT)
 		net->cc_mod.htcp_ca.minRTT = srtt;
 
 	/* max RTT */
 	if (net->fast_retran_ip == 0 && net->ssthresh < 0xFFFF && htcp_ccount(&net->cc_mod.htcp_ca) > 3) {
 		if (net->cc_mod.htcp_ca.maxRTT < net->cc_mod.htcp_ca.minRTT)
 			net->cc_mod.htcp_ca.maxRTT = net->cc_mod.htcp_ca.minRTT;
 		if (net->cc_mod.htcp_ca.maxRTT < srtt && srtt <= net->cc_mod.htcp_ca.maxRTT + MSEC_TO_TICKS(20))
 			net->cc_mod.htcp_ca.maxRTT = srtt;
 	}
 }
 
 static void
 measure_achieved_throughput(struct sctp_nets *net)
 {
 	uint32_t now = sctp_get_tick_count();
 
 	if (net->fast_retran_ip == 0)
 		net->cc_mod.htcp_ca.bytes_acked = net->net_ack;
 
 	if (!use_bandwidth_switch)
 		return;
 
 	/* achieved throughput calculations */
 	/* JRS - not 100% sure of this statement */
 	if (net->fast_retran_ip == 1) {
 		net->cc_mod.htcp_ca.bytecount = 0;
 		net->cc_mod.htcp_ca.lasttime = now;
 		return;
 	}
 	net->cc_mod.htcp_ca.bytecount += net->net_ack;
 	if ((net->cc_mod.htcp_ca.bytecount >= net->cwnd - (((net->cc_mod.htcp_ca.alpha >> 7) ? (net->cc_mod.htcp_ca.alpha >> 7) : 1) * net->mtu)) &&
 	    (now - net->cc_mod.htcp_ca.lasttime >= net->cc_mod.htcp_ca.minRTT) &&
 	    (net->cc_mod.htcp_ca.minRTT > 0)) {
 		uint32_t cur_Bi = net->cc_mod.htcp_ca.bytecount / net->mtu * hz / (now - net->cc_mod.htcp_ca.lasttime);
 
 		if (htcp_ccount(&net->cc_mod.htcp_ca) <= 3) {
 			/* just after backoff */
 			net->cc_mod.htcp_ca.minB = net->cc_mod.htcp_ca.maxB = net->cc_mod.htcp_ca.Bi = cur_Bi;
 		} else {
 			net->cc_mod.htcp_ca.Bi = (3 * net->cc_mod.htcp_ca.Bi + cur_Bi) / 4;
 			if (net->cc_mod.htcp_ca.Bi > net->cc_mod.htcp_ca.maxB)
 				net->cc_mod.htcp_ca.maxB = net->cc_mod.htcp_ca.Bi;
 			if (net->cc_mod.htcp_ca.minB > net->cc_mod.htcp_ca.maxB)
 				net->cc_mod.htcp_ca.minB = net->cc_mod.htcp_ca.maxB;
 		}
 		net->cc_mod.htcp_ca.bytecount = 0;
 		net->cc_mod.htcp_ca.lasttime = now;
 	}
 }
 
 static inline void
 htcp_beta_update(struct htcp *ca, uint32_t minRTT, uint32_t maxRTT)
 {
 	if (use_bandwidth_switch) {
 		uint32_t maxB = ca->maxB;
 		uint32_t old_maxB = ca->old_maxB;
 
 		ca->old_maxB = ca->maxB;
 
 		if (!between(5 * maxB, 4 * old_maxB, 6 * old_maxB)) {
 			ca->beta = BETA_MIN;
 			ca->modeswitch = 0;
 			return;
 		}
 	}
 	if (ca->modeswitch && minRTT > (uint32_t) MSEC_TO_TICKS(10) && maxRTT) {
 		ca->beta = (minRTT << 7) / maxRTT;
 		if (ca->beta < BETA_MIN)
 			ca->beta = BETA_MIN;
 		else if (ca->beta > BETA_MAX)
 			ca->beta = BETA_MAX;
 	} else {
 		ca->beta = BETA_MIN;
 		ca->modeswitch = 1;
 	}
 }
 
 static inline void
 htcp_alpha_update(struct htcp *ca)
 {
 	uint32_t minRTT = ca->minRTT;
 	uint32_t factor = 1;
 	uint32_t diff = htcp_cong_time(ca);
 
 	if (diff > (uint32_t) hz) {
 		diff -= hz;
 		factor = 1 + (10 * diff + ((diff / 2) * (diff / 2) / hz)) / hz;
 	}
 	if (use_rtt_scaling && minRTT) {
 		uint32_t scale = (hz << 3) / (10 * minRTT);
 
 		scale = min(max(scale, 1U << 2), 10U << 3);	/* clamping ratio to
 								 * interval [0.5,10]<<3 */
 		factor = (factor << 3) / scale;
 		if (!factor)
 			factor = 1;
 	}
 	ca->alpha = 2 * factor * ((1 << 7) - ca->beta);
 	if (!ca->alpha)
 		ca->alpha = ALPHA_BASE;
 }
 
 /* After we have the rtt data to calculate beta, we'd still prefer to wait one
  * rtt before we adjust our beta to ensure we are working from a consistent
  * data.
  *
  * This function should be called when we hit a congestion event since only at
  * that point do we really have a real sense of maxRTT (the queues en route
  * were getting just too full now).
  */
 static void
 htcp_param_update(struct sctp_nets *net)
 {
 	uint32_t minRTT = net->cc_mod.htcp_ca.minRTT;
 	uint32_t maxRTT = net->cc_mod.htcp_ca.maxRTT;
 
 	htcp_beta_update(&net->cc_mod.htcp_ca, minRTT, maxRTT);
 	htcp_alpha_update(&net->cc_mod.htcp_ca);
 
 	/*
 	 * add slowly fading memory for maxRTT to accommodate routing
 	 * changes etc
 	 */
 	if (minRTT > 0 && maxRTT > minRTT)
 		net->cc_mod.htcp_ca.maxRTT = minRTT + ((maxRTT - minRTT) * 95) / 100;
 }
 
 static uint32_t
 htcp_recalc_ssthresh(struct sctp_nets *net)
 {
 	htcp_param_update(net);
 	return (max(((net->cwnd / net->mtu * net->cc_mod.htcp_ca.beta) >> 7) * net->mtu, 2U * net->mtu));
 }
 
 static void
 htcp_cong_avoid(struct sctp_tcb *stcb, struct sctp_nets *net)
 {
 	/*-
 	 * How to handle these functions?
          *	if (!tcp_is_cwnd_limited(sk, in_flight)) RRS - good question.
 	 *		return;
 	 */
 	if (net->cwnd <= net->ssthresh) {
 		/* We are in slow start */
 		if (net->flight_size + net->net_ack >= net->cwnd) {
 			if (net->net_ack > (net->mtu * SCTP_BASE_SYSCTL(sctp_L2_abc_variable))) {
 				net->cwnd += (net->mtu * SCTP_BASE_SYSCTL(sctp_L2_abc_variable));
 				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
 					sctp_log_cwnd(stcb, net, net->mtu,
 					    SCTP_CWND_LOG_FROM_SS);
 				}
 			} else {
 				net->cwnd += net->net_ack;
 				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
 					sctp_log_cwnd(stcb, net, net->net_ack,
 					    SCTP_CWND_LOG_FROM_SS);
 				}
 			}
 			sctp_enforce_cwnd_limit(&stcb->asoc, net);
 		} else {
 			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
 				sctp_log_cwnd(stcb, net, net->net_ack,
 				    SCTP_CWND_LOG_NOADV_SS);
 			}
 		}
 	} else {
 		measure_rtt(net);
 
 		/*
 		 * In dangerous area, increase slowly. In theory this is
 		 * net->cwnd += alpha / net->cwnd
 		 */
 		/* What is snd_cwnd_cnt?? */
 		if (((net->partial_bytes_acked / net->mtu * net->cc_mod.htcp_ca.alpha) >> 7) * net->mtu >= net->cwnd) {
 			/*-
 			 * Does SCTP have a cwnd clamp?
 			 * if (net->snd_cwnd < net->snd_cwnd_clamp) - Nope (RRS).
 			 */
 			net->cwnd += net->mtu;
 			net->partial_bytes_acked = 0;
 			sctp_enforce_cwnd_limit(&stcb->asoc, net);
 			htcp_alpha_update(&net->cc_mod.htcp_ca);
 			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
 				sctp_log_cwnd(stcb, net, net->mtu,
 				    SCTP_CWND_LOG_FROM_CA);
 			}
 		} else {
 			net->partial_bytes_acked += net->net_ack;
 			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
 				sctp_log_cwnd(stcb, net, net->net_ack,
 				    SCTP_CWND_LOG_NOADV_CA);
 			}
 		}
 
 		net->cc_mod.htcp_ca.bytes_acked = net->mtu;
 	}
 }
 
 #ifdef SCTP_NOT_USED
 /* Lower bound on congestion window. */
 static uint32_t
 htcp_min_cwnd(struct sctp_tcb *stcb, struct sctp_nets *net)
 {
 	return (net->ssthresh);
 }
 
 #endif
 
 static void
 htcp_init(struct sctp_nets *net)
 {
 	memset(&net->cc_mod.htcp_ca, 0, sizeof(struct htcp));
 	net->cc_mod.htcp_ca.alpha = ALPHA_BASE;
 	net->cc_mod.htcp_ca.beta = BETA_MIN;
 	net->cc_mod.htcp_ca.bytes_acked = net->mtu;
 	net->cc_mod.htcp_ca.last_cong = sctp_get_tick_count();
 }
 
 static void
 sctp_htcp_set_initial_cc_param(struct sctp_tcb *stcb, struct sctp_nets *net)
 {
 	/*
 	 * We take the max of the burst limit times a MTU or the
 	 * INITIAL_CWND. We then limit this to 4 MTU's of sending.
 	 */
 	net->cwnd = min((net->mtu * 4), max((2 * net->mtu), SCTP_INITIAL_CWND));
 	net->ssthresh = stcb->asoc.peers_rwnd;
 	sctp_enforce_cwnd_limit(&stcb->asoc, net);
 	htcp_init(net);
 
 	if (SCTP_BASE_SYSCTL(sctp_logging_level) & (SCTP_CWND_MONITOR_ENABLE | SCTP_CWND_LOGGING_ENABLE)) {
 		sctp_log_cwnd(stcb, net, 0, SCTP_CWND_INITIALIZATION);
 	}
 }
 
 static void
 sctp_htcp_cwnd_update_after_sack(struct sctp_tcb *stcb,
     struct sctp_association *asoc,
     int accum_moved, int reneged_all SCTP_UNUSED, int will_exit)
 {
 	struct sctp_nets *net;
 
 	/******************************/
 	/* update cwnd and Early FR   */
 	/******************************/
 	TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
 
 #ifdef JANA_CMT_FAST_RECOVERY
 		/*
 		 * CMT fast recovery code. Need to debug.
 		 */
 		if (net->fast_retran_loss_recovery && net->new_pseudo_cumack) {
 			if (SCTP_TSN_GE(asoc->last_acked_seq, net->fast_recovery_tsn) ||
 			    SCTP_TSN_GE(net->pseudo_cumack, net->fast_recovery_tsn)) {
 				net->will_exit_fast_recovery = 1;
 			}
 		}
 #endif
 		/* if nothing was acked on this destination skip it */
 		if (net->net_ack == 0) {
 			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
 				sctp_log_cwnd(stcb, net, 0, SCTP_CWND_LOG_FROM_SACK);
 			}
 			continue;
 		}
 #ifdef JANA_CMT_FAST_RECOVERY
 		/*
 		 * CMT fast recovery code
 		 */
 		/*
 		 * if (sctp_cmt_on_off > 0 && net->fast_retran_loss_recovery
 		 * && net->will_exit_fast_recovery == 0) { @@@ Do something
 		 * } else if (sctp_cmt_on_off == 0 &&
 		 * asoc->fast_retran_loss_recovery && will_exit == 0) {
 		 */
 #endif
 
 		if (asoc->fast_retran_loss_recovery &&
 		    will_exit == 0 &&
 		    (asoc->sctp_cmt_on_off == 0)) {
 			/*
 			 * If we are in loss recovery we skip any cwnd
 			 * update
 			 */
 			return;
 		}
 		/*
 		 * CMT: CUC algorithm. Update cwnd if pseudo-cumack has
 		 * moved.
 		 */
 		if (accum_moved ||
 		    ((asoc->sctp_cmt_on_off > 0) && net->new_pseudo_cumack)) {
 			htcp_cong_avoid(stcb, net);
 			measure_achieved_throughput(net);
 		} else {
 			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
 				sctp_log_cwnd(stcb, net, net->mtu,
 				    SCTP_CWND_LOG_NO_CUMACK);
 			}
 		}
 	}
 }
 
 static void
 sctp_htcp_cwnd_update_after_fr(struct sctp_tcb *stcb,
     struct sctp_association *asoc)
 {
 	struct sctp_nets *net;
 
 	/*
 	 * CMT fast recovery code. Need to debug. ((sctp_cmt_on_off > 0) &&
 	 * (net->fast_retran_loss_recovery == 0)))
 	 */
 	TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
 		if ((asoc->fast_retran_loss_recovery == 0) ||
 		    (asoc->sctp_cmt_on_off > 0)) {
 			/* out of a RFC2582 Fast recovery window? */
 			if (net->net_ack > 0) {
 				/*
 				 * per section 7.2.3, are there any
 				 * destinations that had a fast retransmit
 				 * to them. If so what we need to do is
 				 * adjust ssthresh and cwnd.
 				 */
 				struct sctp_tmit_chunk *lchk;
 				int old_cwnd = net->cwnd;
 
 				/* JRS - reset as if state were changed */
 				htcp_reset(&net->cc_mod.htcp_ca);
 				net->ssthresh = htcp_recalc_ssthresh(net);
 				net->cwnd = net->ssthresh;
 				sctp_enforce_cwnd_limit(asoc, net);
 				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
 					sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd),
 					    SCTP_CWND_LOG_FROM_FR);
 				}
 				lchk = TAILQ_FIRST(&asoc->send_queue);
 
 				net->partial_bytes_acked = 0;
 				/* Turn on fast recovery window */
 				asoc->fast_retran_loss_recovery = 1;
 				if (lchk == NULL) {
 					/* Mark end of the window */
 					asoc->fast_recovery_tsn = asoc->sending_seq - 1;
 				} else {
 					asoc->fast_recovery_tsn = lchk->rec.data.TSN_seq - 1;
 				}
 
 				/*
 				 * CMT fast recovery -- per destination
 				 * recovery variable.
 				 */
 				net->fast_retran_loss_recovery = 1;
 
 				if (lchk == NULL) {
 					/* Mark end of the window */
 					net->fast_recovery_tsn = asoc->sending_seq - 1;
 				} else {
 					net->fast_recovery_tsn = lchk->rec.data.TSN_seq - 1;
 				}
 
 				sctp_timer_stop(SCTP_TIMER_TYPE_SEND,
 				    stcb->sctp_ep, stcb, net,
 				    SCTP_FROM_SCTP_CC_FUNCTIONS + SCTP_LOC_3);
 				sctp_timer_start(SCTP_TIMER_TYPE_SEND,
 				    stcb->sctp_ep, stcb, net);
 			}
 		} else if (net->net_ack > 0) {
 			/*
 			 * Mark a peg that we WOULD have done a cwnd
 			 * reduction but RFC2582 prevented this action.
 			 */
 			SCTP_STAT_INCR(sctps_fastretransinrtt);
 		}
 	}
 }
 
 static void
 sctp_htcp_cwnd_update_after_timeout(struct sctp_tcb *stcb,
     struct sctp_nets *net)
 {
 	int old_cwnd = net->cwnd;
 
 	/* JRS - reset as if the state were being changed to timeout */
 	htcp_reset(&net->cc_mod.htcp_ca);
 	net->ssthresh = htcp_recalc_ssthresh(net);
 	net->cwnd = net->mtu;
 	net->partial_bytes_acked = 0;
 	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
 		sctp_log_cwnd(stcb, net, net->cwnd - old_cwnd, SCTP_CWND_LOG_FROM_RTX);
 	}
 }
 
 static void
 sctp_htcp_cwnd_update_after_ecn_echo(struct sctp_tcb *stcb,
     struct sctp_nets *net, int in_window, int num_pkt_lost SCTP_UNUSED)
 {
 	int old_cwnd;
 
 	old_cwnd = net->cwnd;
 
 	/* JRS - reset hctp as if state changed */
 	if (in_window == 0) {
 		htcp_reset(&net->cc_mod.htcp_ca);
 		SCTP_STAT_INCR(sctps_ecnereducedcwnd);
 		net->ssthresh = htcp_recalc_ssthresh(net);
 		if (net->ssthresh < net->mtu) {
 			net->ssthresh = net->mtu;
 			/* here back off the timer as well, to slow us down */
 			net->RTO <<= 1;
 		}
 		net->cwnd = net->ssthresh;
 		sctp_enforce_cwnd_limit(&stcb->asoc, net);
 		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
 			sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_SAT);
 		}
 	}
 }
 
 struct sctp_cc_functions sctp_cc_functions[] = {
 	{
 		.sctp_set_initial_cc_param = sctp_set_initial_cc_param,
 		.sctp_cwnd_update_after_sack = sctp_cwnd_update_after_sack,
 		.sctp_cwnd_update_exit_pf = sctp_cwnd_update_exit_pf_common,
 		.sctp_cwnd_update_after_fr = sctp_cwnd_update_after_fr,
 		.sctp_cwnd_update_after_timeout = sctp_cwnd_update_after_timeout,
 		.sctp_cwnd_update_after_ecn_echo = sctp_cwnd_update_after_ecn_echo,
 		.sctp_cwnd_update_after_packet_dropped = sctp_cwnd_update_after_packet_dropped,
 		.sctp_cwnd_update_after_output = sctp_cwnd_update_after_output,
 	},
 	{
 		.sctp_set_initial_cc_param = sctp_set_initial_cc_param,
 		.sctp_cwnd_update_after_sack = sctp_hs_cwnd_update_after_sack,
 		.sctp_cwnd_update_exit_pf = sctp_cwnd_update_exit_pf_common,
 		.sctp_cwnd_update_after_fr = sctp_hs_cwnd_update_after_fr,
 		.sctp_cwnd_update_after_timeout = sctp_cwnd_update_after_timeout,
 		.sctp_cwnd_update_after_ecn_echo = sctp_cwnd_update_after_ecn_echo,
 		.sctp_cwnd_update_after_packet_dropped = sctp_cwnd_update_after_packet_dropped,
 		.sctp_cwnd_update_after_output = sctp_cwnd_update_after_output,
 	},
 	{
 		.sctp_set_initial_cc_param = sctp_htcp_set_initial_cc_param,
 		.sctp_cwnd_update_after_sack = sctp_htcp_cwnd_update_after_sack,
 		.sctp_cwnd_update_exit_pf = sctp_cwnd_update_exit_pf_common,
 		.sctp_cwnd_update_after_fr = sctp_htcp_cwnd_update_after_fr,
 		.sctp_cwnd_update_after_timeout = sctp_htcp_cwnd_update_after_timeout,
 		.sctp_cwnd_update_after_ecn_echo = sctp_htcp_cwnd_update_after_ecn_echo,
 		.sctp_cwnd_update_after_packet_dropped = sctp_cwnd_update_after_packet_dropped,
 		.sctp_cwnd_update_after_output = sctp_cwnd_update_after_output,
 	},
 	{
 		.sctp_set_initial_cc_param = sctp_set_rtcc_initial_cc_param,
 		.sctp_cwnd_update_after_sack = sctp_cwnd_update_rtcc_after_sack,
 		.sctp_cwnd_update_exit_pf = sctp_cwnd_update_exit_pf_common,
 		.sctp_cwnd_update_after_fr = sctp_cwnd_update_after_fr,
 		.sctp_cwnd_update_after_timeout = sctp_cwnd_update_after_timeout,
 		.sctp_cwnd_update_after_ecn_echo = sctp_cwnd_update_rtcc_after_ecn_echo,
 		.sctp_cwnd_update_after_packet_dropped = sctp_cwnd_update_after_packet_dropped,
 		.sctp_cwnd_update_after_output = sctp_cwnd_update_after_output,
 		.sctp_cwnd_update_packet_transmitted = sctp_cwnd_update_rtcc_packet_transmitted,
 		.sctp_cwnd_update_tsn_acknowledged = sctp_cwnd_update_rtcc_tsn_acknowledged,
 		.sctp_cwnd_new_transmission_begins = sctp_cwnd_new_rtcc_transmission_begins,
 		.sctp_cwnd_prepare_net_for_sack = sctp_cwnd_prepare_rtcc_net_for_sack,
 		.sctp_cwnd_socket_option = sctp_cwnd_rtcc_socket_option,
 		.sctp_rtt_calculated = sctp_rtt_rtcc_calculated
 	}
 };
Index: head/sys/security/mac/mac_framework.c
===================================================================
--- head/sys/security/mac/mac_framework.c	(revision 292383)
+++ head/sys/security/mac/mac_framework.c	(revision 292384)
@@ -1,596 +1,596 @@
 /*-
  * Copyright (c) 1999-2002, 2006, 2009 Robert N. M. Watson
  * Copyright (c) 2001 Ilmar S. Habibulin
  * Copyright (c) 2001-2005 Networks Associates Technology, Inc.
  * Copyright (c) 2005-2006 SPARTA, Inc.
  * Copyright (c) 2008-2009 Apple Inc.
  * All rights reserved.
  *
  * This software was developed by Robert Watson and Ilmar Habibulin for the
  * TrustedBSD Project.
  *
  * This software was developed for the FreeBSD Project in part by Network
  * Associates Laboratories, the Security Research Division of Network
  * Associates, Inc. under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"),
  * as part of the DARPA CHATS research program.
  *
  * This software was enhanced by SPARTA ISSO under SPAWAR contract 
  * N66001-04-C-6019 ("SEFOS").
  *
  * This software was developed at the University of Cambridge Computer
  * Laboratory with support from a grant from Google, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*-
  * Framework for extensible kernel access control.  This file contains core
  * kernel infrastructure for the TrustedBSD MAC Framework, including policy
  * registration, versioning, locking, error composition operator, and system
  * calls.
  *
  * The MAC Framework implements three programming interfaces:
  *
  * - The kernel MAC interface, defined in mac_framework.h, and invoked
  *   throughout the kernel to request security decisions, notify of security
  *   related events, etc.
  *
  * - The MAC policy module interface, defined in mac_policy.h, which is
  *   implemented by MAC policy modules and invoked by the MAC Framework to
  *   forward kernel security requests and notifications to policy modules.
  *
  * - The user MAC API, defined in mac.h, which allows user programs to query
  *   and set label state on objects.
  *
  * The majority of the MAC Framework implementation may be found in
  * src/sys/security/mac.  Sample policy modules may be found in
  * src/sys/security/mac_*.
  */
 
 #include "opt_mac.h"
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/condvar.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/mac.h>
 #include <sys/module.h>
 #include <sys/rmlock.h>
 #include <sys/sdt.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 
 #include <security/mac/mac_framework.h>
 #include <security/mac/mac_internal.h>
 #include <security/mac/mac_policy.h>
 
 /*
  * DTrace SDT providers for MAC.
  */
 SDT_PROVIDER_DEFINE(mac);
 SDT_PROVIDER_DEFINE(mac_framework);
 
-SDT_PROBE_DEFINE2(mac, kernel, policy, modevent, "int",
+SDT_PROBE_DEFINE2(mac, , policy, modevent, "int",
     "struct mac_policy_conf *");
-SDT_PROBE_DEFINE1(mac, kernel, policy, register,
+SDT_PROBE_DEFINE1(mac, , policy, register,
     "struct mac_policy_conf *");
-SDT_PROBE_DEFINE1(mac, kernel, policy, unregister,
+SDT_PROBE_DEFINE1(mac, , policy, unregister,
     "struct mac_policy_conf *");
 
 /*
  * Root sysctl node for all MAC and MAC policy controls.
  */
 SYSCTL_NODE(_security, OID_AUTO, mac, CTLFLAG_RW, 0,
     "TrustedBSD MAC policy controls");
 
 /*
  * Declare that the kernel provides MAC support, version 3 (FreeBSD 7.x).
  * This permits modules to refuse to be loaded if the necessary support isn't
  * present, even if it's pre-boot.
  */
 MODULE_VERSION(kernel_mac_support, MAC_VERSION);
 
 static unsigned int	mac_version = MAC_VERSION;
 SYSCTL_UINT(_security_mac, OID_AUTO, version, CTLFLAG_RD, &mac_version, 0,
     "");
 
 /*
  * Labels consist of a indexed set of "slots", which are allocated policies
  * as required.  The MAC Framework maintains a bitmask of slots allocated so
  * far to prevent reuse.  Slots cannot be reused, as the MAC Framework
  * guarantees that newly allocated slots in labels will be NULL unless
  * otherwise initialized, and because we do not have a mechanism to garbage
  * collect slots on policy unload.  As labeled policies tend to be statically
  * loaded during boot, and not frequently unloaded and reloaded, this is not
  * generally an issue.
  */
 #if MAC_MAX_SLOTS > 32
 #error "MAC_MAX_SLOTS too large"
 #endif
 
 static unsigned int mac_max_slots = MAC_MAX_SLOTS;
 static unsigned int mac_slot_offsets_free = (1 << MAC_MAX_SLOTS) - 1;
 SYSCTL_UINT(_security_mac, OID_AUTO, max_slots, CTLFLAG_RD, &mac_max_slots,
     0, "");
 
 /*
  * Has the kernel started generating labeled objects yet?  All read/write
  * access to this variable is serialized during the boot process.  Following
  * the end of serialization, we don't update this flag; no locking.
  */
 static int	mac_late = 0;
 
 /*
  * Each policy declares a mask of object types requiring labels to be
  * allocated for them.  For convenience, we combine and cache the bitwise or
  * of the per-policy object flags to track whether we will allocate a label
  * for an object type at run-time.
  */
 uint64_t	mac_labeled;
 SYSCTL_UQUAD(_security_mac, OID_AUTO, labeled, CTLFLAG_RD, &mac_labeled, 0,
     "Mask of object types being labeled");
 
 MALLOC_DEFINE(M_MACTEMP, "mactemp", "MAC temporary label storage");
 
 /*
  * MAC policy modules are placed in one of two lists: mac_static_policy_list,
  * for policies that are loaded early and cannot be unloaded, and
  * mac_policy_list, which holds policies either loaded later in the boot
  * cycle or that may be unloaded.  The static policy list does not require
  * locks to iterate over, but the dynamic list requires synchronization.
  * Support for dynamic policy loading can be compiled out using the
  * MAC_STATIC kernel option.
  *
  * The dynamic policy list is protected by two locks: modifying the list
  * requires both locks to be held exclusively.  One of the locks,
  * mac_policy_rm, is acquired over policy entry points that will never sleep;
  * the other, mac_policy_sx, is acquire over policy entry points that may
  * sleep.  The former category will be used when kernel locks may be held
  * over calls to the MAC Framework, during network processing in ithreads,
  * etc.  The latter will tend to involve potentially blocking memory
  * allocations, extended attribute I/O, etc.
  */
 #ifndef MAC_STATIC
 static struct rmlock mac_policy_rm;	/* Non-sleeping entry points. */
 static struct sx mac_policy_sx;		/* Sleeping entry points. */
 #endif
 
 struct mac_policy_list_head mac_policy_list;
 struct mac_policy_list_head mac_static_policy_list;
 u_int mac_policy_count;			/* Registered policy count. */
 
 static void	mac_policy_xlock(void);
 static void	mac_policy_xlock_assert(void);
 static void	mac_policy_xunlock(void);
 
 void
 mac_policy_slock_nosleep(struct rm_priotracker *tracker)
 {
 
 #ifndef MAC_STATIC
 	if (!mac_late)
 		return;
 
 	rm_rlock(&mac_policy_rm, tracker);
 #endif
 }
 
 void
 mac_policy_slock_sleep(void)
 {
 
 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
  	    "mac_policy_slock_sleep");
 
 #ifndef MAC_STATIC
 	if (!mac_late)
 		return;
 
 	sx_slock(&mac_policy_sx);
 #endif
 }
 
 void
 mac_policy_sunlock_nosleep(struct rm_priotracker *tracker)
 {
 
 #ifndef MAC_STATIC
 	if (!mac_late)
 		return;
 
 	rm_runlock(&mac_policy_rm, tracker);
 #endif
 }
 
 void
 mac_policy_sunlock_sleep(void)
 {
 
 #ifndef MAC_STATIC
 	if (!mac_late)
 		return;
 
 	sx_sunlock(&mac_policy_sx);
 #endif
 }
 
 static void
 mac_policy_xlock(void)
 {
 
 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
  	    "mac_policy_xlock()");
 
 #ifndef MAC_STATIC
 	if (!mac_late)
 		return;
 
 	sx_xlock(&mac_policy_sx);
 	rm_wlock(&mac_policy_rm);
 #endif
 }
 
 static void
 mac_policy_xunlock(void)
 {
 
 #ifndef MAC_STATIC
 	if (!mac_late)
 		return;
 
 	rm_wunlock(&mac_policy_rm);
 	sx_xunlock(&mac_policy_sx);
 #endif
 }
 
 static void
 mac_policy_xlock_assert(void)
 {
 
 #ifndef MAC_STATIC
 	if (!mac_late)
 		return;
 
 	/* XXXRW: rm_assert(&mac_policy_rm, RA_WLOCKED); */
 	sx_assert(&mac_policy_sx, SA_XLOCKED);
 #endif
 }
 
 /*
  * Initialize the MAC subsystem, including appropriate SMP locks.
  */
 static void
 mac_init(void)
 {
 
 	LIST_INIT(&mac_static_policy_list);
 	LIST_INIT(&mac_policy_list);
 	mac_labelzone_init();
 
 #ifndef MAC_STATIC
 	rm_init_flags(&mac_policy_rm, "mac_policy_rm", RM_NOWITNESS |
 	    RM_RECURSE);
 	sx_init_flags(&mac_policy_sx, "mac_policy_sx", SX_NOWITNESS);
 #endif
 }
 
 /*
  * For the purposes of modules that want to know if they were loaded "early",
  * set the mac_late flag once we've processed modules either linked into the
  * kernel, or loaded before the kernel startup.
  */
 static void
 mac_late_init(void)
 {
 
 	mac_late = 1;
 }
 
 /*
  * Given a policy, derive from its set of non-NULL label init methods what
  * object types the policy is interested in.
  */
 static uint64_t
 mac_policy_getlabeled(struct mac_policy_conf *mpc)
 {
 	uint64_t labeled;
 
 #define	MPC_FLAG(method, flag)					\
 	if (mpc->mpc_ops->mpo_ ## method != NULL)			\
 		labeled |= (flag);					\
 
 	labeled = 0;
 	MPC_FLAG(cred_init_label, MPC_OBJECT_CRED);
 	MPC_FLAG(proc_init_label, MPC_OBJECT_PROC);
 	MPC_FLAG(vnode_init_label, MPC_OBJECT_VNODE);
 	MPC_FLAG(inpcb_init_label, MPC_OBJECT_INPCB);
 	MPC_FLAG(socket_init_label, MPC_OBJECT_SOCKET);
 	MPC_FLAG(devfs_init_label, MPC_OBJECT_DEVFS);
 	MPC_FLAG(mbuf_init_label, MPC_OBJECT_MBUF);
 	MPC_FLAG(ipq_init_label, MPC_OBJECT_IPQ);
 	MPC_FLAG(ifnet_init_label, MPC_OBJECT_IFNET);
 	MPC_FLAG(bpfdesc_init_label, MPC_OBJECT_BPFDESC);
 	MPC_FLAG(pipe_init_label, MPC_OBJECT_PIPE);
 	MPC_FLAG(mount_init_label, MPC_OBJECT_MOUNT);
 	MPC_FLAG(posixsem_init_label, MPC_OBJECT_POSIXSEM);
 	MPC_FLAG(posixshm_init_label, MPC_OBJECT_POSIXSHM);
 	MPC_FLAG(sysvmsg_init_label, MPC_OBJECT_SYSVMSG);
 	MPC_FLAG(sysvmsq_init_label, MPC_OBJECT_SYSVMSQ);
 	MPC_FLAG(sysvsem_init_label, MPC_OBJECT_SYSVSEM);
 	MPC_FLAG(sysvshm_init_label, MPC_OBJECT_SYSVSHM);
 	MPC_FLAG(syncache_init_label, MPC_OBJECT_SYNCACHE);
 	MPC_FLAG(ip6q_init_label, MPC_OBJECT_IP6Q);
 
 #undef MPC_FLAG
 	return (labeled);
 }
 
 /*
  * When policies are loaded or unloaded, walk the list of registered policies
  * and built mac_labeled, a bitmask representing the union of all objects
  * requiring labels across all policies.
  */
 static void
 mac_policy_update(void)
 {
 	struct mac_policy_conf *mpc;
 
 	mac_policy_xlock_assert();
 
 	mac_labeled = 0;
 	mac_policy_count = 0;
 	LIST_FOREACH(mpc, &mac_static_policy_list, mpc_list) {
 		mac_labeled |= mac_policy_getlabeled(mpc);
 		mac_policy_count++;
 	}
 	LIST_FOREACH(mpc, &mac_policy_list, mpc_list) {
 		mac_labeled |= mac_policy_getlabeled(mpc);
 		mac_policy_count++;
 	}
 }
 
 static int
 mac_policy_register(struct mac_policy_conf *mpc)
 {
 	struct mac_policy_conf *tmpc;
 	int error, slot, static_entry;
 
 	error = 0;
 
 	/*
 	 * We don't technically need exclusive access while !mac_late, but
 	 * hold it for assertion consistency.
 	 */
 	mac_policy_xlock();
 
 	/*
 	 * If the module can potentially be unloaded, or we're loading late,
 	 * we have to stick it in the non-static list and pay an extra
 	 * performance overhead.  Otherwise, we can pay a light locking cost
 	 * and stick it in the static list.
 	 */
 	static_entry = (!mac_late &&
 	    !(mpc->mpc_loadtime_flags & MPC_LOADTIME_FLAG_UNLOADOK));
 
 	if (static_entry) {
 		LIST_FOREACH(tmpc, &mac_static_policy_list, mpc_list) {
 			if (strcmp(tmpc->mpc_name, mpc->mpc_name) == 0) {
 				error = EEXIST;
 				goto out;
 			}
 		}
 	} else {
 		LIST_FOREACH(tmpc, &mac_policy_list, mpc_list) {
 			if (strcmp(tmpc->mpc_name, mpc->mpc_name) == 0) {
 				error = EEXIST;
 				goto out;
 			}
 		}
 	}
 	if (mpc->mpc_field_off != NULL) {
 		slot = ffs(mac_slot_offsets_free);
 		if (slot == 0) {
 			error = ENOMEM;
 			goto out;
 		}
 		slot--;
 		mac_slot_offsets_free &= ~(1 << slot);
 		*mpc->mpc_field_off = slot;
 	}
 	mpc->mpc_runtime_flags |= MPC_RUNTIME_FLAG_REGISTERED;
 
 	/*
 	 * If we're loading a MAC module after the framework has initialized,
 	 * it has to go into the dynamic list.  If we're loading it before
 	 * we've finished initializing, it can go into the static list with
 	 * weaker locker requirements.
 	 */
 	if (static_entry)
 		LIST_INSERT_HEAD(&mac_static_policy_list, mpc, mpc_list);
 	else
 		LIST_INSERT_HEAD(&mac_policy_list, mpc, mpc_list);
 
 	/*
 	 * Per-policy initialization.  Currently, this takes place under the
 	 * exclusive lock, so policies must not sleep in their init method.
 	 * In the future, we may want to separate "init" from "start", with
 	 * "init" occuring without the lock held.  Likewise, on tear-down,
 	 * breaking out "stop" from "destroy".
 	 */
 	if (mpc->mpc_ops->mpo_init != NULL)
 		(*(mpc->mpc_ops->mpo_init))(mpc);
 	mac_policy_update();
 
-	SDT_PROBE(mac, kernel, policy, register, mpc, 0, 0, 0, 0);
+	SDT_PROBE1(mac, , policy, register, mpc);
 	printf("Security policy loaded: %s (%s)\n", mpc->mpc_fullname,
 	    mpc->mpc_name);
 
 out:
 	mac_policy_xunlock();
 	return (error);
 }
 
 static int
 mac_policy_unregister(struct mac_policy_conf *mpc)
 {
 
 	/*
 	 * If we fail the load, we may get a request to unload.  Check to see
 	 * if we did the run-time registration, and if not, silently succeed.
 	 */
 	mac_policy_xlock();
 	if ((mpc->mpc_runtime_flags & MPC_RUNTIME_FLAG_REGISTERED) == 0) {
 		mac_policy_xunlock();
 		return (0);
 	}
 #if 0
 	/*
 	 * Don't allow unloading modules with private data.
 	 */
 	if (mpc->mpc_field_off != NULL) {
 		mac_policy_xunlock();
 		return (EBUSY);
 	}
 #endif
 	/*
 	 * Only allow the unload to proceed if the module is unloadable by
 	 * its own definition.
 	 */
 	if ((mpc->mpc_loadtime_flags & MPC_LOADTIME_FLAG_UNLOADOK) == 0) {
 		mac_policy_xunlock();
 		return (EBUSY);
 	}
 	if (mpc->mpc_ops->mpo_destroy != NULL)
 		(*(mpc->mpc_ops->mpo_destroy))(mpc);
 
 	LIST_REMOVE(mpc, mpc_list);
 	mpc->mpc_runtime_flags &= ~MPC_RUNTIME_FLAG_REGISTERED;
 	mac_policy_update();
 	mac_policy_xunlock();
 
-	SDT_PROBE(mac, kernel, policy, unregister, mpc, 0, 0, 0, 0);
+	SDT_PROBE1(mac, , policy, unregister, mpc);
 	printf("Security policy unload: %s (%s)\n", mpc->mpc_fullname,
 	    mpc->mpc_name);
 
 	return (0);
 }
 
 /*
  * Allow MAC policy modules to register during boot, etc.
  */
 int
 mac_policy_modevent(module_t mod, int type, void *data)
 {
 	struct mac_policy_conf *mpc;
 	int error;
 
 	error = 0;
 	mpc = (struct mac_policy_conf *) data;
 
 #ifdef MAC_STATIC
 	if (mac_late) {
 		printf("mac_policy_modevent: MAC_STATIC and late\n");
 		return (EBUSY);
 	}
 #endif
 
-	SDT_PROBE(mac, kernel, policy, modevent, type, mpc, 0, 0, 0);
+	SDT_PROBE2(mac, , policy, modevent, type, mpc);
 	switch (type) {
 	case MOD_LOAD:
 		if (mpc->mpc_loadtime_flags & MPC_LOADTIME_FLAG_NOTLATE &&
 		    mac_late) {
 			printf("mac_policy_modevent: can't load %s policy "
 			    "after booting\n", mpc->mpc_name);
 			error = EBUSY;
 			break;
 		}
 		error = mac_policy_register(mpc);
 		break;
 	case MOD_UNLOAD:
 		/* Don't unregister the module if it was never registered. */
 		if ((mpc->mpc_runtime_flags & MPC_RUNTIME_FLAG_REGISTERED)
 		    != 0)
 			error = mac_policy_unregister(mpc);
 		else
 			error = 0;
 		break;
 	default:
 		error = EOPNOTSUPP;
 		break;
 	}
 
 	return (error);
 }
 
 /*
  * Define an error value precedence, and given two arguments, selects the
  * value with the higher precedence.
  */
 int
 mac_error_select(int error1, int error2)
 {
 
 	/* Certain decision-making errors take top priority. */
 	if (error1 == EDEADLK || error2 == EDEADLK)
 		return (EDEADLK);
 
 	/* Invalid arguments should be reported where possible. */
 	if (error1 == EINVAL || error2 == EINVAL)
 		return (EINVAL);
 
 	/* Precedence goes to "visibility", with both process and file. */
 	if (error1 == ESRCH || error2 == ESRCH)
 		return (ESRCH);
 
 	if (error1 == ENOENT || error2 == ENOENT)
 		return (ENOENT);
 
 	/* Precedence goes to DAC/MAC protections. */
 	if (error1 == EACCES || error2 == EACCES)
 		return (EACCES);
 
 	/* Precedence goes to privilege. */
 	if (error1 == EPERM || error2 == EPERM)
 		return (EPERM);
 
 	/* Precedence goes to error over success; otherwise, arbitrary. */
 	if (error1 != 0)
 		return (error1);
 	return (error2);
 }
 
 int
 mac_check_structmac_consistent(struct mac *mac)
 {
 
 	if (mac->m_buflen > MAC_MAX_LABEL_BUF_LEN)
 		return (EINVAL);
 
 	return (0);
 }
 
 SYSINIT(mac, SI_SUB_MAC, SI_ORDER_FIRST, mac_init, NULL);
 SYSINIT(mac_late, SI_SUB_MAC_LATE, SI_ORDER_FIRST, mac_late_init, NULL);
Index: head/sys/security/mac/mac_internal.h
===================================================================
--- head/sys/security/mac/mac_internal.h	(revision 292383)
+++ head/sys/security/mac/mac_internal.h	(revision 292384)
@@ -1,516 +1,516 @@
 /*-
  * Copyright (c) 1999-2002, 2006, 2009 Robert N. M. Watson
  * Copyright (c) 2001 Ilmar S. Habibulin
  * Copyright (c) 2001-2004 Networks Associates Technology, Inc.
  * Copyright (c) 2006 nCircle Network Security, Inc.
  * Copyright (c) 2006 SPARTA, Inc.
  * Copyright (c) 2009 Apple, Inc.
  * All rights reserved.
  *
  * This software was developed by Robert Watson and Ilmar Habibulin for the
  * TrustedBSD Project.
  *
  * This software was developed for the FreeBSD Project in part by Network
  * Associates Laboratories, the Security Research Division of Network
  * Associates, Inc. under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"),
  * as part of the DARPA CHATS research program.
  *
  * This software was developed by Robert N. M. Watson for the TrustedBSD
  * Project under contract to nCircle Network Security, Inc.
  *
  * This software was enhanced by SPARTA ISSO under SPAWAR contract
  * N66001-04-C-6019 ("SEFOS").
  *
  * This software was developed at the University of Cambridge Computer
  * Laboratory with support from a grant from Google, Inc. 
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _SECURITY_MAC_MAC_INTERNAL_H_
 #define	_SECURITY_MAC_MAC_INTERNAL_H_
 
 #ifndef _KERNEL
 #error "no user-serviceable parts inside"
 #endif
 
 #include <sys/lock.h>
 #include <sys/rmlock.h>
 
 /*
  * MAC Framework sysctl namespace.
  */
 #ifdef SYSCTL_DECL
 SYSCTL_DECL(_security_mac);
 #endif /* SYSCTL_DECL */
 
 /*
  * MAC Framework SDT DTrace probe namespace, macros for declaring entry
  * point probes, macros for invoking them.
  */
 #ifdef SDT_PROVIDER_DECLARE
 SDT_PROVIDER_DECLARE(mac);		/* MAC Framework-level events. */
 SDT_PROVIDER_DECLARE(mac_framework);	/* Entry points to MAC. */
 
 #define	MAC_CHECK_PROBE_DEFINE4(name, arg0, arg1, arg2, arg3)		\
-	SDT_PROBE_DEFINE5(mac_framework, kernel, name, mac__check__err,	\
+	SDT_PROBE_DEFINE5(mac_framework, , name, mac__check__err,	\
 	    "int", arg0, arg1, arg2, arg3);				\
-	SDT_PROBE_DEFINE5(mac_framework, kernel, name, mac__check__ok,	\
+	SDT_PROBE_DEFINE5(mac_framework, , name, mac__check__ok,	\
 	    "int", arg0, arg1, arg2, arg3);
 
 #define	MAC_CHECK_PROBE_DEFINE3(name, arg0, arg1, arg2)			\
-	SDT_PROBE_DEFINE4(mac_framework, kernel, name, mac__check__err,	\
+	SDT_PROBE_DEFINE4(mac_framework, , name, mac__check__err,	\
 	    "int", arg0, arg1, arg2);					\
-	SDT_PROBE_DEFINE4(mac_framework, kernel, name, mac__check__ok,	\
+	SDT_PROBE_DEFINE4(mac_framework, , name, mac__check__ok,	\
 	    "int", arg0, arg1, arg2);
 
 #define	MAC_CHECK_PROBE_DEFINE2(name, arg0, arg1)			\
-	SDT_PROBE_DEFINE3(mac_framework, kernel, name, mac__check__err,	\
+	SDT_PROBE_DEFINE3(mac_framework, , name, mac__check__err,	\
 	    "int", arg0, arg1);						\
-	SDT_PROBE_DEFINE3(mac_framework, kernel, name, mac__check__ok,	\
+	SDT_PROBE_DEFINE3(mac_framework, , name, mac__check__ok,	\
 	    "int", arg0, arg1);
 
 #define	MAC_CHECK_PROBE_DEFINE1(name, arg0)				\
-	SDT_PROBE_DEFINE2(mac_framework, kernel, name, mac__check__err,	\
+	SDT_PROBE_DEFINE2(mac_framework, , name, mac__check__err,	\
 	    "int", arg0);						\
-	SDT_PROBE_DEFINE2(mac_framework, kernel, name, mac__check__ok,	\
+	SDT_PROBE_DEFINE2(mac_framework, , name, mac__check__ok,	\
 	    "int", arg0);
 
 #define	MAC_CHECK_PROBE4(name, error, arg0, arg1, arg2, arg3)	do {	\
 	if (error) {							\
-		SDT_PROBE(mac_framework, kernel, name, mac__check__err,	\
+		SDT_PROBE5(mac_framework, , name, mac__check__err,	\
 		    error, arg0, arg1, arg2, arg3);			\
 	} else {							\
-		SDT_PROBE(mac_framework, kernel, name, mac__check__ok,	\
+		SDT_PROBE5(mac_framework, , name, mac__check__ok,	\
 		    0, arg0, arg1, arg2, arg3);				\
 	}								\
 } while (0)
 
 #define	MAC_CHECK_PROBE3(name, error, arg0, arg1, arg2)			\
 	MAC_CHECK_PROBE4(name, error, arg0, arg1, arg2, 0)
 #define	MAC_CHECK_PROBE2(name, error, arg0, arg1)			\
 	MAC_CHECK_PROBE3(name, error, arg0, arg1, 0)
 #define	MAC_CHECK_PROBE1(name, error, arg0)				\
 	MAC_CHECK_PROBE2(name, error, arg0, 0)
 #endif
 
 #define	MAC_GRANT_PROBE_DEFINE2(name, arg0, arg1)			\
-	SDT_PROBE_DEFINE3(mac_framework, kernel, name, mac__grant__err,	\
+	SDT_PROBE_DEFINE3(mac_framework, , name, mac__grant__err,	\
 	    "int", arg0, arg1);						\
-	SDT_PROBE_DEFINE3(mac_framework, kernel, name, mac__grant__ok,	\
+	SDT_PROBE_DEFINE3(mac_framework, , name, mac__grant__ok,	\
 	    "int", arg0, arg1);
 
 #define	MAC_GRANT_PROBE2(name, error, arg0, arg1)	do {		\
 	if (error) {							\
-		SDT_PROBE(mac_framework, kernel, name, mac__grant__err,	\
-		    error, arg0, arg1, 0, 0);				\
+		SDT_PROBE3(mac_framework, , name, mac__grant__err,	\
+		    error, arg0, arg1);					\
 	} else {							\
-		SDT_PROBE(mac_framework, kernel, name, mac__grant__ok,	\
-		    error, arg0, arg1, 0, 0);				\
+		SDT_PROBE3(mac_framework, , name, mac__grant__ok,	\
+		    error, arg0, arg1);					\
 	}								\
 } while (0)
 
 /*
  * MAC Framework global types and typedefs.
  */
 LIST_HEAD(mac_policy_list_head, mac_policy_conf);
 #ifdef MALLOC_DECLARE
 MALLOC_DECLARE(M_MACTEMP);
 #endif
 
 /*
  * MAC labels -- in-kernel storage format.
  *
  * In general, struct label pointers are embedded in kernel data structures
  * representing objects that may be labeled (and protected).  Struct label is
  * opaque to both kernel services that invoke the MAC Framework and MAC
  * policy modules.  In particular, we do not wish to encode the layout of the
  * label structure into any ABIs.  Historically, the slot array contained
  * unions of {long, void} but now contains uintptr_t.
  */
 #define	MAC_MAX_SLOTS	4
 #define	MAC_FLAG_INITIALIZED	0x0000001	/* Is initialized for use. */
 struct label {
 	int		l_flags;
 	intptr_t	l_perpolicy[MAC_MAX_SLOTS];
 };
 
 
 /*
  * Flags for mac_labeled, a bitmask of object types need across the union of
  * all policies currently registered with the MAC Framework, used to key
  * whether or not labels are allocated and constructors for the type are
  * invoked.
  */
 #define	MPC_OBJECT_CRED			0x0000000000000001
 #define	MPC_OBJECT_PROC			0x0000000000000002
 #define	MPC_OBJECT_VNODE		0x0000000000000004
 #define	MPC_OBJECT_INPCB		0x0000000000000008
 #define	MPC_OBJECT_SOCKET		0x0000000000000010
 #define	MPC_OBJECT_DEVFS		0x0000000000000020
 #define	MPC_OBJECT_MBUF			0x0000000000000040
 #define	MPC_OBJECT_IPQ			0x0000000000000080
 #define	MPC_OBJECT_IFNET		0x0000000000000100
 #define	MPC_OBJECT_BPFDESC		0x0000000000000200
 #define	MPC_OBJECT_PIPE			0x0000000000000400
 #define	MPC_OBJECT_MOUNT		0x0000000000000800
 #define	MPC_OBJECT_POSIXSEM		0x0000000000001000
 #define	MPC_OBJECT_POSIXSHM		0x0000000000002000
 #define	MPC_OBJECT_SYSVMSG		0x0000000000004000
 #define	MPC_OBJECT_SYSVMSQ		0x0000000000008000
 #define	MPC_OBJECT_SYSVSEM		0x0000000000010000
 #define	MPC_OBJECT_SYSVSHM		0x0000000000020000
 #define	MPC_OBJECT_SYNCACHE		0x0000000000040000
 #define	MPC_OBJECT_IP6Q			0x0000000000080000
 
 /*
  * MAC Framework global variables.
  */
 extern struct mac_policy_list_head	mac_policy_list;
 extern struct mac_policy_list_head	mac_static_policy_list;
 extern u_int				mac_policy_count;
 extern uint64_t				mac_labeled;
 extern struct mtx			mac_ifnet_mtx;
 
 /*
  * MAC Framework infrastructure functions.
  */
 int	mac_error_select(int error1, int error2);
 
 void	mac_policy_slock_nosleep(struct rm_priotracker *tracker);
 void	mac_policy_slock_sleep(void);
 void	mac_policy_sunlock_nosleep(struct rm_priotracker *tracker);
 void	mac_policy_sunlock_sleep(void);
 
 struct label	*mac_labelzone_alloc(int flags);
 void		 mac_labelzone_free(struct label *label);
 void		 mac_labelzone_init(void);
 
 void	mac_init_label(struct label *label);
 void	mac_destroy_label(struct label *label);
 int	mac_check_structmac_consistent(struct mac *mac);
 int	mac_allocate_slot(void);
 
 #define MAC_IFNET_LOCK(ifp)	mtx_lock(&mac_ifnet_mtx)
 #define MAC_IFNET_UNLOCK(ifp)	mtx_unlock(&mac_ifnet_mtx)
 
 /*
  * MAC Framework per-object type functions.  It's not yet clear how the
  * namespaces, etc, should work for these, so for now, sort by object type.
  */
 struct label	*mac_cred_label_alloc(void);
 void		 mac_cred_label_free(struct label *label);
 struct label	*mac_pipe_label_alloc(void);
 void		 mac_pipe_label_free(struct label *label);
 struct label	*mac_socket_label_alloc(int flag);
 void		 mac_socket_label_free(struct label *label);
 struct label	*mac_vnode_label_alloc(void);
 void		 mac_vnode_label_free(struct label *label);
 
 int	mac_cred_check_relabel(struct ucred *cred, struct label *newlabel);
 int	mac_cred_externalize_label(struct label *label, char *elements,
 	    char *outbuf, size_t outbuflen);
 int	mac_cred_internalize_label(struct label *label, char *string);
 void	mac_cred_relabel(struct ucred *cred, struct label *newlabel);
 
 struct label	*mac_mbuf_to_label(struct mbuf *m);
 
 void	mac_pipe_copy_label(struct label *src, struct label *dest);
 int	mac_pipe_externalize_label(struct label *label, char *elements,
 	    char *outbuf, size_t outbuflen);
 int	mac_pipe_internalize_label(struct label *label, char *string);
 
 int	mac_socket_label_set(struct ucred *cred, struct socket *so,
 	    struct label *label);
 void	mac_socket_copy_label(struct label *src, struct label *dest);
 int	mac_socket_externalize_label(struct label *label, char *elements,
 	    char *outbuf, size_t outbuflen);
 int	mac_socket_internalize_label(struct label *label, char *string);
 
 int	mac_vnode_externalize_label(struct label *label, char *elements,
 	    char *outbuf, size_t outbuflen);
 int	mac_vnode_internalize_label(struct label *label, char *string);
 void	mac_vnode_check_mmap_downgrade(struct ucred *cred, struct vnode *vp,
 	    int *prot);
 int	vn_setlabel(struct vnode *vp, struct label *intlabel,
 	    struct ucred *cred);
 
 /*
  * MAC Framework composition macros invoke all registered MAC policies for a
  * specific entry point.  They come in two forms: one which permits policies
  * to sleep/block, and another that does not.
  *
  * MAC_POLICY_CHECK performs the designated check by walking the policy
  * module list and checking with each as to how it feels about the request.
  * Note that it returns its value via 'error' in the scope of the caller.
  */
 #define	MAC_POLICY_CHECK(check, args...) do {				\
 	struct mac_policy_conf *mpc;					\
 									\
 	error = 0;							\
 	LIST_FOREACH(mpc, &mac_static_policy_list, mpc_list) {		\
 		if (mpc->mpc_ops->mpo_ ## check != NULL)		\
 			error = mac_error_select(			\
 			    mpc->mpc_ops->mpo_ ## check (args),		\
 			    error);					\
 	}								\
 	if (!LIST_EMPTY(&mac_policy_list)) {				\
 		mac_policy_slock_sleep();				\
 		LIST_FOREACH(mpc, &mac_policy_list, mpc_list) {		\
 			if (mpc->mpc_ops->mpo_ ## check != NULL)	\
 				error = mac_error_select(		\
 				    mpc->mpc_ops->mpo_ ## check (args),	\
 				    error);				\
 		}							\
 		mac_policy_sunlock_sleep();				\
 	}								\
 } while (0)
 
 #define	MAC_POLICY_CHECK_NOSLEEP(check, args...) do {			\
 	struct mac_policy_conf *mpc;					\
 									\
 	error = 0;							\
 	LIST_FOREACH(mpc, &mac_static_policy_list, mpc_list) {		\
 		if (mpc->mpc_ops->mpo_ ## check != NULL)		\
 			error = mac_error_select(			\
 			    mpc->mpc_ops->mpo_ ## check (args),		\
 			    error);					\
 	}								\
 	if (!LIST_EMPTY(&mac_policy_list)) {				\
 		struct rm_priotracker tracker;				\
 									\
 		mac_policy_slock_nosleep(&tracker);			\
 		LIST_FOREACH(mpc, &mac_policy_list, mpc_list) {		\
 			if (mpc->mpc_ops->mpo_ ## check != NULL)	\
 				error = mac_error_select(		\
 				    mpc->mpc_ops->mpo_ ## check (args),	\
 				    error);				\
 		}							\
 		mac_policy_sunlock_nosleep(&tracker);			\
 	}								\
 } while (0)
 
 /*
  * MAC_POLICY_GRANT performs the designated check by walking the policy
  * module list and checking with each as to how it feels about the request.
  * Unlike MAC_POLICY_CHECK, it grants if any policies return '0', and
  * otherwise returns EPERM.  Note that it returns its value via 'error' in
  * the scope of the caller.
  */
 #define	MAC_POLICY_GRANT_NOSLEEP(check, args...) do {			\
 	struct mac_policy_conf *mpc;					\
 									\
 	error = EPERM;							\
 	LIST_FOREACH(mpc, &mac_static_policy_list, mpc_list) {		\
 		if (mpc->mpc_ops->mpo_ ## check != NULL) {		\
 			if (mpc->mpc_ops->mpo_ ## check(args) == 0)	\
 				error = 0;				\
 		}							\
 	}								\
 	if (!LIST_EMPTY(&mac_policy_list)) {				\
 		struct rm_priotracker tracker;				\
 									\
 		mac_policy_slock_nosleep(&tracker);			\
 		LIST_FOREACH(mpc, &mac_policy_list, mpc_list) {		\
 			if (mpc->mpc_ops->mpo_ ## check != NULL) {	\
 				if (mpc->mpc_ops->mpo_ ## check (args)	\
 				    == 0)				\
 					error = 0;			\
 			}						\
 		}							\
 		mac_policy_sunlock_nosleep(&tracker);			\
 	}								\
 } while (0)
 
 /*
  * MAC_POLICY_BOOLEAN performs the designated boolean composition by walking
  * the module list, invoking each instance of the operation, and combining
  * the results using the passed C operator.  Note that it returns its value
  * via 'result' in the scope of the caller, which should be initialized by
  * the caller in a meaningful way to get a meaningful result.
  */
 #define	MAC_POLICY_BOOLEAN(operation, composition, args...) do {	\
 	struct mac_policy_conf *mpc;					\
 									\
 	LIST_FOREACH(mpc, &mac_static_policy_list, mpc_list) {		\
 		if (mpc->mpc_ops->mpo_ ## operation != NULL)		\
 			result = result composition			\
 			    mpc->mpc_ops->mpo_ ## operation (args);	\
 	}								\
 	if (!LIST_EMPTY(&mac_policy_list)) {				\
 		mac_policy_slock_sleep();				\
 		LIST_FOREACH(mpc, &mac_policy_list, mpc_list) {		\
 			if (mpc->mpc_ops->mpo_ ## operation != NULL)	\
 				result = result composition		\
 				    mpc->mpc_ops->mpo_ ## operation	\
 				    (args);				\
 		}							\
 		mac_policy_sunlock_sleep();				\
 	}								\
 } while (0)
 
 #define	MAC_POLICY_BOOLEAN_NOSLEEP(operation, composition, args...) do {\
 	struct mac_policy_conf *mpc;					\
 									\
 	LIST_FOREACH(mpc, &mac_static_policy_list, mpc_list) {		\
 		if (mpc->mpc_ops->mpo_ ## operation != NULL)		\
 			result = result composition			\
 			    mpc->mpc_ops->mpo_ ## operation (args);	\
 	}								\
 	if (!LIST_EMPTY(&mac_policy_list)) {				\
 		struct rm_priotracker tracker;				\
 									\
 		mac_policy_slock_nosleep(&tracker);			\
 		LIST_FOREACH(mpc, &mac_policy_list, mpc_list) {		\
 			if (mpc->mpc_ops->mpo_ ## operation != NULL)	\
 				result = result composition		\
 				    mpc->mpc_ops->mpo_ ## operation	\
 				    (args);				\
 		}							\
 		mac_policy_sunlock_nosleep(&tracker);			\
 	}								\
 } while (0)
 
 /*
  * MAC_POLICY_EXTERNALIZE queries each policy to see if it can generate an
  * externalized version of a label element by name.  Policies declare whether
  * they have matched a particular element name, parsed from the string by
  * MAC_POLICY_EXTERNALIZE, and an error is returned if any element is matched
  * by no policy.
  */
 #define	MAC_POLICY_EXTERNALIZE(type, label, elementlist, outbuf, 	\
     outbuflen) do {							\
 	int claimed, first, ignorenotfound, savedlen;			\
 	char *element_name, *element_temp;				\
 	struct sbuf sb;							\
 									\
 	error = 0;							\
 	first = 1;							\
 	sbuf_new(&sb, outbuf, outbuflen, SBUF_FIXEDLEN);		\
 	element_temp = elementlist;					\
 	while ((element_name = strsep(&element_temp, ",")) != NULL) {	\
 		if (element_name[0] == '?') {				\
 			element_name++;					\
 			ignorenotfound = 1;				\
 		 } else							\
 			ignorenotfound = 0;				\
 		savedlen = sbuf_len(&sb);				\
 		if (first)						\
 			error = sbuf_printf(&sb, "%s/", element_name);	\
 		else							\
 			error = sbuf_printf(&sb, ",%s/", element_name);	\
 		if (error == -1) {					\
 			error = EINVAL;	/* XXX: E2BIG? */		\
 			break;						\
 		}							\
 		claimed = 0;						\
 		MAC_POLICY_CHECK(type ## _externalize_label, label,	\
 		    element_name, &sb, &claimed);			\
 		if (error)						\
 			break;						\
 		if (claimed == 0 && ignorenotfound) {			\
 			/* Revert last label name. */			\
 			sbuf_setpos(&sb, savedlen);			\
 		} else if (claimed != 1) {				\
 			error = EINVAL;	/* XXX: ENOLABEL? */		\
 			break;						\
 		} else {						\
 			first = 0;					\
 		}							\
 	}								\
 	sbuf_finish(&sb);						\
 } while (0)
 
 /*
  * MAC_POLICY_INTERNALIZE presents parsed element names and data to each
  * policy to see if any is willing to claim it and internalize the label
  * data.  If no policies match, an error is returned.
  */
 #define	MAC_POLICY_INTERNALIZE(type, label, instring) do {		\
 	char *element, *element_name, *element_data;			\
 	int claimed;							\
 									\
 	error = 0;							\
 	element = instring;						\
 	while ((element_name = strsep(&element, ",")) != NULL) {	\
 		element_data = element_name;				\
 		element_name = strsep(&element_data, "/");		\
 		if (element_data == NULL) {				\
 			error = EINVAL;					\
 			break;						\
 		}							\
 		claimed = 0;						\
 		MAC_POLICY_CHECK(type ## _internalize_label, label,	\
 		    element_name, element_data, &claimed);		\
 		if (error)						\
 			break;						\
 		if (claimed != 1) {					\
 			/* XXXMAC: Another error here? */		\
 			error = EINVAL;					\
 			break;						\
 		}							\
 	}								\
 } while (0)
 
 /*
  * MAC_POLICY_PERFORM performs the designated operation by walking the policy
  * module list and invoking that operation for each policy.
  */
 #define	MAC_POLICY_PERFORM(operation, args...) do {			\
 	struct mac_policy_conf *mpc;					\
 									\
 	LIST_FOREACH(mpc, &mac_static_policy_list, mpc_list) {		\
 		if (mpc->mpc_ops->mpo_ ## operation != NULL)		\
 			mpc->mpc_ops->mpo_ ## operation (args);		\
 	}								\
 	if (!LIST_EMPTY(&mac_policy_list)) {				\
 		mac_policy_slock_sleep();				\
 		LIST_FOREACH(mpc, &mac_policy_list, mpc_list) {		\
 			if (mpc->mpc_ops->mpo_ ## operation != NULL)	\
 				mpc->mpc_ops->mpo_ ## operation (args);	\
 		}							\
 		mac_policy_sunlock_sleep();				\
 	}								\
 } while (0)
 
 #define	MAC_POLICY_PERFORM_NOSLEEP(operation, args...) do {		\
 	struct mac_policy_conf *mpc;					\
 									\
 	LIST_FOREACH(mpc, &mac_static_policy_list, mpc_list) {		\
 		if (mpc->mpc_ops->mpo_ ## operation != NULL)		\
 			mpc->mpc_ops->mpo_ ## operation (args);		\
 	}								\
 	if (!LIST_EMPTY(&mac_policy_list)) {				\
 		struct rm_priotracker tracker;				\
 									\
 		mac_policy_slock_nosleep(&tracker);			\
 		LIST_FOREACH(mpc, &mac_policy_list, mpc_list) {		\
 			if (mpc->mpc_ops->mpo_ ## operation != NULL)	\
 				mpc->mpc_ops->mpo_ ## operation (args);	\
 		}							\
 		mac_policy_sunlock_nosleep(&tracker);			\
 	}								\
 } while (0)
 
 #endif /* !_SECURITY_MAC_MAC_INTERNAL_H_ */