Index: head/sys/kern/init_main.c
===================================================================
--- head/sys/kern/init_main.c	(revision 285632)
+++ head/sys/kern/init_main.c	(revision 285633)
@@ -1,875 +1,878 @@
 /*-
  * Copyright (c) 1995 Terrence R. Lambert
  * All rights reserved.
  *
  * Copyright (c) 1982, 1986, 1989, 1991, 1992, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)init_main.c	8.9 (Berkeley) 1/21/94
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ddb.h"
 #include "opt_init_path.h"
 #include "opt_verbose_sysinit.h"
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/exec.h>
 #include <sys/file.h>
 #include <sys/filedesc.h>
 #include <sys/jail.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/loginclass.h>
 #include <sys/mount.h>
 #include <sys/mutex.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysctl.h>
 #include <sys/proc.h>
 #include <sys/racct.h>
 #include <sys/resourcevar.h>
 #include <sys/systm.h>
 #include <sys/signalvar.h>
 #include <sys/vnode.h>
 #include <sys/sysent.h>
 #include <sys/reboot.h>
 #include <sys/sched.h>
 #include <sys/sx.h>
 #include <sys/sysproto.h>
 #include <sys/vmmeter.h>
 #include <sys/unistd.h>
 #include <sys/malloc.h>
 #include <sys/conf.h>
 #include <sys/cpuset.h>
 
 #include <machine/cpu.h>
 
 #include <security/audit/audit.h>
 #include <security/mac/mac_framework.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_domain.h>
 #include <sys/copyright.h>
 
 #include <ddb/ddb.h>
 #include <ddb/db_sym.h>
 
 void mi_startup(void);				/* Should be elsewhere */
 
 /* Components of the first process -- never freed. */
 static struct session session0;
 static struct pgrp pgrp0;
 struct	proc proc0;
 struct	thread thread0 __aligned(16);
 struct	vmspace vmspace0;
 struct	proc *initproc;
 
 #ifndef BOOTHOWTO
 #define	BOOTHOWTO	0
 #endif
 int	boothowto = BOOTHOWTO;	/* initialized so that it can be patched */
 SYSCTL_INT(_debug, OID_AUTO, boothowto, CTLFLAG_RD, &boothowto, 0,
 	"Boot control flags, passed from loader");
 
 #ifndef BOOTVERBOSE
 #define	BOOTVERBOSE	0
 #endif
 int	bootverbose = BOOTVERBOSE;
 SYSCTL_INT(_debug, OID_AUTO, bootverbose, CTLFLAG_RW, &bootverbose, 0,
 	"Control the output of verbose kernel messages");
 
 /*
  * This ensures that there is at least one entry so that the sysinit_set
  * symbol is not undefined.  A sybsystem ID of SI_SUB_DUMMY is never
  * executed.
  */
 SYSINIT(placeholder, SI_SUB_DUMMY, SI_ORDER_ANY, NULL, NULL);
 
 /*
  * The sysinit table itself.  Items are checked off as the are run.
  * If we want to register new sysinit types, add them to newsysinit.
  */
 SET_DECLARE(sysinit_set, struct sysinit);
 struct sysinit **sysinit, **sysinit_end;
 struct sysinit **newsysinit, **newsysinit_end;
 
 /*
  * Merge a new sysinit set into the current set, reallocating it if
  * necessary.  This can only be called after malloc is running.
  */
 void
 sysinit_add(struct sysinit **set, struct sysinit **set_end)
 {
 	struct sysinit **newset;
 	struct sysinit **sipp;
 	struct sysinit **xipp;
 	int count;
 
 	count = set_end - set;
 	if (newsysinit)
 		count += newsysinit_end - newsysinit;
 	else
 		count += sysinit_end - sysinit;
 	newset = malloc(count * sizeof(*sipp), M_TEMP, M_NOWAIT);
 	if (newset == NULL)
 		panic("cannot malloc for sysinit");
 	xipp = newset;
 	if (newsysinit)
 		for (sipp = newsysinit; sipp < newsysinit_end; sipp++)
 			*xipp++ = *sipp;
 	else
 		for (sipp = sysinit; sipp < sysinit_end; sipp++)
 			*xipp++ = *sipp;
 	for (sipp = set; sipp < set_end; sipp++)
 		*xipp++ = *sipp;
 	if (newsysinit)
 		free(newsysinit, M_TEMP);
 	newsysinit = newset;
 	newsysinit_end = newset + count;
 }
 
 #if defined (DDB) && defined(VERBOSE_SYSINIT)
 static const char *
 symbol_name(vm_offset_t va, db_strategy_t strategy)
 {
 	const char *name;
 	c_db_sym_t sym;
 	db_expr_t  offset;
 
 	if (va == 0)
 		return (NULL);
 	sym = db_search_symbol(va, strategy, &offset);
 	if (offset != 0)
 		return (NULL);
 	db_symbol_values(sym, &name, NULL);
 	return (name);
 }
 #endif
 
 /*
  * System startup; initialize the world, create process 0, mount root
  * filesystem, and fork to create init and pagedaemon.  Most of the
  * hard work is done in the lower-level initialization routines including
  * startup(), which does memory initialization and autoconfiguration.
  *
  * This allows simple addition of new kernel subsystems that require
  * boot time initialization.  It also allows substitution of subsystem
  * (for instance, a scheduler, kernel profiler, or VM system) by object
  * module.  Finally, it allows for optional "kernel threads".
  */
 void
 mi_startup(void)
 {
 
 	register struct sysinit **sipp;		/* system initialization*/
 	register struct sysinit **xipp;		/* interior loop of sort*/
 	register struct sysinit *save;		/* bubble*/
 
 #if defined(VERBOSE_SYSINIT)
 	int last;
 	int verbose;
 #endif
 
 	if (boothowto & RB_VERBOSE)
 		bootverbose++;
 
 	if (sysinit == NULL) {
 		sysinit = SET_BEGIN(sysinit_set);
 		sysinit_end = SET_LIMIT(sysinit_set);
 	}
 
 restart:
 	/*
 	 * Perform a bubble sort of the system initialization objects by
 	 * their subsystem (primary key) and order (secondary key).
 	 */
 	for (sipp = sysinit; sipp < sysinit_end; sipp++) {
 		for (xipp = sipp + 1; xipp < sysinit_end; xipp++) {
 			if ((*sipp)->subsystem < (*xipp)->subsystem ||
 			     ((*sipp)->subsystem == (*xipp)->subsystem &&
 			      (*sipp)->order <= (*xipp)->order))
 				continue;	/* skip*/
 			save = *sipp;
 			*sipp = *xipp;
 			*xipp = save;
 		}
 	}
 
 #if defined(VERBOSE_SYSINIT)
 	last = SI_SUB_COPYRIGHT;
 	verbose = 0;
 #if !defined(DDB)
 	printf("VERBOSE_SYSINIT: DDB not enabled, symbol lookups disabled.\n");
 #endif
 #endif
 
 	/*
 	 * Traverse the (now) ordered list of system initialization tasks.
 	 * Perform each task, and continue on to the next task.
 	 */
 	for (sipp = sysinit; sipp < sysinit_end; sipp++) {
 
 		if ((*sipp)->subsystem == SI_SUB_DUMMY)
 			continue;	/* skip dummy task(s)*/
 
 		if ((*sipp)->subsystem == SI_SUB_DONE)
 			continue;
 
 #if defined(VERBOSE_SYSINIT)
 		if ((*sipp)->subsystem > last) {
 			verbose = 1;
 			last = (*sipp)->subsystem;
 			printf("subsystem %x\n", last);
 		}
 		if (verbose) {
 #if defined(DDB)
 			const char *func, *data;
 
 			func = symbol_name((vm_offset_t)(*sipp)->func,
 			    DB_STGY_PROC);
 			data = symbol_name((vm_offset_t)(*sipp)->udata,
 			    DB_STGY_ANY);
 			if (func != NULL && data != NULL)
 				printf("   %s(&%s)... ", func, data);
 			else if (func != NULL)
 				printf("   %s(%p)... ", func, (*sipp)->udata);
 			else
 #endif
 				printf("   %p(%p)... ", (*sipp)->func,
 				    (*sipp)->udata);
 		}
 #endif
 
 		/* Call function */
 		(*((*sipp)->func))((*sipp)->udata);
 
 #if defined(VERBOSE_SYSINIT)
 		if (verbose)
 			printf("done.\n");
 #endif
 
 		/* Check off the one we're just done */
 		(*sipp)->subsystem = SI_SUB_DONE;
 
 		/* Check if we've installed more sysinit items via KLD */
 		if (newsysinit != NULL) {
 			if (sysinit != SET_BEGIN(sysinit_set))
 				free(sysinit, M_TEMP);
 			sysinit = newsysinit;
 			sysinit_end = newsysinit_end;
 			newsysinit = NULL;
 			newsysinit_end = NULL;
 			goto restart;
 		}
 	}
 
 	mtx_assert(&Giant, MA_OWNED | MA_NOTRECURSED);
 	mtx_unlock(&Giant);
 
 	/*
 	 * Now hand over this thread to swapper.
 	 */
 	swapper();
 	/* NOTREACHED*/
 }
 
 
 /*
  ***************************************************************************
  ****
  **** The following SYSINIT's belong elsewhere, but have not yet
  **** been moved.
  ****
  ***************************************************************************
  */
 static void
 print_caddr_t(void *data)
 {
 	printf("%s", (char *)data);
 }
 
 static void
 print_version(void *data __unused)
 {
 	int len;
 
 	/* Strip a trailing newline from version. */
 	len = strlen(version);
 	while (len > 0 && version[len - 1] == '\n')
 		len--;
 	printf("%.*s %s\n", len, version, machine);
 	printf("%s\n", compiler_version);
 }
 
 SYSINIT(announce, SI_SUB_COPYRIGHT, SI_ORDER_FIRST, print_caddr_t,
     copyright);
 SYSINIT(trademark, SI_SUB_COPYRIGHT, SI_ORDER_SECOND, print_caddr_t,
     trademark);
 SYSINIT(version, SI_SUB_COPYRIGHT, SI_ORDER_THIRD, print_version, NULL);
 
 #ifdef WITNESS
 static char wit_warn[] =
      "WARNING: WITNESS option enabled, expect reduced performance.\n";
 SYSINIT(witwarn, SI_SUB_COPYRIGHT, SI_ORDER_THIRD + 1,
    print_caddr_t, wit_warn);
 SYSINIT(witwarn2, SI_SUB_LAST, SI_ORDER_THIRD + 1,
    print_caddr_t, wit_warn);
 #endif
 
 #ifdef DIAGNOSTIC
 static char diag_warn[] =
      "WARNING: DIAGNOSTIC option enabled, expect reduced performance.\n";
 SYSINIT(diagwarn, SI_SUB_COPYRIGHT, SI_ORDER_THIRD + 2,
     print_caddr_t, diag_warn);
 SYSINIT(diagwarn2, SI_SUB_LAST, SI_ORDER_THIRD + 2,
     print_caddr_t, diag_warn);
 #endif
 
 static int
 null_fetch_syscall_args(struct thread *td __unused,
     struct syscall_args *sa __unused)
 {
 
 	panic("null_fetch_syscall_args");
 }
 
 static void
 null_set_syscall_retval(struct thread *td __unused, int error __unused)
 {
 
 	panic("null_set_syscall_retval");
 }
 
 struct sysentvec null_sysvec = {
 	.sv_size	= 0,
 	.sv_table	= NULL,
 	.sv_mask	= 0,
 	.sv_sigsize	= 0,
 	.sv_sigtbl	= NULL,
 	.sv_errsize	= 0,
 	.sv_errtbl	= NULL,
 	.sv_transtrap	= NULL,
 	.sv_fixup	= NULL,
 	.sv_sendsig	= NULL,
 	.sv_sigcode	= NULL,
 	.sv_szsigcode	= NULL,
 	.sv_prepsyscall	= NULL,
 	.sv_name	= "null",
 	.sv_coredump	= NULL,
 	.sv_imgact_try	= NULL,
 	.sv_minsigstksz	= 0,
 	.sv_pagesize	= PAGE_SIZE,
 	.sv_minuser	= VM_MIN_ADDRESS,
 	.sv_maxuser	= VM_MAXUSER_ADDRESS,
 	.sv_usrstack	= USRSTACK,
 	.sv_psstrings	= PS_STRINGS,
 	.sv_stackprot	= VM_PROT_ALL,
 	.sv_copyout_strings	= NULL,
 	.sv_setregs	= NULL,
 	.sv_fixlimit	= NULL,
 	.sv_maxssiz	= NULL,
 	.sv_flags	= 0,
 	.sv_set_syscall_retval = null_set_syscall_retval,
 	.sv_fetch_syscall_args = null_fetch_syscall_args,
 	.sv_syscallnames = NULL,
 	.sv_schedtail	= NULL,
 	.sv_thread_detach = NULL,
 };
 
 /*
  ***************************************************************************
  ****
  **** The two following SYSINIT's are proc0 specific glue code.  I am not
  **** convinced that they can not be safely combined, but their order of
  **** operation has been maintained as the same as the original init_main.c
  **** for right now.
  ****
  **** These probably belong in init_proc.c or kern_proc.c, since they
  **** deal with proc0 (the fork template process).
  ****
  ***************************************************************************
  */
 /* ARGSUSED*/
 static void
 proc0_init(void *dummy __unused)
 {
 	struct proc *p;
 	struct thread *td;
 	struct ucred *newcred;
 	vm_paddr_t pageablemem;
 	int i;
 
 	GIANT_REQUIRED;
 	p = &proc0;
 	td = &thread0;
 	
 	/*
 	 * Initialize magic number and osrel.
 	 */
 	p->p_magic = P_MAGIC;
 	p->p_osrel = osreldate;
 
 	/*
 	 * Initialize thread and process structures.
 	 */
 	procinit();	/* set up proc zone */
 	threadinit();	/* set up UMA zones */
 
 	/*
 	 * Initialise scheduler resources.
 	 * Add scheduler specific parts to proc, thread as needed.
 	 */
 	schedinit();	/* scheduler gets its house in order */
 
 	/*
 	 * Create process 0 (the swapper).
 	 */
 	LIST_INSERT_HEAD(&allproc, p, p_list);
 	LIST_INSERT_HEAD(PIDHASH(0), p, p_hash);
 	mtx_init(&pgrp0.pg_mtx, "process group", NULL, MTX_DEF | MTX_DUPOK);
 	p->p_pgrp = &pgrp0;
 	LIST_INSERT_HEAD(PGRPHASH(0), &pgrp0, pg_hash);
 	LIST_INIT(&pgrp0.pg_members);
 	LIST_INSERT_HEAD(&pgrp0.pg_members, p, p_pglist);
 
 	pgrp0.pg_session = &session0;
 	mtx_init(&session0.s_mtx, "session", NULL, MTX_DEF);
 	refcount_init(&session0.s_count, 1);
 	session0.s_leader = p;
 
 	p->p_sysent = &null_sysvec;
 	p->p_flag = P_SYSTEM | P_INMEM;
 	p->p_flag2 = 0;
 	p->p_state = PRS_NORMAL;
 	knlist_init_mtx(&p->p_klist, &p->p_mtx);
 	STAILQ_INIT(&p->p_ktr);
 	p->p_nice = NZERO;
 	/* pid_max cannot be greater than PID_MAX */
 	td->td_tid = PID_MAX + 1;
 	LIST_INSERT_HEAD(TIDHASH(td->td_tid), td, td_hash);
 	td->td_state = TDS_RUNNING;
 	td->td_pri_class = PRI_TIMESHARE;
 	td->td_user_pri = PUSER;
 	td->td_base_user_pri = PUSER;
 	td->td_lend_user_pri = PRI_MAX;
 	td->td_priority = PVM;
 	td->td_base_pri = PVM;
 	td->td_oncpu = 0;
 	td->td_flags = TDF_INMEM;
 	td->td_pflags = TDP_KTHREAD;
 	td->td_cpuset = cpuset_thread0();
 	vm_domain_policy_init(&td->td_vm_dom_policy);
 	vm_domain_policy_set(&td->td_vm_dom_policy, VM_POLICY_NONE, -1);
 	vm_domain_policy_init(&p->p_vm_dom_policy);
 	vm_domain_policy_set(&p->p_vm_dom_policy, VM_POLICY_NONE, -1);
 	prison0_init();
 	p->p_peers = 0;
 	p->p_leader = p;
 	p->p_reaper = p;
 	LIST_INIT(&p->p_reaplist);
 
 	strncpy(p->p_comm, "kernel", sizeof (p->p_comm));
 	strncpy(td->td_name, "swapper", sizeof (td->td_name));
 
 	callout_init_mtx(&p->p_itcallout, &p->p_mtx, 0);
 	callout_init_mtx(&p->p_limco, &p->p_mtx, 0);
 	callout_init(&td->td_slpcallout, 1);
 
 	/* Create credentials. */
 	newcred = crget();
 	newcred->cr_ngroups = 1;	/* group 0 */
 	newcred->cr_uidinfo = uifind(0);
 	newcred->cr_ruidinfo = uifind(0);
 	newcred->cr_prison = &prison0;
 	newcred->cr_loginclass = loginclass_find("default");
 	proc_set_cred_init(p, newcred);
 #ifdef AUDIT
 	audit_cred_kproc0(newcred);
 #endif
 #ifdef MAC
 	mac_cred_create_swapper(newcred);
 #endif
 	/* Create sigacts. */
 	p->p_sigacts = sigacts_alloc();
 
 	/* Initialize signal state for process 0. */
 	siginit(&proc0);
 
 	/* Create the file descriptor table. */
 	p->p_fd = fdinit(NULL, false);
 	p->p_fdtol = NULL;
 
 	/* Create the limits structures. */
 	p->p_limit = lim_alloc();
 	for (i = 0; i < RLIM_NLIMITS; i++)
 		p->p_limit->pl_rlimit[i].rlim_cur =
 		    p->p_limit->pl_rlimit[i].rlim_max = RLIM_INFINITY;
 	p->p_limit->pl_rlimit[RLIMIT_NOFILE].rlim_cur =
 	    p->p_limit->pl_rlimit[RLIMIT_NOFILE].rlim_max = maxfiles;
 	p->p_limit->pl_rlimit[RLIMIT_NPROC].rlim_cur =
 	    p->p_limit->pl_rlimit[RLIMIT_NPROC].rlim_max = maxproc;
 	p->p_limit->pl_rlimit[RLIMIT_DATA].rlim_cur = dfldsiz;
 	p->p_limit->pl_rlimit[RLIMIT_DATA].rlim_max = maxdsiz;
 	p->p_limit->pl_rlimit[RLIMIT_STACK].rlim_cur = dflssiz;
 	p->p_limit->pl_rlimit[RLIMIT_STACK].rlim_max = maxssiz;
 	/* Cast to avoid overflow on i386/PAE. */
 	pageablemem = ptoa((vm_paddr_t)vm_cnt.v_free_count);
 	p->p_limit->pl_rlimit[RLIMIT_RSS].rlim_cur =
 	    p->p_limit->pl_rlimit[RLIMIT_RSS].rlim_max = pageablemem;
 	p->p_limit->pl_rlimit[RLIMIT_MEMLOCK].rlim_cur = pageablemem / 3;
 	p->p_limit->pl_rlimit[RLIMIT_MEMLOCK].rlim_max = pageablemem;
 	p->p_cpulimit = RLIM_INFINITY;
 
 	PROC_LOCK(p);
 	thread_cow_get_proc(td, p);
 	PROC_UNLOCK(p);
 
 	/* Initialize resource accounting structures. */
 	racct_create(&p->p_racct);
 
 	p->p_stats = pstats_alloc();
 
 	/* Allocate a prototype map so we have something to fork. */
 	p->p_vmspace = &vmspace0;
 	vmspace0.vm_refcnt = 1;
 	pmap_pinit0(vmspace_pmap(&vmspace0));
 
 	/*
 	 * proc0 is not expected to enter usermode, so there is no special
 	 * handling for sv_minuser here, like is done for exec_new_vmspace().
 	 */
 	vm_map_init(&vmspace0.vm_map, vmspace_pmap(&vmspace0),
 	    p->p_sysent->sv_minuser, p->p_sysent->sv_maxuser);
 
 	/*
 	 * Call the init and ctor for the new thread and proc.  We wait
 	 * to do this until all other structures are fairly sane.
 	 */
 	EVENTHANDLER_INVOKE(process_init, p);
 	EVENTHANDLER_INVOKE(thread_init, td);
 	EVENTHANDLER_INVOKE(process_ctor, p);
 	EVENTHANDLER_INVOKE(thread_ctor, td);
 
 	/*
 	 * Charge root for one process.
 	 */
 	(void)chgproccnt(p->p_ucred->cr_ruidinfo, 1, 0);
 	PROC_LOCK(p);
 	racct_add_force(p, RACCT_NPROC, 1);
 	PROC_UNLOCK(p);
 }
 SYSINIT(p0init, SI_SUB_INTRINSIC, SI_ORDER_FIRST, proc0_init, NULL);
 
 /* ARGSUSED*/
 static void
 proc0_post(void *dummy __unused)
 {
 	struct timespec ts;
 	struct proc *p;
 	struct rusage ru;
 	struct thread *td;
 
 	/*
 	 * Now we can look at the time, having had a chance to verify the
 	 * time from the filesystem.  Pretend that proc0 started now.
 	 */
 	sx_slock(&allproc_lock);
 	FOREACH_PROC_IN_SYSTEM(p) {
 		microuptime(&p->p_stats->p_start);
 		PROC_STATLOCK(p);
 		rufetch(p, &ru);	/* Clears thread stats */
 		PROC_STATUNLOCK(p);
 		p->p_rux.rux_runtime = 0;
 		p->p_rux.rux_uticks = 0;
 		p->p_rux.rux_sticks = 0;
 		p->p_rux.rux_iticks = 0;
 		FOREACH_THREAD_IN_PROC(p, td) {
 			td->td_runtime = 0;
 		}
 	}
 	sx_sunlock(&allproc_lock);
 	PCPU_SET(switchtime, cpu_ticks());
 	PCPU_SET(switchticks, ticks);
 
 	/*
 	 * Give the ``random'' number generator a thump.
 	 */
 	nanotime(&ts);
 	srandom(ts.tv_sec ^ ts.tv_nsec);
 }
 SYSINIT(p0post, SI_SUB_INTRINSIC_POST, SI_ORDER_FIRST, proc0_post, NULL);
 
 static void
 random_init(void *dummy __unused)
 {
 
 	/*
 	 * After CPU has been started we have some randomness on most
 	 * platforms via get_cyclecount().  For platforms that don't
 	 * we will reseed random(9) in proc0_post() as well.
 	 */
 	srandom(get_cyclecount());
 }
 SYSINIT(random, SI_SUB_RANDOM, SI_ORDER_FIRST, random_init, NULL);
 
 /*
  ***************************************************************************
  ****
  **** The following SYSINIT's and glue code should be moved to the
  **** respective files on a per subsystem basis.
  ****
  ***************************************************************************
  */
 
 
 /*
  ***************************************************************************
  ****
  **** The following code probably belongs in another file, like
  **** kern/init_init.c.
  ****
  ***************************************************************************
  */
 
 /*
  * List of paths to try when searching for "init".
  */
 static char init_path[MAXPATHLEN] =
 #ifdef	INIT_PATH
     __XSTRING(INIT_PATH);
 #else
     "/sbin/init:/sbin/oinit:/sbin/init.bak:/rescue/init";
 #endif
 SYSCTL_STRING(_kern, OID_AUTO, init_path, CTLFLAG_RD, init_path, 0,
 	"Path used to search the init process");
 
 /*
  * Shutdown timeout of init(8).
  * Unused within kernel, but used to control init(8), hence do not remove.
  */
 #ifndef INIT_SHUTDOWN_TIMEOUT
 #define INIT_SHUTDOWN_TIMEOUT 120
 #endif
 static int init_shutdown_timeout = INIT_SHUTDOWN_TIMEOUT;
 SYSCTL_INT(_kern, OID_AUTO, init_shutdown_timeout,
 	CTLFLAG_RW, &init_shutdown_timeout, 0, "Shutdown timeout of init(8). "
 	"Unused within kernel, but used to control init(8)");
 
 /*
  * Start the initial user process; try exec'ing each pathname in init_path.
  * The program is invoked with one argument containing the boot flags.
  */
 static void
 start_init(void *dummy)
 {
 	vm_offset_t addr;
 	struct execve_args args;
 	int options, error;
 	char *var, *path, *next, *s;
 	char *ucp, **uap, *arg0, *arg1;
 	struct thread *td;
 	struct proc *p;
 
 	mtx_lock(&Giant);
 
 	GIANT_REQUIRED;
 
 	td = curthread;
 	p = td->td_proc;
 
 	vfs_mountroot();
 
 	/* Wipe GELI passphrase from the environment. */
 	kern_unsetenv("kern.geom.eli.passphrase");
 
 	/*
 	 * Need just enough stack to hold the faked-up "execve()" arguments.
 	 */
 	addr = p->p_sysent->sv_usrstack - PAGE_SIZE;
 	if (vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &addr, PAGE_SIZE, 0,
 	    VMFS_NO_SPACE, VM_PROT_ALL, VM_PROT_ALL, 0) != 0)
 		panic("init: couldn't allocate argument space");
 	p->p_vmspace->vm_maxsaddr = (caddr_t)addr;
 	p->p_vmspace->vm_ssize = 1;
 
 	if ((var = kern_getenv("init_path")) != NULL) {
 		strlcpy(init_path, var, sizeof(init_path));
 		freeenv(var);
 	}
 	
 	for (path = init_path; *path != '\0'; path = next) {
 		while (*path == ':')
 			path++;
 		if (*path == '\0')
 			break;
 		for (next = path; *next != '\0' && *next != ':'; next++)
 			/* nothing */ ;
 		if (bootverbose)
 			printf("start_init: trying %.*s\n", (int)(next - path),
 			    path);
 			
 		/*
 		 * Move out the boot flag argument.
 		 */
 		options = 0;
 		ucp = (char *)p->p_sysent->sv_usrstack;
 		(void)subyte(--ucp, 0);		/* trailing zero */
 		if (boothowto & RB_SINGLE) {
 			(void)subyte(--ucp, 's');
 			options = 1;
 		}
 #ifdef notyet
                 if (boothowto & RB_FASTBOOT) {
 			(void)subyte(--ucp, 'f');
 			options = 1;
 		}
 #endif
 
 #ifdef BOOTCDROM
 		(void)subyte(--ucp, 'C');
 		options = 1;
 #endif
 
 		if (options == 0)
 			(void)subyte(--ucp, '-');
 		(void)subyte(--ucp, '-');		/* leading hyphen */
 		arg1 = ucp;
 
 		/*
 		 * Move out the file name (also arg 0).
 		 */
 		(void)subyte(--ucp, 0);
 		for (s = next - 1; s >= path; s--)
 			(void)subyte(--ucp, *s);
 		arg0 = ucp;
 
 		/*
 		 * Move out the arg pointers.
 		 */
 		uap = (char **)((intptr_t)ucp & ~(sizeof(intptr_t)-1));
 		(void)suword((caddr_t)--uap, (long)0);	/* terminator */
 		(void)suword((caddr_t)--uap, (long)(intptr_t)arg1);
 		(void)suword((caddr_t)--uap, (long)(intptr_t)arg0);
 
 		/*
 		 * Point at the arguments.
 		 */
 		args.fname = arg0;
 		args.argv = uap;
 		args.envv = NULL;
 
 		/*
 		 * Now try to exec the program.  If can't for any reason
 		 * other than it doesn't exist, complain.
 		 *
 		 * Otherwise, return via fork_trampoline() all the way
 		 * to user mode as init!
 		 */
 		if ((error = sys_execve(td, &args)) == 0) {
 			mtx_unlock(&Giant);
 			return;
 		}
 		if (error != ENOENT)
 			printf("exec %.*s: error %d\n", (int)(next - path), 
 			    path, error);
 	}
 	printf("init: not found in path %s\n", init_path);
 	panic("no init");
 }
 
 /*
  * Like kproc_create(), but runs in it's own address space.
  * We do this early to reserve pid 1.
  *
  * Note special case - do not make it runnable yet.  Other work
  * in progress will change this more.
  */
 static void
 create_init(const void *udata __unused)
 {
 	struct ucred *newcred, *oldcred;
+	struct thread *td;
 	int error;
 
 	error = fork1(&thread0, RFFDG | RFPROC | RFSTOPPED, 0, &initproc,
 	    NULL, 0);
 	if (error)
 		panic("cannot fork init: %d\n", error);
 	KASSERT(initproc->p_pid == 1, ("create_init: initproc->p_pid != 1"));
 	/* divorce init's credentials from the kernel's */
 	newcred = crget();
 	sx_xlock(&proctree_lock);
 	PROC_LOCK(initproc);
 	initproc->p_flag |= P_SYSTEM | P_INMEM;
 	initproc->p_treeflag |= P_TREE_REAPER;
 	LIST_INSERT_HEAD(&initproc->p_reaplist, &proc0, p_reapsibling);
 	oldcred = initproc->p_ucred;
 	crcopy(newcred, oldcred);
 #ifdef MAC
 	mac_cred_create_init(newcred);
 #endif
 #ifdef AUDIT
 	audit_cred_proc1(newcred);
 #endif
 	proc_set_cred(initproc, newcred);
-	cred_update_thread(FIRST_THREAD_IN_PROC(initproc));
+	td = FIRST_THREAD_IN_PROC(initproc);
+	crfree(td->td_ucred);
+	td->td_ucred = crhold(initproc->p_ucred);
 	PROC_UNLOCK(initproc);
 	sx_xunlock(&proctree_lock);
 	crfree(oldcred);
 	cpu_set_fork_handler(FIRST_THREAD_IN_PROC(initproc), start_init, NULL);
 }
 SYSINIT(init, SI_SUB_CREATE_INIT, SI_ORDER_FIRST, create_init, NULL);
 
 /*
  * Make it runnable now.
  */
 static void
 kick_init(const void *udata __unused)
 {
 	struct thread *td;
 
 	td = FIRST_THREAD_IN_PROC(initproc);
 	thread_lock(td);
 	TD_SET_CAN_RUN(td);
 	sched_add(td, SRQ_BORING);
 	thread_unlock(td);
 }
 SYSINIT(kickinit, SI_SUB_KTHREAD_INIT, SI_ORDER_MIDDLE, kick_init, NULL);
Index: head/sys/kern/kern_prot.c
===================================================================
--- head/sys/kern/kern_prot.c	(revision 285632)
+++ head/sys/kern/kern_prot.c	(revision 285633)
@@ -1,2264 +1,2246 @@
 /*-
  * Copyright (c) 1982, 1986, 1989, 1990, 1991, 1993
  *	The Regents of the University of California.
  * (c) UNIX System Laboratories, Inc.
  * Copyright (c) 2000-2001 Robert N. M. Watson.
  * All rights reserved.
  *
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)kern_prot.c	8.6 (Berkeley) 1/21/94
  */
 
 /*
  * System calls related to processes and protection
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_compat.h"
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/acct.h>
 #include <sys/kdb.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/loginclass.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/refcount.h>
 #include <sys/sx.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/sysproto.h>
 #include <sys/jail.h>
 #include <sys/pioctl.h>
 #include <sys/racct.h>
 #include <sys/resourcevar.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysctl.h>
 
 #ifdef REGRESSION
 FEATURE(regression,
     "Kernel support for interfaces necessary for regression testing (SECURITY RISK!)");
 #endif
 
 #if defined(INET) || defined(INET6)
 #include <netinet/in.h>
 #include <netinet/in_pcb.h>
 #endif
 
 #include <security/audit/audit.h>
 #include <security/mac/mac_framework.h>
 
 static MALLOC_DEFINE(M_CRED, "cred", "credentials");
 
 SYSCTL_NODE(_security, OID_AUTO, bsd, CTLFLAG_RW, 0, "BSD security policy");
 
 static void crextend(struct ucred *cr, int n);
 static void crsetgroups_locked(struct ucred *cr, int ngrp,
     gid_t *groups);
 
 #ifndef _SYS_SYSPROTO_H_
 struct getpid_args {
 	int	dummy;
 };
 #endif
 /* ARGSUSED */
 int
 sys_getpid(struct thread *td, struct getpid_args *uap)
 {
 	struct proc *p = td->td_proc;
 
 	td->td_retval[0] = p->p_pid;
 #if defined(COMPAT_43)
 	td->td_retval[1] = kern_getppid(td);
 #endif
 	return (0);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct getppid_args {
         int     dummy;
 };
 #endif
 /* ARGSUSED */
 int
 sys_getppid(struct thread *td, struct getppid_args *uap)
 {
 
 	td->td_retval[0] = kern_getppid(td);
 	return (0);
 }
 
 int
 kern_getppid(struct thread *td)
 {
 	struct proc *p = td->td_proc;
 	struct proc *pp;
 	int ppid;
 
 	PROC_LOCK(p);
 	if (!(p->p_flag & P_TRACED)) {
 		ppid = p->p_pptr->p_pid;
 		PROC_UNLOCK(p);
 	} else {
 		PROC_UNLOCK(p);
 		sx_slock(&proctree_lock);
 		pp = proc_realparent(p);
 		ppid = pp->p_pid;
 		sx_sunlock(&proctree_lock);
 	}
 
 	return (ppid);
 }
 
 /*
  * Get process group ID; note that POSIX getpgrp takes no parameter.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct getpgrp_args {
         int     dummy;
 };
 #endif
 int
 sys_getpgrp(struct thread *td, struct getpgrp_args *uap)
 {
 	struct proc *p = td->td_proc;
 
 	PROC_LOCK(p);
 	td->td_retval[0] = p->p_pgrp->pg_id;
 	PROC_UNLOCK(p);
 	return (0);
 }
 
 /* Get an arbitary pid's process group id */
 #ifndef _SYS_SYSPROTO_H_
 struct getpgid_args {
 	pid_t	pid;
 };
 #endif
 int
 sys_getpgid(struct thread *td, struct getpgid_args *uap)
 {
 	struct proc *p;
 	int error;
 
 	if (uap->pid == 0) {
 		p = td->td_proc;
 		PROC_LOCK(p);
 	} else {
 		p = pfind(uap->pid);
 		if (p == NULL)
 			return (ESRCH);
 		error = p_cansee(td, p);
 		if (error) {
 			PROC_UNLOCK(p);
 			return (error);
 		}
 	}
 	td->td_retval[0] = p->p_pgrp->pg_id;
 	PROC_UNLOCK(p);
 	return (0);
 }
 
 /*
  * Get an arbitary pid's session id.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct getsid_args {
 	pid_t	pid;
 };
 #endif
 int
 sys_getsid(struct thread *td, struct getsid_args *uap)
 {
 	struct proc *p;
 	int error;
 
 	if (uap->pid == 0) {
 		p = td->td_proc;
 		PROC_LOCK(p);
 	} else {
 		p = pfind(uap->pid);
 		if (p == NULL)
 			return (ESRCH);
 		error = p_cansee(td, p);
 		if (error) {
 			PROC_UNLOCK(p);
 			return (error);
 		}
 	}
 	td->td_retval[0] = p->p_session->s_sid;
 	PROC_UNLOCK(p);
 	return (0);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct getuid_args {
         int     dummy;
 };
 #endif
 /* ARGSUSED */
 int
 sys_getuid(struct thread *td, struct getuid_args *uap)
 {
 
 	td->td_retval[0] = td->td_ucred->cr_ruid;
 #if defined(COMPAT_43)
 	td->td_retval[1] = td->td_ucred->cr_uid;
 #endif
 	return (0);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct geteuid_args {
         int     dummy;
 };
 #endif
 /* ARGSUSED */
 int
 sys_geteuid(struct thread *td, struct geteuid_args *uap)
 {
 
 	td->td_retval[0] = td->td_ucred->cr_uid;
 	return (0);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct getgid_args {
         int     dummy;
 };
 #endif
 /* ARGSUSED */
 int
 sys_getgid(struct thread *td, struct getgid_args *uap)
 {
 
 	td->td_retval[0] = td->td_ucred->cr_rgid;
 #if defined(COMPAT_43)
 	td->td_retval[1] = td->td_ucred->cr_groups[0];
 #endif
 	return (0);
 }
 
 /*
  * Get effective group ID.  The "egid" is groups[0], and could be obtained
  * via getgroups.  This syscall exists because it is somewhat painful to do
  * correctly in a library function.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct getegid_args {
         int     dummy;
 };
 #endif
 /* ARGSUSED */
 int
 sys_getegid(struct thread *td, struct getegid_args *uap)
 {
 
 	td->td_retval[0] = td->td_ucred->cr_groups[0];
 	return (0);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct getgroups_args {
 	u_int	gidsetsize;
 	gid_t	*gidset;
 };
 #endif
 int
 sys_getgroups(struct thread *td, register struct getgroups_args *uap)
 {
 	struct ucred *cred;
 	u_int ngrp;
 	int error;
 
 	cred = td->td_ucred;
 	ngrp = cred->cr_ngroups;
 
 	if (uap->gidsetsize == 0) {
 		error = 0;
 		goto out;
 	}
 	if (uap->gidsetsize < ngrp)
 		return (EINVAL);
 
 	error = copyout(cred->cr_groups, uap->gidset, ngrp * sizeof(gid_t));
 out:
 	td->td_retval[0] = ngrp;
 	return (error);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct setsid_args {
         int     dummy;
 };
 #endif
 /* ARGSUSED */
 int
 sys_setsid(register struct thread *td, struct setsid_args *uap)
 {
 	struct pgrp *pgrp;
 	int error;
 	struct proc *p = td->td_proc;
 	struct pgrp *newpgrp;
 	struct session *newsess;
 
 	error = 0;
 	pgrp = NULL;
 
 	newpgrp = malloc(sizeof(struct pgrp), M_PGRP, M_WAITOK | M_ZERO);
 	newsess = malloc(sizeof(struct session), M_SESSION, M_WAITOK | M_ZERO);
 
 	sx_xlock(&proctree_lock);
 
 	if (p->p_pgid == p->p_pid || (pgrp = pgfind(p->p_pid)) != NULL) {
 		if (pgrp != NULL)
 			PGRP_UNLOCK(pgrp);
 		error = EPERM;
 	} else {
 		(void)enterpgrp(p, p->p_pid, newpgrp, newsess);
 		td->td_retval[0] = p->p_pid;
 		newpgrp = NULL;
 		newsess = NULL;
 	}
 
 	sx_xunlock(&proctree_lock);
 
 	if (newpgrp != NULL)
 		free(newpgrp, M_PGRP);
 	if (newsess != NULL)
 		free(newsess, M_SESSION);
 
 	return (error);
 }
 
 /*
  * set process group (setpgid/old setpgrp)
  *
  * caller does setpgid(targpid, targpgid)
  *
  * pid must be caller or child of caller (ESRCH)
  * if a child
  *	pid must be in same session (EPERM)
  *	pid can't have done an exec (EACCES)
  * if pgid != pid
  * 	there must exist some pid in same session having pgid (EPERM)
  * pid must not be session leader (EPERM)
  */
 #ifndef _SYS_SYSPROTO_H_
 struct setpgid_args {
 	int	pid;		/* target process id */
 	int	pgid;		/* target pgrp id */
 };
 #endif
 /* ARGSUSED */
 int
 sys_setpgid(struct thread *td, register struct setpgid_args *uap)
 {
 	struct proc *curp = td->td_proc;
 	register struct proc *targp;	/* target process */
 	register struct pgrp *pgrp;	/* target pgrp */
 	int error;
 	struct pgrp *newpgrp;
 
 	if (uap->pgid < 0)
 		return (EINVAL);
 
 	error = 0;
 
 	newpgrp = malloc(sizeof(struct pgrp), M_PGRP, M_WAITOK | M_ZERO);
 
 	sx_xlock(&proctree_lock);
 	if (uap->pid != 0 && uap->pid != curp->p_pid) {
 		if ((targp = pfind(uap->pid)) == NULL) {
 			error = ESRCH;
 			goto done;
 		}
 		if (!inferior(targp)) {
 			PROC_UNLOCK(targp);
 			error = ESRCH;
 			goto done;
 		}
 		if ((error = p_cansee(td, targp))) {
 			PROC_UNLOCK(targp);
 			goto done;
 		}
 		if (targp->p_pgrp == NULL ||
 		    targp->p_session != curp->p_session) {
 			PROC_UNLOCK(targp);
 			error = EPERM;
 			goto done;
 		}
 		if (targp->p_flag & P_EXEC) {
 			PROC_UNLOCK(targp);
 			error = EACCES;
 			goto done;
 		}
 		PROC_UNLOCK(targp);
 	} else
 		targp = curp;
 	if (SESS_LEADER(targp)) {
 		error = EPERM;
 		goto done;
 	}
 	if (uap->pgid == 0)
 		uap->pgid = targp->p_pid;
 	if ((pgrp = pgfind(uap->pgid)) == NULL) {
 		if (uap->pgid == targp->p_pid) {
 			error = enterpgrp(targp, uap->pgid, newpgrp,
 			    NULL);
 			if (error == 0)
 				newpgrp = NULL;
 		} else
 			error = EPERM;
 	} else {
 		if (pgrp == targp->p_pgrp) {
 			PGRP_UNLOCK(pgrp);
 			goto done;
 		}
 		if (pgrp->pg_id != targp->p_pid &&
 		    pgrp->pg_session != curp->p_session) {
 			PGRP_UNLOCK(pgrp);
 			error = EPERM;
 			goto done;
 		}
 		PGRP_UNLOCK(pgrp);
 		error = enterthispgrp(targp, pgrp);
 	}
 done:
 	sx_xunlock(&proctree_lock);
 	KASSERT((error == 0) || (newpgrp != NULL),
 	    ("setpgid failed and newpgrp is NULL"));
 	if (newpgrp != NULL)
 		free(newpgrp, M_PGRP);
 	return (error);
 }
 
 /*
  * Use the clause in B.4.2.2 that allows setuid/setgid to be 4.2/4.3BSD
  * compatible.  It says that setting the uid/gid to euid/egid is a special
  * case of "appropriate privilege".  Once the rules are expanded out, this
  * basically means that setuid(nnn) sets all three id's, in all permitted
  * cases unless _POSIX_SAVED_IDS is enabled.  In that case, setuid(getuid())
  * does not set the saved id - this is dangerous for traditional BSD
  * programs.  For this reason, we *really* do not want to set
  * _POSIX_SAVED_IDS and do not want to clear POSIX_APPENDIX_B_4_2_2.
  */
 #define POSIX_APPENDIX_B_4_2_2
 
 #ifndef _SYS_SYSPROTO_H_
 struct setuid_args {
 	uid_t	uid;
 };
 #endif
 /* ARGSUSED */
 int
 sys_setuid(struct thread *td, struct setuid_args *uap)
 {
 	struct proc *p = td->td_proc;
 	struct ucred *newcred, *oldcred;
 	uid_t uid;
 	struct uidinfo *uip;
 	int error;
 
 	uid = uap->uid;
 	AUDIT_ARG_UID(uid);
 	newcred = crget();
 	uip = uifind(uid);
 	PROC_LOCK(p);
 	/*
 	 * Copy credentials so other references do not see our changes.
 	 */
 	oldcred = crcopysafe(p, newcred);
 
 #ifdef MAC
 	error = mac_cred_check_setuid(oldcred, uid);
 	if (error)
 		goto fail;
 #endif
 
 	/*
 	 * See if we have "permission" by POSIX 1003.1 rules.
 	 *
 	 * Note that setuid(geteuid()) is a special case of
 	 * "appropriate privileges" in appendix B.4.2.2.  We need
 	 * to use this clause to be compatible with traditional BSD
 	 * semantics.  Basically, it means that "setuid(xx)" sets all
 	 * three id's (assuming you have privs).
 	 *
 	 * Notes on the logic.  We do things in three steps.
 	 * 1: We determine if the euid is going to change, and do EPERM
 	 *    right away.  We unconditionally change the euid later if this
 	 *    test is satisfied, simplifying that part of the logic.
 	 * 2: We determine if the real and/or saved uids are going to
 	 *    change.  Determined by compile options.
 	 * 3: Change euid last. (after tests in #2 for "appropriate privs")
 	 */
 	if (uid != oldcred->cr_ruid &&		/* allow setuid(getuid()) */
 #ifdef _POSIX_SAVED_IDS
 	    uid != oldcred->cr_svuid &&		/* allow setuid(saved gid) */
 #endif
 #ifdef POSIX_APPENDIX_B_4_2_2	/* Use BSD-compat clause from B.4.2.2 */
 	    uid != oldcred->cr_uid &&		/* allow setuid(geteuid()) */
 #endif
 	    (error = priv_check_cred(oldcred, PRIV_CRED_SETUID, 0)) != 0)
 		goto fail;
 
 #ifdef _POSIX_SAVED_IDS
 	/*
 	 * Do we have "appropriate privileges" (are we root or uid == euid)
 	 * If so, we are changing the real uid and/or saved uid.
 	 */
 	if (
 #ifdef POSIX_APPENDIX_B_4_2_2	/* Use the clause from B.4.2.2 */
 	    uid == oldcred->cr_uid ||
 #endif
 	    /* We are using privs. */
 	    priv_check_cred(oldcred, PRIV_CRED_SETUID, 0) == 0)
 #endif
 	{
 		/*
 		 * Set the real uid and transfer proc count to new user.
 		 */
 		if (uid != oldcred->cr_ruid) {
 			change_ruid(newcred, uip);
 			setsugid(p);
 		}
 		/*
 		 * Set saved uid
 		 *
 		 * XXX always set saved uid even if not _POSIX_SAVED_IDS, as
 		 * the security of seteuid() depends on it.  B.4.2.2 says it
 		 * is important that we should do this.
 		 */
 		if (uid != oldcred->cr_svuid) {
 			change_svuid(newcred, uid);
 			setsugid(p);
 		}
 	}
 
 	/*
 	 * In all permitted cases, we are changing the euid.
 	 */
 	if (uid != oldcred->cr_uid) {
 		change_euid(newcred, uip);
 		setsugid(p);
 	}
 	proc_set_cred(p, newcred);
 	PROC_UNLOCK(p);
 #ifdef RACCT
 	racct_proc_ucred_changed(p, oldcred, newcred);
 #endif
 	uifree(uip);
 	crfree(oldcred);
 	return (0);
 
 fail:
 	PROC_UNLOCK(p);
 	uifree(uip);
 	crfree(newcred);
 	return (error);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct seteuid_args {
 	uid_t	euid;
 };
 #endif
 /* ARGSUSED */
 int
 sys_seteuid(struct thread *td, struct seteuid_args *uap)
 {
 	struct proc *p = td->td_proc;
 	struct ucred *newcred, *oldcred;
 	uid_t euid;
 	struct uidinfo *euip;
 	int error;
 
 	euid = uap->euid;
 	AUDIT_ARG_EUID(euid);
 	newcred = crget();
 	euip = uifind(euid);
 	PROC_LOCK(p);
 	/*
 	 * Copy credentials so other references do not see our changes.
 	 */
 	oldcred = crcopysafe(p, newcred);
 
 #ifdef MAC
 	error = mac_cred_check_seteuid(oldcred, euid);
 	if (error)
 		goto fail;
 #endif
 
 	if (euid != oldcred->cr_ruid &&		/* allow seteuid(getuid()) */
 	    euid != oldcred->cr_svuid &&	/* allow seteuid(saved uid) */
 	    (error = priv_check_cred(oldcred, PRIV_CRED_SETEUID, 0)) != 0)
 		goto fail;
 
 	/*
 	 * Everything's okay, do it.
 	 */
 	if (oldcred->cr_uid != euid) {
 		change_euid(newcred, euip);
 		setsugid(p);
 	}
 	proc_set_cred(p, newcred);
 	PROC_UNLOCK(p);
 	uifree(euip);
 	crfree(oldcred);
 	return (0);
 
 fail:
 	PROC_UNLOCK(p);
 	uifree(euip);
 	crfree(newcred);
 	return (error);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct setgid_args {
 	gid_t	gid;
 };
 #endif
 /* ARGSUSED */
 int
 sys_setgid(struct thread *td, struct setgid_args *uap)
 {
 	struct proc *p = td->td_proc;
 	struct ucred *newcred, *oldcred;
 	gid_t gid;
 	int error;
 
 	gid = uap->gid;
 	AUDIT_ARG_GID(gid);
 	newcred = crget();
 	PROC_LOCK(p);
 	oldcred = crcopysafe(p, newcred);
 
 #ifdef MAC
 	error = mac_cred_check_setgid(oldcred, gid);
 	if (error)
 		goto fail;
 #endif
 
 	/*
 	 * See if we have "permission" by POSIX 1003.1 rules.
 	 *
 	 * Note that setgid(getegid()) is a special case of
 	 * "appropriate privileges" in appendix B.4.2.2.  We need
 	 * to use this clause to be compatible with traditional BSD
 	 * semantics.  Basically, it means that "setgid(xx)" sets all
 	 * three id's (assuming you have privs).
 	 *
 	 * For notes on the logic here, see setuid() above.
 	 */
 	if (gid != oldcred->cr_rgid &&		/* allow setgid(getgid()) */
 #ifdef _POSIX_SAVED_IDS
 	    gid != oldcred->cr_svgid &&		/* allow setgid(saved gid) */
 #endif
 #ifdef POSIX_APPENDIX_B_4_2_2	/* Use BSD-compat clause from B.4.2.2 */
 	    gid != oldcred->cr_groups[0] && /* allow setgid(getegid()) */
 #endif
 	    (error = priv_check_cred(oldcred, PRIV_CRED_SETGID, 0)) != 0)
 		goto fail;
 
 #ifdef _POSIX_SAVED_IDS
 	/*
 	 * Do we have "appropriate privileges" (are we root or gid == egid)
 	 * If so, we are changing the real uid and saved gid.
 	 */
 	if (
 #ifdef POSIX_APPENDIX_B_4_2_2	/* use the clause from B.4.2.2 */
 	    gid == oldcred->cr_groups[0] ||
 #endif
 	    /* We are using privs. */
 	    priv_check_cred(oldcred, PRIV_CRED_SETGID, 0) == 0)
 #endif
 	{
 		/*
 		 * Set real gid
 		 */
 		if (oldcred->cr_rgid != gid) {
 			change_rgid(newcred, gid);
 			setsugid(p);
 		}
 		/*
 		 * Set saved gid
 		 *
 		 * XXX always set saved gid even if not _POSIX_SAVED_IDS, as
 		 * the security of setegid() depends on it.  B.4.2.2 says it
 		 * is important that we should do this.
 		 */
 		if (oldcred->cr_svgid != gid) {
 			change_svgid(newcred, gid);
 			setsugid(p);
 		}
 	}
 	/*
 	 * In all cases permitted cases, we are changing the egid.
 	 * Copy credentials so other references do not see our changes.
 	 */
 	if (oldcred->cr_groups[0] != gid) {
 		change_egid(newcred, gid);
 		setsugid(p);
 	}
 	proc_set_cred(p, newcred);
 	PROC_UNLOCK(p);
 	crfree(oldcred);
 	return (0);
 
 fail:
 	PROC_UNLOCK(p);
 	crfree(newcred);
 	return (error);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct setegid_args {
 	gid_t	egid;
 };
 #endif
 /* ARGSUSED */
 int
 sys_setegid(struct thread *td, struct setegid_args *uap)
 {
 	struct proc *p = td->td_proc;
 	struct ucred *newcred, *oldcred;
 	gid_t egid;
 	int error;
 
 	egid = uap->egid;
 	AUDIT_ARG_EGID(egid);
 	newcred = crget();
 	PROC_LOCK(p);
 	oldcred = crcopysafe(p, newcred);
 
 #ifdef MAC
 	error = mac_cred_check_setegid(oldcred, egid);
 	if (error)
 		goto fail;
 #endif
 
 	if (egid != oldcred->cr_rgid &&		/* allow setegid(getgid()) */
 	    egid != oldcred->cr_svgid &&	/* allow setegid(saved gid) */
 	    (error = priv_check_cred(oldcred, PRIV_CRED_SETEGID, 0)) != 0)
 		goto fail;
 
 	if (oldcred->cr_groups[0] != egid) {
 		change_egid(newcred, egid);
 		setsugid(p);
 	}
 	proc_set_cred(p, newcred);
 	PROC_UNLOCK(p);
 	crfree(oldcred);
 	return (0);
 
 fail:
 	PROC_UNLOCK(p);
 	crfree(newcred);
 	return (error);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct setgroups_args {
 	u_int	gidsetsize;
 	gid_t	*gidset;
 };
 #endif
 /* ARGSUSED */
 int
 sys_setgroups(struct thread *td, struct setgroups_args *uap)
 {
 	gid_t smallgroups[XU_NGROUPS];
 	gid_t *groups;
 	u_int gidsetsize;
 	int error;
 
 	gidsetsize = uap->gidsetsize;
 	if (gidsetsize > ngroups_max + 1)
 		return (EINVAL);
 
 	if (gidsetsize > XU_NGROUPS)
 		groups = malloc(gidsetsize * sizeof(gid_t), M_TEMP, M_WAITOK);
 	else
 		groups = smallgroups;
 
 	error = copyin(uap->gidset, groups, gidsetsize * sizeof(gid_t));
 	if (error == 0)
 		error = kern_setgroups(td, gidsetsize, groups);
 
 	if (gidsetsize > XU_NGROUPS)
 		free(groups, M_TEMP);
 	return (error);
 }
 
 int
 kern_setgroups(struct thread *td, u_int ngrp, gid_t *groups)
 {
 	struct proc *p = td->td_proc;
 	struct ucred *newcred, *oldcred;
 	int error;
 
 	MPASS(ngrp <= ngroups_max + 1);
 	AUDIT_ARG_GROUPSET(groups, ngrp);
 	newcred = crget();
 	crextend(newcred, ngrp);
 	PROC_LOCK(p);
 	oldcred = crcopysafe(p, newcred);
 
 #ifdef MAC
 	error = mac_cred_check_setgroups(oldcred, ngrp, groups);
 	if (error)
 		goto fail;
 #endif
 
 	error = priv_check_cred(oldcred, PRIV_CRED_SETGROUPS, 0);
 	if (error)
 		goto fail;
 
 	if (ngrp == 0) {
 		/*
 		 * setgroups(0, NULL) is a legitimate way of clearing the
 		 * groups vector on non-BSD systems (which generally do not
 		 * have the egid in the groups[0]).  We risk security holes
 		 * when running non-BSD software if we do not do the same.
 		 */
 		newcred->cr_ngroups = 1;
 	} else {
 		crsetgroups_locked(newcred, ngrp, groups);
 	}
 	setsugid(p);
 	proc_set_cred(p, newcred);
 	PROC_UNLOCK(p);
 	crfree(oldcred);
 	return (0);
 
 fail:
 	PROC_UNLOCK(p);
 	crfree(newcred);
 	return (error);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct setreuid_args {
 	uid_t	ruid;
 	uid_t	euid;
 };
 #endif
 /* ARGSUSED */
 int
 sys_setreuid(register struct thread *td, struct setreuid_args *uap)
 {
 	struct proc *p = td->td_proc;
 	struct ucred *newcred, *oldcred;
 	uid_t euid, ruid;
 	struct uidinfo *euip, *ruip;
 	int error;
 
 	euid = uap->euid;
 	ruid = uap->ruid;
 	AUDIT_ARG_EUID(euid);
 	AUDIT_ARG_RUID(ruid);
 	newcred = crget();
 	euip = uifind(euid);
 	ruip = uifind(ruid);
 	PROC_LOCK(p);
 	oldcred = crcopysafe(p, newcred);
 
 #ifdef MAC
 	error = mac_cred_check_setreuid(oldcred, ruid, euid);
 	if (error)
 		goto fail;
 #endif
 
 	if (((ruid != (uid_t)-1 && ruid != oldcred->cr_ruid &&
 	      ruid != oldcred->cr_svuid) ||
 	     (euid != (uid_t)-1 && euid != oldcred->cr_uid &&
 	      euid != oldcred->cr_ruid && euid != oldcred->cr_svuid)) &&
 	    (error = priv_check_cred(oldcred, PRIV_CRED_SETREUID, 0)) != 0)
 		goto fail;
 
 	if (euid != (uid_t)-1 && oldcred->cr_uid != euid) {
 		change_euid(newcred, euip);
 		setsugid(p);
 	}
 	if (ruid != (uid_t)-1 && oldcred->cr_ruid != ruid) {
 		change_ruid(newcred, ruip);
 		setsugid(p);
 	}
 	if ((ruid != (uid_t)-1 || newcred->cr_uid != newcred->cr_ruid) &&
 	    newcred->cr_svuid != newcred->cr_uid) {
 		change_svuid(newcred, newcred->cr_uid);
 		setsugid(p);
 	}
 	proc_set_cred(p, newcred);
 	PROC_UNLOCK(p);
 #ifdef RACCT
 	racct_proc_ucred_changed(p, oldcred, newcred);
 #endif
 	uifree(ruip);
 	uifree(euip);
 	crfree(oldcred);
 	return (0);
 
 fail:
 	PROC_UNLOCK(p);
 	uifree(ruip);
 	uifree(euip);
 	crfree(newcred);
 	return (error);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct setregid_args {
 	gid_t	rgid;
 	gid_t	egid;
 };
 #endif
 /* ARGSUSED */
 int
 sys_setregid(register struct thread *td, struct setregid_args *uap)
 {
 	struct proc *p = td->td_proc;
 	struct ucred *newcred, *oldcred;
 	gid_t egid, rgid;
 	int error;
 
 	egid = uap->egid;
 	rgid = uap->rgid;
 	AUDIT_ARG_EGID(egid);
 	AUDIT_ARG_RGID(rgid);
 	newcred = crget();
 	PROC_LOCK(p);
 	oldcred = crcopysafe(p, newcred);
 
 #ifdef MAC
 	error = mac_cred_check_setregid(oldcred, rgid, egid);
 	if (error)
 		goto fail;
 #endif
 
 	if (((rgid != (gid_t)-1 && rgid != oldcred->cr_rgid &&
 	    rgid != oldcred->cr_svgid) ||
 	     (egid != (gid_t)-1 && egid != oldcred->cr_groups[0] &&
 	     egid != oldcred->cr_rgid && egid != oldcred->cr_svgid)) &&
 	    (error = priv_check_cred(oldcred, PRIV_CRED_SETREGID, 0)) != 0)
 		goto fail;
 
 	if (egid != (gid_t)-1 && oldcred->cr_groups[0] != egid) {
 		change_egid(newcred, egid);
 		setsugid(p);
 	}
 	if (rgid != (gid_t)-1 && oldcred->cr_rgid != rgid) {
 		change_rgid(newcred, rgid);
 		setsugid(p);
 	}
 	if ((rgid != (gid_t)-1 || newcred->cr_groups[0] != newcred->cr_rgid) &&
 	    newcred->cr_svgid != newcred->cr_groups[0]) {
 		change_svgid(newcred, newcred->cr_groups[0]);
 		setsugid(p);
 	}
 	proc_set_cred(p, newcred);
 	PROC_UNLOCK(p);
 	crfree(oldcred);
 	return (0);
 
 fail:
 	PROC_UNLOCK(p);
 	crfree(newcred);
 	return (error);
 }
 
 /*
  * setresuid(ruid, euid, suid) is like setreuid except control over the saved
  * uid is explicit.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct setresuid_args {
 	uid_t	ruid;
 	uid_t	euid;
 	uid_t	suid;
 };
 #endif
 /* ARGSUSED */
 int
 sys_setresuid(register struct thread *td, struct setresuid_args *uap)
 {
 	struct proc *p = td->td_proc;
 	struct ucred *newcred, *oldcred;
 	uid_t euid, ruid, suid;
 	struct uidinfo *euip, *ruip;
 	int error;
 
 	euid = uap->euid;
 	ruid = uap->ruid;
 	suid = uap->suid;
 	AUDIT_ARG_EUID(euid);
 	AUDIT_ARG_RUID(ruid);
 	AUDIT_ARG_SUID(suid);
 	newcred = crget();
 	euip = uifind(euid);
 	ruip = uifind(ruid);
 	PROC_LOCK(p);
 	oldcred = crcopysafe(p, newcred);
 
 #ifdef MAC
 	error = mac_cred_check_setresuid(oldcred, ruid, euid, suid);
 	if (error)
 		goto fail;
 #endif
 
 	if (((ruid != (uid_t)-1 && ruid != oldcred->cr_ruid &&
 	     ruid != oldcred->cr_svuid &&
 	      ruid != oldcred->cr_uid) ||
 	     (euid != (uid_t)-1 && euid != oldcred->cr_ruid &&
 	    euid != oldcred->cr_svuid &&
 	      euid != oldcred->cr_uid) ||
 	     (suid != (uid_t)-1 && suid != oldcred->cr_ruid &&
 	    suid != oldcred->cr_svuid &&
 	      suid != oldcred->cr_uid)) &&
 	    (error = priv_check_cred(oldcred, PRIV_CRED_SETRESUID, 0)) != 0)
 		goto fail;
 
 	if (euid != (uid_t)-1 && oldcred->cr_uid != euid) {
 		change_euid(newcred, euip);
 		setsugid(p);
 	}
 	if (ruid != (uid_t)-1 && oldcred->cr_ruid != ruid) {
 		change_ruid(newcred, ruip);
 		setsugid(p);
 	}
 	if (suid != (uid_t)-1 && oldcred->cr_svuid != suid) {
 		change_svuid(newcred, suid);
 		setsugid(p);
 	}
 	proc_set_cred(p, newcred);
 	PROC_UNLOCK(p);
 #ifdef RACCT
 	racct_proc_ucred_changed(p, oldcred, newcred);
 #endif
 	uifree(ruip);
 	uifree(euip);
 	crfree(oldcred);
 	return (0);
 
 fail:
 	PROC_UNLOCK(p);
 	uifree(ruip);
 	uifree(euip);
 	crfree(newcred);
 	return (error);
 
 }
 
 /*
  * setresgid(rgid, egid, sgid) is like setregid except control over the saved
  * gid is explicit.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct setresgid_args {
 	gid_t	rgid;
 	gid_t	egid;
 	gid_t	sgid;
 };
 #endif
 /* ARGSUSED */
 int
 sys_setresgid(register struct thread *td, struct setresgid_args *uap)
 {
 	struct proc *p = td->td_proc;
 	struct ucred *newcred, *oldcred;
 	gid_t egid, rgid, sgid;
 	int error;
 
 	egid = uap->egid;
 	rgid = uap->rgid;
 	sgid = uap->sgid;
 	AUDIT_ARG_EGID(egid);
 	AUDIT_ARG_RGID(rgid);
 	AUDIT_ARG_SGID(sgid);
 	newcred = crget();
 	PROC_LOCK(p);
 	oldcred = crcopysafe(p, newcred);
 
 #ifdef MAC
 	error = mac_cred_check_setresgid(oldcred, rgid, egid, sgid);
 	if (error)
 		goto fail;
 #endif
 
 	if (((rgid != (gid_t)-1 && rgid != oldcred->cr_rgid &&
 	      rgid != oldcred->cr_svgid &&
 	      rgid != oldcred->cr_groups[0]) ||
 	     (egid != (gid_t)-1 && egid != oldcred->cr_rgid &&
 	      egid != oldcred->cr_svgid &&
 	      egid != oldcred->cr_groups[0]) ||
 	     (sgid != (gid_t)-1 && sgid != oldcred->cr_rgid &&
 	      sgid != oldcred->cr_svgid &&
 	      sgid != oldcred->cr_groups[0])) &&
 	    (error = priv_check_cred(oldcred, PRIV_CRED_SETRESGID, 0)) != 0)
 		goto fail;
 
 	if (egid != (gid_t)-1 && oldcred->cr_groups[0] != egid) {
 		change_egid(newcred, egid);
 		setsugid(p);
 	}
 	if (rgid != (gid_t)-1 && oldcred->cr_rgid != rgid) {
 		change_rgid(newcred, rgid);
 		setsugid(p);
 	}
 	if (sgid != (gid_t)-1 && oldcred->cr_svgid != sgid) {
 		change_svgid(newcred, sgid);
 		setsugid(p);
 	}
 	proc_set_cred(p, newcred);
 	PROC_UNLOCK(p);
 	crfree(oldcred);
 	return (0);
 
 fail:
 	PROC_UNLOCK(p);
 	crfree(newcred);
 	return (error);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct getresuid_args {
 	uid_t	*ruid;
 	uid_t	*euid;
 	uid_t	*suid;
 };
 #endif
 /* ARGSUSED */
 int
 sys_getresuid(register struct thread *td, struct getresuid_args *uap)
 {
 	struct ucred *cred;
 	int error1 = 0, error2 = 0, error3 = 0;
 
 	cred = td->td_ucred;
 	if (uap->ruid)
 		error1 = copyout(&cred->cr_ruid,
 		    uap->ruid, sizeof(cred->cr_ruid));
 	if (uap->euid)
 		error2 = copyout(&cred->cr_uid,
 		    uap->euid, sizeof(cred->cr_uid));
 	if (uap->suid)
 		error3 = copyout(&cred->cr_svuid,
 		    uap->suid, sizeof(cred->cr_svuid));
 	return (error1 ? error1 : error2 ? error2 : error3);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct getresgid_args {
 	gid_t	*rgid;
 	gid_t	*egid;
 	gid_t	*sgid;
 };
 #endif
 /* ARGSUSED */
 int
 sys_getresgid(register struct thread *td, struct getresgid_args *uap)
 {
 	struct ucred *cred;
 	int error1 = 0, error2 = 0, error3 = 0;
 
 	cred = td->td_ucred;
 	if (uap->rgid)
 		error1 = copyout(&cred->cr_rgid,
 		    uap->rgid, sizeof(cred->cr_rgid));
 	if (uap->egid)
 		error2 = copyout(&cred->cr_groups[0],
 		    uap->egid, sizeof(cred->cr_groups[0]));
 	if (uap->sgid)
 		error3 = copyout(&cred->cr_svgid,
 		    uap->sgid, sizeof(cred->cr_svgid));
 	return (error1 ? error1 : error2 ? error2 : error3);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct issetugid_args {
 	int dummy;
 };
 #endif
 /* ARGSUSED */
 int
 sys_issetugid(register struct thread *td, struct issetugid_args *uap)
 {
 	struct proc *p = td->td_proc;
 
 	/*
 	 * Note: OpenBSD sets a P_SUGIDEXEC flag set at execve() time,
 	 * we use P_SUGID because we consider changing the owners as
 	 * "tainting" as well.
 	 * This is significant for procs that start as root and "become"
 	 * a user without an exec - programs cannot know *everything*
 	 * that libc *might* have put in their data segment.
 	 */
 	PROC_LOCK(p);
 	td->td_retval[0] = (p->p_flag & P_SUGID) ? 1 : 0;
 	PROC_UNLOCK(p);
 	return (0);
 }
 
 int
 sys___setugid(struct thread *td, struct __setugid_args *uap)
 {
 #ifdef REGRESSION
 	struct proc *p;
 
 	p = td->td_proc;
 	switch (uap->flag) {
 	case 0:
 		PROC_LOCK(p);
 		p->p_flag &= ~P_SUGID;
 		PROC_UNLOCK(p);
 		return (0);
 	case 1:
 		PROC_LOCK(p);
 		p->p_flag |= P_SUGID;
 		PROC_UNLOCK(p);
 		return (0);
 	default:
 		return (EINVAL);
 	}
 #else /* !REGRESSION */
 
 	return (ENOSYS);
 #endif /* REGRESSION */
 }
 
 /*
  * Check if gid is a member of the group set.
  */
 int
 groupmember(gid_t gid, struct ucred *cred)
 {
 	int l;
 	int h;
 	int m;
 
 	if (cred->cr_groups[0] == gid)
 		return(1);
 
 	/*
 	 * If gid was not our primary group, perform a binary search
 	 * of the supplemental groups.  This is possible because we
 	 * sort the groups in crsetgroups().
 	 */
 	l = 1;
 	h = cred->cr_ngroups;
 	while (l < h) {
 		m = l + ((h - l) / 2);
 		if (cred->cr_groups[m] < gid)
 			l = m + 1; 
 		else
 			h = m; 
 	}
 	if ((l < cred->cr_ngroups) && (cred->cr_groups[l] == gid))
 		return (1);
 
 	return (0);
 }
 
 /*
  * Test the active securelevel against a given level.  securelevel_gt()
  * implements (securelevel > level).  securelevel_ge() implements
  * (securelevel >= level).  Note that the logic is inverted -- these
  * functions return EPERM on "success" and 0 on "failure".
  *
  * Due to care taken when setting the securelevel, we know that no jail will
  * be less secure that its parent (or the physical system), so it is sufficient
  * to test the current jail only.
  *
  * XXXRW: Possibly since this has to do with privilege, it should move to
  * kern_priv.c.
  */
 int
 securelevel_gt(struct ucred *cr, int level)
 {
 
 	return (cr->cr_prison->pr_securelevel > level ? EPERM : 0);
 }
 
 int
 securelevel_ge(struct ucred *cr, int level)
 {
 
 	return (cr->cr_prison->pr_securelevel >= level ? EPERM : 0);
 }
 
 /*
  * 'see_other_uids' determines whether or not visibility of processes
  * and sockets with credentials holding different real uids is possible
  * using a variety of system MIBs.
  * XXX: data declarations should be together near the beginning of the file.
  */
 static int	see_other_uids = 1;
 SYSCTL_INT(_security_bsd, OID_AUTO, see_other_uids, CTLFLAG_RW,
     &see_other_uids, 0,
     "Unprivileged processes may see subjects/objects with different real uid");
 
 /*-
  * Determine if u1 "can see" the subject specified by u2, according to the
  * 'see_other_uids' policy.
  * Returns: 0 for permitted, ESRCH otherwise
  * Locks: none
  * References: *u1 and *u2 must not change during the call
  *             u1 may equal u2, in which case only one reference is required
  */
 static int
 cr_seeotheruids(struct ucred *u1, struct ucred *u2)
 {
 
 	if (!see_other_uids && u1->cr_ruid != u2->cr_ruid) {
 		if (priv_check_cred(u1, PRIV_SEEOTHERUIDS, 0) != 0)
 			return (ESRCH);
 	}
 	return (0);
 }
 
 /*
  * 'see_other_gids' determines whether or not visibility of processes
  * and sockets with credentials holding different real gids is possible
  * using a variety of system MIBs.
  * XXX: data declarations should be together near the beginning of the file.
  */
 static int	see_other_gids = 1;
 SYSCTL_INT(_security_bsd, OID_AUTO, see_other_gids, CTLFLAG_RW,
     &see_other_gids, 0,
     "Unprivileged processes may see subjects/objects with different real gid");
 
 /*
  * Determine if u1 can "see" the subject specified by u2, according to the
  * 'see_other_gids' policy.
  * Returns: 0 for permitted, ESRCH otherwise
  * Locks: none
  * References: *u1 and *u2 must not change during the call
  *             u1 may equal u2, in which case only one reference is required
  */
 static int
 cr_seeothergids(struct ucred *u1, struct ucred *u2)
 {
 	int i, match;
 	
 	if (!see_other_gids) {
 		match = 0;
 		for (i = 0; i < u1->cr_ngroups; i++) {
 			if (groupmember(u1->cr_groups[i], u2))
 				match = 1;
 			if (match)
 				break;
 		}
 		if (!match) {
 			if (priv_check_cred(u1, PRIV_SEEOTHERGIDS, 0) != 0)
 				return (ESRCH);
 		}
 	}
 	return (0);
 }
 
 /*-
  * Determine if u1 "can see" the subject specified by u2.
  * Returns: 0 for permitted, an errno value otherwise
  * Locks: none
  * References: *u1 and *u2 must not change during the call
  *             u1 may equal u2, in which case only one reference is required
  */
 int
 cr_cansee(struct ucred *u1, struct ucred *u2)
 {
 	int error;
 
 	if ((error = prison_check(u1, u2)))
 		return (error);
 #ifdef MAC
 	if ((error = mac_cred_check_visible(u1, u2)))
 		return (error);
 #endif
 	if ((error = cr_seeotheruids(u1, u2)))
 		return (error);
 	if ((error = cr_seeothergids(u1, u2)))
 		return (error);
 	return (0);
 }
 
 /*-
  * Determine if td "can see" the subject specified by p.
  * Returns: 0 for permitted, an errno value otherwise
  * Locks: Sufficient locks to protect p->p_ucred must be held.  td really
  *        should be curthread.
  * References: td and p must be valid for the lifetime of the call
  */
 int
 p_cansee(struct thread *td, struct proc *p)
 {
 
 	/* Wrap cr_cansee() for all functionality. */
 	KASSERT(td == curthread, ("%s: td not curthread", __func__));
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	return (cr_cansee(td->td_ucred, p->p_ucred));
 }
 
 /*
  * 'conservative_signals' prevents the delivery of a broad class of
  * signals by unprivileged processes to processes that have changed their
  * credentials since the last invocation of execve().  This can prevent
  * the leakage of cached information or retained privileges as a result
  * of a common class of signal-related vulnerabilities.  However, this
  * may interfere with some applications that expect to be able to
  * deliver these signals to peer processes after having given up
  * privilege.
  */
 static int	conservative_signals = 1;
 SYSCTL_INT(_security_bsd, OID_AUTO, conservative_signals, CTLFLAG_RW,
     &conservative_signals, 0, "Unprivileged processes prevented from "
     "sending certain signals to processes whose credentials have changed");
 /*-
  * Determine whether cred may deliver the specified signal to proc.
  * Returns: 0 for permitted, an errno value otherwise.
  * Locks: A lock must be held for proc.
  * References: cred and proc must be valid for the lifetime of the call.
  */
 int
 cr_cansignal(struct ucred *cred, struct proc *proc, int signum)
 {
 	int error;
 
 	PROC_LOCK_ASSERT(proc, MA_OWNED);
 	/*
 	 * Jail semantics limit the scope of signalling to proc in the
 	 * same jail as cred, if cred is in jail.
 	 */
 	error = prison_check(cred, proc->p_ucred);
 	if (error)
 		return (error);
 #ifdef MAC
 	if ((error = mac_proc_check_signal(cred, proc, signum)))
 		return (error);
 #endif
 	if ((error = cr_seeotheruids(cred, proc->p_ucred)))
 		return (error);
 	if ((error = cr_seeothergids(cred, proc->p_ucred)))
 		return (error);
 
 	/*
 	 * UNIX signal semantics depend on the status of the P_SUGID
 	 * bit on the target process.  If the bit is set, then additional
 	 * restrictions are placed on the set of available signals.
 	 */
 	if (conservative_signals && (proc->p_flag & P_SUGID)) {
 		switch (signum) {
 		case 0:
 		case SIGKILL:
 		case SIGINT:
 		case SIGTERM:
 		case SIGALRM:
 		case SIGSTOP:
 		case SIGTTIN:
 		case SIGTTOU:
 		case SIGTSTP:
 		case SIGHUP:
 		case SIGUSR1:
 		case SIGUSR2:
 			/*
 			 * Generally, permit job and terminal control
 			 * signals.
 			 */
 			break;
 		default:
 			/* Not permitted without privilege. */
 			error = priv_check_cred(cred, PRIV_SIGNAL_SUGID, 0);
 			if (error)
 				return (error);
 		}
 	}
 
 	/*
 	 * Generally, the target credential's ruid or svuid must match the
 	 * subject credential's ruid or euid.
 	 */
 	if (cred->cr_ruid != proc->p_ucred->cr_ruid &&
 	    cred->cr_ruid != proc->p_ucred->cr_svuid &&
 	    cred->cr_uid != proc->p_ucred->cr_ruid &&
 	    cred->cr_uid != proc->p_ucred->cr_svuid) {
 		error = priv_check_cred(cred, PRIV_SIGNAL_DIFFCRED, 0);
 		if (error)
 			return (error);
 	}
 
 	return (0);
 }
 
 /*-
  * Determine whether td may deliver the specified signal to p.
  * Returns: 0 for permitted, an errno value otherwise
  * Locks: Sufficient locks to protect various components of td and p
  *        must be held.  td must be curthread, and a lock must be
  *        held for p.
  * References: td and p must be valid for the lifetime of the call
  */
 int
 p_cansignal(struct thread *td, struct proc *p, int signum)
 {
 
 	KASSERT(td == curthread, ("%s: td not curthread", __func__));
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	if (td->td_proc == p)
 		return (0);
 
 	/*
 	 * UNIX signalling semantics require that processes in the same
 	 * session always be able to deliver SIGCONT to one another,
 	 * overriding the remaining protections.
 	 */
 	/* XXX: This will require an additional lock of some sort. */
 	if (signum == SIGCONT && td->td_proc->p_session == p->p_session)
 		return (0);
 	/*
 	 * Some compat layers use SIGTHR and higher signals for
 	 * communication between different kernel threads of the same
 	 * process, so that they expect that it's always possible to
 	 * deliver them, even for suid applications where cr_cansignal() can
 	 * deny such ability for security consideration.  It should be
 	 * pretty safe to do since the only way to create two processes
 	 * with the same p_leader is via rfork(2).
 	 */
 	if (td->td_proc->p_leader != NULL && signum >= SIGTHR &&
 	    signum < SIGTHR + 4 && td->td_proc->p_leader == p->p_leader)
 		return (0);
 
 	return (cr_cansignal(td->td_ucred, p, signum));
 }
 
 /*-
  * Determine whether td may reschedule p.
  * Returns: 0 for permitted, an errno value otherwise
  * Locks: Sufficient locks to protect various components of td and p
  *        must be held.  td must be curthread, and a lock must
  *        be held for p.
  * References: td and p must be valid for the lifetime of the call
  */
 int
 p_cansched(struct thread *td, struct proc *p)
 {
 	int error;
 
 	KASSERT(td == curthread, ("%s: td not curthread", __func__));
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	if (td->td_proc == p)
 		return (0);
 	if ((error = prison_check(td->td_ucred, p->p_ucred)))
 		return (error);
 #ifdef MAC
 	if ((error = mac_proc_check_sched(td->td_ucred, p)))
 		return (error);
 #endif
 	if ((error = cr_seeotheruids(td->td_ucred, p->p_ucred)))
 		return (error);
 	if ((error = cr_seeothergids(td->td_ucred, p->p_ucred)))
 		return (error);
 	if (td->td_ucred->cr_ruid != p->p_ucred->cr_ruid &&
 	    td->td_ucred->cr_uid != p->p_ucred->cr_ruid) {
 		error = priv_check(td, PRIV_SCHED_DIFFCRED);
 		if (error)
 			return (error);
 	}
 	return (0);
 }
 
 /*
  * The 'unprivileged_proc_debug' flag may be used to disable a variety of
  * unprivileged inter-process debugging services, including some procfs
  * functionality, ptrace(), and ktrace().  In the past, inter-process
  * debugging has been involved in a variety of security problems, and sites
  * not requiring the service might choose to disable it when hardening
  * systems.
  *
  * XXX: Should modifying and reading this variable require locking?
  * XXX: data declarations should be together near the beginning of the file.
  */
 static int	unprivileged_proc_debug = 1;
 SYSCTL_INT(_security_bsd, OID_AUTO, unprivileged_proc_debug, CTLFLAG_RW,
     &unprivileged_proc_debug, 0,
     "Unprivileged processes may use process debugging facilities");
 
 /*-
  * Determine whether td may debug p.
  * Returns: 0 for permitted, an errno value otherwise
  * Locks: Sufficient locks to protect various components of td and p
  *        must be held.  td must be curthread, and a lock must
  *        be held for p.
  * References: td and p must be valid for the lifetime of the call
  */
 int
 p_candebug(struct thread *td, struct proc *p)
 {
 	int credentialchanged, error, grpsubset, i, uidsubset;
 
 	KASSERT(td == curthread, ("%s: td not curthread", __func__));
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	if (!unprivileged_proc_debug) {
 		error = priv_check(td, PRIV_DEBUG_UNPRIV);
 		if (error)
 			return (error);
 	}
 	if (td->td_proc == p)
 		return (0);
 	if ((error = prison_check(td->td_ucred, p->p_ucred)))
 		return (error);
 #ifdef MAC
 	if ((error = mac_proc_check_debug(td->td_ucred, p)))
 		return (error);
 #endif
 	if ((error = cr_seeotheruids(td->td_ucred, p->p_ucred)))
 		return (error);
 	if ((error = cr_seeothergids(td->td_ucred, p->p_ucred)))
 		return (error);
 
 	/*
 	 * Is p's group set a subset of td's effective group set?  This
 	 * includes p's egid, group access list, rgid, and svgid.
 	 */
 	grpsubset = 1;
 	for (i = 0; i < p->p_ucred->cr_ngroups; i++) {
 		if (!groupmember(p->p_ucred->cr_groups[i], td->td_ucred)) {
 			grpsubset = 0;
 			break;
 		}
 	}
 	grpsubset = grpsubset &&
 	    groupmember(p->p_ucred->cr_rgid, td->td_ucred) &&
 	    groupmember(p->p_ucred->cr_svgid, td->td_ucred);
 
 	/*
 	 * Are the uids present in p's credential equal to td's
 	 * effective uid?  This includes p's euid, svuid, and ruid.
 	 */
 	uidsubset = (td->td_ucred->cr_uid == p->p_ucred->cr_uid &&
 	    td->td_ucred->cr_uid == p->p_ucred->cr_svuid &&
 	    td->td_ucred->cr_uid == p->p_ucred->cr_ruid);
 
 	/*
 	 * Has the credential of the process changed since the last exec()?
 	 */
 	credentialchanged = (p->p_flag & P_SUGID);
 
 	/*
 	 * If p's gids aren't a subset, or the uids aren't a subset,
 	 * or the credential has changed, require appropriate privilege
 	 * for td to debug p.
 	 */
 	if (!grpsubset || !uidsubset) {
 		error = priv_check(td, PRIV_DEBUG_DIFFCRED);
 		if (error)
 			return (error);
 	}
 
 	if (credentialchanged) {
 		error = priv_check(td, PRIV_DEBUG_SUGID);
 		if (error)
 			return (error);
 	}
 
 	/* Can't trace init when securelevel > 0. */
 	if (p == initproc) {
 		error = securelevel_gt(td->td_ucred, 0);
 		if (error)
 			return (error);
 	}
 
 	/*
 	 * Can't trace a process that's currently exec'ing.
 	 *
 	 * XXX: Note, this is not a security policy decision, it's a
 	 * basic correctness/functionality decision.  Therefore, this check
 	 * should be moved to the caller's of p_candebug().
 	 */
 	if ((p->p_flag & P_INEXEC) != 0)
 		return (EBUSY);
 
 	/* Denied explicitely */
 	if ((p->p_flag2 & P2_NOTRACE) != 0) {
 		error = priv_check(td, PRIV_DEBUG_DENIED);
 		if (error != 0)
 			return (error);
 	}
 
 	return (0);
 }
 
 /*-
  * Determine whether the subject represented by cred can "see" a socket.
  * Returns: 0 for permitted, ENOENT otherwise.
  */
 int
 cr_canseesocket(struct ucred *cred, struct socket *so)
 {
 	int error;
 
 	error = prison_check(cred, so->so_cred);
 	if (error)
 		return (ENOENT);
 #ifdef MAC
 	error = mac_socket_check_visible(cred, so);
 	if (error)
 		return (error);
 #endif
 	if (cr_seeotheruids(cred, so->so_cred))
 		return (ENOENT);
 	if (cr_seeothergids(cred, so->so_cred))
 		return (ENOENT);
 
 	return (0);
 }
 
 #if defined(INET) || defined(INET6)
 /*-
  * Determine whether the subject represented by cred can "see" a socket.
  * Returns: 0 for permitted, ENOENT otherwise.
  */
 int
 cr_canseeinpcb(struct ucred *cred, struct inpcb *inp)
 {
 	int error;
 
 	error = prison_check(cred, inp->inp_cred);
 	if (error)
 		return (ENOENT);
 #ifdef MAC
 	INP_LOCK_ASSERT(inp);
 	error = mac_inpcb_check_visible(cred, inp);
 	if (error)
 		return (error);
 #endif
 	if (cr_seeotheruids(cred, inp->inp_cred))
 		return (ENOENT);
 	if (cr_seeothergids(cred, inp->inp_cred))
 		return (ENOENT);
 
 	return (0);
 }
 #endif
 
 /*-
  * Determine whether td can wait for the exit of p.
  * Returns: 0 for permitted, an errno value otherwise
  * Locks: Sufficient locks to protect various components of td and p
  *        must be held.  td must be curthread, and a lock must
  *        be held for p.
  * References: td and p must be valid for the lifetime of the call
 
  */
 int
 p_canwait(struct thread *td, struct proc *p)
 {
 	int error;
 
 	KASSERT(td == curthread, ("%s: td not curthread", __func__));
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	if ((error = prison_check(td->td_ucred, p->p_ucred)))
 		return (error);
 #ifdef MAC
 	if ((error = mac_proc_check_wait(td->td_ucred, p)))
 		return (error);
 #endif
 #if 0
 	/* XXXMAC: This could have odd effects on some shells. */
 	if ((error = cr_seeotheruids(td->td_ucred, p->p_ucred)))
 		return (error);
 #endif
 
 	return (0);
 }
 
 /*
  * Allocate a zeroed cred structure.
  */
 struct ucred *
 crget(void)
 {
 	register struct ucred *cr;
 
 	cr = malloc(sizeof(*cr), M_CRED, M_WAITOK | M_ZERO);
 	refcount_init(&cr->cr_ref, 1);
 #ifdef AUDIT
 	audit_cred_init(cr);
 #endif
 #ifdef MAC
 	mac_cred_init(cr);
 #endif
 	cr->cr_groups = cr->cr_smallgroups;
 	cr->cr_agroups =
 	    sizeof(cr->cr_smallgroups) / sizeof(cr->cr_smallgroups[0]);
 	return (cr);
 }
 
 /*
  * Claim another reference to a ucred structure.
  */
 struct ucred *
 crhold(struct ucred *cr)
 {
 
 	refcount_acquire(&cr->cr_ref);
 	return (cr);
 }
 
 /*
  * Free a cred structure.  Throws away space when ref count gets to 0.
  */
 void
 crfree(struct ucred *cr)
 {
 
 	KASSERT(cr->cr_ref > 0, ("bad ucred refcount: %d", cr->cr_ref));
 	KASSERT(cr->cr_ref != 0xdeadc0de, ("dangling reference to ucred"));
 	if (refcount_release(&cr->cr_ref)) {
 		/*
 		 * Some callers of crget(), such as nfs_statfs(),
 		 * allocate a temporary credential, but don't
 		 * allocate a uidinfo structure.
 		 */
 		if (cr->cr_uidinfo != NULL)
 			uifree(cr->cr_uidinfo);
 		if (cr->cr_ruidinfo != NULL)
 			uifree(cr->cr_ruidinfo);
 		/*
 		 * Free a prison, if any.
 		 */
 		if (cr->cr_prison != NULL)
 			prison_free(cr->cr_prison);
 		if (cr->cr_loginclass != NULL)
 			loginclass_free(cr->cr_loginclass);
 #ifdef AUDIT
 		audit_cred_destroy(cr);
 #endif
 #ifdef MAC
 		mac_cred_destroy(cr);
 #endif
 		if (cr->cr_groups != cr->cr_smallgroups)
 			free(cr->cr_groups, M_CRED);
 		free(cr, M_CRED);
 	}
 }
 
 /*
  * Copy a ucred's contents from a template.  Does not block.
  */
 void
 crcopy(struct ucred *dest, struct ucred *src)
 {
 
 	KASSERT(dest->cr_ref == 1, ("crcopy of shared ucred"));
 	bcopy(&src->cr_startcopy, &dest->cr_startcopy,
 	    (unsigned)((caddr_t)&src->cr_endcopy -
 		(caddr_t)&src->cr_startcopy));
 	crsetgroups(dest, src->cr_ngroups, src->cr_groups);
 	uihold(dest->cr_uidinfo);
 	uihold(dest->cr_ruidinfo);
 	prison_hold(dest->cr_prison);
 	loginclass_hold(dest->cr_loginclass);
 #ifdef AUDIT
 	audit_cred_copy(src, dest);
 #endif
 #ifdef MAC
 	mac_cred_copy(src, dest);
 #endif
 }
 
 /*
  * Dup cred struct to a new held one.
  */
 struct ucred *
 crdup(struct ucred *cr)
 {
 	struct ucred *newcr;
 
 	newcr = crget();
 	crcopy(newcr, cr);
 	return (newcr);
 }
 
 /*
  * Fill in a struct xucred based on a struct ucred.
  */
 void
 cru2x(struct ucred *cr, struct xucred *xcr)
 {
 	int ngroups;
 
 	bzero(xcr, sizeof(*xcr));
 	xcr->cr_version = XUCRED_VERSION;
 	xcr->cr_uid = cr->cr_uid;
 
 	ngroups = MIN(cr->cr_ngroups, XU_NGROUPS);
 	xcr->cr_ngroups = ngroups;
 	bcopy(cr->cr_groups, xcr->cr_groups,
 	    ngroups * sizeof(*cr->cr_groups));
 }
 
 /*
- * small routine to swap a thread's current ucred for the correct one taken
- * from the process.
- */
-void
-cred_update_thread(struct thread *td)
-{
-	struct proc *p;
-	struct ucred *cred;
-
-	p = td->td_proc;
-	cred = td->td_ucred;
-	PROC_LOCK_ASSERT(p, MA_OWNED);
-	td->td_ucred = crhold(p->p_ucred);
-	if (cred != NULL)
-		crfree(cred);
-}
-
-/*
  * Set initial process credentials.
  * Callers are responsible for providing the reference for provided credentials.
  */
 void
 proc_set_cred_init(struct proc *p, struct ucred *newcred)
 {
 
 	p->p_ucred = newcred;
 }
 
 /*
  * Change process credentials.
  * Callers are responsible for providing the reference for passed credentials
  * and for freeing old ones.
  *
  * Process has to be locked except when it does not have credentials (as it
  * should not be visible just yet) or when newcred is NULL (as this can be
  * only used when the process is about to be freed, at which point it should
  * not be visible anymore).
  */
 struct ucred *
 proc_set_cred(struct proc *p, struct ucred *newcred)
 {
 	struct ucred *oldcred;
 
 	MPASS(p->p_ucred != NULL);
 	if (newcred == NULL)
 		MPASS(p->p_state == PRS_ZOMBIE);
 	else
 		PROC_LOCK_ASSERT(p, MA_OWNED);
 
 	oldcred = p->p_ucred;
 	p->p_ucred = newcred;
 	if (newcred != NULL)
 		PROC_UPDATE_COW(p);
 	return (oldcred);
 }
 
 struct ucred *
 crcopysafe(struct proc *p, struct ucred *cr)
 {
 	struct ucred *oldcred;
 	int groups;
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 
 	oldcred = p->p_ucred;
 	while (cr->cr_agroups < oldcred->cr_agroups) {
 		groups = oldcred->cr_agroups;
 		PROC_UNLOCK(p);
 		crextend(cr, groups);
 		PROC_LOCK(p);
 		oldcred = p->p_ucred;
 	}
 	crcopy(cr, oldcred);
 
 	return (oldcred);
 }
 
 /*
  * Extend the passed in credential to hold n items.
  */
 static void
 crextend(struct ucred *cr, int n)
 {
 	int cnt;
 
 	/* Truncate? */
 	if (n <= cr->cr_agroups)
 		return;
 
 	/*
 	 * We extend by 2 each time since we're using a power of two
 	 * allocator until we need enough groups to fill a page.
 	 * Once we're allocating multiple pages, only allocate as many
 	 * as we actually need.  The case of processes needing a
 	 * non-power of two number of pages seems more likely than
 	 * a real world process that adds thousands of groups one at a
 	 * time.
 	 */
 	if ( n < PAGE_SIZE / sizeof(gid_t) ) {
 		if (cr->cr_agroups == 0)
 			cnt = MINALLOCSIZE / sizeof(gid_t);
 		else
 			cnt = cr->cr_agroups * 2;
 
 		while (cnt < n)
 			cnt *= 2;
 	} else
 		cnt = roundup2(n, PAGE_SIZE / sizeof(gid_t));
 
 	/* Free the old array. */
 	if (cr->cr_groups != cr->cr_smallgroups)
 		free(cr->cr_groups, M_CRED);
 
 	cr->cr_groups = malloc(cnt * sizeof(gid_t), M_CRED, M_WAITOK | M_ZERO);
 	cr->cr_agroups = cnt;
 }
 
 /*
  * Copy groups in to a credential, preserving any necessary invariants.
  * Currently this includes the sorting of all supplemental gids.
  * crextend() must have been called before hand to ensure sufficient
  * space is available.
  */
 static void
 crsetgroups_locked(struct ucred *cr, int ngrp, gid_t *groups)
 {
 	int i;
 	int j;
 	gid_t g;
 	
 	KASSERT(cr->cr_agroups >= ngrp, ("cr_ngroups is too small"));
 
 	bcopy(groups, cr->cr_groups, ngrp * sizeof(gid_t));
 	cr->cr_ngroups = ngrp;
 
 	/*
 	 * Sort all groups except cr_groups[0] to allow groupmember to
 	 * perform a binary search.
 	 *
 	 * XXX: If large numbers of groups become common this should
 	 * be replaced with shell sort like linux uses or possibly
 	 * heap sort.
 	 */
 	for (i = 2; i < ngrp; i++) {
 		g = cr->cr_groups[i];
 		for (j = i-1; j >= 1 && g < cr->cr_groups[j]; j--)
 			cr->cr_groups[j + 1] = cr->cr_groups[j];
 		cr->cr_groups[j + 1] = g;
 	}
 }
 
 /*
  * Copy groups in to a credential after expanding it if required.
  * Truncate the list to (ngroups_max + 1) if it is too large.
  */
 void
 crsetgroups(struct ucred *cr, int ngrp, gid_t *groups)
 {
 
 	if (ngrp > ngroups_max + 1)
 		ngrp = ngroups_max + 1;
 
 	crextend(cr, ngrp);
 	crsetgroups_locked(cr, ngrp, groups);
 }
 
 /*
  * Get login name, if available.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct getlogin_args {
 	char	*namebuf;
 	u_int	namelen;
 };
 #endif
 /* ARGSUSED */
 int
 sys_getlogin(struct thread *td, struct getlogin_args *uap)
 {
 	char login[MAXLOGNAME];
 	struct proc *p = td->td_proc;
 	size_t len;
 
 	if (uap->namelen > MAXLOGNAME)
 		uap->namelen = MAXLOGNAME;
 	PROC_LOCK(p);
 	SESS_LOCK(p->p_session);
 	len = strlcpy(login, p->p_session->s_login, uap->namelen) + 1;
 	SESS_UNLOCK(p->p_session);
 	PROC_UNLOCK(p);
 	if (len > uap->namelen)
 		return (ERANGE);
 	return (copyout(login, uap->namebuf, len));
 }
 
 /*
  * Set login name.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct setlogin_args {
 	char	*namebuf;
 };
 #endif
 /* ARGSUSED */
 int
 sys_setlogin(struct thread *td, struct setlogin_args *uap)
 {
 	struct proc *p = td->td_proc;
 	int error;
 	char logintmp[MAXLOGNAME];
 
 	CTASSERT(sizeof(p->p_session->s_login) >= sizeof(logintmp));
 
 	error = priv_check(td, PRIV_PROC_SETLOGIN);
 	if (error)
 		return (error);
 	error = copyinstr(uap->namebuf, logintmp, sizeof(logintmp), NULL);
 	if (error != 0) {
 		if (error == ENAMETOOLONG)
 			error = EINVAL;
 		return (error);
 	}
 	PROC_LOCK(p);
 	SESS_LOCK(p->p_session);
 	strcpy(p->p_session->s_login, logintmp);
 	SESS_UNLOCK(p->p_session);
 	PROC_UNLOCK(p);
 	return (0);
 }
 
 void
 setsugid(struct proc *p)
 {
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	p->p_flag |= P_SUGID;
 	if (!(p->p_pfsflags & PF_ISUGID))
 		p->p_stops = 0;
 }
 
 /*-
  * Change a process's effective uid.
  * Side effects: newcred->cr_uid and newcred->cr_uidinfo will be modified.
  * References: newcred must be an exclusive credential reference for the
  *             duration of the call.
  */
 void
 change_euid(struct ucred *newcred, struct uidinfo *euip)
 {
 
 	newcred->cr_uid = euip->ui_uid;
 	uihold(euip);
 	uifree(newcred->cr_uidinfo);
 	newcred->cr_uidinfo = euip;
 }
 
 /*-
  * Change a process's effective gid.
  * Side effects: newcred->cr_gid will be modified.
  * References: newcred must be an exclusive credential reference for the
  *             duration of the call.
  */
 void
 change_egid(struct ucred *newcred, gid_t egid)
 {
 
 	newcred->cr_groups[0] = egid;
 }
 
 /*-
  * Change a process's real uid.
  * Side effects: newcred->cr_ruid will be updated, newcred->cr_ruidinfo
  *               will be updated, and the old and new cr_ruidinfo proc
  *               counts will be updated.
  * References: newcred must be an exclusive credential reference for the
  *             duration of the call.
  */
 void
 change_ruid(struct ucred *newcred, struct uidinfo *ruip)
 {
 
 	(void)chgproccnt(newcred->cr_ruidinfo, -1, 0);
 	newcred->cr_ruid = ruip->ui_uid;
 	uihold(ruip);
 	uifree(newcred->cr_ruidinfo);
 	newcred->cr_ruidinfo = ruip;
 	(void)chgproccnt(newcred->cr_ruidinfo, 1, 0);
 }
 
 /*-
  * Change a process's real gid.
  * Side effects: newcred->cr_rgid will be updated.
  * References: newcred must be an exclusive credential reference for the
  *             duration of the call.
  */
 void
 change_rgid(struct ucred *newcred, gid_t rgid)
 {
 
 	newcred->cr_rgid = rgid;
 }
 
 /*-
  * Change a process's saved uid.
  * Side effects: newcred->cr_svuid will be updated.
  * References: newcred must be an exclusive credential reference for the
  *             duration of the call.
  */
 void
 change_svuid(struct ucred *newcred, uid_t svuid)
 {
 
 	newcred->cr_svuid = svuid;
 }
 
 /*-
  * Change a process's saved gid.
  * Side effects: newcred->cr_svgid will be updated.
  * References: newcred must be an exclusive credential reference for the
  *             duration of the call.
  */
 void
 change_svgid(struct ucred *newcred, gid_t svgid)
 {
 
 	newcred->cr_svgid = svgid;
 }
Index: head/sys/kern/kern_resource.c
===================================================================
--- head/sys/kern/kern_resource.c	(revision 285632)
+++ head/sys/kern/kern_resource.c	(revision 285633)
@@ -1,1452 +1,1438 @@
 /*-
  * Copyright (c) 1982, 1986, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)kern_resource.c	8.5 (Berkeley) 1/21/94
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_compat.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/sysproto.h>
 #include <sys/file.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/refcount.h>
 #include <sys/racct.h>
 #include <sys/resourcevar.h>
 #include <sys/rwlock.h>
 #include <sys/sched.h>
 #include <sys/sx.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysctl.h>
 #include <sys/sysent.h>
 #include <sys/time.h>
 #include <sys/umtx.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 
 
 static MALLOC_DEFINE(M_PLIMIT, "plimit", "plimit structures");
 static MALLOC_DEFINE(M_UIDINFO, "uidinfo", "uidinfo structures");
 #define	UIHASH(uid)	(&uihashtbl[(uid) & uihash])
 static struct rwlock uihashtbl_lock;
 static LIST_HEAD(uihashhead, uidinfo) *uihashtbl;
 static u_long uihash;		/* size of hash table - 1 */
 
 static void	calcru1(struct proc *p, struct rusage_ext *ruxp,
 		    struct timeval *up, struct timeval *sp);
 static int	donice(struct thread *td, struct proc *chgp, int n);
 static struct uidinfo *uilookup(uid_t uid);
 static void	ruxagg_locked(struct rusage_ext *rux, struct thread *td);
 
 static __inline int	lim_shared(struct plimit *limp);
 
 /*
  * Resource controls and accounting.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct getpriority_args {
 	int	which;
 	int	who;
 };
 #endif
 int
 sys_getpriority(struct thread *td, register struct getpriority_args *uap)
 {
 	struct proc *p;
 	struct pgrp *pg;
 	int error, low;
 
 	error = 0;
 	low = PRIO_MAX + 1;
 	switch (uap->which) {
 
 	case PRIO_PROCESS:
 		if (uap->who == 0)
 			low = td->td_proc->p_nice;
 		else {
 			p = pfind(uap->who);
 			if (p == NULL)
 				break;
 			if (p_cansee(td, p) == 0)
 				low = p->p_nice;
 			PROC_UNLOCK(p);
 		}
 		break;
 
 	case PRIO_PGRP:
 		sx_slock(&proctree_lock);
 		if (uap->who == 0) {
 			pg = td->td_proc->p_pgrp;
 			PGRP_LOCK(pg);
 		} else {
 			pg = pgfind(uap->who);
 			if (pg == NULL) {
 				sx_sunlock(&proctree_lock);
 				break;
 			}
 		}
 		sx_sunlock(&proctree_lock);
 		LIST_FOREACH(p, &pg->pg_members, p_pglist) {
 			PROC_LOCK(p);
 			if (p->p_state == PRS_NORMAL &&
 			    p_cansee(td, p) == 0) {
 				if (p->p_nice < low)
 					low = p->p_nice;
 			}
 			PROC_UNLOCK(p);
 		}
 		PGRP_UNLOCK(pg);
 		break;
 
 	case PRIO_USER:
 		if (uap->who == 0)
 			uap->who = td->td_ucred->cr_uid;
 		sx_slock(&allproc_lock);
 		FOREACH_PROC_IN_SYSTEM(p) {
 			PROC_LOCK(p);
 			if (p->p_state == PRS_NORMAL &&
 			    p_cansee(td, p) == 0 &&
 			    p->p_ucred->cr_uid == uap->who) {
 				if (p->p_nice < low)
 					low = p->p_nice;
 			}
 			PROC_UNLOCK(p);
 		}
 		sx_sunlock(&allproc_lock);
 		break;
 
 	default:
 		error = EINVAL;
 		break;
 	}
 	if (low == PRIO_MAX + 1 && error == 0)
 		error = ESRCH;
 	td->td_retval[0] = low;
 	return (error);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct setpriority_args {
 	int	which;
 	int	who;
 	int	prio;
 };
 #endif
 int
 sys_setpriority(struct thread *td, struct setpriority_args *uap)
 {
 	struct proc *curp, *p;
 	struct pgrp *pg;
 	int found = 0, error = 0;
 
 	curp = td->td_proc;
 	switch (uap->which) {
 	case PRIO_PROCESS:
 		if (uap->who == 0) {
 			PROC_LOCK(curp);
 			error = donice(td, curp, uap->prio);
 			PROC_UNLOCK(curp);
 		} else {
 			p = pfind(uap->who);
 			if (p == NULL)
 				break;
 			error = p_cansee(td, p);
 			if (error == 0)
 				error = donice(td, p, uap->prio);
 			PROC_UNLOCK(p);
 		}
 		found++;
 		break;
 
 	case PRIO_PGRP:
 		sx_slock(&proctree_lock);
 		if (uap->who == 0) {
 			pg = curp->p_pgrp;
 			PGRP_LOCK(pg);
 		} else {
 			pg = pgfind(uap->who);
 			if (pg == NULL) {
 				sx_sunlock(&proctree_lock);
 				break;
 			}
 		}
 		sx_sunlock(&proctree_lock);
 		LIST_FOREACH(p, &pg->pg_members, p_pglist) {
 			PROC_LOCK(p);
 			if (p->p_state == PRS_NORMAL &&
 			    p_cansee(td, p) == 0) {
 				error = donice(td, p, uap->prio);
 				found++;
 			}
 			PROC_UNLOCK(p);
 		}
 		PGRP_UNLOCK(pg);
 		break;
 
 	case PRIO_USER:
 		if (uap->who == 0)
 			uap->who = td->td_ucred->cr_uid;
 		sx_slock(&allproc_lock);
 		FOREACH_PROC_IN_SYSTEM(p) {
 			PROC_LOCK(p);
 			if (p->p_state == PRS_NORMAL &&
 			    p->p_ucred->cr_uid == uap->who &&
 			    p_cansee(td, p) == 0) {
 				error = donice(td, p, uap->prio);
 				found++;
 			}
 			PROC_UNLOCK(p);
 		}
 		sx_sunlock(&allproc_lock);
 		break;
 
 	default:
 		error = EINVAL;
 		break;
 	}
 	if (found == 0 && error == 0)
 		error = ESRCH;
 	return (error);
 }
 
 /*
  * Set "nice" for a (whole) process.
  */
 static int
 donice(struct thread *td, struct proc *p, int n)
 {
 	int error;
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	if ((error = p_cansched(td, p)))
 		return (error);
 	if (n > PRIO_MAX)
 		n = PRIO_MAX;
 	if (n < PRIO_MIN)
 		n = PRIO_MIN;
 	if (n < p->p_nice && priv_check(td, PRIV_SCHED_SETPRIORITY) != 0)
 		return (EACCES);
 	sched_nice(p, n);
 	return (0);
 }
 
 static int unprivileged_idprio;
 SYSCTL_INT(_security_bsd, OID_AUTO, unprivileged_idprio, CTLFLAG_RW,
     &unprivileged_idprio, 0, "Allow non-root users to set an idle priority");
 
 /*
  * Set realtime priority for LWP.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct rtprio_thread_args {
 	int		function;
 	lwpid_t		lwpid;
 	struct rtprio	*rtp;
 };
 #endif
 int
 sys_rtprio_thread(struct thread *td, struct rtprio_thread_args *uap)
 {
 	struct proc *p;
 	struct rtprio rtp;
 	struct thread *td1;
 	int cierror, error;
 
 	/* Perform copyin before acquiring locks if needed. */
 	if (uap->function == RTP_SET)
 		cierror = copyin(uap->rtp, &rtp, sizeof(struct rtprio));
 	else
 		cierror = 0;
 
 	if (uap->lwpid == 0 || uap->lwpid == td->td_tid) {
 		p = td->td_proc;
 		td1 = td;
 		PROC_LOCK(p);
 	} else {
 		/* Only look up thread in current process */
 		td1 = tdfind(uap->lwpid, curproc->p_pid);
 		if (td1 == NULL)
 			return (ESRCH);
 		p = td1->td_proc;
 	}
 
 	switch (uap->function) {
 	case RTP_LOOKUP:
 		if ((error = p_cansee(td, p)))
 			break;
 		pri_to_rtp(td1, &rtp);
 		PROC_UNLOCK(p);
 		return (copyout(&rtp, uap->rtp, sizeof(struct rtprio)));
 	case RTP_SET:
 		if ((error = p_cansched(td, p)) || (error = cierror))
 			break;
 
 		/* Disallow setting rtprio in most cases if not superuser. */
 
 		/*
 		 * Realtime priority has to be restricted for reasons which
 		 * should be obvious.  However, for idleprio processes, there is
 		 * a potential for system deadlock if an idleprio process gains
 		 * a lock on a resource that other processes need (and the
 		 * idleprio process can't run due to a CPU-bound normal
 		 * process).  Fix me!  XXX
 		 *
 		 * This problem is not only related to idleprio process.
 		 * A user level program can obtain a file lock and hold it
 		 * indefinitely.  Additionally, without idleprio processes it is
 		 * still conceivable that a program with low priority will never
 		 * get to run.  In short, allowing this feature might make it
 		 * easier to lock a resource indefinitely, but it is not the
 		 * only thing that makes it possible.
 		 */
 		if (RTP_PRIO_BASE(rtp.type) == RTP_PRIO_REALTIME ||
 		    (RTP_PRIO_BASE(rtp.type) == RTP_PRIO_IDLE &&
 		    unprivileged_idprio == 0)) {
 			error = priv_check(td, PRIV_SCHED_RTPRIO);
 			if (error)
 				break;
 		}
 		error = rtp_to_pri(&rtp, td1);
 		break;
 	default:
 		error = EINVAL;
 		break;
 	}
 	PROC_UNLOCK(p);
 	return (error);
 }
 
 /*
  * Set realtime priority.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct rtprio_args {
 	int		function;
 	pid_t		pid;
 	struct rtprio	*rtp;
 };
 #endif
 int
 sys_rtprio(struct thread *td, register struct rtprio_args *uap)
 {
 	struct proc *p;
 	struct thread *tdp;
 	struct rtprio rtp;
 	int cierror, error;
 
 	/* Perform copyin before acquiring locks if needed. */
 	if (uap->function == RTP_SET)
 		cierror = copyin(uap->rtp, &rtp, sizeof(struct rtprio));
 	else
 		cierror = 0;
 
 	if (uap->pid == 0) {
 		p = td->td_proc;
 		PROC_LOCK(p);
 	} else {
 		p = pfind(uap->pid);
 		if (p == NULL)
 			return (ESRCH);
 	}
 
 	switch (uap->function) {
 	case RTP_LOOKUP:
 		if ((error = p_cansee(td, p)))
 			break;
 		/*
 		 * Return OUR priority if no pid specified,
 		 * or if one is, report the highest priority
 		 * in the process.  There isn't much more you can do as
 		 * there is only room to return a single priority.
 		 * Note: specifying our own pid is not the same
 		 * as leaving it zero.
 		 */
 		if (uap->pid == 0) {
 			pri_to_rtp(td, &rtp);
 		} else {
 			struct rtprio rtp2;
 
 			rtp.type = RTP_PRIO_IDLE;
 			rtp.prio = RTP_PRIO_MAX;
 			FOREACH_THREAD_IN_PROC(p, tdp) {
 				pri_to_rtp(tdp, &rtp2);
 				if (rtp2.type <  rtp.type ||
 				    (rtp2.type == rtp.type &&
 				    rtp2.prio < rtp.prio)) {
 					rtp.type = rtp2.type;
 					rtp.prio = rtp2.prio;
 				}
 			}
 		}
 		PROC_UNLOCK(p);
 		return (copyout(&rtp, uap->rtp, sizeof(struct rtprio)));
 	case RTP_SET:
 		if ((error = p_cansched(td, p)) || (error = cierror))
 			break;
 
 		/*
 		 * Disallow setting rtprio in most cases if not superuser.
 		 * See the comment in sys_rtprio_thread about idprio
 		 * threads holding a lock.
 		 */
 		if (RTP_PRIO_BASE(rtp.type) == RTP_PRIO_REALTIME ||
 		    (RTP_PRIO_BASE(rtp.type) == RTP_PRIO_IDLE &&
 		    !unprivileged_idprio)) {
 			error = priv_check(td, PRIV_SCHED_RTPRIO);
 			if (error)
 				break;
 		}
 
 		/*
 		 * If we are setting our own priority, set just our
 		 * thread but if we are doing another process,
 		 * do all the threads on that process. If we
 		 * specify our own pid we do the latter.
 		 */
 		if (uap->pid == 0) {
 			error = rtp_to_pri(&rtp, td);
 		} else {
 			FOREACH_THREAD_IN_PROC(p, td) {
 				if ((error = rtp_to_pri(&rtp, td)) != 0)
 					break;
 			}
 		}
 		break;
 	default:
 		error = EINVAL;
 		break;
 	}
 	PROC_UNLOCK(p);
 	return (error);
 }
 
 int
 rtp_to_pri(struct rtprio *rtp, struct thread *td)
 {
 	u_char  newpri, oldclass, oldpri;
 
 	switch (RTP_PRIO_BASE(rtp->type)) {
 	case RTP_PRIO_REALTIME:
 		if (rtp->prio > RTP_PRIO_MAX)
 			return (EINVAL);
 		newpri = PRI_MIN_REALTIME + rtp->prio;
 		break;
 	case RTP_PRIO_NORMAL:
 		if (rtp->prio > (PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE))
 			return (EINVAL);
 		newpri = PRI_MIN_TIMESHARE + rtp->prio;
 		break;
 	case RTP_PRIO_IDLE:
 		if (rtp->prio > RTP_PRIO_MAX)
 			return (EINVAL);
 		newpri = PRI_MIN_IDLE + rtp->prio;
 		break;
 	default:
 		return (EINVAL);
 	}
 
 	thread_lock(td);
 	oldclass = td->td_pri_class;
 	sched_class(td, rtp->type);	/* XXX fix */
 	oldpri = td->td_user_pri;
 	sched_user_prio(td, newpri);
 	if (td->td_user_pri != oldpri && (oldclass != RTP_PRIO_NORMAL ||
 	    td->td_pri_class != RTP_PRIO_NORMAL))
 		sched_prio(td, td->td_user_pri);
 	if (TD_ON_UPILOCK(td) && oldpri != newpri) {
 		critical_enter();
 		thread_unlock(td);
 		umtx_pi_adjust(td, oldpri);
 		critical_exit();
 	} else
 		thread_unlock(td);
 	return (0);
 }
 
 void
 pri_to_rtp(struct thread *td, struct rtprio *rtp)
 {
 
 	thread_lock(td);
 	switch (PRI_BASE(td->td_pri_class)) {
 	case PRI_REALTIME:
 		rtp->prio = td->td_base_user_pri - PRI_MIN_REALTIME;
 		break;
 	case PRI_TIMESHARE:
 		rtp->prio = td->td_base_user_pri - PRI_MIN_TIMESHARE;
 		break;
 	case PRI_IDLE:
 		rtp->prio = td->td_base_user_pri - PRI_MIN_IDLE;
 		break;
 	default:
 		break;
 	}
 	rtp->type = td->td_pri_class;
 	thread_unlock(td);
 }
 
 #if defined(COMPAT_43)
 #ifndef _SYS_SYSPROTO_H_
 struct osetrlimit_args {
 	u_int	which;
 	struct	orlimit *rlp;
 };
 #endif
 int
 osetrlimit(struct thread *td, register struct osetrlimit_args *uap)
 {
 	struct orlimit olim;
 	struct rlimit lim;
 	int error;
 
 	if ((error = copyin(uap->rlp, &olim, sizeof(struct orlimit))))
 		return (error);
 	lim.rlim_cur = olim.rlim_cur;
 	lim.rlim_max = olim.rlim_max;
 	error = kern_setrlimit(td, uap->which, &lim);
 	return (error);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct ogetrlimit_args {
 	u_int	which;
 	struct	orlimit *rlp;
 };
 #endif
 int
 ogetrlimit(struct thread *td, register struct ogetrlimit_args *uap)
 {
 	struct orlimit olim;
 	struct rlimit rl;
 	int error;
 
 	if (uap->which >= RLIM_NLIMITS)
 		return (EINVAL);
 	lim_rlimit(td, uap->which, &rl);
 
 	/*
 	 * XXX would be more correct to convert only RLIM_INFINITY to the
 	 * old RLIM_INFINITY and fail with EOVERFLOW for other larger
 	 * values.  Most 64->32 and 32->16 conversions, including not
 	 * unimportant ones of uids are even more broken than what we
 	 * do here (they blindly truncate).  We don't do this correctly
 	 * here since we have little experience with EOVERFLOW yet.
 	 * Elsewhere, getuid() can't fail...
 	 */
 	olim.rlim_cur = rl.rlim_cur > 0x7fffffff ? 0x7fffffff : rl.rlim_cur;
 	olim.rlim_max = rl.rlim_max > 0x7fffffff ? 0x7fffffff : rl.rlim_max;
 	error = copyout(&olim, uap->rlp, sizeof(olim));
 	return (error);
 }
 #endif /* COMPAT_43 */
 
 #ifndef _SYS_SYSPROTO_H_
 struct __setrlimit_args {
 	u_int	which;
 	struct	rlimit *rlp;
 };
 #endif
 int
 sys_setrlimit(struct thread *td, register struct __setrlimit_args *uap)
 {
 	struct rlimit alim;
 	int error;
 
 	if ((error = copyin(uap->rlp, &alim, sizeof(struct rlimit))))
 		return (error);
 	error = kern_setrlimit(td, uap->which, &alim);
 	return (error);
 }
 
 static void
 lim_cb(void *arg)
 {
 	struct rlimit rlim;
 	struct thread *td;
 	struct proc *p;
 
 	p = arg;
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	/*
 	 * Check if the process exceeds its cpu resource allocation.  If
 	 * it reaches the max, arrange to kill the process in ast().
 	 */
 	if (p->p_cpulimit == RLIM_INFINITY)
 		return;
 	PROC_STATLOCK(p);
 	FOREACH_THREAD_IN_PROC(p, td) {
 		ruxagg(p, td);
 	}
 	PROC_STATUNLOCK(p);
 	if (p->p_rux.rux_runtime > p->p_cpulimit * cpu_tickrate()) {
 		lim_rlimit_proc(p, RLIMIT_CPU, &rlim);
 		if (p->p_rux.rux_runtime >= rlim.rlim_max * cpu_tickrate()) {
 			killproc(p, "exceeded maximum CPU limit");
 		} else {
 			if (p->p_cpulimit < rlim.rlim_max)
 				p->p_cpulimit += 5;
 			kern_psignal(p, SIGXCPU);
 		}
 	}
 	if ((p->p_flag & P_WEXIT) == 0)
 		callout_reset_sbt(&p->p_limco, SBT_1S, 0,
 		    lim_cb, p, C_PREL(1));
 }
 
 int
 kern_setrlimit(struct thread *td, u_int which, struct rlimit *limp)
 {
 
 	return (kern_proc_setrlimit(td, td->td_proc, which, limp));
 }
 
 int
 kern_proc_setrlimit(struct thread *td, struct proc *p, u_int which,
     struct rlimit *limp)
 {
 	struct plimit *newlim, *oldlim;
 	register struct rlimit *alimp;
 	struct rlimit oldssiz;
 	int error;
 
 	if (which >= RLIM_NLIMITS)
 		return (EINVAL);
 
 	/*
 	 * Preserve historical bugs by treating negative limits as unsigned.
 	 */
 	if (limp->rlim_cur < 0)
 		limp->rlim_cur = RLIM_INFINITY;
 	if (limp->rlim_max < 0)
 		limp->rlim_max = RLIM_INFINITY;
 
 	oldssiz.rlim_cur = 0;
 	newlim = lim_alloc();
 	PROC_LOCK(p);
 	oldlim = p->p_limit;
 	alimp = &oldlim->pl_rlimit[which];
 	if (limp->rlim_cur > alimp->rlim_max ||
 	    limp->rlim_max > alimp->rlim_max)
 		if ((error = priv_check(td, PRIV_PROC_SETRLIMIT))) {
 			PROC_UNLOCK(p);
 			lim_free(newlim);
 			return (error);
 		}
 	if (limp->rlim_cur > limp->rlim_max)
 		limp->rlim_cur = limp->rlim_max;
 	lim_copy(newlim, oldlim);
 	alimp = &newlim->pl_rlimit[which];
 
 	switch (which) {
 
 	case RLIMIT_CPU:
 		if (limp->rlim_cur != RLIM_INFINITY &&
 		    p->p_cpulimit == RLIM_INFINITY)
 			callout_reset_sbt(&p->p_limco, SBT_1S, 0,
 			    lim_cb, p, C_PREL(1));
 		p->p_cpulimit = limp->rlim_cur;
 		break;
 	case RLIMIT_DATA:
 		if (limp->rlim_cur > maxdsiz)
 			limp->rlim_cur = maxdsiz;
 		if (limp->rlim_max > maxdsiz)
 			limp->rlim_max = maxdsiz;
 		break;
 
 	case RLIMIT_STACK:
 		if (limp->rlim_cur > maxssiz)
 			limp->rlim_cur = maxssiz;
 		if (limp->rlim_max > maxssiz)
 			limp->rlim_max = maxssiz;
 		oldssiz = *alimp;
 		if (p->p_sysent->sv_fixlimit != NULL)
 			p->p_sysent->sv_fixlimit(&oldssiz,
 			    RLIMIT_STACK);
 		break;
 
 	case RLIMIT_NOFILE:
 		if (limp->rlim_cur > maxfilesperproc)
 			limp->rlim_cur = maxfilesperproc;
 		if (limp->rlim_max > maxfilesperproc)
 			limp->rlim_max = maxfilesperproc;
 		break;
 
 	case RLIMIT_NPROC:
 		if (limp->rlim_cur > maxprocperuid)
 			limp->rlim_cur = maxprocperuid;
 		if (limp->rlim_max > maxprocperuid)
 			limp->rlim_max = maxprocperuid;
 		if (limp->rlim_cur < 1)
 			limp->rlim_cur = 1;
 		if (limp->rlim_max < 1)
 			limp->rlim_max = 1;
 		break;
 	}
 	if (p->p_sysent->sv_fixlimit != NULL)
 		p->p_sysent->sv_fixlimit(limp, which);
 	*alimp = *limp;
 	p->p_limit = newlim;
 	PROC_UPDATE_COW(p);
 	PROC_UNLOCK(p);
 	lim_free(oldlim);
 
 	if (which == RLIMIT_STACK &&
 	    /*
 	     * Skip calls from exec_new_vmspace(), done when stack is
 	     * not mapped yet.
 	     */
 	    (td != curthread || (p->p_flag & P_INEXEC) == 0)) {
 		/*
 		 * Stack is allocated to the max at exec time with only
 		 * "rlim_cur" bytes accessible.  If stack limit is going
 		 * up make more accessible, if going down make inaccessible.
 		 */
 		if (limp->rlim_cur != oldssiz.rlim_cur) {
 			vm_offset_t addr;
 			vm_size_t size;
 			vm_prot_t prot;
 
 			if (limp->rlim_cur > oldssiz.rlim_cur) {
 				prot = p->p_sysent->sv_stackprot;
 				size = limp->rlim_cur - oldssiz.rlim_cur;
 				addr = p->p_sysent->sv_usrstack -
 				    limp->rlim_cur;
 			} else {
 				prot = VM_PROT_NONE;
 				size = oldssiz.rlim_cur - limp->rlim_cur;
 				addr = p->p_sysent->sv_usrstack -
 				    oldssiz.rlim_cur;
 			}
 			addr = trunc_page(addr);
 			size = round_page(size);
 			(void)vm_map_protect(&p->p_vmspace->vm_map,
 			    addr, addr + size, prot, FALSE);
 		}
 	}
 
 	return (0);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct __getrlimit_args {
 	u_int	which;
 	struct	rlimit *rlp;
 };
 #endif
 /* ARGSUSED */
 int
 sys_getrlimit(struct thread *td, register struct __getrlimit_args *uap)
 {
 	struct rlimit rlim;
 	int error;
 
 	if (uap->which >= RLIM_NLIMITS)
 		return (EINVAL);
 	lim_rlimit(td, uap->which, &rlim);
 	error = copyout(&rlim, uap->rlp, sizeof(struct rlimit));
 	return (error);
 }
 
 /*
  * Transform the running time and tick information for children of proc p
  * into user and system time usage.
  */
 void
 calccru(struct proc *p, struct timeval *up, struct timeval *sp)
 {
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	calcru1(p, &p->p_crux, up, sp);
 }
 
 /*
  * Transform the running time and tick information in proc p into user
  * and system time usage.  If appropriate, include the current time slice
  * on this CPU.
  */
 void
 calcru(struct proc *p, struct timeval *up, struct timeval *sp)
 {
 	struct thread *td;
 	uint64_t runtime, u;
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	PROC_STATLOCK_ASSERT(p, MA_OWNED);
 	/*
 	 * If we are getting stats for the current process, then add in the
 	 * stats that this thread has accumulated in its current time slice.
 	 * We reset the thread and CPU state as if we had performed a context
 	 * switch right here.
 	 */
 	td = curthread;
 	if (td->td_proc == p) {
 		u = cpu_ticks();
 		runtime = u - PCPU_GET(switchtime);
 		td->td_runtime += runtime;
 		td->td_incruntime += runtime;
 		PCPU_SET(switchtime, u);
 	}
 	/* Make sure the per-thread stats are current. */
 	FOREACH_THREAD_IN_PROC(p, td) {
 		if (td->td_incruntime == 0)
 			continue;
 		ruxagg(p, td);
 	}
 	calcru1(p, &p->p_rux, up, sp);
 }
 
 /* Collect resource usage for a single thread. */
 void
 rufetchtd(struct thread *td, struct rusage *ru)
 {
 	struct proc *p;
 	uint64_t runtime, u;
 
 	p = td->td_proc;
 	PROC_STATLOCK_ASSERT(p, MA_OWNED);
 	THREAD_LOCK_ASSERT(td, MA_OWNED);
 	/*
 	 * If we are getting stats for the current thread, then add in the
 	 * stats that this thread has accumulated in its current time slice.
 	 * We reset the thread and CPU state as if we had performed a context
 	 * switch right here.
 	 */
 	if (td == curthread) {
 		u = cpu_ticks();
 		runtime = u - PCPU_GET(switchtime);
 		td->td_runtime += runtime;
 		td->td_incruntime += runtime;
 		PCPU_SET(switchtime, u);
 	}
 	ruxagg(p, td);
 	*ru = td->td_ru;
 	calcru1(p, &td->td_rux, &ru->ru_utime, &ru->ru_stime);
 }
 
 static void
 calcru1(struct proc *p, struct rusage_ext *ruxp, struct timeval *up,
     struct timeval *sp)
 {
 	/* {user, system, interrupt, total} {ticks, usec}: */
 	uint64_t ut, uu, st, su, it, tt, tu;
 
 	ut = ruxp->rux_uticks;
 	st = ruxp->rux_sticks;
 	it = ruxp->rux_iticks;
 	tt = ut + st + it;
 	if (tt == 0) {
 		/* Avoid divide by zero */
 		st = 1;
 		tt = 1;
 	}
 	tu = cputick2usec(ruxp->rux_runtime);
 	if ((int64_t)tu < 0) {
 		/* XXX: this should be an assert /phk */
 		printf("calcru: negative runtime of %jd usec for pid %d (%s)\n",
 		    (intmax_t)tu, p->p_pid, p->p_comm);
 		tu = ruxp->rux_tu;
 	}
 
 	if (tu >= ruxp->rux_tu) {
 		/*
 		 * The normal case, time increased.
 		 * Enforce monotonicity of bucketed numbers.
 		 */
 		uu = (tu * ut) / tt;
 		if (uu < ruxp->rux_uu)
 			uu = ruxp->rux_uu;
 		su = (tu * st) / tt;
 		if (su < ruxp->rux_su)
 			su = ruxp->rux_su;
 	} else if (tu + 3 > ruxp->rux_tu || 101 * tu > 100 * ruxp->rux_tu) {
 		/*
 		 * When we calibrate the cputicker, it is not uncommon to
 		 * see the presumably fixed frequency increase slightly over
 		 * time as a result of thermal stabilization and NTP
 		 * discipline (of the reference clock).  We therefore ignore
 		 * a bit of backwards slop because we  expect to catch up
 		 * shortly.  We use a 3 microsecond limit to catch low
 		 * counts and a 1% limit for high counts.
 		 */
 		uu = ruxp->rux_uu;
 		su = ruxp->rux_su;
 		tu = ruxp->rux_tu;
 	} else { /* tu < ruxp->rux_tu */
 		/*
 		 * What happened here was likely that a laptop, which ran at
 		 * a reduced clock frequency at boot, kicked into high gear.
 		 * The wisdom of spamming this message in that case is
 		 * dubious, but it might also be indicative of something
 		 * serious, so lets keep it and hope laptops can be made
 		 * more truthful about their CPU speed via ACPI.
 		 */
 		printf("calcru: runtime went backwards from %ju usec "
 		    "to %ju usec for pid %d (%s)\n",
 		    (uintmax_t)ruxp->rux_tu, (uintmax_t)tu,
 		    p->p_pid, p->p_comm);
 		uu = (tu * ut) / tt;
 		su = (tu * st) / tt;
 	}
 
 	ruxp->rux_uu = uu;
 	ruxp->rux_su = su;
 	ruxp->rux_tu = tu;
 
 	up->tv_sec = uu / 1000000;
 	up->tv_usec = uu % 1000000;
 	sp->tv_sec = su / 1000000;
 	sp->tv_usec = su % 1000000;
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct getrusage_args {
 	int	who;
 	struct	rusage *rusage;
 };
 #endif
 int
 sys_getrusage(register struct thread *td, register struct getrusage_args *uap)
 {
 	struct rusage ru;
 	int error;
 
 	error = kern_getrusage(td, uap->who, &ru);
 	if (error == 0)
 		error = copyout(&ru, uap->rusage, sizeof(struct rusage));
 	return (error);
 }
 
 int
 kern_getrusage(struct thread *td, int who, struct rusage *rup)
 {
 	struct proc *p;
 	int error;
 
 	error = 0;
 	p = td->td_proc;
 	PROC_LOCK(p);
 	switch (who) {
 	case RUSAGE_SELF:
 		rufetchcalc(p, rup, &rup->ru_utime,
 		    &rup->ru_stime);
 		break;
 
 	case RUSAGE_CHILDREN:
 		*rup = p->p_stats->p_cru;
 		calccru(p, &rup->ru_utime, &rup->ru_stime);
 		break;
 
 	case RUSAGE_THREAD:
 		PROC_STATLOCK(p);
 		thread_lock(td);
 		rufetchtd(td, rup);
 		thread_unlock(td);
 		PROC_STATUNLOCK(p);
 		break;
 
 	default:
 		error = EINVAL;
 	}
 	PROC_UNLOCK(p);
 	return (error);
 }
 
 void
 rucollect(struct rusage *ru, struct rusage *ru2)
 {
 	long *ip, *ip2;
 	int i;
 
 	if (ru->ru_maxrss < ru2->ru_maxrss)
 		ru->ru_maxrss = ru2->ru_maxrss;
 	ip = &ru->ru_first;
 	ip2 = &ru2->ru_first;
 	for (i = &ru->ru_last - &ru->ru_first; i >= 0; i--)
 		*ip++ += *ip2++;
 }
 
 void
 ruadd(struct rusage *ru, struct rusage_ext *rux, struct rusage *ru2,
     struct rusage_ext *rux2)
 {
 
 	rux->rux_runtime += rux2->rux_runtime;
 	rux->rux_uticks += rux2->rux_uticks;
 	rux->rux_sticks += rux2->rux_sticks;
 	rux->rux_iticks += rux2->rux_iticks;
 	rux->rux_uu += rux2->rux_uu;
 	rux->rux_su += rux2->rux_su;
 	rux->rux_tu += rux2->rux_tu;
 	rucollect(ru, ru2);
 }
 
 /*
  * Aggregate tick counts into the proc's rusage_ext.
  */
 static void
 ruxagg_locked(struct rusage_ext *rux, struct thread *td)
 {
 
 	THREAD_LOCK_ASSERT(td, MA_OWNED);
 	PROC_STATLOCK_ASSERT(td->td_proc, MA_OWNED);
 	rux->rux_runtime += td->td_incruntime;
 	rux->rux_uticks += td->td_uticks;
 	rux->rux_sticks += td->td_sticks;
 	rux->rux_iticks += td->td_iticks;
 }
 
 void
 ruxagg(struct proc *p, struct thread *td)
 {
 
 	thread_lock(td);
 	ruxagg_locked(&p->p_rux, td);
 	ruxagg_locked(&td->td_rux, td);
 	td->td_incruntime = 0;
 	td->td_uticks = 0;
 	td->td_iticks = 0;
 	td->td_sticks = 0;
 	thread_unlock(td);
 }
 
 /*
  * Update the rusage_ext structure and fetch a valid aggregate rusage
  * for proc p if storage for one is supplied.
  */
 void
 rufetch(struct proc *p, struct rusage *ru)
 {
 	struct thread *td;
 
 	PROC_STATLOCK_ASSERT(p, MA_OWNED);
 
 	*ru = p->p_ru;
 	if (p->p_numthreads > 0)  {
 		FOREACH_THREAD_IN_PROC(p, td) {
 			ruxagg(p, td);
 			rucollect(ru, &td->td_ru);
 		}
 	}
 }
 
 /*
  * Atomically perform a rufetch and a calcru together.
  * Consumers, can safely assume the calcru is executed only once
  * rufetch is completed.
  */
 void
 rufetchcalc(struct proc *p, struct rusage *ru, struct timeval *up,
     struct timeval *sp)
 {
 
 	PROC_STATLOCK(p);
 	rufetch(p, ru);
 	calcru(p, up, sp);
 	PROC_STATUNLOCK(p);
 }
 
 /*
  * Allocate a new resource limits structure and initialize its
  * reference count and mutex pointer.
  */
 struct plimit *
 lim_alloc()
 {
 	struct plimit *limp;
 
 	limp = malloc(sizeof(struct plimit), M_PLIMIT, M_WAITOK);
 	refcount_init(&limp->pl_refcnt, 1);
 	return (limp);
 }
 
 struct plimit *
 lim_hold(struct plimit *limp)
 {
 
 	refcount_acquire(&limp->pl_refcnt);
 	return (limp);
 }
 
 static __inline int
 lim_shared(struct plimit *limp)
 {
 
 	return (limp->pl_refcnt > 1);
 }
 
 void
 lim_fork(struct proc *p1, struct proc *p2)
 {
 
 	PROC_LOCK_ASSERT(p1, MA_OWNED);
 	PROC_LOCK_ASSERT(p2, MA_OWNED);
 
 	p2->p_limit = lim_hold(p1->p_limit);
 	callout_init_mtx(&p2->p_limco, &p2->p_mtx, 0);
 	if (p1->p_cpulimit != RLIM_INFINITY)
 		callout_reset_sbt(&p2->p_limco, SBT_1S, 0,
 		    lim_cb, p2, C_PREL(1));
 }
 
 void
 lim_free(struct plimit *limp)
 {
 
 	if (refcount_release(&limp->pl_refcnt))
 		free((void *)limp, M_PLIMIT);
 }
 
 /*
  * Make a copy of the plimit structure.
  * We share these structures copy-on-write after fork.
  */
 void
 lim_copy(struct plimit *dst, struct plimit *src)
 {
 
 	KASSERT(!lim_shared(dst), ("lim_copy to shared limit"));
 	bcopy(src->pl_rlimit, dst->pl_rlimit, sizeof(src->pl_rlimit));
 }
 
 /*
  * Return the hard limit for a particular system resource.  The
  * which parameter specifies the index into the rlimit array.
  */
 rlim_t
 lim_max(struct thread *td, int which)
 {
 	struct rlimit rl;
 
 	lim_rlimit(td, which, &rl);
 	return (rl.rlim_max);
 }
 
 rlim_t
 lim_max_proc(struct proc *p, int which)
 {
 	struct rlimit rl;
 
 	lim_rlimit_proc(p, which, &rl);
 	return (rl.rlim_max);
 }
 
 /*
  * Return the current (soft) limit for a particular system resource.
  * The which parameter which specifies the index into the rlimit array
  */
 rlim_t
 lim_cur(struct thread *td, int which)
 {
 	struct rlimit rl;
 
 	lim_rlimit(td, which, &rl);
 	return (rl.rlim_cur);
 }
 
 rlim_t
 lim_cur_proc(struct proc *p, int which)
 {
 	struct rlimit rl;
 
 	lim_rlimit_proc(p, which, &rl);
 	return (rl.rlim_cur);
 }
 
 /*
  * Return a copy of the entire rlimit structure for the system limit
  * specified by 'which' in the rlimit structure pointed to by 'rlp'.
  */
 void
 lim_rlimit(struct thread *td, int which, struct rlimit *rlp)
 {
 	struct proc *p = td->td_proc;
 
 	MPASS(td == curthread);
 	KASSERT(which >= 0 && which < RLIM_NLIMITS,
 	    ("request for invalid resource limit"));
 	*rlp = td->td_limit->pl_rlimit[which];
 	if (p->p_sysent->sv_fixlimit != NULL)
 		p->p_sysent->sv_fixlimit(rlp, which);
 }
 
 void
 lim_rlimit_proc(struct proc *p, int which, struct rlimit *rlp)
 {
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	KASSERT(which >= 0 && which < RLIM_NLIMITS,
 	    ("request for invalid resource limit"));
 	*rlp = p->p_limit->pl_rlimit[which];
 	if (p->p_sysent->sv_fixlimit != NULL)
 		p->p_sysent->sv_fixlimit(rlp, which);
 }
 
 void
 uihashinit()
 {
 
 	uihashtbl = hashinit(maxproc / 16, M_UIDINFO, &uihash);
 	rw_init(&uihashtbl_lock, "uidinfo hash");
 }
 
 /*
  * Look up a uidinfo struct for the parameter uid.
  * uihashtbl_lock must be locked.
  * Increase refcount on uidinfo struct returned.
  */
 static struct uidinfo *
 uilookup(uid_t uid)
 {
 	struct uihashhead *uipp;
 	struct uidinfo *uip;
 
 	rw_assert(&uihashtbl_lock, RA_LOCKED);
 	uipp = UIHASH(uid);
 	LIST_FOREACH(uip, uipp, ui_hash)
 		if (uip->ui_uid == uid) {
 			uihold(uip);
 			break;
 		}
 
 	return (uip);
 }
 
 /*
  * Find or allocate a struct uidinfo for a particular uid.
  * Returns with uidinfo struct referenced.
  * uifree() should be called on a struct uidinfo when released.
  */
 struct uidinfo *
 uifind(uid_t uid)
 {
 	struct uidinfo *new_uip, *uip;
 
 	rw_rlock(&uihashtbl_lock);
 	uip = uilookup(uid);
 	rw_runlock(&uihashtbl_lock);
 	if (uip != NULL)
 		return (uip);
 
 	new_uip = malloc(sizeof(*new_uip), M_UIDINFO, M_WAITOK | M_ZERO);
 	racct_create(&new_uip->ui_racct);
 	refcount_init(&new_uip->ui_ref, 1);
 	new_uip->ui_uid = uid;
 	mtx_init(&new_uip->ui_vmsize_mtx, "ui_vmsize", NULL, MTX_DEF);
 
 	rw_wlock(&uihashtbl_lock);
 	/*
 	 * There's a chance someone created our uidinfo while we
 	 * were in malloc and not holding the lock, so we have to
 	 * make sure we don't insert a duplicate uidinfo.
 	 */
 	if ((uip = uilookup(uid)) == NULL) {
 		LIST_INSERT_HEAD(UIHASH(uid), new_uip, ui_hash);
 		rw_wunlock(&uihashtbl_lock);
 		uip = new_uip;
 	} else {
 		rw_wunlock(&uihashtbl_lock);
 		racct_destroy(&new_uip->ui_racct);
 		mtx_destroy(&new_uip->ui_vmsize_mtx);
 		free(new_uip, M_UIDINFO);
 	}
 	return (uip);
 }
 
 /*
  * Place another refcount on a uidinfo struct.
  */
 void
 uihold(struct uidinfo *uip)
 {
 
 	refcount_acquire(&uip->ui_ref);
 }
 
 /*-
  * Since uidinfo structs have a long lifetime, we use an
  * opportunistic refcounting scheme to avoid locking the lookup hash
  * for each release.
  *
  * If the refcount hits 0, we need to free the structure,
  * which means we need to lock the hash.
  * Optimal case:
  *   After locking the struct and lowering the refcount, if we find
  *   that we don't need to free, simply unlock and return.
  * Suboptimal case:
  *   If refcount lowering results in need to free, bump the count
  *   back up, lose the lock and acquire the locks in the proper
  *   order to try again.
  */
 void
 uifree(struct uidinfo *uip)
 {
 	int old;
 
 	/* Prepare for optimal case. */
 	old = uip->ui_ref;
 	if (old > 1 && atomic_cmpset_int(&uip->ui_ref, old, old - 1))
 		return;
 
 	/* Prepare for suboptimal case. */
 	rw_wlock(&uihashtbl_lock);
 	if (refcount_release(&uip->ui_ref) == 0) {
 		rw_wunlock(&uihashtbl_lock);
 		return;
 	}
 
 	racct_destroy(&uip->ui_racct);
 	LIST_REMOVE(uip, ui_hash);
 	rw_wunlock(&uihashtbl_lock);
 
 	if (uip->ui_sbsize != 0)
 		printf("freeing uidinfo: uid = %d, sbsize = %ld\n",
 		    uip->ui_uid, uip->ui_sbsize);
 	if (uip->ui_proccnt != 0)
 		printf("freeing uidinfo: uid = %d, proccnt = %ld\n",
 		    uip->ui_uid, uip->ui_proccnt);
 	if (uip->ui_vmsize != 0)
 		printf("freeing uidinfo: uid = %d, swapuse = %lld\n",
 		    uip->ui_uid, (unsigned long long)uip->ui_vmsize);
 	mtx_destroy(&uip->ui_vmsize_mtx);
 	free(uip, M_UIDINFO);
 }
 
 #ifdef RACCT
 void
 ui_racct_foreach(void (*callback)(struct racct *racct,
     void *arg2, void *arg3), void *arg2, void *arg3)
 {
 	struct uidinfo *uip;
 	struct uihashhead *uih;
 
 	rw_rlock(&uihashtbl_lock);
 	for (uih = &uihashtbl[uihash]; uih >= uihashtbl; uih--) {
 		LIST_FOREACH(uip, uih, ui_hash) {
 			(callback)(uip->ui_racct, arg2, arg3);
 		}
 	}
 	rw_runlock(&uihashtbl_lock);
 }
 #endif
 
 static inline int
 chglimit(struct uidinfo *uip, long *limit, int diff, rlim_t max, const char *name)
 {
 
 	/* Don't allow them to exceed max, but allow subtraction. */
 	if (diff > 0 && max != 0) {
 		if (atomic_fetchadd_long(limit, (long)diff) + diff > max) {
 			atomic_subtract_long(limit, (long)diff);
 			return (0);
 		}
 	} else {
 		atomic_add_long(limit, (long)diff);
 		if (*limit < 0)
 			printf("negative %s for uid = %d\n", name, uip->ui_uid);
 	}
 	return (1);
 }
 
 /*
  * Change the count associated with number of processes
  * a given user is using.  When 'max' is 0, don't enforce a limit
  */
 int
 chgproccnt(struct uidinfo *uip, int diff, rlim_t max)
 {
 
 	return (chglimit(uip, &uip->ui_proccnt, diff, max, "proccnt"));
 }
 
 /*
  * Change the total socket buffer size a user has used.
  */
 int
 chgsbsize(struct uidinfo *uip, u_int *hiwat, u_int to, rlim_t max)
 {
 	int diff, rv;
 
 	diff = to - *hiwat;
 	if (diff > 0 && max == 0) {
 		rv = 0;
 	} else {
 		rv = chglimit(uip, &uip->ui_sbsize, diff, max, "sbsize");
 		if (rv != 0)
 			*hiwat = to;
 	}
 	return (rv);
 }
 
 /*
  * Change the count associated with number of pseudo-terminals
  * a given user is using.  When 'max' is 0, don't enforce a limit
  */
 int
 chgptscnt(struct uidinfo *uip, int diff, rlim_t max)
 {
 
 	return (chglimit(uip, &uip->ui_ptscnt, diff, max, "ptscnt"));
 }
 
 int
 chgkqcnt(struct uidinfo *uip, int diff, rlim_t max)
 {
 
 	return (chglimit(uip, &uip->ui_kqcnt, diff, max, "kqcnt"));
 }
-
-void
-lim_update_thread(struct thread *td)
-{
-	struct proc *p;
-	struct plimit *lim;
-
-	p = td->td_proc;
-	lim = td->td_limit;
-	PROC_LOCK_ASSERT(p, MA_OWNED);
-	td->td_limit = lim_hold(p->p_limit);
-	if (lim != NULL)
-		lim_free(lim);
-}
Index: head/sys/kern/kern_thread.c
===================================================================
--- head/sys/kern/kern_thread.c	(revision 285632)
+++ head/sys/kern/kern_thread.c	(revision 285633)
@@ -1,1198 +1,1210 @@
 /*-
  * Copyright (C) 2001 Julian Elischer <julian@freebsd.org>.
  *  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice(s), this list of conditions and the following disclaimer as
  *    the first lines of this file unmodified other than the possible
  *    addition of one or more copyright notices.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice(s), this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  * DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
  * DAMAGE.
  */
 
 #include "opt_witness.h"
 #include "opt_hwpmc_hooks.h"
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/rangelock.h>
 #include <sys/resourcevar.h>
 #include <sys/sdt.h>
 #include <sys/smp.h>
 #include <sys/sched.h>
 #include <sys/sleepqueue.h>
 #include <sys/selinfo.h>
 #include <sys/sysent.h>
 #include <sys/turnstile.h>
 #include <sys/ktr.h>
 #include <sys/rwlock.h>
 #include <sys/umtx.h>
 #include <sys/cpuset.h>
 #ifdef	HWPMC_HOOKS
 #include <sys/pmckern.h>
 #endif
 
 #include <security/audit/audit.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/uma.h>
 #include <vm/vm_domain.h>
 #include <sys/eventhandler.h>
 
 SDT_PROVIDER_DECLARE(proc);
 SDT_PROBE_DEFINE(proc, , , lwp__exit);
 
 /*
  * thread related storage.
  */
 static uma_zone_t thread_zone;
 
 TAILQ_HEAD(, thread) zombie_threads = TAILQ_HEAD_INITIALIZER(zombie_threads);
 static struct mtx zombie_lock;
 MTX_SYSINIT(zombie_lock, &zombie_lock, "zombie lock", MTX_SPIN);
 
 static void thread_zombie(struct thread *);
 static int thread_unsuspend_one(struct thread *td, struct proc *p,
     bool boundary);
 
 #define TID_BUFFER_SIZE	1024
 
 struct mtx tid_lock;
 static struct unrhdr *tid_unrhdr;
 static lwpid_t tid_buffer[TID_BUFFER_SIZE];
 static int tid_head, tid_tail;
 static MALLOC_DEFINE(M_TIDHASH, "tidhash", "thread hash");
 
 struct	tidhashhead *tidhashtbl;
 u_long	tidhash;
 struct	rwlock tidhash_lock;
 
 static lwpid_t
 tid_alloc(void)
 {
 	lwpid_t	tid;
 
 	tid = alloc_unr(tid_unrhdr);
 	if (tid != -1)
 		return (tid);
 	mtx_lock(&tid_lock);
 	if (tid_head == tid_tail) {
 		mtx_unlock(&tid_lock);
 		return (-1);
 	}
 	tid = tid_buffer[tid_head];
 	tid_head = (tid_head + 1) % TID_BUFFER_SIZE;
 	mtx_unlock(&tid_lock);
 	return (tid);
 }
 
 static void
 tid_free(lwpid_t tid)
 {
 	lwpid_t tmp_tid = -1;
 
 	mtx_lock(&tid_lock);
 	if ((tid_tail + 1) % TID_BUFFER_SIZE == tid_head) {
 		tmp_tid = tid_buffer[tid_head];
 		tid_head = (tid_head + 1) % TID_BUFFER_SIZE;
 	}
 	tid_buffer[tid_tail] = tid;
 	tid_tail = (tid_tail + 1) % TID_BUFFER_SIZE;
 	mtx_unlock(&tid_lock);
 	if (tmp_tid != -1)
 		free_unr(tid_unrhdr, tmp_tid);
 }
 
 /*
  * Prepare a thread for use.
  */
 static int
 thread_ctor(void *mem, int size, void *arg, int flags)
 {
 	struct thread	*td;
 
 	td = (struct thread *)mem;
 	td->td_state = TDS_INACTIVE;
 	td->td_oncpu = NOCPU;
 
 	td->td_tid = tid_alloc();
 
 	/*
 	 * Note that td_critnest begins life as 1 because the thread is not
 	 * running and is thereby implicitly waiting to be on the receiving
 	 * end of a context switch.
 	 */
 	td->td_critnest = 1;
 	td->td_lend_user_pri = PRI_MAX;
 	EVENTHANDLER_INVOKE(thread_ctor, td);
 #ifdef AUDIT
 	audit_thread_alloc(td);
 #endif
 	umtx_thread_alloc(td);
 	return (0);
 }
 
 /*
  * Reclaim a thread after use.
  */
 static void
 thread_dtor(void *mem, int size, void *arg)
 {
 	struct thread *td;
 
 	td = (struct thread *)mem;
 
 #ifdef INVARIANTS
 	/* Verify that this thread is in a safe state to free. */
 	switch (td->td_state) {
 	case TDS_INHIBITED:
 	case TDS_RUNNING:
 	case TDS_CAN_RUN:
 	case TDS_RUNQ:
 		/*
 		 * We must never unlink a thread that is in one of
 		 * these states, because it is currently active.
 		 */
 		panic("bad state for thread unlinking");
 		/* NOTREACHED */
 	case TDS_INACTIVE:
 		break;
 	default:
 		panic("bad thread state");
 		/* NOTREACHED */
 	}
 #endif
 #ifdef AUDIT
 	audit_thread_free(td);
 #endif
 	/* Free all OSD associated to this thread. */
 	osd_thread_exit(td);
 
 	EVENTHANDLER_INVOKE(thread_dtor, td);
 	tid_free(td->td_tid);
 }
 
 /*
  * Initialize type-stable parts of a thread (when newly created).
  */
 static int
 thread_init(void *mem, int size, int flags)
 {
 	struct thread *td;
 
 	td = (struct thread *)mem;
 
 	td->td_sleepqueue = sleepq_alloc();
 	td->td_turnstile = turnstile_alloc();
 	td->td_rlqe = NULL;
 	EVENTHANDLER_INVOKE(thread_init, td);
 	td->td_sched = (struct td_sched *)&td[1];
 	umtx_thread_init(td);
 	td->td_kstack = 0;
 	td->td_sel = NULL;
 	return (0);
 }
 
 /*
  * Tear down type-stable parts of a thread (just before being discarded).
  */
 static void
 thread_fini(void *mem, int size)
 {
 	struct thread *td;
 
 	td = (struct thread *)mem;
 	EVENTHANDLER_INVOKE(thread_fini, td);
 	rlqentry_free(td->td_rlqe);
 	turnstile_free(td->td_turnstile);
 	sleepq_free(td->td_sleepqueue);
 	umtx_thread_fini(td);
 	seltdfini(td);
 }
 
 /*
  * For a newly created process,
  * link up all the structures and its initial threads etc.
  * called from:
  * {arch}/{arch}/machdep.c   {arch}_init(), init386() etc.
  * proc_dtor() (should go away)
  * proc_init()
  */
 void
 proc_linkup0(struct proc *p, struct thread *td)
 {
 	TAILQ_INIT(&p->p_threads);	     /* all threads in proc */
 	proc_linkup(p, td);
 }
 
 void
 proc_linkup(struct proc *p, struct thread *td)
 {
 
 	sigqueue_init(&p->p_sigqueue, p);
 	p->p_ksi = ksiginfo_alloc(1);
 	if (p->p_ksi != NULL) {
 		/* XXX p_ksi may be null if ksiginfo zone is not ready */
 		p->p_ksi->ksi_flags = KSI_EXT | KSI_INS;
 	}
 	LIST_INIT(&p->p_mqnotifier);
 	p->p_numthreads = 0;
 	thread_link(td, p);
 }
 
 /*
  * Initialize global thread allocation resources.
  */
 void
 threadinit(void)
 {
 
 	mtx_init(&tid_lock, "TID lock", NULL, MTX_DEF);
 
 	/*
 	 * pid_max cannot be greater than PID_MAX.
 	 * leave one number for thread0.
 	 */
 	tid_unrhdr = new_unrhdr(PID_MAX + 2, INT_MAX, &tid_lock);
 
 	thread_zone = uma_zcreate("THREAD", sched_sizeof_thread(),
 	    thread_ctor, thread_dtor, thread_init, thread_fini,
 	    16 - 1, 0);
 	tidhashtbl = hashinit(maxproc / 2, M_TIDHASH, &tidhash);
 	rw_init(&tidhash_lock, "tidhash");
 }
 
 /*
  * Place an unused thread on the zombie list.
  * Use the slpq as that must be unused by now.
  */
 void
 thread_zombie(struct thread *td)
 {
 	mtx_lock_spin(&zombie_lock);
 	TAILQ_INSERT_HEAD(&zombie_threads, td, td_slpq);
 	mtx_unlock_spin(&zombie_lock);
 }
 
 /*
  * Release a thread that has exited after cpu_throw().
  */
 void
 thread_stash(struct thread *td)
 {
 	atomic_subtract_rel_int(&td->td_proc->p_exitthreads, 1);
 	thread_zombie(td);
 }
 
 /*
  * Reap zombie resources.
  */
 void
 thread_reap(void)
 {
 	struct thread *td_first, *td_next;
 
 	/*
 	 * Don't even bother to lock if none at this instant,
 	 * we really don't care about the next instant..
 	 */
 	if (!TAILQ_EMPTY(&zombie_threads)) {
 		mtx_lock_spin(&zombie_lock);
 		td_first = TAILQ_FIRST(&zombie_threads);
 		if (td_first)
 			TAILQ_INIT(&zombie_threads);
 		mtx_unlock_spin(&zombie_lock);
 		while (td_first) {
 			td_next = TAILQ_NEXT(td_first, td_slpq);
 			thread_cow_free(td_first);
 			thread_free(td_first);
 			td_first = td_next;
 		}
 	}
 }
 
 /*
  * Allocate a thread.
  */
 struct thread *
 thread_alloc(int pages)
 {
 	struct thread *td;
 
 	thread_reap(); /* check if any zombies to get */
 
 	td = (struct thread *)uma_zalloc(thread_zone, M_WAITOK);
 	KASSERT(td->td_kstack == 0, ("thread_alloc got thread with kstack"));
 	if (!vm_thread_new(td, pages)) {
 		uma_zfree(thread_zone, td);
 		return (NULL);
 	}
 	cpu_thread_alloc(td);
 	vm_domain_policy_init(&td->td_vm_dom_policy);
 	return (td);
 }
 
 int
 thread_alloc_stack(struct thread *td, int pages)
 {
 
 	KASSERT(td->td_kstack == 0,
 	    ("thread_alloc_stack called on a thread with kstack"));
 	if (!vm_thread_new(td, pages))
 		return (0);
 	cpu_thread_alloc(td);
 	return (1);
 }
 
 /*
  * Deallocate a thread.
  */
 void
 thread_free(struct thread *td)
 {
 
 	lock_profile_thread_exit(td);
 	if (td->td_cpuset)
 		cpuset_rel(td->td_cpuset);
 	td->td_cpuset = NULL;
 	cpu_thread_free(td);
 	if (td->td_kstack != 0)
 		vm_thread_dispose(td);
 	vm_domain_policy_cleanup(&td->td_vm_dom_policy);
 	uma_zfree(thread_zone, td);
 }
 
 void
 thread_cow_get_proc(struct thread *newtd, struct proc *p)
 {
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	newtd->td_ucred = crhold(p->p_ucred);
 	newtd->td_limit = lim_hold(p->p_limit);
 	newtd->td_cowgen = p->p_cowgen;
 }
 
 void
 thread_cow_get(struct thread *newtd, struct thread *td)
 {
 
 	newtd->td_ucred = crhold(td->td_ucred);
 	newtd->td_limit = lim_hold(td->td_limit);
 	newtd->td_cowgen = td->td_cowgen;
 }
 
 void
 thread_cow_free(struct thread *td)
 {
 
-	if (td->td_ucred)
+	if (td->td_ucred != NULL)
 		crfree(td->td_ucred);
-	if (td->td_limit)
+	if (td->td_limit != NULL)
 		lim_free(td->td_limit);
 }
 
 void
 thread_cow_update(struct thread *td)
 {
 	struct proc *p;
+	struct ucred *oldcred;
+	struct plimit *oldlimit;
 
 	p = td->td_proc;
+	oldcred = NULL;
+	oldlimit = NULL;
 	PROC_LOCK(p);
-	if (td->td_ucred != p->p_ucred)
-		cred_update_thread(td);
-	if (td->td_limit != p->p_limit)
-		lim_update_thread(td);
+	if (td->td_ucred != p->p_ucred) {
+		oldcred = td->td_ucred;
+		td->td_ucred = crhold(p->p_ucred);
+	}
+	if (td->td_limit != p->p_limit) {
+		oldlimit = td->td_limit;
+		td->td_limit = lim_hold(p->p_limit);
+	}
 	td->td_cowgen = p->p_cowgen;
 	PROC_UNLOCK(p);
+	if (oldcred != NULL)
+		crfree(oldcred);
+	if (oldlimit != NULL)
+		lim_free(oldlimit);
 }
 
 /*
  * Discard the current thread and exit from its context.
  * Always called with scheduler locked.
  *
  * Because we can't free a thread while we're operating under its context,
  * push the current thread into our CPU's deadthread holder. This means
  * we needn't worry about someone else grabbing our context before we
  * do a cpu_throw().
  */
 void
 thread_exit(void)
 {
 	uint64_t runtime, new_switchtime;
 	struct thread *td;
 	struct thread *td2;
 	struct proc *p;
 	int wakeup_swapper;
 
 	td = curthread;
 	p = td->td_proc;
 
 	PROC_SLOCK_ASSERT(p, MA_OWNED);
 	mtx_assert(&Giant, MA_NOTOWNED);
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	KASSERT(p != NULL, ("thread exiting without a process"));
 	CTR3(KTR_PROC, "thread_exit: thread %p (pid %ld, %s)", td,
 	    (long)p->p_pid, td->td_name);
 	KASSERT(TAILQ_EMPTY(&td->td_sigqueue.sq_list), ("signal pending"));
 
 #ifdef AUDIT
 	AUDIT_SYSCALL_EXIT(0, td);
 #endif
 	/*
 	 * drop FPU & debug register state storage, or any other
 	 * architecture specific resources that
 	 * would not be on a new untouched process.
 	 */
 	cpu_thread_exit(td);	/* XXXSMP */
 
 	/*
 	 * The last thread is left attached to the process
 	 * So that the whole bundle gets recycled. Skip
 	 * all this stuff if we never had threads.
 	 * EXIT clears all sign of other threads when
 	 * it goes to single threading, so the last thread always
 	 * takes the short path.
 	 */
 	if (p->p_flag & P_HADTHREADS) {
 		if (p->p_numthreads > 1) {
 			atomic_add_int(&td->td_proc->p_exitthreads, 1);
 			thread_unlink(td);
 			td2 = FIRST_THREAD_IN_PROC(p);
 			sched_exit_thread(td2, td);
 
 			/*
 			 * The test below is NOT true if we are the
 			 * sole exiting thread. P_STOPPED_SINGLE is unset
 			 * in exit1() after it is the only survivor.
 			 */
 			if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
 				if (p->p_numthreads == p->p_suspcount) {
 					thread_lock(p->p_singlethread);
 					wakeup_swapper = thread_unsuspend_one(
 						p->p_singlethread, p, false);
 					thread_unlock(p->p_singlethread);
 					if (wakeup_swapper)
 						kick_proc0();
 				}
 			}
 
 			PCPU_SET(deadthread, td);
 		} else {
 			/*
 			 * The last thread is exiting.. but not through exit()
 			 */
 			panic ("thread_exit: Last thread exiting on its own");
 		}
 	} 
 #ifdef	HWPMC_HOOKS
 	/*
 	 * If this thread is part of a process that is being tracked by hwpmc(4),
 	 * inform the module of the thread's impending exit.
 	 */
 	if (PMC_PROC_IS_USING_PMCS(td->td_proc))
 		PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_OUT);
 #endif
 	PROC_UNLOCK(p);
 	PROC_STATLOCK(p);
 	thread_lock(td);
 	PROC_SUNLOCK(p);
 
 	/* Do the same timestamp bookkeeping that mi_switch() would do. */
 	new_switchtime = cpu_ticks();
 	runtime = new_switchtime - PCPU_GET(switchtime);
 	td->td_runtime += runtime;
 	td->td_incruntime += runtime;
 	PCPU_SET(switchtime, new_switchtime);
 	PCPU_SET(switchticks, ticks);
 	PCPU_INC(cnt.v_swtch);
 
 	/* Save our resource usage in our process. */
 	td->td_ru.ru_nvcsw++;
 	ruxagg(p, td);
 	rucollect(&p->p_ru, &td->td_ru);
 	PROC_STATUNLOCK(p);
 
 	td->td_state = TDS_INACTIVE;
 #ifdef WITNESS
 	witness_thread_exit(td);
 #endif
 	CTR1(KTR_PROC, "thread_exit: cpu_throw() thread %p", td);
 	sched_throw(td);
 	panic("I'm a teapot!");
 	/* NOTREACHED */
 }
 
 /*
  * Do any thread specific cleanups that may be needed in wait()
  * called with Giant, proc and schedlock not held.
  */
 void
 thread_wait(struct proc *p)
 {
 	struct thread *td;
 
 	mtx_assert(&Giant, MA_NOTOWNED);
 	KASSERT(p->p_numthreads == 1, ("multiple threads in thread_wait()"));
 	KASSERT(p->p_exitthreads == 0, ("p_exitthreads leaking"));
 	td = FIRST_THREAD_IN_PROC(p);
 	/* Lock the last thread so we spin until it exits cpu_throw(). */
 	thread_lock(td);
 	thread_unlock(td);
 	lock_profile_thread_exit(td);
 	cpuset_rel(td->td_cpuset);
 	td->td_cpuset = NULL;
 	cpu_thread_clean(td);
 	thread_cow_free(td);
 	thread_reap();	/* check for zombie threads etc. */
 }
 
 /*
  * Link a thread to a process.
  * set up anything that needs to be initialized for it to
  * be used by the process.
  */
 void
 thread_link(struct thread *td, struct proc *p)
 {
 
 	/*
 	 * XXX This can't be enabled because it's called for proc0 before
 	 * its lock has been created.
 	 * PROC_LOCK_ASSERT(p, MA_OWNED);
 	 */
 	td->td_state    = TDS_INACTIVE;
 	td->td_proc     = p;
 	td->td_flags    = TDF_INMEM;
 
 	LIST_INIT(&td->td_contested);
 	LIST_INIT(&td->td_lprof[0]);
 	LIST_INIT(&td->td_lprof[1]);
 	sigqueue_init(&td->td_sigqueue, p);
 	callout_init(&td->td_slpcallout, 1);
 	TAILQ_INSERT_TAIL(&p->p_threads, td, td_plist);
 	p->p_numthreads++;
 }
 
 /*
  * Called from:
  *  thread_exit()
  */
 void
 thread_unlink(struct thread *td)
 {
 	struct proc *p = td->td_proc;
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	TAILQ_REMOVE(&p->p_threads, td, td_plist);
 	p->p_numthreads--;
 	/* could clear a few other things here */
 	/* Must  NOT clear links to proc! */
 }
 
 static int
 calc_remaining(struct proc *p, int mode)
 {
 	int remaining;
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	PROC_SLOCK_ASSERT(p, MA_OWNED);
 	if (mode == SINGLE_EXIT)
 		remaining = p->p_numthreads;
 	else if (mode == SINGLE_BOUNDARY)
 		remaining = p->p_numthreads - p->p_boundary_count;
 	else if (mode == SINGLE_NO_EXIT || mode == SINGLE_ALLPROC)
 		remaining = p->p_numthreads - p->p_suspcount;
 	else
 		panic("calc_remaining: wrong mode %d", mode);
 	return (remaining);
 }
 
 static int
 remain_for_mode(int mode)
 {
 
 	return (mode == SINGLE_ALLPROC ? 0 : 1);
 }
 
 static int
 weed_inhib(int mode, struct thread *td2, struct proc *p)
 {
 	int wakeup_swapper;
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	PROC_SLOCK_ASSERT(p, MA_OWNED);
 	THREAD_LOCK_ASSERT(td2, MA_OWNED);
 
 	wakeup_swapper = 0;
 	switch (mode) {
 	case SINGLE_EXIT:
 		if (TD_IS_SUSPENDED(td2))
 			wakeup_swapper |= thread_unsuspend_one(td2, p, true);
 		if (TD_ON_SLEEPQ(td2) && (td2->td_flags & TDF_SINTR) != 0)
 			wakeup_swapper |= sleepq_abort(td2, EINTR);
 		break;
 	case SINGLE_BOUNDARY:
 		if (TD_IS_SUSPENDED(td2) && (td2->td_flags & TDF_BOUNDARY) == 0)
 			wakeup_swapper |= thread_unsuspend_one(td2, p, false);
 		if (TD_ON_SLEEPQ(td2) && (td2->td_flags & TDF_SINTR) != 0)
 			wakeup_swapper |= sleepq_abort(td2, ERESTART);
 		break;
 	case SINGLE_NO_EXIT:
 		if (TD_IS_SUSPENDED(td2) && (td2->td_flags & TDF_BOUNDARY) == 0)
 			wakeup_swapper |= thread_unsuspend_one(td2, p, false);
 		if (TD_ON_SLEEPQ(td2) && (td2->td_flags & TDF_SINTR) != 0)
 			wakeup_swapper |= sleepq_abort(td2, ERESTART);
 		break;
 	case SINGLE_ALLPROC:
 		/*
 		 * ALLPROC suspend tries to avoid spurious EINTR for
 		 * threads sleeping interruptable, by suspending the
 		 * thread directly, similarly to sig_suspend_threads().
 		 * Since such sleep is not performed at the user
 		 * boundary, TDF_BOUNDARY flag is not set, and TDF_ALLPROCSUSP
 		 * is used to avoid immediate un-suspend.
 		 */
 		if (TD_IS_SUSPENDED(td2) && (td2->td_flags & (TDF_BOUNDARY |
 		    TDF_ALLPROCSUSP)) == 0)
 			wakeup_swapper |= thread_unsuspend_one(td2, p, false);
 		if (TD_ON_SLEEPQ(td2) && (td2->td_flags & TDF_SINTR) != 0) {
 			if ((td2->td_flags & TDF_SBDRY) == 0) {
 				thread_suspend_one(td2);
 				td2->td_flags |= TDF_ALLPROCSUSP;
 			} else {
 				wakeup_swapper |= sleepq_abort(td2, ERESTART);
 			}
 		}
 		break;
 	}
 	return (wakeup_swapper);
 }
 
 /*
  * Enforce single-threading.
  *
  * Returns 1 if the caller must abort (another thread is waiting to
  * exit the process or similar). Process is locked!
  * Returns 0 when you are successfully the only thread running.
  * A process has successfully single threaded in the suspend mode when
  * There are no threads in user mode. Threads in the kernel must be
  * allowed to continue until they get to the user boundary. They may even
  * copy out their return values and data before suspending. They may however be
  * accelerated in reaching the user boundary as we will wake up
  * any sleeping threads that are interruptable. (PCATCH).
  */
 int
 thread_single(struct proc *p, int mode)
 {
 	struct thread *td;
 	struct thread *td2;
 	int remaining, wakeup_swapper;
 
 	td = curthread;
 	KASSERT(mode == SINGLE_EXIT || mode == SINGLE_BOUNDARY ||
 	    mode == SINGLE_ALLPROC || mode == SINGLE_NO_EXIT,
 	    ("invalid mode %d", mode));
 	/*
 	 * If allowing non-ALLPROC singlethreading for non-curproc
 	 * callers, calc_remaining() and remain_for_mode() should be
 	 * adjusted to also account for td->td_proc != p.  For now
 	 * this is not implemented because it is not used.
 	 */
 	KASSERT((mode == SINGLE_ALLPROC && td->td_proc != p) ||
 	    (mode != SINGLE_ALLPROC && td->td_proc == p),
 	    ("mode %d proc %p curproc %p", mode, p, td->td_proc));
 	mtx_assert(&Giant, MA_NOTOWNED);
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 
 	if ((p->p_flag & P_HADTHREADS) == 0 && mode != SINGLE_ALLPROC)
 		return (0);
 
 	/* Is someone already single threading? */
 	if (p->p_singlethread != NULL && p->p_singlethread != td)
 		return (1);
 
 	if (mode == SINGLE_EXIT) {
 		p->p_flag |= P_SINGLE_EXIT;
 		p->p_flag &= ~P_SINGLE_BOUNDARY;
 	} else {
 		p->p_flag &= ~P_SINGLE_EXIT;
 		if (mode == SINGLE_BOUNDARY)
 			p->p_flag |= P_SINGLE_BOUNDARY;
 		else
 			p->p_flag &= ~P_SINGLE_BOUNDARY;
 	}
 	if (mode == SINGLE_ALLPROC)
 		p->p_flag |= P_TOTAL_STOP;
 	p->p_flag |= P_STOPPED_SINGLE;
 	PROC_SLOCK(p);
 	p->p_singlethread = td;
 	remaining = calc_remaining(p, mode);
 	while (remaining != remain_for_mode(mode)) {
 		if (P_SHOULDSTOP(p) != P_STOPPED_SINGLE)
 			goto stopme;
 		wakeup_swapper = 0;
 		FOREACH_THREAD_IN_PROC(p, td2) {
 			if (td2 == td)
 				continue;
 			thread_lock(td2);
 			td2->td_flags |= TDF_ASTPENDING | TDF_NEEDSUSPCHK;
 			if (TD_IS_INHIBITED(td2)) {
 				wakeup_swapper |= weed_inhib(mode, td2, p);
 #ifdef SMP
 			} else if (TD_IS_RUNNING(td2) && td != td2) {
 				forward_signal(td2);
 #endif
 			}
 			thread_unlock(td2);
 		}
 		if (wakeup_swapper)
 			kick_proc0();
 		remaining = calc_remaining(p, mode);
 
 		/*
 		 * Maybe we suspended some threads.. was it enough?
 		 */
 		if (remaining == remain_for_mode(mode))
 			break;
 
 stopme:
 		/*
 		 * Wake us up when everyone else has suspended.
 		 * In the mean time we suspend as well.
 		 */
 		thread_suspend_switch(td, p);
 		remaining = calc_remaining(p, mode);
 	}
 	if (mode == SINGLE_EXIT) {
 		/*
 		 * Convert the process to an unthreaded process.  The
 		 * SINGLE_EXIT is called by exit1() or execve(), in
 		 * both cases other threads must be retired.
 		 */
 		KASSERT(p->p_numthreads == 1, ("Unthreading with >1 threads"));
 		p->p_singlethread = NULL;
 		p->p_flag &= ~(P_STOPPED_SINGLE | P_SINGLE_EXIT | P_HADTHREADS);
 
 		/*
 		 * Wait for any remaining threads to exit cpu_throw().
 		 */
 		while (p->p_exitthreads != 0) {
 			PROC_SUNLOCK(p);
 			PROC_UNLOCK(p);
 			sched_relinquish(td);
 			PROC_LOCK(p);
 			PROC_SLOCK(p);
 		}
 	} else if (mode == SINGLE_BOUNDARY) {
 		/*
 		 * Wait until all suspended threads are removed from
 		 * the processors.  The thread_suspend_check()
 		 * increments p_boundary_count while it is still
 		 * running, which makes it possible for the execve()
 		 * to destroy vmspace while our other threads are
 		 * still using the address space.
 		 *
 		 * We lock the thread, which is only allowed to
 		 * succeed after context switch code finished using
 		 * the address space.
 		 */
 		FOREACH_THREAD_IN_PROC(p, td2) {
 			if (td2 == td)
 				continue;
 			thread_lock(td2);
 			KASSERT((td2->td_flags & TDF_BOUNDARY) != 0,
 			    ("td %p not on boundary", td2));
 			KASSERT(TD_IS_SUSPENDED(td2),
 			    ("td %p is not suspended", td2));
 			thread_unlock(td2);
 		}
 	}
 	PROC_SUNLOCK(p);
 	return (0);
 }
 
 bool
 thread_suspend_check_needed(void)
 {
 	struct proc *p;
 	struct thread *td;
 
 	td = curthread;
 	p = td->td_proc;
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	return (P_SHOULDSTOP(p) || ((p->p_flag & P_TRACED) != 0 &&
 	    (td->td_dbgflags & TDB_SUSPEND) != 0));
 }
 
 /*
  * Called in from locations that can safely check to see
  * whether we have to suspend or at least throttle for a
  * single-thread event (e.g. fork).
  *
  * Such locations include userret().
  * If the "return_instead" argument is non zero, the thread must be able to
  * accept 0 (caller may continue), or 1 (caller must abort) as a result.
  *
  * The 'return_instead' argument tells the function if it may do a
  * thread_exit() or suspend, or whether the caller must abort and back
  * out instead.
  *
  * If the thread that set the single_threading request has set the
  * P_SINGLE_EXIT bit in the process flags then this call will never return
  * if 'return_instead' is false, but will exit.
  *
  * P_SINGLE_EXIT | return_instead == 0| return_instead != 0
  *---------------+--------------------+---------------------
  *       0       | returns 0          |   returns 0 or 1
  *               | when ST ends       |   immediately
  *---------------+--------------------+---------------------
  *       1       | thread exits       |   returns 1
  *               |                    |  immediately
  * 0 = thread_exit() or suspension ok,
  * other = return error instead of stopping the thread.
  *
  * While a full suspension is under effect, even a single threading
  * thread would be suspended if it made this call (but it shouldn't).
  * This call should only be made from places where
  * thread_exit() would be safe as that may be the outcome unless
  * return_instead is set.
  */
 int
 thread_suspend_check(int return_instead)
 {
 	struct thread *td;
 	struct proc *p;
 	int wakeup_swapper;
 
 	td = curthread;
 	p = td->td_proc;
 	mtx_assert(&Giant, MA_NOTOWNED);
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	while (thread_suspend_check_needed()) {
 		if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
 			KASSERT(p->p_singlethread != NULL,
 			    ("singlethread not set"));
 			/*
 			 * The only suspension in action is a
 			 * single-threading. Single threader need not stop.
 			 * XXX Should be safe to access unlocked
 			 * as it can only be set to be true by us.
 			 */
 			if (p->p_singlethread == td)
 				return (0);	/* Exempt from stopping. */
 		}
 		if ((p->p_flag & P_SINGLE_EXIT) && return_instead)
 			return (EINTR);
 
 		/* Should we goto user boundary if we didn't come from there? */
 		if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE &&
 		    (p->p_flag & P_SINGLE_BOUNDARY) && return_instead)
 			return (ERESTART);
 
 		/*
 		 * Ignore suspend requests if they are deferred.
 		 */
 		if ((td->td_flags & TDF_SBDRY) != 0) {
 			KASSERT(return_instead,
 			    ("TDF_SBDRY set for unsafe thread_suspend_check"));
 			return (0);
 		}
 
 		/*
 		 * If the process is waiting for us to exit,
 		 * this thread should just suicide.
 		 * Assumes that P_SINGLE_EXIT implies P_STOPPED_SINGLE.
 		 */
 		if ((p->p_flag & P_SINGLE_EXIT) && (p->p_singlethread != td)) {
 			PROC_UNLOCK(p);
 			tidhash_remove(td);
 
 			/*
 			 * Allow Linux emulation layer to do some work
 			 * before thread suicide.
 			 */
 			if (__predict_false(p->p_sysent->sv_thread_detach != NULL))
 				(p->p_sysent->sv_thread_detach)(td);
 
 			PROC_LOCK(p);
 			tdsigcleanup(td);
 			umtx_thread_exit(td);
 			PROC_SLOCK(p);
 			thread_stopped(p);
 			thread_exit();
 		}
 
 		PROC_SLOCK(p);
 		thread_stopped(p);
 		if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
 			if (p->p_numthreads == p->p_suspcount + 1) {
 				thread_lock(p->p_singlethread);
 				wakeup_swapper = thread_unsuspend_one(
 				    p->p_singlethread, p, false);
 				thread_unlock(p->p_singlethread);
 				if (wakeup_swapper)
 					kick_proc0();
 			}
 		}
 		PROC_UNLOCK(p);
 		thread_lock(td);
 		/*
 		 * When a thread suspends, it just
 		 * gets taken off all queues.
 		 */
 		thread_suspend_one(td);
 		if (return_instead == 0) {
 			p->p_boundary_count++;
 			td->td_flags |= TDF_BOUNDARY;
 		}
 		PROC_SUNLOCK(p);
 		mi_switch(SW_INVOL | SWT_SUSPEND, NULL);
 		thread_unlock(td);
 		PROC_LOCK(p);
 	}
 	return (0);
 }
 
 void
 thread_suspend_switch(struct thread *td, struct proc *p)
 {
 
 	KASSERT(!TD_IS_SUSPENDED(td), ("already suspended"));
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	PROC_SLOCK_ASSERT(p, MA_OWNED);
 	/*
 	 * We implement thread_suspend_one in stages here to avoid
 	 * dropping the proc lock while the thread lock is owned.
 	 */
 	if (p == td->td_proc) {
 		thread_stopped(p);
 		p->p_suspcount++;
 	}
 	PROC_UNLOCK(p);
 	thread_lock(td);
 	td->td_flags &= ~TDF_NEEDSUSPCHK;
 	TD_SET_SUSPENDED(td);
 	sched_sleep(td, 0);
 	PROC_SUNLOCK(p);
 	DROP_GIANT();
 	mi_switch(SW_VOL | SWT_SUSPEND, NULL);
 	thread_unlock(td);
 	PICKUP_GIANT();
 	PROC_LOCK(p);
 	PROC_SLOCK(p);
 }
 
 void
 thread_suspend_one(struct thread *td)
 {
 	struct proc *p;
 
 	p = td->td_proc;
 	PROC_SLOCK_ASSERT(p, MA_OWNED);
 	THREAD_LOCK_ASSERT(td, MA_OWNED);
 	KASSERT(!TD_IS_SUSPENDED(td), ("already suspended"));
 	p->p_suspcount++;
 	td->td_flags &= ~TDF_NEEDSUSPCHK;
 	TD_SET_SUSPENDED(td);
 	sched_sleep(td, 0);
 }
 
 static int
 thread_unsuspend_one(struct thread *td, struct proc *p, bool boundary)
 {
 
 	THREAD_LOCK_ASSERT(td, MA_OWNED);
 	KASSERT(TD_IS_SUSPENDED(td), ("Thread not suspended"));
 	TD_CLR_SUSPENDED(td);
 	td->td_flags &= ~TDF_ALLPROCSUSP;
 	if (td->td_proc == p) {
 		PROC_SLOCK_ASSERT(p, MA_OWNED);
 		p->p_suspcount--;
 		if (boundary && (td->td_flags & TDF_BOUNDARY) != 0) {
 			td->td_flags &= ~TDF_BOUNDARY;
 			p->p_boundary_count--;
 		}
 	}
 	return (setrunnable(td));
 }
 
 /*
  * Allow all threads blocked by single threading to continue running.
  */
 void
 thread_unsuspend(struct proc *p)
 {
 	struct thread *td;
 	int wakeup_swapper;
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	PROC_SLOCK_ASSERT(p, MA_OWNED);
 	wakeup_swapper = 0;
 	if (!P_SHOULDSTOP(p)) {
                 FOREACH_THREAD_IN_PROC(p, td) {
 			thread_lock(td);
 			if (TD_IS_SUSPENDED(td)) {
 				wakeup_swapper |= thread_unsuspend_one(td, p,
 				    true);
 			}
 			thread_unlock(td);
 		}
 	} else if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE &&
 	    p->p_numthreads == p->p_suspcount) {
 		/*
 		 * Stopping everything also did the job for the single
 		 * threading request. Now we've downgraded to single-threaded,
 		 * let it continue.
 		 */
 		if (p->p_singlethread->td_proc == p) {
 			thread_lock(p->p_singlethread);
 			wakeup_swapper = thread_unsuspend_one(
 			    p->p_singlethread, p, false);
 			thread_unlock(p->p_singlethread);
 		}
 	}
 	if (wakeup_swapper)
 		kick_proc0();
 }
 
 /*
  * End the single threading mode..
  */
 void
 thread_single_end(struct proc *p, int mode)
 {
 	struct thread *td;
 	int wakeup_swapper;
 
 	KASSERT(mode == SINGLE_EXIT || mode == SINGLE_BOUNDARY ||
 	    mode == SINGLE_ALLPROC || mode == SINGLE_NO_EXIT,
 	    ("invalid mode %d", mode));
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	KASSERT((mode == SINGLE_ALLPROC && (p->p_flag & P_TOTAL_STOP) != 0) ||
 	    (mode != SINGLE_ALLPROC && (p->p_flag & P_TOTAL_STOP) == 0),
 	    ("mode %d does not match P_TOTAL_STOP", mode));
 	KASSERT(mode == SINGLE_ALLPROC || p->p_singlethread == curthread,
 	    ("thread_single_end from other thread %p %p",
 	    curthread, p->p_singlethread));
 	KASSERT(mode != SINGLE_BOUNDARY ||
 	    (p->p_flag & P_SINGLE_BOUNDARY) != 0,
 	    ("mis-matched SINGLE_BOUNDARY flags %x", p->p_flag));
 	p->p_flag &= ~(P_STOPPED_SINGLE | P_SINGLE_EXIT | P_SINGLE_BOUNDARY |
 	    P_TOTAL_STOP);
 	PROC_SLOCK(p);
 	p->p_singlethread = NULL;
 	wakeup_swapper = 0;
 	/*
 	 * If there are other threads they may now run,
 	 * unless of course there is a blanket 'stop order'
 	 * on the process. The single threader must be allowed
 	 * to continue however as this is a bad place to stop.
 	 */
 	if (p->p_numthreads != remain_for_mode(mode) && !P_SHOULDSTOP(p)) {
                 FOREACH_THREAD_IN_PROC(p, td) {
 			thread_lock(td);
 			if (TD_IS_SUSPENDED(td)) {
 				wakeup_swapper |= thread_unsuspend_one(td, p,
 				    mode == SINGLE_BOUNDARY);
 			}
 			thread_unlock(td);
 		}
 	}
 	KASSERT(mode != SINGLE_BOUNDARY || p->p_boundary_count == 0,
 	    ("inconsistent boundary count %d", p->p_boundary_count));
 	PROC_SUNLOCK(p);
 	if (wakeup_swapper)
 		kick_proc0();
 }
 
 struct thread *
 thread_find(struct proc *p, lwpid_t tid)
 {
 	struct thread *td;
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	FOREACH_THREAD_IN_PROC(p, td) {
 		if (td->td_tid == tid)
 			break;
 	}
 	return (td);
 }
 
 /* Locate a thread by number; return with proc lock held. */
 struct thread *
 tdfind(lwpid_t tid, pid_t pid)
 {
 #define RUN_THRESH	16
 	struct thread *td;
 	int run = 0;
 
 	rw_rlock(&tidhash_lock);
 	LIST_FOREACH(td, TIDHASH(tid), td_hash) {
 		if (td->td_tid == tid) {
 			if (pid != -1 && td->td_proc->p_pid != pid) {
 				td = NULL;
 				break;
 			}
 			PROC_LOCK(td->td_proc);
 			if (td->td_proc->p_state == PRS_NEW) {
 				PROC_UNLOCK(td->td_proc);
 				td = NULL;
 				break;
 			}
 			if (run > RUN_THRESH) {
 				if (rw_try_upgrade(&tidhash_lock)) {
 					LIST_REMOVE(td, td_hash);
 					LIST_INSERT_HEAD(TIDHASH(td->td_tid),
 						td, td_hash);
 					rw_wunlock(&tidhash_lock);
 					return (td);
 				}
 			}
 			break;
 		}
 		run++;
 	}
 	rw_runlock(&tidhash_lock);
 	return (td);
 }
 
 void
 tidhash_add(struct thread *td)
 {
 	rw_wlock(&tidhash_lock);
 	LIST_INSERT_HEAD(TIDHASH(td->td_tid), td, td_hash);
 	rw_wunlock(&tidhash_lock);
 }
 
 void
 tidhash_remove(struct thread *td)
 {
 	rw_wlock(&tidhash_lock);
 	LIST_REMOVE(td, td_hash);
 	rw_wunlock(&tidhash_lock);
 }
Index: head/sys/sys/resourcevar.h
===================================================================
--- head/sys/sys/resourcevar.h	(revision 285632)
+++ head/sys/sys/resourcevar.h	(revision 285633)
@@ -1,165 +1,163 @@
 /*-
  * Copyright (c) 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)resourcevar.h	8.4 (Berkeley) 1/9/95
  * $FreeBSD$
  */
 
 #ifndef	_SYS_RESOURCEVAR_H_
 #define	_SYS_RESOURCEVAR_H_
 
 #include <sys/resource.h>
 #include <sys/queue.h>
 #ifdef _KERNEL
 #include <sys/_lock.h>
 #include <sys/_mutex.h>
 #endif
 
 /*
  * Kernel per-process accounting / statistics
  * (not necessarily resident except when running).
  *
  * Locking key:
  *      b - created at fork, never changes
  *      c - locked by proc mtx
  *      k - only accessed by curthread
  *      w - locked by proc itim lock
  *	w2 - locked by proc prof lock
  */
 struct pstats {
 #define	pstat_startzero	p_cru
 	struct	rusage p_cru;		/* Stats for reaped children. */
 	struct	itimerval p_timer[3];	/* (w) Virtual-time timers. */
 #define	pstat_endzero	pstat_startcopy
 
 #define	pstat_startcopy	p_prof
 	struct uprof {			/* Profile arguments. */
 		caddr_t	pr_base;	/* (c + w2) Buffer base. */
 		u_long	pr_size;	/* (c + w2) Buffer size. */
 		u_long	pr_off;		/* (c + w2) PC offset. */
 		u_long	pr_scale;	/* (c + w2) PC scaling. */
 	} p_prof;
 #define	pstat_endcopy	p_start
 	struct	timeval p_start;	/* (b) Starting time. */
 };
 
 #ifdef _KERNEL
 
 /*
  * Kernel shareable process resource limits.  Because this structure
  * is moderately large but changes infrequently, it is normally
  * shared copy-on-write after forks.
  */
 struct plimit {
 	struct	rlimit pl_rlimit[RLIM_NLIMITS];
 	int	pl_refcnt;		/* number of references */
 };
 
 struct racct;
 
 /*-
  * Per uid resource consumption.  This structure is used to track
  * the total resource consumption (process count, socket buffer size,
  * etc) for the uid and impose limits.
  *
  * Locking guide:
  * (a) Constant from inception
  * (b) Lockless, updated using atomics
  * (c) Locked by global uihashtbl_lock
  * (d) Locked by the ui_vmsize_mtx
  */
 struct uidinfo {
 	LIST_ENTRY(uidinfo) ui_hash;	/* (c) hash chain of uidinfos */
 	struct mtx ui_vmsize_mtx;
 	vm_ooffset_t ui_vmsize;		/* (d) swap reservation by uid */
 	long	ui_sbsize;		/* (b) socket buffer space consumed */
 	long	ui_proccnt;		/* (b) number of processes */
 	long	ui_ptscnt;		/* (b) number of pseudo-terminals */
 	long	ui_kqcnt;		/* (b) number of kqueues */
 	uid_t	ui_uid;			/* (a) uid */
 	u_int	ui_ref;			/* (b) reference count */
 #ifdef	RACCT
 	struct racct *ui_racct;		/* (a) resource accounting */
 #endif
 };
 
 #define	UIDINFO_VMSIZE_LOCK(ui)		mtx_lock(&((ui)->ui_vmsize_mtx))
 #define	UIDINFO_VMSIZE_UNLOCK(ui)	mtx_unlock(&((ui)->ui_vmsize_mtx))
 
 struct proc;
 struct rusage_ext;
 struct thread;
 
 void	 addupc_intr(struct thread *td, uintfptr_t pc, u_int ticks);
 void	 addupc_task(struct thread *td, uintfptr_t pc, u_int ticks);
 void	 calccru(struct proc *p, struct timeval *up, struct timeval *sp);
 void	 calcru(struct proc *p, struct timeval *up, struct timeval *sp);
 int	 chgkqcnt(struct uidinfo *uip, int diff, rlim_t max);
 int	 chgproccnt(struct uidinfo *uip, int diff, rlim_t maxval);
 int	 chgsbsize(struct uidinfo *uip, u_int *hiwat, u_int to,
 	    rlim_t maxval);
 int	 chgptscnt(struct uidinfo *uip, int diff, rlim_t maxval);
 int	 fuswintr(void *base);
 int	 kern_proc_setrlimit(struct thread *td, struct proc *p, u_int which,
 	    struct rlimit *limp);
 struct plimit
 	*lim_alloc(void);
 void	 lim_copy(struct plimit *dst, struct plimit *src);
 rlim_t	 lim_cur(struct thread *td, int which);
 rlim_t	 lim_cur_proc(struct proc *p, int which);
 void	 lim_fork(struct proc *p1, struct proc *p2);
 void	 lim_free(struct plimit *limp);
 struct plimit
 	*lim_hold(struct plimit *limp);
 rlim_t	 lim_max(struct thread *td, int which);
 rlim_t	 lim_max_proc(struct proc *p, int which);
 void	 lim_rlimit(struct thread *td, int which, struct rlimit *rlp);
 void	 lim_rlimit_proc(struct proc *p, int which, struct rlimit *rlp);
 void	 ruadd(struct rusage *ru, struct rusage_ext *rux, struct rusage *ru2,
 	    struct rusage_ext *rux2);
 void	 rucollect(struct rusage *ru, struct rusage *ru2);
 void	 rufetch(struct proc *p, struct rusage *ru);
 void	 rufetchcalc(struct proc *p, struct rusage *ru, struct timeval *up,
 	    struct timeval *sp);
 void	 rufetchtd(struct thread *td, struct rusage *ru);
 void	 ruxagg(struct proc *p, struct thread *td);
 int	 suswintr(void *base, int word);
 struct uidinfo
 	*uifind(uid_t uid);
 void	 uifree(struct uidinfo *uip);
 void	 uihashinit(void);
 void	 uihold(struct uidinfo *uip);
 #ifdef	RACCT
 void	 ui_racct_foreach(void (*callback)(struct racct *racct,
 	    void *arg2, void *arg3), void *arg2, void *arg3);
 #endif
 
-void	lim_update_thread(struct thread *td);
-
 #endif /* _KERNEL */
 #endif /* !_SYS_RESOURCEVAR_H_ */
Index: head/sys/sys/ucred.h
===================================================================
--- head/sys/sys/ucred.h	(revision 285632)
+++ head/sys/sys/ucred.h	(revision 285633)
@@ -1,119 +1,118 @@
 /*-
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ucred.h	8.4 (Berkeley) 1/9/95
  * $FreeBSD$
  */
 
 #ifndef _SYS_UCRED_H_
 #define	_SYS_UCRED_H_
 
 #include <bsm/audit.h>
 
 struct loginclass;
 
 #define	XU_NGROUPS	16
 
 /*
  * Credentials.
  *
  * Please do not inspect cr_uid directly to determine superuserness.  The
  * priv(9) interface should be used to check for privilege.
  */
 #if defined(_KERNEL) || defined(_WANT_UCRED)
 struct ucred {
 	u_int	cr_ref;			/* reference count */
 #define	cr_startcopy cr_uid
 	uid_t	cr_uid;			/* effective user id */
 	uid_t	cr_ruid;		/* real user id */
 	uid_t	cr_svuid;		/* saved user id */
 	int	cr_ngroups;		/* number of groups */
 	gid_t	cr_rgid;		/* real group id */
 	gid_t	cr_svgid;		/* saved group id */
 	struct uidinfo	*cr_uidinfo;	/* per euid resource consumption */
 	struct uidinfo	*cr_ruidinfo;	/* per ruid resource consumption */
 	struct prison	*cr_prison;	/* jail(2) */
 	struct loginclass	*cr_loginclass; /* login class */
 	u_int		cr_flags;	/* credential flags */
 	void 		*cr_pspare2[2];	/* general use 2 */
 #define	cr_endcopy	cr_label
 	struct label	*cr_label;	/* MAC label */
 	struct auditinfo_addr	cr_audit;	/* Audit properties. */
 	gid_t	*cr_groups;		/* groups */
 	int	cr_agroups;		/* Available groups */
 	gid_t   cr_smallgroups[XU_NGROUPS];	/* storage for small groups */
 };
 #define	NOCRED	((struct ucred *)0)	/* no credential available */
 #define	FSCRED	((struct ucred *)-1)	/* filesystem credential */
 #endif /* _KERNEL || _WANT_UCRED */
 
 /*
  * Flags for cr_flags.
  */
 #define	CRED_FLAG_CAPMODE	0x00000001	/* In capability mode. */
 
 /*
  * This is the external representation of struct ucred.
  */
 struct xucred {
 	u_int	cr_version;		/* structure layout version */
 	uid_t	cr_uid;			/* effective user id */
 	short	cr_ngroups;		/* number of groups */
 	gid_t	cr_groups[XU_NGROUPS];	/* groups */
 	void	*_cr_unused1;		/* compatibility with old ucred */
 };
 #define	XUCRED_VERSION	0
 
 /* This can be used for both ucred and xucred structures. */
 #define	cr_gid cr_groups[0]
 
 #ifdef _KERNEL
 struct proc;
 struct thread;
 
 void	change_egid(struct ucred *newcred, gid_t egid);
 void	change_euid(struct ucred *newcred, struct uidinfo *euip);
 void	change_rgid(struct ucred *newcred, gid_t rgid);
 void	change_ruid(struct ucred *newcred, struct uidinfo *ruip);
 void	change_svgid(struct ucred *newcred, gid_t svgid);
 void	change_svuid(struct ucred *newcred, uid_t svuid);
 void	crcopy(struct ucred *dest, struct ucred *src);
 struct ucred	*crcopysafe(struct proc *p, struct ucred *cr);
 struct ucred	*crdup(struct ucred *cr);
-void	cred_update_thread(struct thread *td);
 void	proc_set_cred_init(struct proc *p, struct ucred *cr);
 struct ucred	*proc_set_cred(struct proc *p, struct ucred *cr);
 void	crfree(struct ucred *cr);
 struct ucred	*crget(void);
 struct ucred	*crhold(struct ucred *cr);
 void	cru2x(struct ucred *cr, struct xucred *xcr);
 void	crsetgroups(struct ucred *cr, int n, gid_t *groups);
 int	groupmember(gid_t gid, struct ucred *cred);
 #endif /* _KERNEL */
 
 #endif /* !_SYS_UCRED_H_ */