Index: head/sys/ddb/db_ps.c
===================================================================
--- head/sys/ddb/db_ps.c	(revision 350662)
+++ head/sys/ddb/db_ps.c	(revision 350663)
@@ -1,536 +1,525 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1993 The Regents of the University of California.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_kstack_pages.h"
 
 #include <sys/param.h>
 #include <sys/cons.h>
 #include <sys/jail.h>
 #include <sys/kdb.h>
 #include <sys/kernel.h>
 #include <sys/proc.h>
 #include <sys/sysent.h>
 #include <sys/systm.h>
-#include <sys/_kstack_cache.h>
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 
 #include <ddb/ddb.h>
 
 #define PRINT_NONE	0
 #define PRINT_ARGS	1
 
 static void	dumpthread(volatile struct proc *p, volatile struct thread *td,
 		    int all);
 static int	ps_mode;
 
 /*
  * At least one non-optional show-command must be implemented using
  * DB_SHOW_ALL_COMMAND() so that db_show_all_cmd_set gets created.
  * Here is one.
  */
 DB_SHOW_ALL_COMMAND(procs, db_procs_cmd)
 {
 	db_ps(addr, have_addr, count, modif);
 }
 
 static void
 dump_args(volatile struct proc *p)
 {
 	char *args;
 	int i, len;
 
 	if (p->p_args == NULL)
 		return;
 	args = p->p_args->ar_args;
 	len = (int)p->p_args->ar_length;
 	for (i = 0; i < len; i++) {
 		if (args[i] == '\0')
 			db_printf(" ");
 		else
 			db_printf("%c", args[i]);
 	}
 }
 
 /*
  * Layout:
  * - column counts
  * - header
  * - single-threaded process
  * - multi-threaded process
  * - thread in a MT process
  *
  *          1         2         3         4         5         6         7
  * 1234567890123456789012345678901234567890123456789012345678901234567890
  *   pid  ppid  pgrp   uid  state   wmesg   wchan       cmd
  * <pid> <ppi> <pgi> <uid>  <stat>  <wmesg> <wchan   >  <name>
  * <pid> <ppi> <pgi> <uid>  <stat>  (threaded)          <command>
  * <tid >                   <stat>  <wmesg> <wchan   >  <name>
  *
  * For machines with 64-bit pointers, we expand the wchan field 8 more
  * characters.
  */
 void
 db_ps(db_expr_t addr, bool hasaddr, db_expr_t count, char *modif)
 {
 	volatile struct proc *p, *pp;
 	volatile struct thread *td;
 	struct ucred *cred;
 	struct pgrp *pgrp;
 	char state[9];
 	int np, rflag, sflag, dflag, lflag, wflag;
 
 	ps_mode = modif[0] == 'a' ? PRINT_ARGS : PRINT_NONE;
 	np = nprocs;
 
 	if (!LIST_EMPTY(&allproc))
 		p = LIST_FIRST(&allproc);
 	else
 		p = &proc0;
 
 #ifdef __LP64__
 	db_printf("  pid  ppid  pgrp   uid  state   wmesg   wchan               cmd\n");
 #else
 	db_printf("  pid  ppid  pgrp   uid  state   wmesg   wchan       cmd\n");
 #endif
 	while (--np >= 0 && !db_pager_quit) {
 		if (p == NULL) {
 			db_printf("oops, ran out of processes early!\n");
 			break;
 		}
 		pp = p->p_pptr;
 		if (pp == NULL)
 			pp = p;
 
 		cred = p->p_ucred;
 		pgrp = p->p_pgrp;
 		db_printf("%5d %5d %5d %5d ", p->p_pid, pp->p_pid,
 		    pgrp != NULL ? pgrp->pg_id : 0,
 		    cred != NULL ? cred->cr_ruid : 0);
 
 		/* Determine our primary process state. */
 		switch (p->p_state) {
 		case PRS_NORMAL:
 			if (P_SHOULDSTOP(p))
 				state[0] = 'T';
 			else {
 				/*
 				 * One of D, L, R, S, W.  For a
 				 * multithreaded process we will use
 				 * the state of the thread with the
 				 * highest precedence.  The
 				 * precendence order from high to low
 				 * is R, L, D, S, W.  If no thread is
 				 * in a sane state we use '?' for our
 				 * primary state.
 				 */
 				rflag = sflag = dflag = lflag = wflag = 0;
 				FOREACH_THREAD_IN_PROC(p, td) {
 					if (td->td_state == TDS_RUNNING ||
 					    td->td_state == TDS_RUNQ ||
 					    td->td_state == TDS_CAN_RUN)
 						rflag++;
 					if (TD_ON_LOCK(td))
 						lflag++;
 					if (TD_IS_SLEEPING(td)) {
 						if (!(td->td_flags & TDF_SINTR))
 							dflag++;
 						else
 							sflag++;
 					}
 					if (TD_AWAITING_INTR(td))
 						wflag++;
 				}
 				if (rflag)
 					state[0] = 'R';
 				else if (lflag)
 					state[0] = 'L';
 				else if (dflag)
 					state[0] = 'D';
 				else if (sflag)
 					state[0] = 'S';
 				else if (wflag)
 					state[0] = 'W';
 				else
 					state[0] = '?';
 			}
 			break;
 		case PRS_NEW:
 			state[0] = 'N';
 			break;
 		case PRS_ZOMBIE:
 			state[0] = 'Z';
 			break;
 		default:
 			state[0] = 'U';
 			break;
 		}
 		state[1] = '\0';
 
 		/* Additional process state flags. */
 		if (!(p->p_flag & P_INMEM))
 			strlcat(state, "W", sizeof(state));
 		if (p->p_flag & P_TRACED)
 			strlcat(state, "X", sizeof(state));
 		if (p->p_flag & P_WEXIT && p->p_state != PRS_ZOMBIE)
 			strlcat(state, "E", sizeof(state));
 		if (p->p_flag & P_PPWAIT)
 			strlcat(state, "V", sizeof(state));
 		if (p->p_flag & P_SYSTEM || p->p_lock > 0)
 			strlcat(state, "L", sizeof(state));
 		if (p->p_pgrp != NULL && p->p_session != NULL &&
 		    SESS_LEADER(p))
 			strlcat(state, "s", sizeof(state));
 		/* Cheated here and didn't compare pgid's. */
 		if (p->p_flag & P_CONTROLT)
 			strlcat(state, "+", sizeof(state));
 		if (cred != NULL && jailed(cred))
 			strlcat(state, "J", sizeof(state));
 		db_printf(" %-6.6s ", state);
 		if (p->p_flag & P_HADTHREADS) {
 #ifdef __LP64__
 			db_printf(" (threaded)                  ");
 #else
 			db_printf(" (threaded)          ");
 #endif
 			if (p->p_flag & P_SYSTEM)
 				db_printf("[");
 			db_printf("%s", p->p_comm);
 			if (p->p_flag & P_SYSTEM)
 				db_printf("]");
 			if (ps_mode == PRINT_ARGS) {
 				db_printf(" ");
 				dump_args(p);
 			}
 			db_printf("\n");
 		}
 		FOREACH_THREAD_IN_PROC(p, td) {
 			dumpthread(p, td, p->p_flag & P_HADTHREADS);
 			if (db_pager_quit)
 				break;
 		}
 
 		p = LIST_NEXT(p, p_list);
 		if (p == NULL && np > 0)
 			p = LIST_FIRST(&zombproc);
 	}
 }
 
 static void
 dumpthread(volatile struct proc *p, volatile struct thread *td, int all)
 {
 	char state[9], wprefix;
 	const char *wmesg;
 	void *wchan;
 	
 	if (all) {
 		db_printf("%6d                  ", td->td_tid);
 		switch (td->td_state) {
 		case TDS_RUNNING:
 			snprintf(state, sizeof(state), "Run");
 			break;
 		case TDS_RUNQ:
 			snprintf(state, sizeof(state), "RunQ");
 			break;
 		case TDS_CAN_RUN:
 			snprintf(state, sizeof(state), "CanRun");
 			break;
 		case TDS_INACTIVE:
 			snprintf(state, sizeof(state), "Inactv");
 			break;
 		case TDS_INHIBITED:
 			state[0] = '\0';
 			if (TD_ON_LOCK(td))
 				strlcat(state, "L", sizeof(state));
 			if (TD_IS_SLEEPING(td)) {
 				if (td->td_flags & TDF_SINTR)
 					strlcat(state, "S", sizeof(state));
 				else
 					strlcat(state, "D", sizeof(state));
 			}
 			if (TD_IS_SWAPPED(td))
 				strlcat(state, "W", sizeof(state));
 			if (TD_AWAITING_INTR(td))
 				strlcat(state, "I", sizeof(state));
 			if (TD_IS_SUSPENDED(td))
 				strlcat(state, "s", sizeof(state));
 			if (state[0] != '\0')
 				break;
 		default:
 			snprintf(state, sizeof(state), "???");
 		}			
 		db_printf(" %-6.6s ", state);
 	}
 	wprefix = ' ';
 	if (TD_ON_LOCK(td)) {
 		wprefix = '*';
 		wmesg = td->td_lockname;
 		wchan = td->td_blocked;
 	} else if (TD_ON_SLEEPQ(td)) {
 		wmesg = td->td_wmesg;
 		wchan = td->td_wchan;
 	} else if (TD_IS_RUNNING(td)) {
 		snprintf(state, sizeof(state), "CPU %d", td->td_oncpu);
 		wmesg = state;
 		wchan = NULL;
 	} else {
 		wmesg = "";
 		wchan = NULL;
 	}
 	db_printf("%c%-7.7s ", wprefix, wmesg);
 	if (wchan == NULL)
 #ifdef __LP64__
 		db_printf("%18s  ", "");
 #else
 		db_printf("%10s  ", "");
 #endif
 	else
 		db_printf("%p  ", wchan);
 	if (p->p_flag & P_SYSTEM)
 		db_printf("[");
 	if (td->td_name[0] != '\0')
 		db_printf("%s", td->td_name);
 	else
 		db_printf("%s", td->td_proc->p_comm);
 	if (p->p_flag & P_SYSTEM)
 		db_printf("]");
 	if (ps_mode == PRINT_ARGS && all == 0) {
 		db_printf(" ");
 		dump_args(p);
 	}
 	db_printf("\n");
 }
 
 DB_SHOW_COMMAND(thread, db_show_thread)
 {
 	struct thread *td;
 	struct lock_object *lock;
 	u_int delta;
 	bool comma;
 
 	/* Determine which thread to examine. */
 	if (have_addr)
 		td = db_lookup_thread(addr, false);
 	else
 		td = kdb_thread;
 	lock = (struct lock_object *)td->td_lock;
 
 	db_printf("Thread %d at %p:\n", td->td_tid, td);
 	db_printf(" proc (pid %d): %p\n", td->td_proc->p_pid, td->td_proc);
 	if (td->td_name[0] != '\0')
 		db_printf(" name: %s\n", td->td_name);
 	db_printf(" pcb: %p\n", td->td_pcb);
 	db_printf(" stack: %p-%p\n", (void *)td->td_kstack,
 	    (void *)(td->td_kstack + td->td_kstack_pages * PAGE_SIZE - 1));
 	db_printf(" flags: %#x ", td->td_flags);
 	db_printf(" pflags: %#x\n", td->td_pflags);
 	db_printf(" state: ");
 	switch (td->td_state) {
 	case TDS_INACTIVE:
 		db_printf("INACTIVE\n");
 		break;
 	case TDS_CAN_RUN:
 		db_printf("CAN RUN\n");
 		break;
 	case TDS_RUNQ:
 		db_printf("RUNQ\n");
 		break;
 	case TDS_RUNNING:
 		db_printf("RUNNING (CPU %d)\n", td->td_oncpu);
 		break;
 	case TDS_INHIBITED:
 		db_printf("INHIBITED: {");
 		comma = false;
 		if (TD_IS_SLEEPING(td)) {
 			db_printf("SLEEPING");
 			comma = true;
 		}
 		if (TD_IS_SUSPENDED(td)) {
 			if (comma)
 				db_printf(", ");
 			db_printf("SUSPENDED");
 			comma = true;
 		}
 		if (TD_IS_SWAPPED(td)) {
 			if (comma)
 				db_printf(", ");
 			db_printf("SWAPPED");
 			comma = true;
 		}
 		if (TD_ON_LOCK(td)) {
 			if (comma)
 				db_printf(", ");
 			db_printf("LOCK");
 			comma = true;
 		}
 		if (TD_AWAITING_INTR(td)) {
 			if (comma)
 				db_printf(", ");
 			db_printf("IWAIT");
 		}
 		db_printf("}\n");
 		break;
 	default:
 		db_printf("??? (%#x)\n", td->td_state);
 		break;
 	}
 	if (TD_ON_LOCK(td))
 		db_printf(" lock: %s  turnstile: %p\n", td->td_lockname,
 		    td->td_blocked);
 	if (TD_ON_SLEEPQ(td))
 		db_printf(
 	    " wmesg: %s  wchan: %p sleeptimo %lx. %jx (curr %lx. %jx)\n",
 		    td->td_wmesg, td->td_wchan,
 		    (long)sbttobt(td->td_sleeptimo).sec,
 		    (uintmax_t)sbttobt(td->td_sleeptimo).frac,
 		    (long)sbttobt(sbinuptime()).sec,
 		    (uintmax_t)sbttobt(sbinuptime()).frac);
 	db_printf(" priority: %d\n", td->td_priority);
 	db_printf(" container lock: %s (%p)\n", lock->lo_name, lock);
 	if (td->td_swvoltick != 0) {
 		delta = ticks - td->td_swvoltick;
 		db_printf(" last voluntary switch: %u.%03u s ago\n",
 		    delta / hz, (delta % hz) * 1000 / hz);
 	}
 	if (td->td_swinvoltick != 0) {
 		delta = ticks - td->td_swinvoltick;
 		db_printf(" last involuntary switch: %u.%03u s ago\n",
 		    delta / hz, (delta % hz) * 1000 / hz);
 	}
 }
 
 DB_SHOW_COMMAND(proc, db_show_proc)
 {
 	struct thread *td;
 	struct proc *p;
 	int i;
 
 	/* Determine which process to examine. */
 	if (have_addr)
 		p = db_lookup_proc(addr);
 	else
 		p = kdb_thread->td_proc;
 
 	db_printf("Process %d (%s) at %p:\n", p->p_pid, p->p_comm, p);
 	db_printf(" state: ");
 	switch (p->p_state) {
 	case PRS_NEW:
 		db_printf("NEW\n");
 		break;
 	case PRS_NORMAL:
 		db_printf("NORMAL\n");
 		break;
 	case PRS_ZOMBIE:
 		db_printf("ZOMBIE\n");
 		break;
 	default:
 		db_printf("??? (%#x)\n", p->p_state);
 	}
 	if (p->p_ucred != NULL) {
 		db_printf(" uid: %d  gids: ", p->p_ucred->cr_uid);
 		for (i = 0; i < p->p_ucred->cr_ngroups; i++) {
 			db_printf("%d", p->p_ucred->cr_groups[i]);
 			if (i < (p->p_ucred->cr_ngroups - 1))
 				db_printf(", ");
 		}
 		db_printf("\n");
 	}
 	if (p->p_pptr != NULL)
 		db_printf(" parent: pid %d at %p\n", p->p_pptr->p_pid,
 		    p->p_pptr);
 	if (p->p_leader != NULL && p->p_leader != p)
 		db_printf(" leader: pid %d at %p\n", p->p_leader->p_pid,
 		    p->p_leader);
 	if (p->p_sysent != NULL)
 		db_printf(" ABI: %s\n", p->p_sysent->sv_name);
 	if (p->p_args != NULL) {
 		db_printf(" arguments: ");
 		dump_args(p);
 		db_printf("\n");
 	}
 	db_printf(" reaper: %p reapsubtree: %d\n",
 	    p->p_reaper, p->p_reapsubtree);
 	db_printf(" sigparent: %d\n", p->p_sigparent);
 	db_printf(" vmspace: %p\n", p->p_vmspace);
 	db_printf("   (map %p)\n",
 	    (p->p_vmspace != NULL) ? &p->p_vmspace->vm_map : 0);
 	db_printf("   (map.pmap %p)\n",
 	    (p->p_vmspace != NULL) ? &p->p_vmspace->vm_map.pmap : 0);
 	db_printf("   (pmap %p)\n",
 	    (p->p_vmspace != NULL) ? &p->p_vmspace->vm_pmap : 0);
 	db_printf(" threads: %d\n", p->p_numthreads);
 	FOREACH_THREAD_IN_PROC(p, td) {
 		dumpthread(p, td, 1);
 		if (db_pager_quit)
 			break;
 	}
 }
 
 void
 db_findstack_cmd(db_expr_t addr, bool have_addr, db_expr_t dummy3 __unused,
     char *dummy4 __unused)
 {
 	struct proc *p;
 	struct thread *td;
-	struct kstack_cache_entry *ks_ce;
 	vm_offset_t saddr;
 
 	if (have_addr)
 		saddr = addr;
 	else {
 		db_printf("Usage: findstack <address>\n");
 		return;
 	}
 
 	FOREACH_PROC_IN_SYSTEM(p) {
 		FOREACH_THREAD_IN_PROC(p, td) {
 			if (td->td_kstack <= saddr && saddr < td->td_kstack +
 			    PAGE_SIZE * td->td_kstack_pages) {
 				db_printf("Thread %p\n", td);
 				return;
 			}
-		}
-	}
-
-	for (ks_ce = kstack_cache; ks_ce != NULL;
-	     ks_ce = ks_ce->next_ks_entry) {
-		if ((vm_offset_t)ks_ce <= saddr && saddr < (vm_offset_t)ks_ce +
-		    PAGE_SIZE * kstack_pages) {
-			db_printf("Cached stack %p\n", ks_ce);
-			return;
 		}
 	}
 }
Index: head/sys/sys/_kstack_cache.h
===================================================================
--- head/sys/sys/_kstack_cache.h	(revision 350662)
+++ head/sys/sys/_kstack_cache.h	(nonexistent)
@@ -1,49 +0,0 @@
-/*-
- * SPDX-License-Identifier: BSD-3-Clause
- *
- * Copyright (c) 2009 Konstantin Belousov <kib@FreeBSD.org>
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- *	$FreeBSD$
- */
-
-#ifndef _SYS__KSTACK_CACHE_H
-#define	_SYS__KSTACK_CACHE_H
-
-struct kstack_cache_entry {
-	struct vm_object *ksobj;
-	struct kstack_cache_entry *next_ks_entry;
-};
-
-extern struct kstack_cache_entry *kstack_cache;
-
-#ifndef KSTACK_MAX_PAGES
-#define KSTACK_MAX_PAGES 32
-#endif
-
-#endif
-
-

Property changes on: head/sys/sys/_kstack_cache.h
___________________________________________________________________
Deleted: svn:keywords
## -1 +0,0 ##
-FreeBSD=%H
\ No newline at end of property
Index: head/sys/vm/vm_glue.c
===================================================================
--- head/sys/vm/vm_glue.c	(revision 350662)
+++ head/sys/vm/vm_glue.c	(revision 350663)
@@ -1,596 +1,615 @@
 /*-
  * SPDX-License-Identifier: (BSD-3-Clause AND MIT-CMU)
  *
  * Copyright (c) 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * The Mach Operating System project at Carnegie-Mellon University.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)vm_glue.c	8.6 (Berkeley) 1/5/94
  *
  *
  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
  * All rights reserved.
  *
  * Permission to use, copy, modify and distribute this software and
  * its documentation is hereby granted, provided that both the copyright
  * notice and this permission notice appear in all copies of the
  * software, derivative works or modified versions, and any portions
  * thereof, and that both notices appear in supporting documentation.
  *
  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  *
  * Carnegie Mellon requests users of this software to return to
  *
  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  *  School of Computer Science
  *  Carnegie Mellon University
  *  Pittsburgh PA 15213-3890
  *
  * any improvements or extensions that they make and grant Carnegie the
  * rights to redistribute these changes.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_vm.h"
 #include "opt_kstack_pages.h"
 #include "opt_kstack_max_pages.h"
 #include "opt_kstack_usage_prof.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/domainset.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/racct.h>
 #include <sys/resourcevar.h>
 #include <sys/rwlock.h>
 #include <sys/sched.h>
 #include <sys/sf_buf.h>
 #include <sys/shm.h>
 #include <sys/vmmeter.h>
 #include <sys/vmem.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
-#include <sys/_kstack_cache.h>
 #include <sys/eventhandler.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/unistd.h>
 
+#include <vm/uma.h>
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 #include <vm/vm_domainset.h>
 #include <vm/vm_map.h>
 #include <vm/vm_page.h>
 #include <vm/vm_pageout.h>
 #include <vm/vm_object.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_pager.h>
 #include <vm/swap_pager.h>
 
 #include <machine/cpu.h>
 
 /*
  * MPSAFE
  *
  * WARNING!  This code calls vm_map_check_protection() which only checks
  * the associated vm_map_entry range.  It does not determine whether the
  * contents of the memory is actually readable or writable.  In most cases
  * just checking the vm_map_entry is sufficient within the kernel's address
  * space.
  */
 int
 kernacc(void *addr, int len, int rw)
 {
 	boolean_t rv;
 	vm_offset_t saddr, eaddr;
 	vm_prot_t prot;
 
 	KASSERT((rw & ~VM_PROT_ALL) == 0,
 	    ("illegal ``rw'' argument to kernacc (%x)\n", rw));
 
 	if ((vm_offset_t)addr + len > vm_map_max(kernel_map) ||
 	    (vm_offset_t)addr + len < (vm_offset_t)addr)
 		return (FALSE);
 
 	prot = rw;
 	saddr = trunc_page((vm_offset_t)addr);
 	eaddr = round_page((vm_offset_t)addr + len);
 	vm_map_lock_read(kernel_map);
 	rv = vm_map_check_protection(kernel_map, saddr, eaddr, prot);
 	vm_map_unlock_read(kernel_map);
 	return (rv == TRUE);
 }
 
 /*
  * MPSAFE
  *
  * WARNING!  This code calls vm_map_check_protection() which only checks
  * the associated vm_map_entry range.  It does not determine whether the
  * contents of the memory is actually readable or writable.  vmapbuf(),
  * vm_fault_quick(), or copyin()/copout()/su*()/fu*() functions should be
  * used in conjunction with this call.
  */
 int
 useracc(void *addr, int len, int rw)
 {
 	boolean_t rv;
 	vm_prot_t prot;
 	vm_map_t map;
 
 	KASSERT((rw & ~VM_PROT_ALL) == 0,
 	    ("illegal ``rw'' argument to useracc (%x)\n", rw));
 	prot = rw;
 	map = &curproc->p_vmspace->vm_map;
 	if ((vm_offset_t)addr + len > vm_map_max(map) ||
 	    (vm_offset_t)addr + len < (vm_offset_t)addr) {
 		return (FALSE);
 	}
 	vm_map_lock_read(map);
 	rv = vm_map_check_protection(map, trunc_page((vm_offset_t)addr),
 	    round_page((vm_offset_t)addr + len), prot);
 	vm_map_unlock_read(map);
 	return (rv == TRUE);
 }
 
 int
 vslock(void *addr, size_t len)
 {
 	vm_offset_t end, last, start;
 	vm_size_t npages;
 	int error;
 
 	last = (vm_offset_t)addr + len;
 	start = trunc_page((vm_offset_t)addr);
 	end = round_page(last);
 	if (last < (vm_offset_t)addr || end < (vm_offset_t)addr)
 		return (EINVAL);
 	npages = atop(end - start);
 	if (npages > vm_page_max_user_wired)
 		return (ENOMEM);
 	error = vm_map_wire(&curproc->p_vmspace->vm_map, start, end,
 	    VM_MAP_WIRE_SYSTEM | VM_MAP_WIRE_NOHOLES);
 	if (error == KERN_SUCCESS) {
 		curthread->td_vslock_sz += len;
 		return (0);
 	}
 
 	/*
 	 * Return EFAULT on error to match copy{in,out}() behaviour
 	 * rather than returning ENOMEM like mlock() would.
 	 */
 	return (EFAULT);
 }
 
 void
 vsunlock(void *addr, size_t len)
 {
 
 	/* Rely on the parameter sanity checks performed by vslock(). */
 	MPASS(curthread->td_vslock_sz >= len);
 	curthread->td_vslock_sz -= len;
 	(void)vm_map_unwire(&curproc->p_vmspace->vm_map,
 	    trunc_page((vm_offset_t)addr), round_page((vm_offset_t)addr + len),
 	    VM_MAP_WIRE_SYSTEM | VM_MAP_WIRE_NOHOLES);
 }
 
 /*
  * Pin the page contained within the given object at the given offset.  If the
  * page is not resident, allocate and load it using the given object's pager.
  * Return the pinned page if successful; otherwise, return NULL.
  */
 static vm_page_t
 vm_imgact_hold_page(vm_object_t object, vm_ooffset_t offset)
 {
 	vm_page_t m;
 	vm_pindex_t pindex;
 	int rv;
 
 	VM_OBJECT_WLOCK(object);
 	pindex = OFF_TO_IDX(offset);
 	m = vm_page_grab(object, pindex, VM_ALLOC_NORMAL | VM_ALLOC_NOBUSY |
 	    VM_ALLOC_WIRED);
 	if (m->valid != VM_PAGE_BITS_ALL) {
 		vm_page_xbusy(m);
 		rv = vm_pager_get_pages(object, &m, 1, NULL, NULL);
 		if (rv != VM_PAGER_OK) {
 			vm_page_lock(m);
 			vm_page_unwire(m, PQ_NONE);
 			vm_page_free(m);
 			vm_page_unlock(m);
 			m = NULL;
 			goto out;
 		}
 		vm_page_xunbusy(m);
 	}
 out:
 	VM_OBJECT_WUNLOCK(object);
 	return (m);
 }
 
 /*
  * Return a CPU private mapping to the page at the given offset within the
  * given object.  The page is pinned before it is mapped.
  */
 struct sf_buf *
 vm_imgact_map_page(vm_object_t object, vm_ooffset_t offset)
 {
 	vm_page_t m;
 
 	m = vm_imgact_hold_page(object, offset);
 	if (m == NULL)
 		return (NULL);
 	sched_pin();
 	return (sf_buf_alloc(m, SFB_CPUPRIVATE));
 }
 
 /*
  * Destroy the given CPU private mapping and unpin the page that it mapped.
  */
 void
 vm_imgact_unmap_page(struct sf_buf *sf)
 {
 	vm_page_t m;
 
 	m = sf_buf_page(sf);
 	sf_buf_free(sf);
 	sched_unpin();
 	vm_page_lock(m);
 	vm_page_unwire(m, PQ_ACTIVE);
 	vm_page_unlock(m);
 }
 
 void
 vm_sync_icache(vm_map_t map, vm_offset_t va, vm_offset_t sz)
 {
 
 	pmap_sync_icache(map->pmap, va, sz);
 }
 
-struct kstack_cache_entry *kstack_cache;
+static uma_zone_t kstack_cache;
 static int kstack_cache_size = 128;
-static int kstacks, kstack_domain_iter;
-static struct mtx kstack_cache_mtx;
-MTX_SYSINIT(kstack_cache, &kstack_cache_mtx, "kstkch", MTX_DEF);
+static int kstack_domain_iter;
 
-SYSCTL_INT(_vm, OID_AUTO, kstack_cache_size, CTLFLAG_RW, &kstack_cache_size, 0,
-    "");
-SYSCTL_INT(_vm, OID_AUTO, kstacks, CTLFLAG_RD, &kstacks, 0,
-    "");
+static int
+sysctl_kstack_cache_size(SYSCTL_HANDLER_ARGS)
+{
+	int error, newsize;
 
+	newsize = kstack_cache_size;
+	error = sysctl_handle_int(oidp, &newsize, 0, req);
+	if (error == 0 && req->newptr && newsize != kstack_cache_size)
+		kstack_cache_size =
+		    uma_zone_set_maxcache(kstack_cache, newsize);
+	return (error);
+}
+SYSCTL_PROC(_vm, OID_AUTO, kstack_cache_size, CTLTYPE_INT|CTLFLAG_RW,
+	&kstack_cache_size, 0, sysctl_kstack_cache_size, "IU",
+	"Maximum number of cached kernel stacks");
+
 /*
  * Create the kernel stack (including pcb for i386) for a new thread.
  * This routine directly affects the fork perf for a process and
  * create performance for a thread.
  */
-int
-vm_thread_new(struct thread *td, int pages)
+static vm_offset_t
+vm_thread_stack_create(struct domainset *ds, vm_object_t *ksobjp, int pages)
 {
+	vm_page_t ma[KSTACK_MAX_PAGES];
 	vm_object_t ksobj;
 	vm_offset_t ks;
-	vm_page_t ma[KSTACK_MAX_PAGES];
-	struct kstack_cache_entry *ks_ce;
 	int i;
 
-	/* Bounds check */
-	if (pages <= 1)
-		pages = kstack_pages;
-	else if (pages > KSTACK_MAX_PAGES)
-		pages = KSTACK_MAX_PAGES;
-
-	if (pages == kstack_pages && kstack_cache != NULL) {
-		mtx_lock(&kstack_cache_mtx);
-		if (kstack_cache != NULL) {
-			ks_ce = kstack_cache;
-			kstack_cache = ks_ce->next_ks_entry;
-			mtx_unlock(&kstack_cache_mtx);
-
-			td->td_kstack_obj = ks_ce->ksobj;
-			td->td_kstack = (vm_offset_t)ks_ce;
-			td->td_kstack_pages = kstack_pages;
-			return (1);
-		}
-		mtx_unlock(&kstack_cache_mtx);
-	}
-
 	/*
 	 * Allocate an object for the kstack.
 	 */
 	ksobj = vm_object_allocate(OBJT_DEFAULT, pages);
 	
 	/*
 	 * Get a kernel virtual address for this thread's kstack.
 	 */
 #if defined(__mips__)
 	/*
 	 * We need to align the kstack's mapped address to fit within
 	 * a single TLB entry.
 	 */
 	if (vmem_xalloc(kernel_arena, (pages + KSTACK_GUARD_PAGES) * PAGE_SIZE,
 	    PAGE_SIZE * 2, 0, 0, VMEM_ADDR_MIN, VMEM_ADDR_MAX,
 	    M_BESTFIT | M_NOWAIT, &ks)) {
 		ks = 0;
 	}
 #else
 	ks = kva_alloc((pages + KSTACK_GUARD_PAGES) * PAGE_SIZE);
 #endif
 	if (ks == 0) {
 		printf("vm_thread_new: kstack allocation failed\n");
 		vm_object_deallocate(ksobj);
 		return (0);
 	}
-
-	/*
-	 * Ensure that kstack objects can draw pages from any memory
-	 * domain.  Otherwise a local memory shortage can block a process
-	 * swap-in.
-	 */
 	if (vm_ndomains > 1) {
-		ksobj->domain.dr_policy = DOMAINSET_RR();
+		ksobj->domain.dr_policy = ds;
 		ksobj->domain.dr_iter =
 		    atomic_fetchadd_int(&kstack_domain_iter, 1);
 	}
 
-	atomic_add_int(&kstacks, 1);
 	if (KSTACK_GUARD_PAGES != 0) {
 		pmap_qremove(ks, KSTACK_GUARD_PAGES);
 		ks += KSTACK_GUARD_PAGES * PAGE_SIZE;
 	}
-	td->td_kstack_obj = ksobj;
-	td->td_kstack = ks;
-	/*
-	 * Knowing the number of pages allocated is useful when you
-	 * want to deallocate them.
-	 */
-	td->td_kstack_pages = pages;
+
 	/* 
 	 * For the length of the stack, link in a real page of ram for each
 	 * page of stack.
 	 */
 	VM_OBJECT_WLOCK(ksobj);
 	(void)vm_page_grab_pages(ksobj, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOBUSY |
 	    VM_ALLOC_WIRED, ma, pages);
 	for (i = 0; i < pages; i++)
 		ma[i]->valid = VM_PAGE_BITS_ALL;
 	VM_OBJECT_WUNLOCK(ksobj);
 	pmap_qenter(ks, ma, pages);
-	return (1);
+	*ksobjp = ksobj;
+
+	return (ks);
 }
 
 static void
 vm_thread_stack_dispose(vm_object_t ksobj, vm_offset_t ks, int pages)
 {
 	vm_page_t m;
 	int i;
 
-	atomic_add_int(&kstacks, -1);
 	pmap_qremove(ks, pages);
 	VM_OBJECT_WLOCK(ksobj);
 	for (i = 0; i < pages; i++) {
 		m = vm_page_lookup(ksobj, i);
 		if (m == NULL)
 			panic("vm_thread_dispose: kstack already missing?");
 		vm_page_lock(m);
 		vm_page_unwire_noq(m);
 		vm_page_free(m);
 		vm_page_unlock(m);
 	}
 	VM_OBJECT_WUNLOCK(ksobj);
 	vm_object_deallocate(ksobj);
 	kva_free(ks - (KSTACK_GUARD_PAGES * PAGE_SIZE),
 	    (pages + KSTACK_GUARD_PAGES) * PAGE_SIZE);
 }
 
 /*
+ * Allocate the kernel stack for a new thread.
+ */
+int
+vm_thread_new(struct thread *td, int pages)
+{
+	vm_object_t ksobj;
+	vm_offset_t ks;
+
+	/* Bounds check */
+	if (pages <= 1)
+		pages = kstack_pages;
+	else if (pages > KSTACK_MAX_PAGES)
+		pages = KSTACK_MAX_PAGES;
+
+	ks = 0;
+	ksobj = NULL;
+	if (pages == kstack_pages && kstack_cache != NULL) {
+		ks = (vm_offset_t)uma_zalloc(kstack_cache, M_NOWAIT);
+		if (ks != 0) 
+			ksobj = PHYS_TO_VM_PAGE(pmap_kextract(ks))->object;
+	}
+
+	/*
+	 * Ensure that kstack objects can draw pages from any memory
+	 * domain.  Otherwise a local memory shortage can block a process
+	 * swap-in.
+	 */
+	if (ks == 0)
+		ks = vm_thread_stack_create(DOMAINSET_PREF(PCPU_GET(domain)),
+		    &ksobj, pages);
+	if (ks == 0)
+		return (0);
+	td->td_kstack_obj = ksobj;
+	td->td_kstack = ks;
+	td->td_kstack_pages = pages;
+	return (1);
+}
+
+/*
  * Dispose of a thread's kernel stack.
  */
 void
 vm_thread_dispose(struct thread *td)
 {
 	vm_object_t ksobj;
 	vm_offset_t ks;
-	struct kstack_cache_entry *ks_ce;
 	int pages;
 
 	pages = td->td_kstack_pages;
 	ksobj = td->td_kstack_obj;
 	ks = td->td_kstack;
 	td->td_kstack = 0;
 	td->td_kstack_pages = 0;
-	if (pages == kstack_pages && kstacks <= kstack_cache_size) {
-		ks_ce = (struct kstack_cache_entry *)ks;
-		ks_ce->ksobj = ksobj;
-		mtx_lock(&kstack_cache_mtx);
-		ks_ce->next_ks_entry = kstack_cache;
-		kstack_cache = ks_ce;
-		mtx_unlock(&kstack_cache_mtx);
-		return;
+	if (pages == kstack_pages)
+		uma_zfree(kstack_cache, (void *)ks);
+	else
+		vm_thread_stack_dispose(ksobj, ks, pages);
+}
+
+static int
+kstack_import(void *arg, void **store, int cnt, int domain, int flags)
+{
+	vm_object_t ksobj;
+	int i;
+
+	for (i = 0; i < cnt; i++) {
+		store[i] = (void *)vm_thread_stack_create(
+		    DOMAINSET_PREF(domain), &ksobj, kstack_pages);
+		if (store[i] == NULL)
+			break;
 	}
-	vm_thread_stack_dispose(ksobj, ks, pages);
+	return (i);
 }
 
 static void
-vm_thread_stack_lowmem(void *nulll)
+kstack_release(void *arg, void **store, int cnt)
 {
-	struct kstack_cache_entry *ks_ce, *ks_ce1;
+	vm_offset_t ks;
+	int i;
 
-	mtx_lock(&kstack_cache_mtx);
-	ks_ce = kstack_cache;
-	kstack_cache = NULL;
-	mtx_unlock(&kstack_cache_mtx);
-
-	while (ks_ce != NULL) {
-		ks_ce1 = ks_ce;
-		ks_ce = ks_ce->next_ks_entry;
-
-		vm_thread_stack_dispose(ks_ce1->ksobj, (vm_offset_t)ks_ce1,
-		    kstack_pages);
+	for (i = 0; i < cnt; i++) {
+		ks = (vm_offset_t)store[i];
+		vm_thread_stack_dispose(
+		    PHYS_TO_VM_PAGE(pmap_kextract(ks))->object,
+		    ks, kstack_pages);
 	}
 }
 
 static void
-kstack_cache_init(void *nulll)
+kstack_cache_init(void *null)
 {
-
-	EVENTHANDLER_REGISTER(vm_lowmem, vm_thread_stack_lowmem, NULL,
-	    EVENTHANDLER_PRI_ANY);
+	kstack_cache = uma_zcache_create("kstack_cache",
+	    kstack_pages * PAGE_SIZE, NULL, NULL, NULL, NULL,
+	    kstack_import, kstack_release, NULL,
+	    UMA_ZONE_NUMA|UMA_ZONE_MINBUCKET);
+	uma_zone_set_maxcache(kstack_cache, kstack_cache_size);
 }
 
 SYSINIT(vm_kstacks, SI_SUB_KTHREAD_INIT, SI_ORDER_ANY, kstack_cache_init, NULL);
 
 #ifdef KSTACK_USAGE_PROF
 /*
  * Track maximum stack used by a thread in kernel.
  */
 static int max_kstack_used;
 
 SYSCTL_INT(_debug, OID_AUTO, max_kstack_used, CTLFLAG_RD,
     &max_kstack_used, 0,
     "Maxiumum stack depth used by a thread in kernel");
 
 void
 intr_prof_stack_use(struct thread *td, struct trapframe *frame)
 {
 	vm_offset_t stack_top;
 	vm_offset_t current;
 	int used, prev_used;
 
 	/*
 	 * Testing for interrupted kernel mode isn't strictly
 	 * needed. It optimizes the execution, since interrupts from
 	 * usermode will have only the trap frame on the stack.
 	 */
 	if (TRAPF_USERMODE(frame))
 		return;
 
 	stack_top = td->td_kstack + td->td_kstack_pages * PAGE_SIZE;
 	current = (vm_offset_t)(uintptr_t)&stack_top;
 
 	/*
 	 * Try to detect if interrupt is using kernel thread stack.
 	 * Hardware could use a dedicated stack for interrupt handling.
 	 */
 	if (stack_top <= current || current < td->td_kstack)
 		return;
 
 	used = stack_top - current;
 	for (;;) {
 		prev_used = max_kstack_used;
 		if (prev_used >= used)
 			break;
 		if (atomic_cmpset_int(&max_kstack_used, prev_used, used))
 			break;
 	}
 }
 #endif /* KSTACK_USAGE_PROF */
 
 /*
  * Implement fork's actions on an address space.
  * Here we arrange for the address space to be copied or referenced,
  * allocate a user struct (pcb and kernel stack), then call the
  * machine-dependent layer to fill those in and make the new process
  * ready to run.  The new process is set up so that it returns directly
  * to user mode to avoid stack copying and relocation problems.
  */
 int
 vm_forkproc(struct thread *td, struct proc *p2, struct thread *td2,
     struct vmspace *vm2, int flags)
 {
 	struct proc *p1 = td->td_proc;
 	struct domainset *dset;
 	int error;
 
 	if ((flags & RFPROC) == 0) {
 		/*
 		 * Divorce the memory, if it is shared, essentially
 		 * this changes shared memory amongst threads, into
 		 * COW locally.
 		 */
 		if ((flags & RFMEM) == 0) {
 			if (p1->p_vmspace->vm_refcnt > 1) {
 				error = vmspace_unshare(p1);
 				if (error)
 					return (error);
 			}
 		}
 		cpu_fork(td, p2, td2, flags);
 		return (0);
 	}
 
 	if (flags & RFMEM) {
 		p2->p_vmspace = p1->p_vmspace;
 		atomic_add_int(&p1->p_vmspace->vm_refcnt, 1);
 	}
 	dset = td2->td_domain.dr_policy;
 	while (vm_page_count_severe_set(&dset->ds_mask)) {
 		vm_wait_doms(&dset->ds_mask);
 	}
 
 	if ((flags & RFMEM) == 0) {
 		p2->p_vmspace = vm2;
 		if (p1->p_vmspace->vm_shm)
 			shmfork(p1, p2);
 	}
 
 	/*
 	 * cpu_fork will copy and update the pcb, set up the kernel stack,
 	 * and make the child ready to run.
 	 */
 	cpu_fork(td, p2, td2, flags);
 	return (0);
 }
 
 /*
  * Called after process has been wait(2)'ed upon and is being reaped.
  * The idea is to reclaim resources that we could not reclaim while
  * the process was still executing.
  */
 void
 vm_waitproc(p)
 	struct proc *p;
 {
 
 	vmspace_exitfree(p);		/* and clean-out the vmspace */
 }
 
 void
 kick_proc0(void)
 {
 
 	wakeup(&proc0);
 }
Index: head/sys/vm/vm_param.h
===================================================================
--- head/sys/vm/vm_param.h	(revision 350662)
+++ head/sys/vm/vm_param.h	(revision 350663)
@@ -1,137 +1,141 @@
 /*-
  * SPDX-License-Identifier: (BSD-3-Clause AND MIT-CMU)
  *
  * Copyright (c) 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * The Mach Operating System project at Carnegie-Mellon University.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)vm_param.h	8.1 (Berkeley) 6/11/93
  *
  *
  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
  * All rights reserved.
  *
  * Authors: Avadis Tevanian, Jr., Michael Wayne Young
  *
  * Permission to use, copy, modify and distribute this software and
  * its documentation is hereby granted, provided that both the copyright
  * notice and this permission notice appear in all copies of the
  * software, derivative works or modified versions, and any portions
  * thereof, and that both notices appear in supporting documentation.
  *
  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  *
  * Carnegie Mellon requests users of this software to return to
  *
  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  *  School of Computer Science
  *  Carnegie Mellon University
  *  Pittsburgh PA 15213-3890
  *
  * any improvements or extensions that they make and grant Carnegie the
  * rights to redistribute these changes.
  *
  * $FreeBSD$
  */
 
 /*
  *	Machine independent virtual memory parameters.
  */
 
 #ifndef	_VM_PARAM_
 #define	_VM_PARAM_
 
 #include <machine/vmparam.h>
 
 /*
  * CTL_VM identifiers
  */
 #define	VM_TOTAL		1	/* struct vmtotal */
 #define	VM_METER                VM_TOTAL/* deprecated, use VM_TOTAL */
 #define	VM_LOADAVG	 	2	/* struct loadavg */
 #define VM_V_FREE_MIN		3	/* vm_cnt.v_free_min */
 #define VM_V_FREE_TARGET	4	/* vm_cnt.v_free_target */
 #define VM_V_FREE_RESERVED	5	/* vm_cnt.v_free_reserved */
 #define VM_V_INACTIVE_TARGET	6	/* vm_cnt.v_inactive_target */
 #define	VM_OBSOLETE_7		7	/* unused, formerly v_cache_min */
 #define	VM_OBSOLETE_8		8	/* unused, formerly v_cache_max */
 #define VM_V_PAGEOUT_FREE_MIN	9	/* vm_cnt.v_pageout_free_min */
 #define	VM_OBSOLETE_10		10	/* pageout algorithm */
 #define VM_SWAPPING_ENABLED	11	/* swapping enabled */
 #define VM_OVERCOMMIT		12	/* vm.overcommit */
 #define	VM_MAXID		13	/* number of valid vm ids */
 
 /*
  * Structure for swap device statistics
  */
 #define XSWDEV_VERSION	2
 struct xswdev {
 	u_int	xsw_version;
 	dev_t	xsw_dev;
 	int	xsw_flags;
 	int	xsw_nblks;
 	int     xsw_used;
 };
 
 /*
  *	Return values from the VM routines.
  */
 #define	KERN_SUCCESS		0
 #define	KERN_INVALID_ADDRESS	1
 #define	KERN_PROTECTION_FAILURE	2
 #define	KERN_NO_SPACE		3
 #define	KERN_INVALID_ARGUMENT	4
 #define	KERN_FAILURE		5
 #define	KERN_RESOURCE_SHORTAGE	6
 #define	KERN_NOT_RECEIVER	7
 #define	KERN_NO_ACCESS		8
 
 #ifndef PA_LOCK_COUNT
 #ifdef SMP
 #define	PA_LOCK_COUNT	32
 #else
 #define PA_LOCK_COUNT	1
 #endif	/* !SMP */
 #endif	/* !PA_LOCK_COUNT */
 
+#ifndef KSTACK_MAX_PAGES
+#define KSTACK_MAX_PAGES 32
+#endif
+
 #ifndef ASSEMBLER
 #ifdef _KERNEL
 #define num_pages(x) \
 	((vm_offset_t)((((vm_offset_t)(x)) + PAGE_MASK) >> PAGE_SHIFT))
 extern	unsigned long maxtsiz;
 extern	unsigned long dfldsiz;
 extern	unsigned long maxdsiz;
 extern	unsigned long dflssiz;
 extern	unsigned long maxssiz;
 extern	unsigned long sgrowsiz;
 #endif				/* _KERNEL */
 #endif				/* ASSEMBLER */
 #endif				/* _VM_PARAM_ */
Index: head/sys/vm/vm_swapout.c
===================================================================
--- head/sys/vm/vm_swapout.c	(revision 350662)
+++ head/sys/vm/vm_swapout.c	(revision 350663)
@@ -1,963 +1,962 @@
 /*-
  * SPDX-License-Identifier: (BSD-4-Clause AND MIT-CMU)
  *
  * Copyright (c) 1991 Regents of the University of California.
  * All rights reserved.
  * Copyright (c) 1994 John S. Dyson
  * All rights reserved.
  * Copyright (c) 1994 David Greenman
  * All rights reserved.
  * Copyright (c) 2005 Yahoo! Technologies Norway AS
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * The Mach Operating System project at Carnegie-Mellon University.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)vm_pageout.c	7.4 (Berkeley) 5/7/91
  *
  *
  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
  * All rights reserved.
  *
  * Authors: Avadis Tevanian, Jr., Michael Wayne Young
  *
  * Permission to use, copy, modify and distribute this software and
  * its documentation is hereby granted, provided that both the copyright
  * notice and this permission notice appear in all copies of the
  * software, derivative works or modified versions, and any portions
  * thereof, and that both notices appear in supporting documentation.
  *
  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  *
  * Carnegie Mellon requests users of this software to return to
  *
  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  *  School of Computer Science
  *  Carnegie Mellon University
  *  Pittsburgh PA 15213-3890
  *
  * any improvements or extensions that they make and grant Carnegie the
  * rights to redistribute these changes.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_kstack_pages.h"
 #include "opt_kstack_max_pages.h"
 #include "opt_vm.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/limits.h>
 #include <sys/kernel.h>
 #include <sys/eventhandler.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
-#include <sys/_kstack_cache.h>
 #include <sys/kthread.h>
 #include <sys/ktr.h>
 #include <sys/mount.h>
 #include <sys/racct.h>
 #include <sys/resourcevar.h>
 #include <sys/sched.h>
 #include <sys/sdt.h>
 #include <sys/signalvar.h>
 #include <sys/smp.h>
 #include <sys/time.h>
 #include <sys/vnode.h>
 #include <sys/vmmeter.h>
 #include <sys/rwlock.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 #include <vm/vm_pageout.h>
 #include <vm/vm_pager.h>
 #include <vm/vm_phys.h>
 #include <vm/swap_pager.h>
 #include <vm/vm_extern.h>
 #include <vm/uma.h>
 
 /* the kernel process "vm_daemon" */
 static void vm_daemon(void);
 static struct proc *vmproc;
 
 static struct kproc_desc vm_kp = {
 	"vmdaemon",
 	vm_daemon,
 	&vmproc
 };
 SYSINIT(vmdaemon, SI_SUB_KTHREAD_VM, SI_ORDER_FIRST, kproc_start, &vm_kp);
 
 static int vm_swap_enabled = 1;
 static int vm_swap_idle_enabled = 0;
 
 SYSCTL_INT(_vm, VM_SWAPPING_ENABLED, swap_enabled, CTLFLAG_RW,
     &vm_swap_enabled, 0,
     "Enable entire process swapout");
 SYSCTL_INT(_vm, OID_AUTO, swap_idle_enabled, CTLFLAG_RW,
     &vm_swap_idle_enabled, 0,
     "Allow swapout on idle criteria");
 
 /*
  * Swap_idle_threshold1 is the guaranteed swapped in time for a process
  */
 static int swap_idle_threshold1 = 2;
 SYSCTL_INT(_vm, OID_AUTO, swap_idle_threshold1, CTLFLAG_RW,
     &swap_idle_threshold1, 0,
     "Guaranteed swapped in time for a process");
 
 /*
  * Swap_idle_threshold2 is the time that a process can be idle before
  * it will be swapped out, if idle swapping is enabled.
  */
 static int swap_idle_threshold2 = 10;
 SYSCTL_INT(_vm, OID_AUTO, swap_idle_threshold2, CTLFLAG_RW,
     &swap_idle_threshold2, 0,
     "Time before a process will be swapped out");
 
 static int vm_pageout_req_swapout;	/* XXX */
 static int vm_daemon_needed;
 static struct mtx vm_daemon_mtx;
 /* Allow for use by vm_pageout before vm_daemon is initialized. */
 MTX_SYSINIT(vm_daemon, &vm_daemon_mtx, "vm daemon", MTX_DEF);
 
 static int swapped_cnt;
 static int swap_inprogress;	/* Pending swap-ins done outside swapper. */
 static int last_swapin;
 
 static void swapclear(struct proc *);
 static int swapout(struct proc *);
 static void vm_swapout_map_deactivate_pages(vm_map_t, long);
 static void vm_swapout_object_deactivate_pages(pmap_t, vm_object_t, long);
 static void swapout_procs(int action);
 static void vm_req_vmdaemon(int req);
 static void vm_thread_swapout(struct thread *td);
 
 /*
  *	vm_swapout_object_deactivate_pages
  *
  *	Deactivate enough pages to satisfy the inactive target
  *	requirements.
  *
  *	The object and map must be locked.
  */
 static void
 vm_swapout_object_deactivate_pages(pmap_t pmap, vm_object_t first_object,
     long desired)
 {
 	vm_object_t backing_object, object;
 	vm_page_t p;
 	int act_delta, remove_mode;
 
 	VM_OBJECT_ASSERT_LOCKED(first_object);
 	if ((first_object->flags & OBJ_FICTITIOUS) != 0)
 		return;
 	for (object = first_object;; object = backing_object) {
 		if (pmap_resident_count(pmap) <= desired)
 			goto unlock_return;
 		VM_OBJECT_ASSERT_LOCKED(object);
 		if ((object->flags & OBJ_UNMANAGED) != 0 ||
 		    object->paging_in_progress != 0)
 			goto unlock_return;
 
 		remove_mode = 0;
 		if (object->shadow_count > 1)
 			remove_mode = 1;
 		/*
 		 * Scan the object's entire memory queue.
 		 */
 		TAILQ_FOREACH(p, &object->memq, listq) {
 			if (pmap_resident_count(pmap) <= desired)
 				goto unlock_return;
 			if (should_yield())
 				goto unlock_return;
 			if (vm_page_busied(p))
 				continue;
 			VM_CNT_INC(v_pdpages);
 			vm_page_lock(p);
 			if (vm_page_wired(p) ||
 			    !pmap_page_exists_quick(pmap, p)) {
 				vm_page_unlock(p);
 				continue;
 			}
 			act_delta = pmap_ts_referenced(p);
 			if ((p->aflags & PGA_REFERENCED) != 0) {
 				if (act_delta == 0)
 					act_delta = 1;
 				vm_page_aflag_clear(p, PGA_REFERENCED);
 			}
 			if (!vm_page_active(p) && act_delta != 0) {
 				vm_page_activate(p);
 				p->act_count += act_delta;
 			} else if (vm_page_active(p)) {
 				if (act_delta == 0) {
 					p->act_count -= min(p->act_count,
 					    ACT_DECLINE);
 					if (!remove_mode && p->act_count == 0) {
 						pmap_remove_all(p);
 						vm_page_deactivate(p);
 					} else
 						vm_page_requeue(p);
 				} else {
 					vm_page_activate(p);
 					if (p->act_count < ACT_MAX -
 					    ACT_ADVANCE)
 						p->act_count += ACT_ADVANCE;
 					vm_page_requeue(p);
 				}
 			} else if (vm_page_inactive(p))
 				pmap_remove_all(p);
 			vm_page_unlock(p);
 		}
 		if ((backing_object = object->backing_object) == NULL)
 			goto unlock_return;
 		VM_OBJECT_RLOCK(backing_object);
 		if (object != first_object)
 			VM_OBJECT_RUNLOCK(object);
 	}
 unlock_return:
 	if (object != first_object)
 		VM_OBJECT_RUNLOCK(object);
 }
 
 /*
  * deactivate some number of pages in a map, try to do it fairly, but
  * that is really hard to do.
  */
 static void
 vm_swapout_map_deactivate_pages(vm_map_t map, long desired)
 {
 	vm_map_entry_t tmpe;
 	vm_object_t obj, bigobj;
 	int nothingwired;
 
 	if (!vm_map_trylock_read(map))
 		return;
 
 	bigobj = NULL;
 	nothingwired = TRUE;
 
 	/*
 	 * first, search out the biggest object, and try to free pages from
 	 * that.
 	 */
 	tmpe = map->header.next;
 	while (tmpe != &map->header) {
 		if ((tmpe->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
 			obj = tmpe->object.vm_object;
 			if (obj != NULL && VM_OBJECT_TRYRLOCK(obj)) {
 				if (obj->shadow_count <= 1 &&
 				    (bigobj == NULL ||
 				     bigobj->resident_page_count <
 				     obj->resident_page_count)) {
 					if (bigobj != NULL)
 						VM_OBJECT_RUNLOCK(bigobj);
 					bigobj = obj;
 				} else
 					VM_OBJECT_RUNLOCK(obj);
 			}
 		}
 		if (tmpe->wired_count > 0)
 			nothingwired = FALSE;
 		tmpe = tmpe->next;
 	}
 
 	if (bigobj != NULL) {
 		vm_swapout_object_deactivate_pages(map->pmap, bigobj, desired);
 		VM_OBJECT_RUNLOCK(bigobj);
 	}
 	/*
 	 * Next, hunt around for other pages to deactivate.  We actually
 	 * do this search sort of wrong -- .text first is not the best idea.
 	 */
 	tmpe = map->header.next;
 	while (tmpe != &map->header) {
 		if (pmap_resident_count(vm_map_pmap(map)) <= desired)
 			break;
 		if ((tmpe->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
 			obj = tmpe->object.vm_object;
 			if (obj != NULL) {
 				VM_OBJECT_RLOCK(obj);
 				vm_swapout_object_deactivate_pages(map->pmap,
 				    obj, desired);
 				VM_OBJECT_RUNLOCK(obj);
 			}
 		}
 		tmpe = tmpe->next;
 	}
 
 	/*
 	 * Remove all mappings if a process is swapped out, this will free page
 	 * table pages.
 	 */
 	if (desired == 0 && nothingwired) {
 		pmap_remove(vm_map_pmap(map), vm_map_min(map),
 		    vm_map_max(map));
 	}
 
 	vm_map_unlock_read(map);
 }
 
 /*
  * Swap out requests
  */
 #define VM_SWAP_NORMAL 1
 #define VM_SWAP_IDLE 2
 
 void
 vm_swapout_run(void)
 {
 
 	if (vm_swap_enabled)
 		vm_req_vmdaemon(VM_SWAP_NORMAL);
 }
 
 /*
  * Idle process swapout -- run once per second when pagedaemons are
  * reclaiming pages.
  */
 void
 vm_swapout_run_idle(void)
 {
 	static long lsec;
 
 	if (!vm_swap_idle_enabled || time_second == lsec)
 		return;
 	vm_req_vmdaemon(VM_SWAP_IDLE);
 	lsec = time_second;
 }
 
 static void
 vm_req_vmdaemon(int req)
 {
 	static int lastrun = 0;
 
 	mtx_lock(&vm_daemon_mtx);
 	vm_pageout_req_swapout |= req;
 	if ((ticks > (lastrun + hz)) || (ticks < lastrun)) {
 		wakeup(&vm_daemon_needed);
 		lastrun = ticks;
 	}
 	mtx_unlock(&vm_daemon_mtx);
 }
 
 static void
 vm_daemon(void)
 {
 	struct rlimit rsslim;
 	struct proc *p;
 	struct thread *td;
 	struct vmspace *vm;
 	int breakout, swapout_flags, tryagain, attempts;
 #ifdef RACCT
 	uint64_t rsize, ravailable;
 #endif
 
 	while (TRUE) {
 		mtx_lock(&vm_daemon_mtx);
 		msleep(&vm_daemon_needed, &vm_daemon_mtx, PPAUSE, "psleep",
 #ifdef RACCT
 		    racct_enable ? hz : 0
 #else
 		    0
 #endif
 		);
 		swapout_flags = vm_pageout_req_swapout;
 		vm_pageout_req_swapout = 0;
 		mtx_unlock(&vm_daemon_mtx);
 		if (swapout_flags != 0) {
 			/*
 			 * Drain the per-CPU page queue batches as a deadlock
 			 * avoidance measure.
 			 */
 			if ((swapout_flags & VM_SWAP_NORMAL) != 0)
 				vm_page_drain_pqbatch();
 			swapout_procs(swapout_flags);
 		}
 
 		/*
 		 * scan the processes for exceeding their rlimits or if
 		 * process is swapped out -- deactivate pages
 		 */
 		tryagain = 0;
 		attempts = 0;
 again:
 		attempts++;
 		sx_slock(&allproc_lock);
 		FOREACH_PROC_IN_SYSTEM(p) {
 			vm_pindex_t limit, size;
 
 			/*
 			 * if this is a system process or if we have already
 			 * looked at this process, skip it.
 			 */
 			PROC_LOCK(p);
 			if (p->p_state != PRS_NORMAL ||
 			    p->p_flag & (P_INEXEC | P_SYSTEM | P_WEXIT)) {
 				PROC_UNLOCK(p);
 				continue;
 			}
 			/*
 			 * if the process is in a non-running type state,
 			 * don't touch it.
 			 */
 			breakout = 0;
 			FOREACH_THREAD_IN_PROC(p, td) {
 				thread_lock(td);
 				if (!TD_ON_RUNQ(td) &&
 				    !TD_IS_RUNNING(td) &&
 				    !TD_IS_SLEEPING(td) &&
 				    !TD_IS_SUSPENDED(td)) {
 					thread_unlock(td);
 					breakout = 1;
 					break;
 				}
 				thread_unlock(td);
 			}
 			if (breakout) {
 				PROC_UNLOCK(p);
 				continue;
 			}
 			/*
 			 * get a limit
 			 */
 			lim_rlimit_proc(p, RLIMIT_RSS, &rsslim);
 			limit = OFF_TO_IDX(
 			    qmin(rsslim.rlim_cur, rsslim.rlim_max));
 
 			/*
 			 * let processes that are swapped out really be
 			 * swapped out set the limit to nothing (will force a
 			 * swap-out.)
 			 */
 			if ((p->p_flag & P_INMEM) == 0)
 				limit = 0;	/* XXX */
 			vm = vmspace_acquire_ref(p);
 			_PHOLD_LITE(p);
 			PROC_UNLOCK(p);
 			if (vm == NULL) {
 				PRELE(p);
 				continue;
 			}
 			sx_sunlock(&allproc_lock);
 
 			size = vmspace_resident_count(vm);
 			if (size >= limit) {
 				vm_swapout_map_deactivate_pages(
 				    &vm->vm_map, limit);
 				size = vmspace_resident_count(vm);
 			}
 #ifdef RACCT
 			if (racct_enable) {
 				rsize = IDX_TO_OFF(size);
 				PROC_LOCK(p);
 				if (p->p_state == PRS_NORMAL)
 					racct_set(p, RACCT_RSS, rsize);
 				ravailable = racct_get_available(p, RACCT_RSS);
 				PROC_UNLOCK(p);
 				if (rsize > ravailable) {
 					/*
 					 * Don't be overly aggressive; this
 					 * might be an innocent process,
 					 * and the limit could've been exceeded
 					 * by some memory hog.  Don't try
 					 * to deactivate more than 1/4th
 					 * of process' resident set size.
 					 */
 					if (attempts <= 8) {
 						if (ravailable < rsize -
 						    (rsize / 4)) {
 							ravailable = rsize -
 							    (rsize / 4);
 						}
 					}
 					vm_swapout_map_deactivate_pages(
 					    &vm->vm_map,
 					    OFF_TO_IDX(ravailable));
 					/* Update RSS usage after paging out. */
 					size = vmspace_resident_count(vm);
 					rsize = IDX_TO_OFF(size);
 					PROC_LOCK(p);
 					if (p->p_state == PRS_NORMAL)
 						racct_set(p, RACCT_RSS, rsize);
 					PROC_UNLOCK(p);
 					if (rsize > ravailable)
 						tryagain = 1;
 				}
 			}
 #endif
 			vmspace_free(vm);
 			sx_slock(&allproc_lock);
 			PRELE(p);
 		}
 		sx_sunlock(&allproc_lock);
 		if (tryagain != 0 && attempts <= 10) {
 			maybe_yield();
 			goto again;
 		}
 	}
 }
 
 /*
  * Allow a thread's kernel stack to be paged out.
  */
 static void
 vm_thread_swapout(struct thread *td)
 {
 	vm_object_t ksobj;
 	vm_page_t m;
 	int i, pages;
 
 	cpu_thread_swapout(td);
 	pages = td->td_kstack_pages;
 	ksobj = td->td_kstack_obj;
 	pmap_qremove(td->td_kstack, pages);
 	VM_OBJECT_WLOCK(ksobj);
 	for (i = 0; i < pages; i++) {
 		m = vm_page_lookup(ksobj, i);
 		if (m == NULL)
 			panic("vm_thread_swapout: kstack already missing?");
 		vm_page_dirty(m);
 		vm_page_lock(m);
 		vm_page_unwire(m, PQ_LAUNDRY);
 		vm_page_unlock(m);
 	}
 	VM_OBJECT_WUNLOCK(ksobj);
 }
 
 /*
  * Bring the kernel stack for a specified thread back in.
  */
 static void
 vm_thread_swapin(struct thread *td, int oom_alloc)
 {
 	vm_object_t ksobj;
 	vm_page_t ma[KSTACK_MAX_PAGES];
 	int a, count, i, j, pages, rv;
 
 	pages = td->td_kstack_pages;
 	ksobj = td->td_kstack_obj;
 	VM_OBJECT_WLOCK(ksobj);
 	(void)vm_page_grab_pages(ksobj, 0, oom_alloc | VM_ALLOC_WIRED, ma,
 	    pages);
 	for (i = 0; i < pages;) {
 		vm_page_assert_xbusied(ma[i]);
 		if (ma[i]->valid == VM_PAGE_BITS_ALL) {
 			vm_page_xunbusy(ma[i]);
 			i++;
 			continue;
 		}
 		vm_object_pip_add(ksobj, 1);
 		for (j = i + 1; j < pages; j++)
 			if (ma[j]->valid == VM_PAGE_BITS_ALL)
 				break;
 		rv = vm_pager_has_page(ksobj, ma[i]->pindex, NULL, &a);
 		KASSERT(rv == 1, ("%s: missing page %p", __func__, ma[i]));
 		count = min(a + 1, j - i);
 		rv = vm_pager_get_pages(ksobj, ma + i, count, NULL, NULL);
 		KASSERT(rv == VM_PAGER_OK, ("%s: cannot get kstack for proc %d",
 		    __func__, td->td_proc->p_pid));
 		vm_object_pip_wakeup(ksobj);
 		for (j = i; j < i + count; j++)
 			vm_page_xunbusy(ma[j]);
 		i += count;
 	}
 	VM_OBJECT_WUNLOCK(ksobj);
 	pmap_qenter(td->td_kstack, ma, pages);
 	cpu_thread_swapin(td);
 }
 
 void
 faultin(struct proc *p)
 {
 	struct thread *td;
 	int oom_alloc;
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 
 	/*
 	 * If another process is swapping in this process,
 	 * just wait until it finishes.
 	 */
 	if (p->p_flag & P_SWAPPINGIN) {
 		while (p->p_flag & P_SWAPPINGIN)
 			msleep(&p->p_flag, &p->p_mtx, PVM, "faultin", 0);
 		return;
 	}
 
 	if ((p->p_flag & P_INMEM) == 0) {
 		oom_alloc = (p->p_flag & P_WKILLED) != 0 ? VM_ALLOC_SYSTEM :
 		    VM_ALLOC_NORMAL;
 
 		/*
 		 * Don't let another thread swap process p out while we are
 		 * busy swapping it in.
 		 */
 		++p->p_lock;
 		p->p_flag |= P_SWAPPINGIN;
 		PROC_UNLOCK(p);
 		sx_xlock(&allproc_lock);
 		MPASS(swapped_cnt > 0);
 		swapped_cnt--;
 		if (curthread != &thread0)
 			swap_inprogress++;
 		sx_xunlock(&allproc_lock);
 
 		/*
 		 * We hold no lock here because the list of threads
 		 * can not change while all threads in the process are
 		 * swapped out.
 		 */
 		FOREACH_THREAD_IN_PROC(p, td)
 			vm_thread_swapin(td, oom_alloc);
 
 		if (curthread != &thread0) {
 			sx_xlock(&allproc_lock);
 			MPASS(swap_inprogress > 0);
 			swap_inprogress--;
 			last_swapin = ticks;
 			sx_xunlock(&allproc_lock);
 		}
 		PROC_LOCK(p);
 		swapclear(p);
 		p->p_swtick = ticks;
 
 		/* Allow other threads to swap p out now. */
 		wakeup(&p->p_flag);
 		--p->p_lock;
 	}
 }
 
 /*
  * This swapin algorithm attempts to swap-in processes only if there
  * is enough space for them.  Of course, if a process waits for a long
  * time, it will be swapped in anyway.
  */
 
 static struct proc *
 swapper_selector(bool wkilled_only)
 {
 	struct proc *p, *res;
 	struct thread *td;
 	int ppri, pri, slptime, swtime;
 
 	sx_assert(&allproc_lock, SA_SLOCKED);
 	if (swapped_cnt == 0)
 		return (NULL);
 	res = NULL;
 	ppri = INT_MIN;
 	FOREACH_PROC_IN_SYSTEM(p) {
 		PROC_LOCK(p);
 		if (p->p_state == PRS_NEW || (p->p_flag & (P_SWAPPINGOUT |
 		    P_SWAPPINGIN | P_INMEM)) != 0) {
 			PROC_UNLOCK(p);
 			continue;
 		}
 		if (p->p_state == PRS_NORMAL && (p->p_flag & P_WKILLED) != 0) {
 			/*
 			 * A swapped-out process might have mapped a
 			 * large portion of the system's pages as
 			 * anonymous memory.  There is no other way to
 			 * release the memory other than to kill the
 			 * process, for which we need to swap it in.
 			 */
 			return (p);
 		}
 		if (wkilled_only) {
 			PROC_UNLOCK(p);
 			continue;
 		}
 		swtime = (ticks - p->p_swtick) / hz;
 		FOREACH_THREAD_IN_PROC(p, td) {
 			/*
 			 * An otherwise runnable thread of a process
 			 * swapped out has only the TDI_SWAPPED bit set.
 			 */
 			thread_lock(td);
 			if (td->td_inhibitors == TDI_SWAPPED) {
 				slptime = (ticks - td->td_slptick) / hz;
 				pri = swtime + slptime;
 				if ((td->td_flags & TDF_SWAPINREQ) == 0)
 					pri -= p->p_nice * 8;
 				/*
 				 * if this thread is higher priority
 				 * and there is enough space, then select
 				 * this process instead of the previous
 				 * selection.
 				 */
 				if (pri > ppri) {
 					res = p;
 					ppri = pri;
 				}
 			}
 			thread_unlock(td);
 		}
 		PROC_UNLOCK(p);
 	}
 
 	if (res != NULL)
 		PROC_LOCK(res);
 	return (res);
 }
 
 #define	SWAPIN_INTERVAL	(MAXSLP * hz / 2)
 
 /*
  * Limit swapper to swap in one non-WKILLED process in MAXSLP/2
  * interval, assuming that there is:
  * - at least one domain that is not suffering from a shortage of free memory;
  * - no parallel swap-ins;
  * - no other swap-ins in the current SWAPIN_INTERVAL.
  */
 static bool
 swapper_wkilled_only(void)
 {
 
 	return (vm_page_count_min_set(&all_domains) || swap_inprogress > 0 ||
 	    (u_int)(ticks - last_swapin) < SWAPIN_INTERVAL);
 }
 
 void
 swapper(void)
 {
 	struct proc *p;
 
 	for (;;) {
 		sx_slock(&allproc_lock);
 		p = swapper_selector(swapper_wkilled_only());
 		sx_sunlock(&allproc_lock);
 
 		if (p == NULL) {
 			tsleep(&proc0, PVM, "swapin", SWAPIN_INTERVAL);
 		} else {
 			PROC_LOCK_ASSERT(p, MA_OWNED);
 
 			/*
 			 * Another process may be bringing or may have
 			 * already brought this process in while we
 			 * traverse all threads.  Or, this process may
 			 * have exited or even being swapped out
 			 * again.
 			 */
 			if (p->p_state == PRS_NORMAL && (p->p_flag & (P_INMEM |
 			    P_SWAPPINGOUT | P_SWAPPINGIN)) == 0) {
 				faultin(p);
 			}
 			PROC_UNLOCK(p);
 		}
 	}
 }
 
 /*
  * First, if any processes have been sleeping or stopped for at least
  * "swap_idle_threshold1" seconds, they are swapped out.  If, however,
  * no such processes exist, then the longest-sleeping or stopped
  * process is swapped out.  Finally, and only as a last resort, if
  * there are no sleeping or stopped processes, the longest-resident
  * process is swapped out.
  */
 static void
 swapout_procs(int action)
 {
 	struct proc *p;
 	struct thread *td;
 	int slptime;
 	bool didswap, doswap;
 
 	MPASS((action & (VM_SWAP_NORMAL | VM_SWAP_IDLE)) != 0);
 
 	didswap = false;
 	sx_slock(&allproc_lock);
 	FOREACH_PROC_IN_SYSTEM(p) {
 		/*
 		 * Filter out not yet fully constructed processes.  Do
 		 * not swap out held processes.  Avoid processes which
 		 * are system, exiting, execing, traced, already swapped
 		 * out or are in the process of being swapped in or out.
 		 */
 		PROC_LOCK(p);
 		if (p->p_state != PRS_NORMAL || p->p_lock != 0 || (p->p_flag &
 		    (P_SYSTEM | P_WEXIT | P_INEXEC | P_STOPPED_SINGLE |
 		    P_TRACED | P_SWAPPINGOUT | P_SWAPPINGIN | P_INMEM)) !=
 		    P_INMEM) {
 			PROC_UNLOCK(p);
 			continue;
 		}
 
 		/*
 		 * Further consideration of this process for swap out
 		 * requires iterating over its threads.  We release
 		 * allproc_lock here so that process creation and
 		 * destruction are not blocked while we iterate.
 		 *
 		 * To later reacquire allproc_lock and resume
 		 * iteration over the allproc list, we will first have
 		 * to release the lock on the process.  We place a
 		 * hold on the process so that it remains in the
 		 * allproc list while it is unlocked.
 		 */
 		_PHOLD_LITE(p);
 		sx_sunlock(&allproc_lock);
 
 		/*
 		 * Do not swapout a realtime process.
 		 * Guarantee swap_idle_threshold1 time in memory.
 		 * If the system is under memory stress, or if we are
 		 * swapping idle processes >= swap_idle_threshold2,
 		 * then swap the process out.
 		 */
 		doswap = true;
 		FOREACH_THREAD_IN_PROC(p, td) {
 			thread_lock(td);
 			slptime = (ticks - td->td_slptick) / hz;
 			if (PRI_IS_REALTIME(td->td_pri_class) ||
 			    slptime < swap_idle_threshold1 ||
 			    !thread_safetoswapout(td) ||
 			    ((action & VM_SWAP_NORMAL) == 0 &&
 			    slptime < swap_idle_threshold2))
 				doswap = false;
 			thread_unlock(td);
 			if (!doswap)
 				break;
 		}
 		if (doswap && swapout(p) == 0)
 			didswap = true;
 
 		PROC_UNLOCK(p);
 		if (didswap) {
 			sx_xlock(&allproc_lock);
 			swapped_cnt++;
 			sx_downgrade(&allproc_lock);
 		} else
 			sx_slock(&allproc_lock);
 		PRELE(p);
 	}
 	sx_sunlock(&allproc_lock);
 
 	/*
 	 * If we swapped something out, and another process needed memory,
 	 * then wakeup the sched process.
 	 */
 	if (didswap)
 		wakeup(&proc0);
 }
 
 static void
 swapclear(struct proc *p)
 {
 	struct thread *td;
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 
 	FOREACH_THREAD_IN_PROC(p, td) {
 		thread_lock(td);
 		td->td_flags |= TDF_INMEM;
 		td->td_flags &= ~TDF_SWAPINREQ;
 		TD_CLR_SWAPPED(td);
 		if (TD_CAN_RUN(td))
 			if (setrunnable(td)) {
 #ifdef INVARIANTS
 				/*
 				 * XXX: We just cleared TDI_SWAPPED
 				 * above and set TDF_INMEM, so this
 				 * should never happen.
 				 */
 				panic("not waking up swapper");
 #endif
 			}
 		thread_unlock(td);
 	}
 	p->p_flag &= ~(P_SWAPPINGIN | P_SWAPPINGOUT);
 	p->p_flag |= P_INMEM;
 }
 
 static int
 swapout(struct proc *p)
 {
 	struct thread *td;
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 
 	/*
 	 * The states of this process and its threads may have changed
 	 * by now.  Assuming that there is only one pageout daemon thread,
 	 * this process should still be in memory.
 	 */
 	KASSERT((p->p_flag & (P_INMEM | P_SWAPPINGOUT | P_SWAPPINGIN)) ==
 	    P_INMEM, ("swapout: lost a swapout race?"));
 
 	/*
 	 * Remember the resident count.
 	 */
 	p->p_vmspace->vm_swrss = vmspace_resident_count(p->p_vmspace);
 
 	/*
 	 * Check and mark all threads before we proceed.
 	 */
 	p->p_flag &= ~P_INMEM;
 	p->p_flag |= P_SWAPPINGOUT;
 	FOREACH_THREAD_IN_PROC(p, td) {
 		thread_lock(td);
 		if (!thread_safetoswapout(td)) {
 			thread_unlock(td);
 			swapclear(p);
 			return (EBUSY);
 		}
 		td->td_flags &= ~TDF_INMEM;
 		TD_SET_SWAPPED(td);
 		thread_unlock(td);
 	}
 	td = FIRST_THREAD_IN_PROC(p);
 	++td->td_ru.ru_nswap;
 	PROC_UNLOCK(p);
 
 	/*
 	 * This list is stable because all threads are now prevented from
 	 * running.  The list is only modified in the context of a running
 	 * thread in this process.
 	 */
 	FOREACH_THREAD_IN_PROC(p, td)
 		vm_thread_swapout(td);
 
 	PROC_LOCK(p);
 	p->p_flag &= ~P_SWAPPINGOUT;
 	p->p_swtick = ticks;
 	return (0);
 }