No OneTemporary
Actions

Size

79 KB

Referenced Files

None

Subscribers

None

View Options

	diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c
	index 3e4f19d655e6..7386a0729835 100644
	--- a/sys/kern/init_main.c
	+++ b/sys/kern/init_main.c
	@@ -1,919 +1,920 @@
	/*-
	* SPDX-License-Identifier: BSD-4-Clause
	*
	* Copyright (c) 1995 Terrence R. Lambert
	* All rights reserved.
	*
	* Copyright (c) 1982, 1986, 1989, 1991, 1992, 1993
	* The Regents of the University of California. All rights reserved.
	* (c) UNIX System Laboratories, Inc.
	* All or some portions of this file are derived from material licensed
	* to the University of California by American Telephone and Telegraph
	* Co. or Unix System Laboratories, Inc. and are reproduced herein with
	* the permission of UNIX System Laboratories, Inc.
	*
	* Redistribution and use in source and binary forms, with or without
	* modification, are permitted provided that the following conditions
	* are met:
	* 1. Redistributions of source code must retain the above copyright
	* notice, this list of conditions and the following disclaimer.
	* 2. Redistributions in binary form must reproduce the above copyright
	* notice, this list of conditions and the following disclaimer in the
	* documentation and/or other materials provided with the distribution.
	* 3. All advertising materials mentioning features or use of this software
	* must display the following acknowledgement:
	* This product includes software developed by the University of
	* California, Berkeley and its contributors.
	* 4. Neither the name of the University nor the names of its contributors
	* may be used to endorse or promote products derived from this software
	* without specific prior written permission.
	*
	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	* SUCH DAMAGE.
	*/

	#include "opt_ddb.h"
	#include "opt_kdb.h"
	#include "opt_init_path.h"
	#include "opt_verbose_sysinit.h"

	#include <sys/param.h>
	#include <sys/systm.h>
	#include <sys/boottrace.h>
	#include <sys/conf.h>
	#include <sys/cpuset.h>
	#include <sys/dtrace_bsd.h>
	#include <sys/epoch.h>
	#include <sys/eventhandler.h>
	#include <sys/exec.h>
	#include <sys/file.h>
	#include <sys/filedesc.h>
	#include <sys/imgact.h>
	#include <sys/jail.h>
	#include <sys/kernel.h>
	#include <sys/ktr.h>
	#include <sys/lock.h>
	#include <sys/loginclass.h>
	#include <sys/malloc.h>
	#include <sys/mount.h>
	#include <sys/mutex.h>
	#include <sys/proc.h>
	#include <sys/racct.h>
	#include <sys/reboot.h>
	#include <sys/resourcevar.h>
	#include <sys/queue.h>
	#include <sys/queue_mergesort.h>
	#include <sys/sched.h>
	#include <sys/signalvar.h>
	#include <sys/sx.h>
	#include <sys/syscallsubr.h>
	#include <sys/sysctl.h>
	#include <sys/sysent.h>
	#include <sys/sysproto.h>
	#include <sys/unistd.h>
	#include <sys/vmmeter.h>
	#include <sys/vnode.h>

	#include <machine/cpu.h>

	#include <security/audit/audit.h>
	#include <security/mac/mac_framework.h>

	#include <vm/vm.h>
	#include <vm/vm_param.h>
	#include <vm/vm_extern.h>
	#include <vm/pmap.h>
	#include <vm/vm_map.h>
	#include <sys/copyright.h>

	#include <ddb/ddb.h>
	#include <ddb/db_sym.h>

	void mi_startup(void); /* Should be elsewhere */

	/* Components of the first process -- never freed. */
	static struct session session0;
	static struct pgrp pgrp0;
	struct proc proc0;
	struct thread0_storage thread0_st __aligned(32);
	struct vmspace vmspace0;
	struct proc *initproc;

	int
	linux_alloc_current_noop(struct thread *td __unused, int flags __unused)
	{
	return (0);
	}
	int (lkpi_alloc_current)(struct thread , int) = linux_alloc_current_noop;

	#ifndef BOOTHOWTO
	#define BOOTHOWTO 0
	#endif
	int boothowto = BOOTHOWTO; /* initialized so that it can be patched */
	SYSCTL_INT(_debug, OID_AUTO, boothowto, CTLFLAG_RD, &boothowto, 0,
	"Boot control flags, passed from loader");

	#ifndef BOOTVERBOSE
	#define BOOTVERBOSE 0
	#endif
	int bootverbose = BOOTVERBOSE;
	SYSCTL_INT(_debug, OID_AUTO, bootverbose, CTLFLAG_RW, &bootverbose, 0,
	"Control the output of verbose kernel messages");

	#ifdef VERBOSE_SYSINIT
	/*
	* We'll use the defined value of VERBOSE_SYSINIT from the kernel config to
	* dictate the default VERBOSE_SYSINIT behavior. Significant values for this
	* option and associated tunable are:
	* - 0, 'compiled in but silent by default'
	* - 1, 'compiled in but verbose by default' (default)
	*/
	int verbose_sysinit = VERBOSE_SYSINIT;
	TUNABLE_INT("debug.verbose_sysinit", &verbose_sysinit);
	#endif

	#ifdef INVARIANTS
	FEATURE(invariants, "Kernel compiled with INVARIANTS, may affect performance");
	#endif

	/*
	* This ensures that there is at least one entry so that the sysinit_set
	* symbol is not undefined. A sybsystem ID of SI_SUB_DUMMY is never
	* executed.
	*/
	SYSINIT(placeholder, SI_SUB_DUMMY, SI_ORDER_ANY, NULL, NULL);

	/*
	* The sysinit linker set compiled into the kernel. These are placed onto the
	* sysinit list by mi_startup; sysinit_add can add (e.g., from klds) additional
	* sysinits to the linked list but the linker set here does not change.
	*/
	SET_DECLARE(sysinit_set, struct sysinit);

	/*
	* The sysinit lists. Items are moved to sysinit_done_list when done.
	*/
	static STAILQ_HEAD(sysinitlist, sysinit) sysinit_list;
	static struct sysinitlist sysinit_done_list =
	STAILQ_HEAD_INITIALIZER(sysinit_done_list);

	/*
	* Compare two sysinits; return -1, 0, or 1 if a comes before, at the same time
	* as, or after b.
	*/
	static int
	sysinit_compar(struct sysinit a, struct sysinit b, void *thunk __unused)
	{

	if (a->subsystem < b->subsystem)
	return (-1);
	if (a->subsystem > b->subsystem)
	return (1);
	if (a->order < b->order)
	return (-1);
	if (a->order > b->order)
	return (1);
	return (0);
	}

	static void
	sysinit_mklist(struct sysinitlist list, struct sysinit *set,
	struct sysinit **set_end)
	{
	struct sysinit **sipp;

	TSENTER();
	TSENTER2("listify");
	STAILQ_INIT(list);
	for (sipp = set; sipp < set_end; sipp++)
	STAILQ_INSERT_TAIL(list, *sipp, next);
	TSEXIT2("listify");
	TSENTER2("mergesort");
	STAILQ_MERGESORT(list, NULL, sysinit_compar, sysinit, next);
	TSEXIT2("mergesort");
	TSEXIT();
	}

	/*
	* Merge a new sysinit set into the sysinit list.
	*/
	void
	sysinit_add(struct sysinit set, struct sysinit set_end)
	{
	struct sysinitlist new_list;

	TSENTER();

	/* Construct a sorted list from the new sysinits. */
	sysinit_mklist(&new_list, set, set_end);

	/* Merge the new list into the existing one. */
	TSENTER2("STAILQ_MERGE");
	STAILQ_MERGE(&sysinit_list, &new_list, NULL, sysinit_compar, sysinit, next);
	TSEXIT2("STAILQ_MERGE");

	TSEXIT();
	}

	#if defined (DDB) && defined(VERBOSE_SYSINIT)
	static const char *
	symbol_name(vm_offset_t va, db_strategy_t strategy)
	{
	const char *name;
	c_db_sym_t sym;
	db_expr_t offset;

	if (va == 0)
	return (NULL);
	sym = db_search_symbol(va, strategy, &offset);
	if (offset != 0)
	return (NULL);
	db_symbol_values(sym, &name, NULL);
	return (name);
	}
	#endif

	/*
	* System startup; initialize the world, create process 0, mount root
	* filesystem, and fork to create init and pagedaemon. Most of the
	* hard work is done in the lower-level initialization routines including
	* startup(), which does memory initialization and autoconfiguration.
	*
	* This allows simple addition of new kernel subsystems that require
	* boot time initialization. It also allows substitution of subsystem
	* (for instance, a scheduler, kernel profiler, or VM system) by object
	* module. Finally, it allows for optional "kernel threads".
	*/
	void
	mi_startup(void)
	{
	-
	struct sysinit *sip;
	int last;
	#if defined(VERBOSE_SYSINIT)
	int verbose;
	#endif

	TSENTER();

	if (boothowto & RB_VERBOSE)
	bootverbose++;

	/* Construct and sort sysinit list. */
	sysinit_mklist(&sysinit_list, SET_BEGIN(sysinit_set), SET_LIMIT(sysinit_set));

	last = SI_SUB_COPYRIGHT;
	#if defined(VERBOSE_SYSINIT)
	verbose = 0;
	#if !defined(DDB)
	printf("VERBOSE_SYSINIT: DDB not enabled, symbol lookups disabled.\n");
	#endif
	#endif

	/*
	* Perform each system initialization task from the ordered list. Note
	* that if sysinit_list is modified (e.g. by a KLD) we will nonetheless
	* always perform the earlist-sorted sysinit at each step; using the
	* STAILQ_FOREACH macro would result in items being skipped if inserted
	* earlier than the "current item".
	*/
	while ((sip = STAILQ_FIRST(&sysinit_list)) != NULL) {
	STAILQ_REMOVE_HEAD(&sysinit_list, next);
	STAILQ_INSERT_TAIL(&sysinit_done_list, sip, next);

	if (sip->subsystem == SI_SUB_DUMMY)
	continue; /* skip dummy task(s)*/

	if (sip->subsystem > last)
	BOOTTRACE_INIT("sysinit 0x%7x", sip->subsystem);

	#if defined(VERBOSE_SYSINIT)
	if (sip->subsystem > last && verbose_sysinit != 0) {
	verbose = 1;
	printf("subsystem %x\n", last);
	}
	if (verbose) {
	#if defined(DDB)
	const char func, data;

	func = symbol_name((vm_offset_t)sip->func,
	DB_STGY_PROC);
	data = symbol_name((vm_offset_t)sip->udata,
	DB_STGY_ANY);
	if (func != NULL && data != NULL)
	printf(" %s(&%s)... ", func, data);
	else if (func != NULL)
	printf(" %s(%p)... ", func, sip->udata);
	else
	#endif
	printf(" %p(%p)... ", sip->func,
	sip->udata);
	}
	#endif

	/* Call function */
	(*(sip->func))(sip->udata);

	#if defined(VERBOSE_SYSINIT)
	if (verbose)
	printf("done.\n");
	#endif

	/* Check off the one we're just done */
	last = sip->subsystem;
	}

	TSEXIT(); /* Here so we don't overlap with start_init. */
	BOOTTRACE("mi_startup done");

	mtx_assert(&Giant, MA_OWNED \| MA_NOTRECURSED);
	mtx_unlock(&Giant);

	/*
	- * Now hand over this thread to swapper.
	+ * We can't free our thread structure since it is statically allocated.
	+ * Just sleep forever. This thread could be repurposed for something if
	+ * the need arises.
	*/
	- swapper();
	- /* NOTREACHED*/
	+ for (;;)
	+ tsleep(__builtin_frame_address(0), PNOLOCK, "parked", 0);
	}

	static void
	print_caddr_t(void *data)
	{
	printf("%s", (char *)data);
	}

	static void
	print_version(void *data __unused)
	{
	int len;

	/* Strip a trailing newline from version. */
	len = strlen(version);
	while (len > 0 && version[len - 1] == '\n')
	len--;
	printf("%.*s %s\n", len, version, machine);
	printf("%s\n", compiler_version);
	}

	SYSINIT(announce, SI_SUB_COPYRIGHT, SI_ORDER_FIRST, print_caddr_t,
	copyright);
	SYSINIT(trademark, SI_SUB_COPYRIGHT, SI_ORDER_SECOND, print_caddr_t,
	trademark);
	SYSINIT(version, SI_SUB_COPYRIGHT, SI_ORDER_THIRD, print_version, NULL);

	#ifdef WITNESS
	static char wit_warn[] =
	"WARNING: WITNESS option enabled, expect reduced performance.\n";
	SYSINIT(witwarn, SI_SUB_COPYRIGHT, SI_ORDER_FOURTH,
	print_caddr_t, wit_warn);
	SYSINIT(witwarn2, SI_SUB_LAST, SI_ORDER_FOURTH,
	print_caddr_t, wit_warn);
	#endif

	#ifdef DIAGNOSTIC
	static char diag_warn[] =
	"WARNING: DIAGNOSTIC option enabled, expect reduced performance.\n";
	SYSINIT(diagwarn, SI_SUB_COPYRIGHT, SI_ORDER_FIFTH,
	print_caddr_t, diag_warn);
	SYSINIT(diagwarn2, SI_SUB_LAST, SI_ORDER_FIFTH,
	print_caddr_t, diag_warn);
	#endif

	#if __SIZEOF_LONG__ == 4
	static char ilp32_warn[] =
	"WARNING: 32-bit kernels are deprecated and may be removed in FreeBSD 15.0.\n";
	SYSINIT(ilp32warn, SI_SUB_COPYRIGHT, SI_ORDER_FIFTH,
	print_caddr_t, ilp32_warn);
	SYSINIT(ilp32warn2, SI_SUB_LAST, SI_ORDER_FIFTH,
	print_caddr_t, ilp32_warn);
	#endif

	static int
	null_fetch_syscall_args(struct thread *td __unused)
	{

	panic("null_fetch_syscall_args");
	}

	static void
	null_set_syscall_retval(struct thread *td __unused, int error __unused)
	{

	panic("null_set_syscall_retval");
	}

	static void
	null_set_fork_retval(struct thread *td __unused)
	{

	}

	struct sysentvec null_sysvec = {
	.sv_size = 0,
	.sv_table = NULL,
	.sv_fixup = NULL,
	.sv_sendsig = NULL,
	.sv_sigcode = NULL,
	.sv_szsigcode = NULL,
	.sv_name = "null",
	.sv_coredump = NULL,
	.sv_minsigstksz = 0,
	.sv_minuser = VM_MIN_ADDRESS,
	.sv_maxuser = VM_MAXUSER_ADDRESS,
	.sv_usrstack = USRSTACK,
	.sv_psstrings = PS_STRINGS,
	.sv_psstringssz = sizeof(struct ps_strings),
	.sv_stackprot = VM_PROT_ALL,
	.sv_copyout_strings = NULL,
	.sv_setregs = NULL,
	.sv_fixlimit = NULL,
	.sv_maxssiz = NULL,
	.sv_flags = 0,
	.sv_set_syscall_retval = null_set_syscall_retval,
	.sv_fetch_syscall_args = null_fetch_syscall_args,
	.sv_syscallnames = NULL,
	.sv_schedtail = NULL,
	.sv_thread_detach = NULL,
	.sv_trap = NULL,
	.sv_set_fork_retval = null_set_fork_retval,
	.sv_regset_begin = NULL,
	.sv_regset_end = NULL,
	};

	/*
	* The two following SYSINIT's are proc0 specific glue code. I am not
	* convinced that they can not be safely combined, but their order of
	* operation has been maintained as the same as the original init_main.c
	* for right now.
	*/
	/* ARGSUSED*/
	static void
	proc0_init(void *dummy __unused)
	{
	struct proc *p;
	struct thread *td;
	struct ucred *newcred;
	struct uidinfo tmpuinfo;
	struct loginclass tmplc = {
	.lc_name = "",
	};
	vm_paddr_t pageablemem;
	int i;

	GIANT_REQUIRED;
	p = &proc0;
	td = &thread0;

	/*
	* Initialize magic number and osrel.
	*/
	p->p_magic = P_MAGIC;
	p->p_osrel = osreldate;

	/*
	* Initialize thread and process structures.
	*/
	procinit(); /* set up proc zone */
	threadinit(); /* set up UMA zones */

	/*
	* Initialise scheduler resources.
	* Add scheduler specific parts to proc, thread as needed.
	*/
	schedinit(); /* scheduler gets its house in order */

	/*
	* Create process 0 (the swapper).
	*/
	LIST_INSERT_HEAD(&allproc, p, p_list);
	LIST_INSERT_HEAD(PIDHASH(0), p, p_hash);
	mtx_init(&pgrp0.pg_mtx, "process group", NULL, MTX_DEF \| MTX_DUPOK);
	sx_init(&pgrp0.pg_killsx, "killpg racer");
	p->p_pgrp = &pgrp0;
	LIST_INSERT_HEAD(PGRPHASH(0), &pgrp0, pg_hash);
	LIST_INIT(&pgrp0.pg_members);
	LIST_INSERT_HEAD(&pgrp0.pg_members, p, p_pglist);

	pgrp0.pg_session = &session0;
	mtx_init(&session0.s_mtx, "session", NULL, MTX_DEF);
	refcount_init(&session0.s_count, 1);
	session0.s_leader = p;

	p->p_sysent = &null_sysvec;
	p->p_flag = P_SYSTEM \| P_INMEM \| P_KPROC;
	p->p_flag2 = 0;
	p->p_state = PRS_NORMAL;
	p->p_klist = knlist_alloc(&p->p_mtx);
	STAILQ_INIT(&p->p_ktr);
	p->p_nice = NZERO;
	td->td_tid = THREAD0_TID;
	tidhash_add(td);
	TD_SET_STATE(td, TDS_RUNNING);
	td->td_pri_class = PRI_TIMESHARE;
	td->td_user_pri = PUSER;
	td->td_base_user_pri = PUSER;
	td->td_lend_user_pri = PRI_MAX;
	td->td_priority = PVM;
	td->td_base_pri = PVM;
	td->td_oncpu = curcpu;
	td->td_flags = TDF_INMEM;
	td->td_pflags = TDP_KTHREAD;
	td->td_cpuset = cpuset_thread0();
	td->td_domain.dr_policy = td->td_cpuset->cs_domain;
	prison0_init();
	p->p_peers = 0;
	p->p_leader = p;
	p->p_reaper = p;
	p->p_treeflag \|= P_TREE_REAPER;
	LIST_INIT(&p->p_reaplist);

	strncpy(p->p_comm, "kernel", sizeof (p->p_comm));
	strncpy(td->td_name, "swapper", sizeof (td->td_name));

	callout_init_mtx(&p->p_itcallout, &p->p_mtx, 0);
	callout_init_mtx(&p->p_limco, &p->p_mtx, 0);
	callout_init(&td->td_slpcallout, 1);
	TAILQ_INIT(&p->p_kqtim_stop);

	/* Create credentials. */
	newcred = crget();
	newcred->cr_ngroups = 1; /* group 0 */
	/* A hack to prevent uifind from tripping over NULL pointers. */
	curthread->td_ucred = newcred;
	tmpuinfo.ui_uid = 1;
	newcred->cr_uidinfo = newcred->cr_ruidinfo = &tmpuinfo;
	newcred->cr_uidinfo = uifind(0);
	newcred->cr_ruidinfo = uifind(0);
	newcred->cr_loginclass = &tmplc;
	newcred->cr_loginclass = loginclass_find("default");
	/* End hack. creds get properly set later with thread_cow_get_proc */
	curthread->td_ucred = NULL;
	newcred->cr_prison = &prison0;
	newcred->cr_users++; /* avoid assertion failure */
	p->p_ucred = crcowget(newcred);
	newcred->cr_users--;
	crfree(newcred);
	#ifdef AUDIT
	audit_cred_kproc0(newcred);
	#endif
	#ifdef MAC
	mac_cred_create_swapper(newcred);
	#endif
	/* Create sigacts. */
	p->p_sigacts = sigacts_alloc();

	/* Initialize signal state for process 0. */
	siginit(&proc0);

	/* Create the file descriptor table. */
	p->p_pd = pdinit(NULL, false);
	p->p_fd = fdinit();
	p->p_fdtol = NULL;

	/* Create the limits structures. */
	p->p_limit = lim_alloc();
	for (i = 0; i < RLIM_NLIMITS; i++)
	p->p_limit->pl_rlimit[i].rlim_cur =
	p->p_limit->pl_rlimit[i].rlim_max = RLIM_INFINITY;
	p->p_limit->pl_rlimit[RLIMIT_NOFILE].rlim_cur =
	p->p_limit->pl_rlimit[RLIMIT_NOFILE].rlim_max = maxfiles;
	p->p_limit->pl_rlimit[RLIMIT_NPROC].rlim_cur =
	p->p_limit->pl_rlimit[RLIMIT_NPROC].rlim_max = maxproc;
	p->p_limit->pl_rlimit[RLIMIT_DATA].rlim_cur = dfldsiz;
	p->p_limit->pl_rlimit[RLIMIT_DATA].rlim_max = maxdsiz;
	p->p_limit->pl_rlimit[RLIMIT_STACK].rlim_cur = dflssiz;
	p->p_limit->pl_rlimit[RLIMIT_STACK].rlim_max = maxssiz;
	/* Cast to avoid overflow on i386/PAE. */
	pageablemem = ptoa((vm_paddr_t)vm_free_count());
	p->p_limit->pl_rlimit[RLIMIT_RSS].rlim_cur =
	p->p_limit->pl_rlimit[RLIMIT_RSS].rlim_max = pageablemem;
	p->p_limit->pl_rlimit[RLIMIT_MEMLOCK].rlim_cur = pageablemem / 3;
	p->p_limit->pl_rlimit[RLIMIT_MEMLOCK].rlim_max = pageablemem;
	p->p_cpulimit = RLIM_INFINITY;

	PROC_LOCK(p);
	thread_cow_get_proc(td, p);
	PROC_UNLOCK(p);

	/* Initialize resource accounting structures. */
	racct_create(&p->p_racct);

	p->p_stats = pstats_alloc();

	/* Allocate a prototype map so we have something to fork. */
	p->p_vmspace = &vmspace0;
	refcount_init(&vmspace0.vm_refcnt, 1);
	pmap_pinit0(vmspace_pmap(&vmspace0));

	/*
	* proc0 is not expected to enter usermode, so there is no special
	* handling for sv_minuser here, like is done for exec_new_vmspace().
	*/
	vm_map_init(&vmspace0.vm_map, vmspace_pmap(&vmspace0),
	p->p_sysent->sv_minuser, p->p_sysent->sv_maxuser);

	/*
	* Call the init and ctor for the new thread and proc. We wait
	* to do this until all other structures are fairly sane.
	*/
	EVENTHANDLER_DIRECT_INVOKE(process_init, p);
	EVENTHANDLER_DIRECT_INVOKE(thread_init, td);
	#ifdef KDTRACE_HOOKS
	kdtrace_proc_ctor(p);
	kdtrace_thread_ctor(td);
	#endif
	EVENTHANDLER_DIRECT_INVOKE(process_ctor, p);
	EVENTHANDLER_DIRECT_INVOKE(thread_ctor, td);

	/*
	* Charge root for one process.
	*/
	(void)chgproccnt(p->p_ucred->cr_ruidinfo, 1, 0);
	PROC_LOCK(p);
	racct_add_force(p, RACCT_NPROC, 1);
	PROC_UNLOCK(p);
	}
	SYSINIT(p0init, SI_SUB_INTRINSIC, SI_ORDER_FIRST, proc0_init, NULL);

	/* ARGSUSED*/
	static void
	proc0_post(void *dummy __unused)
	{
	struct proc *p;
	struct rusage ru;
	struct thread *td;

	/*
	* Now we can look at the time, having had a chance to verify the
	* time from the filesystem. Pretend that proc0 started now.
	*/
	sx_slock(&allproc_lock);
	FOREACH_PROC_IN_SYSTEM(p) {
	PROC_LOCK(p);
	if (p->p_state == PRS_NEW) {
	PROC_UNLOCK(p);
	continue;
	}
	microuptime(&p->p_stats->p_start);
	PROC_STATLOCK(p);
	rufetch(p, &ru); /* Clears thread stats */
	p->p_rux.rux_runtime = 0;
	p->p_rux.rux_uticks = 0;
	p->p_rux.rux_sticks = 0;
	p->p_rux.rux_iticks = 0;
	PROC_STATUNLOCK(p);
	FOREACH_THREAD_IN_PROC(p, td) {
	td->td_runtime = 0;
	}
	PROC_UNLOCK(p);
	}
	sx_sunlock(&allproc_lock);
	PCPU_SET(switchtime, cpu_ticks());
	PCPU_SET(switchticks, ticks);
	}
	SYSINIT(p0post, SI_SUB_INTRINSIC_POST, SI_ORDER_FIRST, proc0_post, NULL);

	/*
	***************************************************************************
	****
	**** The following SYSINIT's and glue code should be moved to the
	**** respective files on a per subsystem basis.
	****
	***************************************************************************
	*/

	/*
	* List of paths to try when searching for "init".
	*/
	static char init_path[MAXPATHLEN] =
	#ifdef INIT_PATH
	__XSTRING(INIT_PATH);
	#else
	"/sbin/init:/sbin/oinit:/sbin/init.bak:/rescue/init";
	#endif
	SYSCTL_STRING(_kern, OID_AUTO, init_path, CTLFLAG_RD, init_path, 0,
	"Path used to search the init process");

	/*
	* Shutdown timeout of init(8).
	* Unused within kernel, but used to control init(8), hence do not remove.
	*/
	#ifndef INIT_SHUTDOWN_TIMEOUT
	#define INIT_SHUTDOWN_TIMEOUT 120
	#endif
	static int init_shutdown_timeout = INIT_SHUTDOWN_TIMEOUT;
	SYSCTL_INT(_kern, OID_AUTO, init_shutdown_timeout,
	CTLFLAG_RW, &init_shutdown_timeout, 0, "Shutdown timeout of init(8). "
	"Unused within kernel, but used to control init(8)");

	/*
	* Start the initial user process; try exec'ing each pathname in init_path.
	* The program is invoked with one argument containing the boot flags.
	*/
	static void
	start_init(void *dummy)
	{
	struct image_args args;
	int error;
	char var, path;
	char free_init_path, tmp_init_path;
	struct thread *td;
	struct proc *p;
	struct vmspace *oldvmspace;

	TSENTER(); /* Here so we don't overlap with mi_startup. */

	td = curthread;
	p = td->td_proc;

	vfs_mountroot();

	/* Wipe GELI passphrase from the environment. */
	kern_unsetenv("kern.geom.eli.passphrase");

	/* For Multicons, report which console is primary to both */
	if (boothowto & RB_MULTIPLE) {
	if (boothowto & RB_SERIAL)
	printf("Dual Console: Serial Primary, Video Secondary\n");
	else
	printf("Dual Console: Video Primary, Serial Secondary\n");
	}

	if ((var = kern_getenv("init_path")) != NULL) {
	strlcpy(init_path, var, sizeof(init_path));
	freeenv(var);
	}
	free_init_path = tmp_init_path = strdup(init_path, M_TEMP);

	while ((path = strsep(&tmp_init_path, ":")) != NULL) {
	if (bootverbose)
	printf("start_init: trying %s\n", path);

	memset(&args, 0, sizeof(args));
	error = exec_alloc_args(&args);
	if (error != 0)
	panic("%s: Can't allocate space for init arguments %d",
	__func__, error);

	error = exec_args_add_fname(&args, path, UIO_SYSSPACE);
	if (error != 0)
	panic("%s: Can't add fname %d", __func__, error);
	error = exec_args_add_arg(&args, path, UIO_SYSSPACE);
	if (error != 0)
	panic("%s: Can't add argv[0] %d", __func__, error);
	if (boothowto & RB_SINGLE)
	error = exec_args_add_arg(&args, "-s", UIO_SYSSPACE);
	if (error != 0)
	panic("%s: Can't add argv[0] %d", __func__, error);

	/*
	* Now try to exec the program. If can't for any reason
	* other than it doesn't exist, complain.
	*
	* Otherwise, return via fork_trampoline() all the way
	* to user mode as init!
	*/
	KASSERT((td->td_pflags & TDP_EXECVMSPC) == 0,
	("nested execve"));
	memset(td->td_frame, 0, sizeof(*td->td_frame));
	oldvmspace = p->p_vmspace;
	error = kern_execve(td, &args, NULL, oldvmspace);
	KASSERT(error != 0,
	("kern_execve returned success, not EJUSTRETURN"));
	if (error == EJUSTRETURN) {
	exec_cleanup(td, oldvmspace);
	free(free_init_path, M_TEMP);
	TSEXIT();
	return;
	}
	if (error != ENOENT)
	printf("exec %s: error %d\n", path, error);
	}
	free(free_init_path, M_TEMP);
	printf("init: not found in path %s\n", init_path);
	panic("no init");
	}

	/*
	* Like kproc_create(), but runs in its own address space. We do this
	* early to reserve pid 1. Note special case - do not make it
	* runnable yet, init execution is started when userspace can be served.
	*/
	static void
	create_init(const void *udata __unused)
	{
	struct fork_req fr;
	struct ucred newcred, oldcred;
	struct thread *td;
	int error;

	bzero(&fr, sizeof(fr));
	fr.fr_flags = RFFDG \| RFPROC \| RFSTOPPED;
	fr.fr_procp = &initproc;
	error = fork1(&thread0, &fr);
	if (error)
	panic("cannot fork init: %d\n", error);
	KASSERT(initproc->p_pid == 1, ("create_init: initproc->p_pid != 1"));
	/* divorce init's credentials from the kernel's */
	newcred = crget();
	sx_xlock(&proctree_lock);
	PROC_LOCK(initproc);
	initproc->p_flag \|= P_SYSTEM \| P_INMEM;
	initproc->p_treeflag \|= P_TREE_REAPER;
	oldcred = initproc->p_ucred;
	crcopy(newcred, oldcred);
	#ifdef MAC
	mac_cred_create_init(newcred);
	#endif
	#ifdef AUDIT
	audit_cred_proc1(newcred);
	#endif
	proc_set_cred(initproc, newcred);
	td = FIRST_THREAD_IN_PROC(initproc);
	crcowfree(td);
	td->td_realucred = crcowget(initproc->p_ucred);
	td->td_ucred = td->td_realucred;
	PROC_UNLOCK(initproc);
	sx_xunlock(&proctree_lock);
	crfree(oldcred);
	cpu_fork_kthread_handler(FIRST_THREAD_IN_PROC(initproc),
	start_init, NULL);
	}
	SYSINIT(init, SI_SUB_CREATE_INIT, SI_ORDER_FIRST, create_init, NULL);

	/*
	* Make it runnable now.
	*/
	static void
	kick_init(const void *udata __unused)
	{
	struct thread *td;

	td = FIRST_THREAD_IN_PROC(initproc);
	thread_lock(td);
	TD_SET_CAN_RUN(td);
	sched_add(td, SRQ_BORING);
	}
	SYSINIT(kickinit, SI_SUB_KTHREAD_INIT, SI_ORDER_MIDDLE, kick_init, NULL);

	/*
	* DDB(4).
	*/
	#ifdef DDB
	static void
	db_show_print_syinit(struct sysinit *sip, bool ddb)
	{
	const char sname, funcname;
	c_db_sym_t sym;
	db_expr_t offset;

	#define xprint(...) \
	if (ddb) \
	db_printf(__VA_ARGS__); \
	else \
	printf(__VA_ARGS__)

	if (sip == NULL) {
	xprint("%s: no sysinit * given\n", __func__);
	return;
	}

	sym = db_search_symbol((vm_offset_t)sip, DB_STGY_ANY, &offset);
	db_symbol_values(sym, &sname, NULL);
	sym = db_search_symbol((vm_offset_t)sip->func, DB_STGY_PROC, &offset);
	db_symbol_values(sym, &funcname, NULL);
	xprint("%s(%p)\n", (sname != NULL) ? sname : "", sip);
	xprint(" %#08x %#08x\n", sip->subsystem, sip->order);
	xprint(" %p(%s)(%p)\n",
	sip->func, (funcname != NULL) ? funcname : "", sip->udata);
	#undef xprint
	}

	DB_SHOW_COMMAND_FLAGS(sysinit, db_show_sysinit, DB_CMD_MEMSAFE)
	{
	struct sysinit *sip;

	db_printf("SYSINIT vs Name(Ptr)\n");
	db_printf(" Subsystem Order\n");
	db_printf(" Function(Name)(Arg)\n");
	STAILQ_FOREACH(sip, &sysinit_done_list, next) {
	db_show_print_syinit(sip, true);
	if (db_pager_quit)
	return;
	}
	STAILQ_FOREACH(sip, &sysinit_list, next) {
	db_show_print_syinit(sip, true);
	if (db_pager_quit)
	break;
	}
	}
	#endif /* DDB */
	diff --git a/sys/vm/vm.h b/sys/vm/vm.h
	index b7d149a2fca2..d28c84dd1c95 100644
	--- a/sys/vm/vm.h
	+++ b/sys/vm/vm.h
	@@ -1,184 +1,183 @@
	/*-
	* SPDX-License-Identifier: (BSD-3-Clause AND MIT-CMU)
	*
	* Copyright (c) 1991, 1993
	* The Regents of the University of California. All rights reserved.
	*
	* Redistribution and use in source and binary forms, with or without
	* modification, are permitted provided that the following conditions
	* are met:
	* 1. Redistributions of source code must retain the above copyright
	* notice, this list of conditions and the following disclaimer.
	* 2. Redistributions in binary form must reproduce the above copyright
	* notice, this list of conditions and the following disclaimer in the
	* documentation and/or other materials provided with the distribution.
	* 3. Neither the name of the University nor the names of its contributors
	* may be used to endorse or promote products derived from this software
	* without specific prior written permission.
	*
	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	* SUCH DAMAGE.
	*
	* Copyright (c) 1987, 1990 Carnegie-Mellon University.
	* All rights reserved.
	*
	* Authors: Avadis Tevanian, Jr., Michael Wayne Young
	*
	* Permission to use, copy, modify and distribute this software and
	* its documentation is hereby granted, provided that both the copyright
	* notice and this permission notice appear in all copies of the
	* software, derivative works or modified versions, and any portions
	* thereof, and that both notices appear in supporting documentation.
	*
	* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
	* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
	* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
	*
	* Carnegie Mellon requests users of this software to return to
	*
	* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
	* School of Computer Science
	* Carnegie Mellon University
	* Pittsburgh PA 15213-3890
	*
	* any improvements or extensions that they make and grant Carnegie the
	* rights to redistribute these changes.
	*/

	#ifndef VM_H
	#define VM_H

	#include <machine/vm.h>

	typedef char vm_inherit_t; /* inheritance codes */

	#define VM_INHERIT_SHARE ((vm_inherit_t) 0)
	#define VM_INHERIT_COPY ((vm_inherit_t) 1)
	#define VM_INHERIT_NONE ((vm_inherit_t) 2)
	#define VM_INHERIT_ZERO ((vm_inherit_t) 3)
	#define VM_INHERIT_DEFAULT VM_INHERIT_COPY

	typedef u_char vm_prot_t; /* protection codes */

	#define VM_PROT_NONE ((vm_prot_t) 0x00)
	#define VM_PROT_READ ((vm_prot_t) 0x01)
	#define VM_PROT_WRITE ((vm_prot_t) 0x02)
	#define VM_PROT_EXECUTE ((vm_prot_t) 0x04)
	#define VM_PROT_COPY ((vm_prot_t) 0x08) /* copy-on-read */
	#define VM_PROT_PRIV_FLAG ((vm_prot_t) 0x10)
	#define VM_PROT_FAULT_LOOKUP VM_PROT_PRIV_FLAG
	#define VM_PROT_NO_PROMOTE VM_PROT_PRIV_FLAG
	#define VM_PROT_QUICK_NOFAULT VM_PROT_PRIV_FLAG /* same to save bits */

	#define VM_PROT_ALL (VM_PROT_READ\|VM_PROT_WRITE\|VM_PROT_EXECUTE)
	#define VM_PROT_RW (VM_PROT_READ\|VM_PROT_WRITE)
	#define VM_PROT_DEFAULT VM_PROT_ALL

	enum obj_type {
	OBJT_RESERVED = 0, /* was OBJT_DEFAULT */
	OBJT_SWAP,
	OBJT_DEFAULT = OBJT_SWAP,
	OBJT_VNODE,
	OBJT_DEVICE,
	OBJT_PHYS,
	OBJT_DEAD,
	OBJT_SG,
	OBJT_MGTDEVICE,
	OBJT_FIRST_DYN,
	};
	typedef u_char objtype_t;

	union vm_map_object;
	typedef union vm_map_object vm_map_object_t;

	struct vm_map_entry;
	typedef struct vm_map_entry *vm_map_entry_t;

	struct vm_map;
	typedef struct vm_map *vm_map_t;

	struct vm_object;
	typedef struct vm_object *vm_object_t;

	#ifndef _KERNEL
	/*
	* This is defined in <sys/types.h> for the kernel so that non-vm kernel
	* sources (mainly Mach-derived ones such as ddb) don't have to include
	* vm stuff. Defining it there for applications might break things.
	* Define it here for "applications" that include vm headers (e.g.,
	* genassym).
	*/
	#ifndef HAVE_BOOLEAN
	typedef int boolean_t;
	#endif

	/*
	* The exact set of memory attributes is machine dependent. However,
	* every machine is required to define VM_MEMATTR_DEFAULT and
	* VM_MEMATTR_UNCACHEABLE.
	*/
	typedef char vm_memattr_t; /* memory attribute codes */

	/*
	* This is defined in <sys/types.h> for the kernel so that vnode_if.h
	* doesn't have to include <vm/vm.h>.
	*/
	struct vm_page;
	typedef struct vm_page *vm_page_t;
	#endif /* _KERNEL */

	struct vm_reserv;
	typedef struct vm_reserv *vm_reserv_t;

	/*
	* Information passed from the machine-independent VM initialization code
	* for use by machine-dependant code (mainly for MMU support)
	*/
	struct kva_md_info {
	vm_offset_t buffer_sva;
	vm_offset_t buffer_eva;
	vm_offset_t clean_sva;
	vm_offset_t clean_eva;
	};

	/* bits from overcommit */
	#define SWAP_RESERVE_FORCE_ON (1 << 0)
	#define SWAP_RESERVE_RLIMIT_ON (1 << 1)
	#define SWAP_RESERVE_ALLOW_NONWIRED (1 << 2)

	#ifdef NUMA
	#define __numa_used
	#else
	#define __numa_used __unused
	#endif

	#ifdef _KERNEL
	struct ucred;

	void vm_ksubmap_init(struct kva_md_info *);
	bool swap_reserve(vm_ooffset_t incr);
	bool swap_reserve_by_cred(vm_ooffset_t incr, struct ucred *cred);
	void swap_reserve_force(vm_ooffset_t incr);
	void swap_release(vm_ooffset_t decr);
	void swap_release_by_cred(vm_ooffset_t decr, struct ucred *cred);
	-void swapper(void);

	extern struct kva_md_info kmi;
	#define VA_IS_CLEANMAP(va) \
	((va) >= kmi.clean_sva && (va) < kmi.clean_eva)

	extern int old_mlock;
	extern int vm_ndomains;
	extern int vm_overcommit;
	#endif /* _KERNEL */

	#endif /* VM_H */
	diff --git a/sys/vm/vm_glue.c b/sys/vm/vm_glue.c
	index 4f8121fa1064..63417687a1a5 100644
	--- a/sys/vm/vm_glue.c
	+++ b/sys/vm/vm_glue.c
	@@ -1,842 +1,843 @@
	/*-
	* SPDX-License-Identifier: (BSD-3-Clause AND MIT-CMU)
	*
	* Copyright (c) 1991, 1993
	* The Regents of the University of California. All rights reserved.
	*
	* This code is derived from software contributed to Berkeley by
	* The Mach Operating System project at Carnegie-Mellon University.
	*
	* Redistribution and use in source and binary forms, with or without
	* modification, are permitted provided that the following conditions
	* are met:
	* 1. Redistributions of source code must retain the above copyright
	* notice, this list of conditions and the following disclaimer.
	* 2. Redistributions in binary form must reproduce the above copyright
	* notice, this list of conditions and the following disclaimer in the
	* documentation and/or other materials provided with the distribution.
	* 3. Neither the name of the University nor the names of its contributors
	* may be used to endorse or promote products derived from this software
	* without specific prior written permission.
	*
	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	* SUCH DAMAGE.
	*
	*
	* Copyright (c) 1987, 1990 Carnegie-Mellon University.
	* All rights reserved.
	*
	* Permission to use, copy, modify and distribute this software and
	* its documentation is hereby granted, provided that both the copyright
	* notice and this permission notice appear in all copies of the
	* software, derivative works or modified versions, and any portions
	* thereof, and that both notices appear in supporting documentation.
	*
	* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
	* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
	* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
	*
	* Carnegie Mellon requests users of this software to return to
	*
	* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
	* School of Computer Science
	* Carnegie Mellon University
	* Pittsburgh PA 15213-3890
	*
	* any improvements or extensions that they make and grant Carnegie the
	* rights to redistribute these changes.
	*/

	#include <sys/cdefs.h>
	#include "opt_vm.h"
	#include "opt_kstack_pages.h"
	#include "opt_kstack_max_pages.h"
	#include "opt_kstack_usage_prof.h"

	#include <sys/param.h>
	#include <sys/systm.h>
	#include <sys/asan.h>
	#include <sys/domainset.h>
	#include <sys/limits.h>
	#include <sys/lock.h>
	#include <sys/malloc.h>
	#include <sys/msan.h>
	#include <sys/mutex.h>
	#include <sys/proc.h>
	#include <sys/racct.h>
	#include <sys/refcount.h>
	#include <sys/resourcevar.h>
	#include <sys/rwlock.h>
	#include <sys/sched.h>
	#include <sys/sf_buf.h>
	#include <sys/shm.h>
	#include <sys/smp.h>
	#include <sys/vmmeter.h>
	#include <sys/vmem.h>
	#include <sys/sx.h>
	#include <sys/sysctl.h>
	#include <sys/kernel.h>
	#include <sys/ktr.h>
	#include <sys/unistd.h>

	#include <vm/uma.h>
	#include <vm/vm.h>
	#include <vm/vm_param.h>
	#include <vm/pmap.h>
	#include <vm/vm_domainset.h>
	#include <vm/vm_map.h>
	#include <vm/vm_page.h>
	#include <vm/vm_pageout.h>
	#include <vm/vm_pagequeue.h>
	#include <vm/vm_object.h>
	#include <vm/vm_kern.h>
	#include <vm/vm_extern.h>
	#include <vm/vm_pager.h>
	#include <vm/swap_pager.h>
	#include <vm/vm_phys.h>

	#include <machine/cpu.h>

	#if VM_NRESERVLEVEL > 1
	#define KVA_KSTACK_QUANTUM_SHIFT (VM_LEVEL_1_ORDER + VM_LEVEL_0_ORDER + \
	PAGE_SHIFT)
	#elif VM_NRESERVLEVEL > 0
	#define KVA_KSTACK_QUANTUM_SHIFT (VM_LEVEL_0_ORDER + PAGE_SHIFT)
	#else
	#define KVA_KSTACK_QUANTUM_SHIFT (8 + PAGE_SHIFT)
	#endif
	#define KVA_KSTACK_QUANTUM (1ul << KVA_KSTACK_QUANTUM_SHIFT)

	/*
	* MPSAFE
	*
	* WARNING! This code calls vm_map_check_protection() which only checks
	* the associated vm_map_entry range. It does not determine whether the
	* contents of the memory is actually readable or writable. In most cases
	* just checking the vm_map_entry is sufficient within the kernel's address
	* space.
	*/
	bool
	kernacc(void *addr, int len, int rw)
	{
	boolean_t rv;
	vm_offset_t saddr, eaddr;
	vm_prot_t prot;

	KASSERT((rw & ~VM_PROT_ALL) == 0,
	("illegal ``rw'' argument to kernacc (%x)\n", rw));

	if ((vm_offset_t)addr + len > vm_map_max(kernel_map) \|\|
	(vm_offset_t)addr + len < (vm_offset_t)addr)
	return (false);

	prot = rw;
	saddr = trunc_page((vm_offset_t)addr);
	eaddr = round_page((vm_offset_t)addr + len);
	vm_map_lock_read(kernel_map);
	rv = vm_map_check_protection(kernel_map, saddr, eaddr, prot);
	vm_map_unlock_read(kernel_map);
	return (rv == TRUE);
	}

	/*
	* MPSAFE
	*
	* WARNING! This code calls vm_map_check_protection() which only checks
	* the associated vm_map_entry range. It does not determine whether the
	* contents of the memory is actually readable or writable. vmapbuf(),
	* vm_fault_quick(), or copyin()/copout()/su()/fu() functions should be
	* used in conjunction with this call.
	*/
	bool
	useracc(void *addr, int len, int rw)
	{
	boolean_t rv;
	vm_prot_t prot;
	vm_map_t map;

	KASSERT((rw & ~VM_PROT_ALL) == 0,
	("illegal ``rw'' argument to useracc (%x)\n", rw));
	prot = rw;
	map = &curproc->p_vmspace->vm_map;
	if ((vm_offset_t)addr + len > vm_map_max(map) \|\|
	(vm_offset_t)addr + len < (vm_offset_t)addr) {
	return (false);
	}
	vm_map_lock_read(map);
	rv = vm_map_check_protection(map, trunc_page((vm_offset_t)addr),
	round_page((vm_offset_t)addr + len), prot);
	vm_map_unlock_read(map);
	return (rv == TRUE);
	}

	int
	vslock(void *addr, size_t len)
	{
	vm_offset_t end, last, start;
	vm_size_t npages;
	int error;

	last = (vm_offset_t)addr + len;
	start = trunc_page((vm_offset_t)addr);
	end = round_page(last);
	if (last < (vm_offset_t)addr \|\| end < (vm_offset_t)addr)
	return (EINVAL);
	npages = atop(end - start);
	if (npages > vm_page_max_user_wired)
	return (ENOMEM);
	error = vm_map_wire(&curproc->p_vmspace->vm_map, start, end,
	VM_MAP_WIRE_SYSTEM \| VM_MAP_WIRE_NOHOLES);
	if (error == KERN_SUCCESS) {
	curthread->td_vslock_sz += len;
	return (0);
	}

	/*
	* Return EFAULT on error to match copy{in,out}() behaviour
	* rather than returning ENOMEM like mlock() would.
	*/
	return (EFAULT);
	}

	void
	vsunlock(void *addr, size_t len)
	{

	/* Rely on the parameter sanity checks performed by vslock(). */
	MPASS(curthread->td_vslock_sz >= len);
	curthread->td_vslock_sz -= len;
	(void)vm_map_unwire(&curproc->p_vmspace->vm_map,
	trunc_page((vm_offset_t)addr), round_page((vm_offset_t)addr + len),
	VM_MAP_WIRE_SYSTEM \| VM_MAP_WIRE_NOHOLES);
	}

	/*
	* Pin the page contained within the given object at the given offset. If the
	* page is not resident, allocate and load it using the given object's pager.
	* Return the pinned page if successful; otherwise, return NULL.
	*/
	static vm_page_t
	vm_imgact_hold_page(vm_object_t object, vm_ooffset_t offset)
	{
	vm_page_t m;
	vm_pindex_t pindex;

	pindex = OFF_TO_IDX(offset);
	(void)vm_page_grab_valid_unlocked(&m, object, pindex,
	VM_ALLOC_NORMAL \| VM_ALLOC_NOBUSY \| VM_ALLOC_WIRED);
	return (m);
	}

	/*
	* Return a CPU private mapping to the page at the given offset within the
	* given object. The page is pinned before it is mapped.
	*/
	struct sf_buf *
	vm_imgact_map_page(vm_object_t object, vm_ooffset_t offset)
	{
	vm_page_t m;

	m = vm_imgact_hold_page(object, offset);
	if (m == NULL)
	return (NULL);
	sched_pin();
	return (sf_buf_alloc(m, SFB_CPUPRIVATE));
	}

	/*
	* Destroy the given CPU private mapping and unpin the page that it mapped.
	*/
	void
	vm_imgact_unmap_page(struct sf_buf *sf)
	{
	vm_page_t m;

	m = sf_buf_page(sf);
	sf_buf_free(sf);
	sched_unpin();
	vm_page_unwire(m, PQ_ACTIVE);
	}

	void
	vm_sync_icache(vm_map_t map, vm_offset_t va, vm_offset_t sz)
	{

	pmap_sync_icache(map->pmap, va, sz);
	}

	static vm_object_t kstack_object;
	static vm_object_t kstack_alt_object;
	static uma_zone_t kstack_cache;
	static int kstack_cache_size;
	static vmem_t *vmd_kstack_arena[MAXMEMDOM];

	static int
	sysctl_kstack_cache_size(SYSCTL_HANDLER_ARGS)
	{
	int error, oldsize;

	oldsize = kstack_cache_size;
	error = sysctl_handle_int(oidp, arg1, arg2, req);
	if (error == 0 && req->newptr && oldsize != kstack_cache_size)
	uma_zone_set_maxcache(kstack_cache, kstack_cache_size);
	return (error);
	}
	SYSCTL_PROC(_vm, OID_AUTO, kstack_cache_size,
	CTLTYPE_INT\|CTLFLAG_MPSAFE\|CTLFLAG_RW, &kstack_cache_size, 0,
	sysctl_kstack_cache_size, "IU", "Maximum number of cached kernel stacks");

	/*
	* Allocate a virtual address range from a domain kstack arena, following
	* the specified NUMA policy.
	*/
	static vm_offset_t
	vm_thread_alloc_kstack_kva(vm_size_t size, int domain)
	{
	#ifndef __ILP32__
	int rv;
	vmem_t *arena;
	vm_offset_t addr = 0;

	size = round_page(size);
	/* Allocate from the kernel arena for non-standard kstack sizes. */
	if (size != ptoa(kstack_pages + KSTACK_GUARD_PAGES)) {
	arena = vm_dom[domain].vmd_kernel_arena;
	} else {
	arena = vmd_kstack_arena[domain];
	}
	rv = vmem_alloc(arena, size, M_BESTFIT \| M_NOWAIT, &addr);
	if (rv == ENOMEM)
	return (0);
	KASSERT(atop(addr - VM_MIN_KERNEL_ADDRESS) %
	(kstack_pages + KSTACK_GUARD_PAGES) == 0,
	("%s: allocated kstack KVA not aligned to multiple of kstack size",
	__func__));

	return (addr);
	#else
	return (kva_alloc(size));
	#endif
	}

	/*
	* Release a region of kernel virtual memory
	* allocated from the kstack arena.
	*/
	static __noinline void
	vm_thread_free_kstack_kva(vm_offset_t addr, vm_size_t size, int domain)
	{
	vmem_t *arena;

	size = round_page(size);
	#ifdef __ILP32__
	arena = kernel_arena;
	#else
	arena = vmd_kstack_arena[domain];
	if (size != ptoa(kstack_pages + KSTACK_GUARD_PAGES)) {
	arena = vm_dom[domain].vmd_kernel_arena;
	}
	#endif
	vmem_free(arena, addr, size);
	}

	static vmem_size_t
	vm_thread_kstack_import_quantum(void)
	{
	#ifndef __ILP32__
	/*
	* The kstack_quantum is larger than KVA_QUANTUM to account
	* for holes induced by guard pages.
	*/
	return (KVA_KSTACK_QUANTUM * (kstack_pages + KSTACK_GUARD_PAGES));
	#else
	return (KVA_KSTACK_QUANTUM);
	#endif
	}

	/*
	* Import KVA from a parent arena into the kstack arena. Imports must be
	* a multiple of kernel stack pages + guard pages in size.
	*
	* Kstack VA allocations need to be aligned so that the linear KVA pindex
	* is divisible by the total number of kstack VA pages. This is necessary to
	* make vm_kstack_pindex work properly.
	*
	* We import a multiple of KVA_KSTACK_QUANTUM-sized region from the parent
	* arena. The actual size used by the kstack arena is one kstack smaller to
	* allow for the necessary alignment adjustments to be made.
	*/
	static int
	vm_thread_kstack_arena_import(void *arena, vmem_size_t size, int flags,
	vmem_addr_t *addrp)
	{
	int error, rem;
	size_t kpages = kstack_pages + KSTACK_GUARD_PAGES;

	KASSERT(atop(size) % kpages == 0,
	("%s: Size %jd is not a multiple of kstack pages (%d)", __func__,
	(intmax_t)size, (int)kpages));

	error = vmem_xalloc(arena, vm_thread_kstack_import_quantum(),
	KVA_KSTACK_QUANTUM, 0, 0, VMEM_ADDR_MIN, VMEM_ADDR_MAX, flags,
	addrp);
	if (error) {
	return (error);
	}

	rem = atop(*addrp - VM_MIN_KERNEL_ADDRESS) % kpages;
	if (rem != 0) {
	/* Bump addr to next aligned address */
	addrp = addrp + (kpages - rem) * PAGE_SIZE;
	}

	return (0);
	}

	/*
	* Release KVA from a parent arena into the kstack arena. Released imports must
	* be a multiple of kernel stack pages + guard pages in size.
	*/
	static void
	vm_thread_kstack_arena_release(void *arena, vmem_addr_t addr, vmem_size_t size)
	{
	int rem;
	size_t kpages __diagused = kstack_pages + KSTACK_GUARD_PAGES;

	KASSERT(size % kpages == 0,
	("%s: Size %jd is not a multiple of kstack pages (%d)", __func__,
	(intmax_t)size, (int)kpages));

	KASSERT((addr - VM_MIN_KERNEL_ADDRESS) % kpages == 0,
	("%s: Address %p is not properly aligned (%p)", __func__,
	(void )addr, (void )VM_MIN_KERNEL_ADDRESS));
	/*
	* If the address is not KVA_KSTACK_QUANTUM-aligned we have to decrement
	* it to account for the shift in kva_import_kstack.
	*/
	rem = addr % KVA_KSTACK_QUANTUM;
	if (rem) {
	KASSERT(rem <= ptoa(kpages),
	("%s: rem > kpages (%d), (%d)", __func__, rem,
	(int)kpages));
	addr -= rem;
	}
	vmem_xfree(arena, addr, vm_thread_kstack_import_quantum());
	}

	/*
	* Create the kernel stack for a new thread.
	*/
	static vm_offset_t
	vm_thread_stack_create(struct domainset *ds, int pages)
	{
	vm_page_t ma[KSTACK_MAX_PAGES];
	struct vm_domainset_iter di;
	int req = VM_ALLOC_NORMAL;
	vm_object_t obj;
	vm_offset_t ks;
	int domain, i;

	obj = vm_thread_kstack_size_to_obj(pages);
	if (vm_ndomains > 1)
	obj->domain.dr_policy = ds;
	vm_domainset_iter_page_init(&di, obj, 0, &domain, &req);
	do {
	/*
	* Get a kernel virtual address for this thread's kstack.
	*/
	ks = vm_thread_alloc_kstack_kva(ptoa(pages + KSTACK_GUARD_PAGES),
	domain);
	if (ks == 0)
	continue;
	ks += ptoa(KSTACK_GUARD_PAGES);

	/*
	* Allocate physical pages to back the stack.
	*/
	if (vm_thread_stack_back(ks, ma, pages, req, domain) != 0) {
	vm_thread_free_kstack_kva(ks - ptoa(KSTACK_GUARD_PAGES),
	ptoa(pages + KSTACK_GUARD_PAGES), domain);
	continue;
	}
	if (KSTACK_GUARD_PAGES != 0) {
	pmap_qremove(ks - ptoa(KSTACK_GUARD_PAGES),
	KSTACK_GUARD_PAGES);
	}
	for (i = 0; i < pages; i++)
	vm_page_valid(ma[i]);
	pmap_qenter(ks, ma, pages);
	return (ks);
	} while (vm_domainset_iter_page(&di, obj, &domain) == 0);

	return (0);
	}

	static __noinline void
	vm_thread_stack_dispose(vm_offset_t ks, int pages)
	{
	vm_page_t m;
	vm_pindex_t pindex;
	int i, domain;
	vm_object_t obj = vm_thread_kstack_size_to_obj(pages);

	pindex = vm_kstack_pindex(ks, pages);
	domain = vm_phys_domain(vtophys(ks));
	pmap_qremove(ks, pages);
	VM_OBJECT_WLOCK(obj);
	for (i = 0; i < pages; i++) {
	m = vm_page_lookup(obj, pindex + i);
	if (m == NULL)
	panic("%s: kstack already missing?", __func__);
	KASSERT(vm_page_domain(m) == domain,
	("%s: page %p domain mismatch, expected %d got %d",
	__func__, m, domain, vm_page_domain(m)));
	vm_page_xbusy_claim(m);
	vm_page_unwire_noq(m);
	vm_page_free(m);
	}
	VM_OBJECT_WUNLOCK(obj);
	kasan_mark((void *)ks, ptoa(pages), ptoa(pages), 0);
	vm_thread_free_kstack_kva(ks - (KSTACK_GUARD_PAGES * PAGE_SIZE),
	ptoa(pages + KSTACK_GUARD_PAGES), domain);
	}

	/*
	* Allocate the kernel stack for a new thread.
	*/
	int
	vm_thread_new(struct thread *td, int pages)
	{
	vm_offset_t ks;
	u_short ks_domain;

	/* Bounds check */
	if (pages <= 1)
	pages = kstack_pages;
	else if (pages > KSTACK_MAX_PAGES)
	pages = KSTACK_MAX_PAGES;

	ks = 0;
	if (pages == kstack_pages && kstack_cache != NULL)
	ks = (vm_offset_t)uma_zalloc(kstack_cache, M_NOWAIT);

	/*
	* Ensure that kstack objects can draw pages from any memory
	* domain. Otherwise a local memory shortage can block a process
	* swap-in.
	*/
	if (ks == 0)
	ks = vm_thread_stack_create(DOMAINSET_PREF(PCPU_GET(domain)),
	pages);
	if (ks == 0)
	return (0);

	ks_domain = vm_phys_domain(vtophys(ks));
	KASSERT(ks_domain >= 0 && ks_domain < vm_ndomains,
	("%s: invalid domain for kstack %p", __func__, (void *)ks));
	td->td_kstack = ks;
	td->td_kstack_pages = pages;
	td->td_kstack_domain = ks_domain;
	return (1);
	}

	/*
	* Dispose of a thread's kernel stack.
	*/
	void
	vm_thread_dispose(struct thread *td)
	{
	vm_offset_t ks;
	int pages;

	pages = td->td_kstack_pages;
	ks = td->td_kstack;
	td->td_kstack = 0;
	td->td_kstack_pages = 0;
	td->td_kstack_domain = MAXMEMDOM;
	if (pages == kstack_pages) {
	kasan_mark((void *)ks, 0, ptoa(pages), KASAN_KSTACK_FREED);
	uma_zfree(kstack_cache, (void *)ks);
	} else {
	vm_thread_stack_dispose(ks, pages);
	}
	}

	/*
	* Calculate kstack pindex.
	*
	* Uses a non-identity mapping if guard pages are
	* active to avoid pindex holes in the kstack object.
	*/
	vm_pindex_t
	vm_kstack_pindex(vm_offset_t ks, int kpages)
	{
	vm_pindex_t pindex = atop(ks - VM_MIN_KERNEL_ADDRESS);

	#ifdef __ILP32__
	return (pindex);
	#else
	/*
	* Return the linear pindex if guard pages aren't active or if we are
	* allocating a non-standard kstack size.
	*/
	if (KSTACK_GUARD_PAGES == 0 \|\| kpages != kstack_pages) {
	return (pindex);
	}
	KASSERT(pindex % (kpages + KSTACK_GUARD_PAGES) >= KSTACK_GUARD_PAGES,
	("%s: Attempting to calculate kstack guard page pindex", __func__));

	return (pindex -
	(pindex / (kpages + KSTACK_GUARD_PAGES) + 1) * KSTACK_GUARD_PAGES);
	#endif
	}

	/*
	* Allocate physical pages, following the specified NUMA policy, to back a
	* kernel stack.
	*/
	int
	vm_thread_stack_back(vm_offset_t ks, vm_page_t ma[], int npages, int req_class,
	int domain)
	{
	vm_object_t obj = vm_thread_kstack_size_to_obj(npages);
	vm_pindex_t pindex;
	vm_page_t m;
	int n;

	pindex = vm_kstack_pindex(ks, npages);

	VM_OBJECT_WLOCK(obj);
	for (n = 0; n < npages;) {
	m = vm_page_grab(obj, pindex + n,
	VM_ALLOC_NOCREAT \| VM_ALLOC_WIRED);
	if (m == NULL) {
	m = vm_page_alloc_domain(obj, pindex + n, domain,
	req_class \| VM_ALLOC_WIRED);
	}
	if (m == NULL)
	break;
	ma[n++] = m;
	}
	if (n < npages)
	goto cleanup;
	VM_OBJECT_WUNLOCK(obj);

	return (0);
	cleanup:
	for (int i = 0; i < n; i++) {
	m = ma[i];
	(void)vm_page_unwire_noq(m);
	vm_page_free(m);
	}
	VM_OBJECT_WUNLOCK(obj);

	return (ENOMEM);
	}

	vm_object_t
	vm_thread_kstack_size_to_obj(int npages)
	{
	return (npages == kstack_pages ? kstack_object : kstack_alt_object);
	}

	static int
	kstack_import(void arg, void *store, int cnt, int domain, int flags)
	{
	struct domainset *ds;
	int i;

	if (domain == UMA_ANYDOMAIN)
	ds = DOMAINSET_RR();
	else
	ds = DOMAINSET_PREF(domain);

	for (i = 0; i < cnt; i++) {
	store[i] = (void *)vm_thread_stack_create(ds, kstack_pages);
	if (store[i] == NULL)
	break;
	}
	return (i);
	}

	static void
	kstack_release(void arg, void *store, int cnt)
	{
	vm_offset_t ks;
	int i;

	for (i = 0; i < cnt; i++) {
	ks = (vm_offset_t)store[i];
	vm_thread_stack_dispose(ks, kstack_pages);
	}
	}

	static void
	kstack_cache_init(void *null)
	{
	vm_size_t kstack_quantum;
	int domain;

	kstack_object = vm_object_allocate(OBJT_SWAP,
	atop(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS));
	kstack_cache = uma_zcache_create("kstack_cache",
	kstack_pages * PAGE_SIZE, NULL, NULL, NULL, NULL,
	kstack_import, kstack_release, NULL,
	UMA_ZONE_FIRSTTOUCH);
	kstack_cache_size = imax(128, mp_ncpus * 4);
	uma_zone_set_maxcache(kstack_cache, kstack_cache_size);

	kstack_alt_object = vm_object_allocate(OBJT_SWAP,
	atop(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS));

	kstack_quantum = vm_thread_kstack_import_quantum();
	/*
	* Reduce size used by the kstack arena to allow for
	* alignment adjustments in vm_thread_kstack_arena_import.
	*/
	kstack_quantum -= (kstack_pages + KSTACK_GUARD_PAGES) * PAGE_SIZE;
	/*
	* Create the kstack_arena for each domain and set kernel_arena as
	* parent.
	*/
	for (domain = 0; domain < vm_ndomains; domain++) {
	vmd_kstack_arena[domain] = vmem_create("kstack arena", 0, 0,
	PAGE_SIZE, 0, M_WAITOK);
	KASSERT(vmd_kstack_arena[domain] != NULL,
	("%s: failed to create domain %d kstack_arena", __func__,
	domain));
	vmem_set_import(vmd_kstack_arena[domain],
	vm_thread_kstack_arena_import,
	vm_thread_kstack_arena_release,
	vm_dom[domain].vmd_kernel_arena, kstack_quantum);
	}
	}
	SYSINIT(vm_kstacks, SI_SUB_KMEM, SI_ORDER_ANY, kstack_cache_init, NULL);

	#ifdef KSTACK_USAGE_PROF
	/*
	* Track maximum stack used by a thread in kernel.
	*/
	static int max_kstack_used;

	SYSCTL_INT(_debug, OID_AUTO, max_kstack_used, CTLFLAG_RD,
	&max_kstack_used, 0,
	"Maximum stack depth used by a thread in kernel");

	void
	intr_prof_stack_use(struct thread td, struct trapframe frame)
	{
	vm_offset_t stack_top;
	vm_offset_t current;
	int used, prev_used;

	/*
	* Testing for interrupted kernel mode isn't strictly
	* needed. It optimizes the execution, since interrupts from
	* usermode will have only the trap frame on the stack.
	*/
	if (TRAPF_USERMODE(frame))
	return;

	stack_top = td->td_kstack + td->td_kstack_pages * PAGE_SIZE;
	current = (vm_offset_t)(uintptr_t)&stack_top;

	/*
	* Try to detect if interrupt is using kernel thread stack.
	* Hardware could use a dedicated stack for interrupt handling.
	*/
	if (stack_top <= current \|\| current < td->td_kstack)
	return;

	used = stack_top - current;
	for (;;) {
	prev_used = max_kstack_used;
	if (prev_used >= used)
	break;
	if (atomic_cmpset_int(&max_kstack_used, prev_used, used))
	break;
	}
	}
	#endif /* KSTACK_USAGE_PROF */

	/*
	* Implement fork's actions on an address space.
	* Here we arrange for the address space to be copied or referenced,
	* allocate a user struct (pcb and kernel stack), then call the
	* machine-dependent layer to fill those in and make the new process
	* ready to run. The new process is set up so that it returns directly
	* to user mode to avoid stack copying and relocation problems.
	*/
	int
	vm_forkproc(struct thread td, struct proc p2, struct thread *td2,
	struct vmspace *vm2, int flags)
	{
	struct proc *p1 = td->td_proc;
	struct domainset *dset;
	int error;

	if ((flags & RFPROC) == 0) {
	/*
	* Divorce the memory, if it is shared, essentially
	* this changes shared memory amongst threads, into
	* COW locally.
	*/
	if ((flags & RFMEM) == 0) {
	error = vmspace_unshare(p1);
	if (error)
	return (error);
	}
	cpu_fork(td, p2, td2, flags);
	return (0);
	}

	if (flags & RFMEM) {
	p2->p_vmspace = p1->p_vmspace;
	refcount_acquire(&p1->p_vmspace->vm_refcnt);
	}
	dset = td2->td_domain.dr_policy;
	while (vm_page_count_severe_set(&dset->ds_mask)) {
	vm_wait_doms(&dset->ds_mask, 0);
	}

	if ((flags & RFMEM) == 0) {
	p2->p_vmspace = vm2;
	if (p1->p_vmspace->vm_shm)
	shmfork(p1, p2);
	}

	/*
	* cpu_fork will copy and update the pcb, set up the kernel stack,
	* and make the child ready to run.
	*/
	cpu_fork(td, p2, td2, flags);
	return (0);
	}

	/*
	* Called after process has been wait(2)'ed upon and is being reaped.
	* The idea is to reclaim resources that we could not reclaim while
	* the process was still executing.
	*/
	void
	vm_waitproc(struct proc *p)
	{

	vmspace_exitfree(p); /* and clean-out the vmspace */
	}

	+/*
	+ * This used to kick the thread which faults in threads.
	+ */
	void
	kick_proc0(void)
	{
	-
	- wakeup(&proc0);
	}
	diff --git a/sys/vm/vm_swapout.c b/sys/vm/vm_swapout.c
	index 85708d61d849..b97f6904ab5a 100644
	--- a/sys/vm/vm_swapout.c
	+++ b/sys/vm/vm_swapout.c
	@@ -1,715 +1,595 @@
	/*-
	* SPDX-License-Identifier: (BSD-4-Clause AND MIT-CMU)
	*
	* Copyright (c) 1991 Regents of the University of California.
	* All rights reserved.
	* Copyright (c) 1994 John S. Dyson
	* All rights reserved.
	* Copyright (c) 1994 David Greenman
	* All rights reserved.
	* Copyright (c) 2005 Yahoo! Technologies Norway AS
	* All rights reserved.
	*
	* This code is derived from software contributed to Berkeley by
	* The Mach Operating System project at Carnegie-Mellon University.
	*
	* Redistribution and use in source and binary forms, with or without
	* modification, are permitted provided that the following conditions
	* are met:
	* 1. Redistributions of source code must retain the above copyright
	* notice, this list of conditions and the following disclaimer.
	* 2. Redistributions in binary form must reproduce the above copyright
	* notice, this list of conditions and the following disclaimer in the
	* documentation and/or other materials provided with the distribution.
	* 3. All advertising materials mentioning features or use of this software
	* must display the following acknowledgement:
	* This product includes software developed by the University of
	* California, Berkeley and its contributors.
	* 4. Neither the name of the University nor the names of its contributors
	* may be used to endorse or promote products derived from this software
	* without specific prior written permission.
	*
	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	* SUCH DAMAGE.
	*
	*
	* Copyright (c) 1987, 1990 Carnegie-Mellon University.
	* All rights reserved.
	*
	* Authors: Avadis Tevanian, Jr., Michael Wayne Young
	*
	* Permission to use, copy, modify and distribute this software and
	* its documentation is hereby granted, provided that both the copyright
	* notice and this permission notice appear in all copies of the
	* software, derivative works or modified versions, and any portions
	* thereof, and that both notices appear in supporting documentation.
	*
	* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
	* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
	* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
	*
	* Carnegie Mellon requests users of this software to return to
	*
	* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
	* School of Computer Science
	* Carnegie Mellon University
	* Pittsburgh PA 15213-3890
	*
	* any improvements or extensions that they make and grant Carnegie the
	* rights to redistribute these changes.
	*/

	#include <sys/cdefs.h>
	#include "opt_kstack_pages.h"
	#include "opt_kstack_max_pages.h"
	#include "opt_vm.h"

	#include <sys/param.h>
	#include <sys/systm.h>
	#include <sys/limits.h>
	#include <sys/kernel.h>
	#include <sys/eventhandler.h>
	#include <sys/lock.h>
	#include <sys/mutex.h>
	#include <sys/proc.h>
	#include <sys/kthread.h>
	#include <sys/ktr.h>
	#include <sys/mount.h>
	#include <sys/racct.h>
	#include <sys/resourcevar.h>
	#include <sys/refcount.h>
	#include <sys/sched.h>
	#include <sys/sdt.h>
	#include <sys/signalvar.h>
	#include <sys/smp.h>
	#include <sys/time.h>
	#include <sys/vnode.h>
	#include <sys/vmmeter.h>
	#include <sys/rwlock.h>
	#include <sys/sx.h>
	#include <sys/sysctl.h>

	#include <vm/vm.h>
	#include <vm/vm_param.h>
	#include <vm/vm_kern.h>
	#include <vm/vm_object.h>
	#include <vm/vm_page.h>
	#include <vm/vm_map.h>
	#include <vm/vm_pageout.h>
	#include <vm/vm_pager.h>
	#include <vm/vm_phys.h>
	#include <vm/swap_pager.h>
	#include <vm/vm_extern.h>
	#include <vm/uma.h>

	/* the kernel process "vm_daemon" */
	static void vm_daemon(void);
	static struct proc *vmproc;

	static struct kproc_desc vm_kp = {
	"vmdaemon",
	vm_daemon,
	&vmproc
	};
	SYSINIT(vmdaemon, SI_SUB_KTHREAD_VM, SI_ORDER_FIRST, kproc_start, &vm_kp);

	static int vm_swap_enabled = 1;
	static int vm_swap_idle_enabled = 0;

	SYSCTL_INT(_vm, VM_SWAPPING_ENABLED, swap_enabled, CTLFLAG_RW,
	&vm_swap_enabled, 0,
	"Enable entire process swapout");
	SYSCTL_INT(_vm, OID_AUTO, swap_idle_enabled, CTLFLAG_RW,
	&vm_swap_idle_enabled, 0,
	"Allow swapout on idle criteria");

	/*
	* Swap_idle_threshold1 is the guaranteed swapped in time for a process
	*/
	static int swap_idle_threshold1 = 2;
	SYSCTL_INT(_vm, OID_AUTO, swap_idle_threshold1, CTLFLAG_RW,
	&swap_idle_threshold1, 0,
	"Guaranteed swapped in time for a process");

	/*
	* Swap_idle_threshold2 is the time that a process can be idle before
	* it will be swapped out, if idle swapping is enabled.
	*/
	static int swap_idle_threshold2 = 10;
	SYSCTL_INT(_vm, OID_AUTO, swap_idle_threshold2, CTLFLAG_RW,
	&swap_idle_threshold2, 0,
	"Time before a process will be swapped out");

	static int vm_daemon_timeout = 0;
	SYSCTL_INT(_vm, OID_AUTO, vmdaemon_timeout, CTLFLAG_RW,
	&vm_daemon_timeout, 0,
	"Time between vmdaemon runs");

	static int vm_daemon_needed;
	static struct mtx vm_daemon_mtx;
	/* Allow for use by vm_pageout before vm_daemon is initialized. */
	MTX_SYSINIT(vm_daemon, &vm_daemon_mtx, "vm daemon", MTX_DEF);

	static int swapped_cnt;
	static int swap_inprogress; /* Pending swap-ins done outside swapper. */
	static int last_swapin;

	static void swapclear(struct proc *);
	static void vm_swapout_map_deactivate_pages(vm_map_t, long);
	static void vm_swapout_object_deactivate(pmap_t, vm_object_t, long);

	static void
	vm_swapout_object_deactivate_page(pmap_t pmap, vm_page_t m, bool unmap)
	{

	/*
	* Ignore unreclaimable wired pages. Repeat the check after busying
	* since a busy holder may wire the page.
	*/
	if (vm_page_wired(m) \|\| !vm_page_tryxbusy(m))
	return;

	if (vm_page_wired(m) \|\| !pmap_page_exists_quick(pmap, m)) {
	vm_page_xunbusy(m);
	return;
	}
	if (!pmap_is_referenced(m)) {
	if (!vm_page_active(m))
	(void)vm_page_try_remove_all(m);
	else if (unmap && vm_page_try_remove_all(m))
	vm_page_deactivate(m);
	}
	vm_page_xunbusy(m);
	}

	/*
	* vm_swapout_object_deactivate
	*
	* Deactivate enough pages to satisfy the inactive target
	* requirements.
	*
	* The object and map must be locked.
	*/
	static void
	vm_swapout_object_deactivate(pmap_t pmap, vm_object_t first_object,
	long desired)
	{
	vm_object_t backing_object, object;
	vm_page_t m;
	bool unmap;

	VM_OBJECT_ASSERT_LOCKED(first_object);
	if ((first_object->flags & OBJ_FICTITIOUS) != 0)
	return;
	for (object = first_object;; object = backing_object) {
	if (pmap_resident_count(pmap) <= desired)
	goto unlock_return;
	VM_OBJECT_ASSERT_LOCKED(object);
	if ((object->flags & OBJ_UNMANAGED) != 0 \|\|
	blockcount_read(&object->paging_in_progress) > 0)
	goto unlock_return;

	unmap = true;
	if (object->shadow_count > 1)
	unmap = false;

	/*
	* Scan the object's entire memory queue.
	*/
	TAILQ_FOREACH(m, &object->memq, listq) {
	if (pmap_resident_count(pmap) <= desired)
	goto unlock_return;
	if (should_yield())
	goto unlock_return;
	vm_swapout_object_deactivate_page(pmap, m, unmap);
	}
	if ((backing_object = object->backing_object) == NULL)
	goto unlock_return;
	VM_OBJECT_RLOCK(backing_object);
	if (object != first_object)
	VM_OBJECT_RUNLOCK(object);
	}
	unlock_return:
	if (object != first_object)
	VM_OBJECT_RUNLOCK(object);
	}

	/*
	* deactivate some number of pages in a map, try to do it fairly, but
	* that is really hard to do.
	*/
	static void
	vm_swapout_map_deactivate_pages(vm_map_t map, long desired)
	{
	vm_map_entry_t tmpe;
	vm_object_t obj, bigobj;
	int nothingwired;

	if (!vm_map_trylock_read(map))
	return;

	bigobj = NULL;
	nothingwired = TRUE;

	/*
	* first, search out the biggest object, and try to free pages from
	* that.
	*/
	VM_MAP_ENTRY_FOREACH(tmpe, map) {
	if ((tmpe->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
	obj = tmpe->object.vm_object;
	if (obj != NULL && VM_OBJECT_TRYRLOCK(obj)) {
	if (obj->shadow_count <= 1 &&
	(bigobj == NULL \|\|
	bigobj->resident_page_count <
	obj->resident_page_count)) {
	if (bigobj != NULL)
	VM_OBJECT_RUNLOCK(bigobj);
	bigobj = obj;
	} else
	VM_OBJECT_RUNLOCK(obj);
	}
	}
	if (tmpe->wired_count > 0)
	nothingwired = FALSE;
	}

	if (bigobj != NULL) {
	vm_swapout_object_deactivate(map->pmap, bigobj, desired);
	VM_OBJECT_RUNLOCK(bigobj);
	}
	/*
	* Next, hunt around for other pages to deactivate. We actually
	* do this search sort of wrong -- .text first is not the best idea.
	*/
	VM_MAP_ENTRY_FOREACH(tmpe, map) {
	if (pmap_resident_count(vm_map_pmap(map)) <= desired)
	break;
	if ((tmpe->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
	obj = tmpe->object.vm_object;
	if (obj != NULL) {
	VM_OBJECT_RLOCK(obj);
	vm_swapout_object_deactivate(map->pmap, obj,
	desired);
	VM_OBJECT_RUNLOCK(obj);
	}
	}
	}

	/*
	* Remove all mappings if a process is swapped out, this will free page
	* table pages.
	*/
	if (desired == 0 && nothingwired) {
	pmap_remove(vm_map_pmap(map), vm_map_min(map),
	vm_map_max(map));
	}

	vm_map_unlock_read(map);
	}

	static void
	vm_daemon(void)
	{
	struct rlimit rsslim;
	struct proc *p;
	struct thread *td;
	struct vmspace *vm;
	int breakout, tryagain, attempts;
	#ifdef RACCT
	uint64_t rsize, ravailable;

	if (racct_enable && vm_daemon_timeout == 0)
	vm_daemon_timeout = hz;
	#endif

	while (TRUE) {
	mtx_lock(&vm_daemon_mtx);
	msleep(&vm_daemon_needed, &vm_daemon_mtx, PPAUSE, "psleep",
	vm_daemon_timeout);
	mtx_unlock(&vm_daemon_mtx);

	/*
	* scan the processes for exceeding their rlimits or if
	* process is swapped out -- deactivate pages
	*/
	tryagain = 0;
	attempts = 0;
	again:
	attempts++;
	sx_slock(&allproc_lock);
	FOREACH_PROC_IN_SYSTEM(p) {
	vm_pindex_t limit, size;

	/*
	* if this is a system process or if we have already
	* looked at this process, skip it.
	*/
	PROC_LOCK(p);
	if (p->p_state != PRS_NORMAL \|\|
	p->p_flag & (P_INEXEC \| P_SYSTEM \| P_WEXIT)) {
	PROC_UNLOCK(p);
	continue;
	}
	/*
	* if the process is in a non-running type state,
	* don't touch it.
	*/
	breakout = 0;
	FOREACH_THREAD_IN_PROC(p, td) {
	thread_lock(td);
	if (!TD_ON_RUNQ(td) &&
	!TD_IS_RUNNING(td) &&
	!TD_IS_SLEEPING(td) &&
	!TD_IS_SUSPENDED(td)) {
	thread_unlock(td);
	breakout = 1;
	break;
	}
	thread_unlock(td);
	}
	if (breakout) {
	PROC_UNLOCK(p);
	continue;
	}
	/*
	* get a limit
	*/
	lim_rlimit_proc(p, RLIMIT_RSS, &rsslim);
	limit = OFF_TO_IDX(
	qmin(rsslim.rlim_cur, rsslim.rlim_max));

	/*
	* let processes that are swapped out really be
	* swapped out set the limit to nothing (will force a
	* swap-out.)
	*/
	if ((p->p_flag & P_INMEM) == 0)
	limit = 0; /* XXX */
	vm = vmspace_acquire_ref(p);
	_PHOLD_LITE(p);
	PROC_UNLOCK(p);
	if (vm == NULL) {
	PRELE(p);
	continue;
	}
	sx_sunlock(&allproc_lock);

	size = vmspace_resident_count(vm);
	if (size >= limit) {
	vm_swapout_map_deactivate_pages(
	&vm->vm_map, limit);
	size = vmspace_resident_count(vm);
	}
	#ifdef RACCT
	if (racct_enable) {
	rsize = IDX_TO_OFF(size);
	PROC_LOCK(p);
	if (p->p_state == PRS_NORMAL)
	racct_set(p, RACCT_RSS, rsize);
	ravailable = racct_get_available(p, RACCT_RSS);
	PROC_UNLOCK(p);
	if (rsize > ravailable) {
	/*
	* Don't be overly aggressive; this
	* might be an innocent process,
	* and the limit could've been exceeded
	* by some memory hog. Don't try
	* to deactivate more than 1/4th
	* of process' resident set size.
	*/
	if (attempts <= 8) {
	if (ravailable < rsize -
	(rsize / 4)) {
	ravailable = rsize -
	(rsize / 4);
	}
	}
	vm_swapout_map_deactivate_pages(
	&vm->vm_map,
	OFF_TO_IDX(ravailable));
	/* Update RSS usage after paging out. */
	size = vmspace_resident_count(vm);
	rsize = IDX_TO_OFF(size);
	PROC_LOCK(p);
	if (p->p_state == PRS_NORMAL)
	racct_set(p, RACCT_RSS, rsize);
	PROC_UNLOCK(p);
	if (rsize > ravailable)
	tryagain = 1;
	}
	}
	#endif
	vmspace_free(vm);
	sx_slock(&allproc_lock);
	PRELE(p);
	}
	sx_sunlock(&allproc_lock);
	if (tryagain != 0 && attempts <= 10) {
	maybe_yield();
	goto again;
	}
	}
	}

	/*
	* Bring the kernel stack for a specified thread back in.
	*/
	static void
	vm_thread_swapin(struct thread *td, int oom_alloc)
	{
	vm_page_t ma[KSTACK_MAX_PAGES];
	vm_offset_t kaddr;
	vm_object_t obj;
	int a, count, i, j, pages, rv __diagused;

	kaddr = td->td_kstack;
	pages = td->td_kstack_pages;
	obj = vm_thread_kstack_size_to_obj(pages);
	while (vm_thread_stack_back(kaddr, ma, pages, oom_alloc,
	td->td_kstack_domain) == ENOMEM)
	;
	for (i = 0; i < pages;) {
	vm_page_assert_xbusied(ma[i]);
	if (vm_page_all_valid(ma[i])) {
	i++;
	continue;
	}
	vm_object_pip_add(obj, 1);
	for (j = i + 1; j < pages; j++)
	if (vm_page_all_valid(ma[j]))
	break;
	VM_OBJECT_WLOCK(obj);
	rv = vm_pager_has_page(obj, ma[i]->pindex, NULL, &a);
	VM_OBJECT_WUNLOCK(obj);
	KASSERT(rv == 1, ("%s: missing page %p", __func__, ma[i]));
	count = min(a + 1, j - i);
	rv = vm_pager_get_pages(obj, ma + i, count, NULL, NULL);
	KASSERT(rv == VM_PAGER_OK, ("%s: cannot get kstack for proc %d",
	__func__, td->td_proc->p_pid));
	vm_object_pip_wakeup(obj);
	i += count;
	}
	pmap_qenter(kaddr, ma, pages);
	cpu_thread_swapin(td);
	}

	void
	faultin(struct proc *p)
	{
	struct thread *td;
	int oom_alloc;

	PROC_LOCK_ASSERT(p, MA_OWNED);

	/*
	* If another process is swapping in this process,
	* just wait until it finishes.
	*/
	if (p->p_flag & P_SWAPPINGIN) {
	while (p->p_flag & P_SWAPPINGIN)
	msleep(&p->p_flag, &p->p_mtx, PVM, "faultin", 0);
	return;
	}

	if ((p->p_flag & P_INMEM) == 0) {
	oom_alloc = (p->p_flag & P_WKILLED) != 0 ? VM_ALLOC_SYSTEM :
	VM_ALLOC_NORMAL;

	/*
	* Don't let another thread swap process p out while we are
	* busy swapping it in.
	*/
	++p->p_lock;
	p->p_flag \|= P_SWAPPINGIN;
	PROC_UNLOCK(p);
	sx_xlock(&allproc_lock);
	MPASS(swapped_cnt > 0);
	swapped_cnt--;
	if (curthread != &thread0)
	swap_inprogress++;
	sx_xunlock(&allproc_lock);

	/*
	* We hold no lock here because the list of threads
	* can not change while all threads in the process are
	* swapped out.
	*/
	FOREACH_THREAD_IN_PROC(p, td)
	vm_thread_swapin(td, oom_alloc);

	if (curthread != &thread0) {
	sx_xlock(&allproc_lock);
	MPASS(swap_inprogress > 0);
	swap_inprogress--;
	last_swapin = ticks;
	sx_xunlock(&allproc_lock);
	}
	PROC_LOCK(p);
	swapclear(p);
	p->p_swtick = ticks;

	/* Allow other threads to swap p out now. */
	wakeup(&p->p_flag);
	--p->p_lock;
	}
	}

	-/*
	- * This swapin algorithm attempts to swap-in processes only if there
	- * is enough space for them. Of course, if a process waits for a long
	- * time, it will be swapped in anyway.
	- */
	-
	-static struct proc *
	-swapper_selector(bool wkilled_only)
	-{
	- struct proc p, res;
	- struct thread *td;
	- int ppri, pri, slptime, swtime;
	-
	- sx_assert(&allproc_lock, SA_SLOCKED);
	- if (swapped_cnt == 0)
	- return (NULL);
	- res = NULL;
	- ppri = INT_MIN;
	- FOREACH_PROC_IN_SYSTEM(p) {
	- PROC_LOCK(p);
	- if (p->p_state == PRS_NEW \|\| (p->p_flag & (P_SWAPPINGOUT \|
	- P_SWAPPINGIN \| P_INMEM)) != 0) {
	- PROC_UNLOCK(p);
	- continue;
	- }
	- if (p->p_state == PRS_NORMAL && (p->p_flag & P_WKILLED) != 0) {
	- /*
	- * A swapped-out process might have mapped a
	- * large portion of the system's pages as
	- * anonymous memory. There is no other way to
	- * release the memory other than to kill the
	- * process, for which we need to swap it in.
	- */
	- return (p);
	- }
	- if (wkilled_only) {
	- PROC_UNLOCK(p);
	- continue;
	- }
	- swtime = (ticks - p->p_swtick) / hz;
	- FOREACH_THREAD_IN_PROC(p, td) {
	- /*
	- * An otherwise runnable thread of a process
	- * swapped out has only the TDI_SWAPPED bit set.
	- */
	- thread_lock(td);
	- if (td->td_inhibitors == TDI_SWAPPED) {
	- slptime = (ticks - td->td_slptick) / hz;
	- pri = swtime + slptime;
	- if ((td->td_flags & TDF_SWAPINREQ) == 0)
	- pri -= p->p_nice * 8;
	- /*
	- * if this thread is higher priority
	- * and there is enough space, then select
	- * this process instead of the previous
	- * selection.
	- */
	- if (pri > ppri) {
	- res = p;
	- ppri = pri;
	- }
	- }
	- thread_unlock(td);
	- }
	- PROC_UNLOCK(p);
	- }
	-
	- if (res != NULL)
	- PROC_LOCK(res);
	- return (res);
	-}
	-
	-#define SWAPIN_INTERVAL (MAXSLP * hz / 2)
	-
	-/*
	- * Limit swapper to swap in one non-WKILLED process in MAXSLP/2
	- * interval, assuming that there is:
	- * - at least one domain that is not suffering from a shortage of free memory;
	- * - no parallel swap-ins;
	- * - no other swap-ins in the current SWAPIN_INTERVAL.
	- */
	-static bool
	-swapper_wkilled_only(void)
	-{
	-
	- return (vm_page_count_min_set(&all_domains) \|\| swap_inprogress > 0 \|\|
	- (u_int)(ticks - last_swapin) < SWAPIN_INTERVAL);
	-}
	-
	-void
	-swapper(void)
	-{
	- struct proc *p;
	-
	- for (;;) {
	- sx_slock(&allproc_lock);
	- p = swapper_selector(swapper_wkilled_only());
	- sx_sunlock(&allproc_lock);
	-
	- if (p == NULL) {
	- tsleep(&proc0, PVM, "swapin", SWAPIN_INTERVAL);
	- } else {
	- PROC_LOCK_ASSERT(p, MA_OWNED);
	-
	- /*
	- * Another process may be bringing or may have
	- * already brought this process in while we
	- * traverse all threads. Or, this process may
	- * have exited or even being swapped out
	- * again.
	- */
	- if (p->p_state == PRS_NORMAL && (p->p_flag & (P_INMEM \|
	- P_SWAPPINGOUT \| P_SWAPPINGIN)) == 0) {
	- faultin(p);
	- }
	- PROC_UNLOCK(p);
	- }
	- }
	-}
	-
	static void
	swapclear(struct proc *p)
	{
	struct thread *td;

	PROC_LOCK_ASSERT(p, MA_OWNED);

	FOREACH_THREAD_IN_PROC(p, td) {
	thread_lock(td);
	td->td_flags \|= TDF_INMEM;
	td->td_flags &= ~TDF_SWAPINREQ;
	TD_CLR_SWAPPED(td);
	if (TD_CAN_RUN(td)) {
	if (setrunnable(td, 0)) {
	#ifdef INVARIANTS
	/*
	* XXX: We just cleared TDI_SWAPPED
	* above and set TDF_INMEM, so this
	* should never happen.
	*/
	panic("not waking up swapper");
	#endif
	}
	} else
	thread_unlock(td);
	}
	p->p_flag &= ~(P_SWAPPINGIN \| P_SWAPPINGOUT);
	p->p_flag \|= P_INMEM;
	}
	diff --git a/sys/vm/vm_swapout_dummy.c b/sys/vm/vm_swapout_dummy.c
	index 0e0a268c8c46..7697a86f9d0b 100644
	--- a/sys/vm/vm_swapout_dummy.c
	+++ b/sys/vm/vm_swapout_dummy.c
	@@ -1,109 +1,101 @@
	/*-
	* SPDX-License-Identifier: (BSD-4-Clause AND MIT-CMU)
	*
	* Copyright (c) 1991 Regents of the University of California.
	* All rights reserved.
	* Copyright (c) 1994 John S. Dyson
	* All rights reserved.
	* Copyright (c) 1994 David Greenman
	* All rights reserved.
	* Copyright (c) 2005 Yahoo! Technologies Norway AS
	* All rights reserved.
	*
	* This code is derived from software contributed to Berkeley by
	* The Mach Operating System project at Carnegie-Mellon University.
	*
	* Redistribution and use in source and binary forms, with or without
	* modification, are permitted provided that the following conditions
	* are met:
	* 1. Redistributions of source code must retain the above copyright
	* notice, this list of conditions and the following disclaimer.
	* 2. Redistributions in binary form must reproduce the above copyright
	* notice, this list of conditions and the following disclaimer in the
	* documentation and/or other materials provided with the distribution.
	* 3. All advertising materials mentioning features or use of this software
	* must display the following acknowledgement:
	* This product includes software developed by the University of
	* California, Berkeley and its contributors.
	* 4. Neither the name of the University nor the names of its contributors
	* may be used to endorse or promote products derived from this software
	* without specific prior written permission.
	*
	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	* SUCH DAMAGE.
	*
	*
	* Copyright (c) 1987, 1990 Carnegie-Mellon University.
	* All rights reserved.
	*
	* Authors: Avadis Tevanian, Jr., Michael Wayne Young
	*
	* Permission to use, copy, modify and distribute this software and
	* its documentation is hereby granted, provided that both the copyright
	* notice and this permission notice appear in all copies of the
	* software, derivative works or modified versions, and any portions
	* thereof, and that both notices appear in supporting documentation.
	*
	* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
	* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
	* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
	*
	* Carnegie Mellon requests users of this software to return to
	*
	* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
	* School of Computer Science
	* Carnegie Mellon University
	* Pittsburgh PA 15213-3890
	*
	* any improvements or extensions that they make and grant Carnegie the
	* rights to redistribute these changes.
	*/

	#include <sys/param.h>
	#include <sys/systm.h>
	#include <sys/kernel.h>
	#include <sys/lock.h>
	#include <sys/mutex.h>
	#include <sys/proc.h>
	#include <sys/sysctl.h>
	#include <sys/vmmeter.h>

	#include <vm/vm.h>
	#include <vm/vm_param.h>
	#include <vm/vm_pageout.h>

	static int vm_swap_enabled = 0;
	SYSCTL_INT(_vm, VM_SWAPPING_ENABLED, swap_enabled, CTLFLAG_RD,
	&vm_swap_enabled, 0,
	"Enable entire process swapout");

	static int vm_swap_idle_enabled = 0;
	SYSCTL_INT(_vm, OID_AUTO, swap_idle_enabled, CTLFLAG_RD,
	&vm_swap_idle_enabled, 0,
	"Allow swapout on idle criteria");

	void
	faultin(struct proc *p)
	{

	PROC_LOCK_ASSERT(p, MA_OWNED);
	if ((p->p_flag & P_INMEM) == 0)
	panic("faultin: proc %p swapped out with NO_SWAPPING", p);
	}
	-
	-void
	-swapper(void)
	-{
	-
	- for (;;)
	- tsleep(&proc0, PVM, "swapin", MAXSLP * hz);
	-}

File Metadata

Mime Type: text/x-diff
Expires: Fri, Feb 21, 9:07 PM (10 h, 7 m)
Storage Engine: blob
Storage Format: Raw Data
Storage Handle: 16745022
Default Alt Text: (79 KB)

No OneTemporaryActions

View Options

File Metadata

Event Timeline

No OneTemporary
Actions