D15865.diff
No OneTemporary
Actions

Size

197 KB

Referenced Files

None

Subscribers

None

D15865.diff
View Options

	Index: sys/arm/arm/pmap-v6.c
	===================================================================
	--- sys/arm/arm/pmap-v6.c
	+++ sys/arm/arm/pmap-v6.c
	@@ -6577,7 +6577,7 @@
	int npte2 = 0;
	int i, j, index;

	- sx_slock(&allproc_lock);
	+ sx_slock(&V_allproc_lock);
	FOREACH_PROC_IN_SYSTEM(p) {
	if (p->p_pid != pid \|\| p->p_vmspace == NULL)
	continue;
	@@ -6605,7 +6605,7 @@
	index = 0;
	printf("\n");
	}
	- sx_sunlock(&allproc_lock);
	+ sx_sunlock(&V_allproc_lock);
	return (npte2);
	}
	pte2p = pmap_pte2(pmap, va);
	@@ -6632,7 +6632,7 @@
	}
	}
	}
	- sx_sunlock(&allproc_lock);
	+ sx_sunlock(&V_allproc_lock);
	return (npte2);
	}

	Index: sys/cddl/contrib/opensolaris/uts/intel/dtrace/fasttrap_isa.c
	===================================================================
	--- sys/cddl/contrib/opensolaris/uts/intel/dtrace/fasttrap_isa.c
	+++ sys/cddl/contrib/opensolaris/uts/intel/dtrace/fasttrap_isa.c
	@@ -1022,11 +1022,11 @@
	mutex_enter(pid_mtx);
	#else
	pp = p;
	- sx_slock(&proctree_lock);
	+ sx_slock(&V_proctree_lock);
	while (pp->p_vmspace == pp->p_pptr->p_vmspace)
	pp = pp->p_pptr;
	pid = pp->p_pid;
	- sx_sunlock(&proctree_lock);
	+ sx_sunlock(&V_proctree_lock);
	pp = NULL;

	rm_rlock(&fasttrap_tp_lock, &tracker);
	Index: sys/compat/linprocfs/linprocfs.c
	===================================================================
	--- sys/compat/linprocfs/linprocfs.c
	+++ sys/compat/linprocfs/linprocfs.c
	@@ -689,8 +689,8 @@
	(int)(averunnable.ldavg[2] / averunnable.fscale),
	(int)(averunnable.ldavg[2] * 100 / averunnable.fscale % 100),
	1, /* number of running tasks */
	- nprocs, /* number of tasks */
	- lastpid /* the last pid */
	+ V_nprocs, /* number of tasks */
	+ V_lastpid /* the last pid */
	);
	return (0);
	}
	@@ -708,10 +708,10 @@
	vm_offset_t startcode, startdata;

	getboottime(&boottime);
	- sx_slock(&proctree_lock);
	+ sx_slock(&V_proctree_lock);
	PROC_LOCK(p);
	fill_kinfo_proc(p, &kp);
	- sx_sunlock(&proctree_lock);
	+ sx_sunlock(&V_proctree_lock);
	if (p->p_vmspace) {
	startcode = (vm_offset_t)p->p_vmspace->vm_taddr;
	startdata = (vm_offset_t)p->p_vmspace->vm_daddr;
	@@ -787,11 +787,11 @@
	struct kinfo_proc kp;
	segsz_t lsize;

	- sx_slock(&proctree_lock);
	+ sx_slock(&V_proctree_lock);
	PROC_LOCK(p);
	fill_kinfo_proc(p, &kp);
	PROC_UNLOCK(p);
	- sx_sunlock(&proctree_lock);
	+ sx_sunlock(&V_proctree_lock);

	/*
	* See comments in linprocfs_doprocstatus() regarding the
	@@ -825,7 +825,7 @@
	l_sigset_t siglist, sigignore, sigcatch;
	int i;

	- sx_slock(&proctree_lock);
	+ sx_slock(&V_proctree_lock);
	PROC_LOCK(p);
	td2 = FIRST_THREAD_IN_PROC(p); /* XXXKSE pretend only one thread */

	@@ -864,7 +864,7 @@
	}

	fill_kinfo_proc(p, &kp);
	- sx_sunlock(&proctree_lock);
	+ sx_sunlock(&V_proctree_lock);

	sbuf_printf(sb, "Name:\t%s\n", p->p_comm); /* XXX escape */
	sbuf_printf(sb, "State:\t%s\n", state);
	Index: sys/compat/linux/linux_file.c
	===================================================================
	--- sys/compat/linux/linux_file.c
	+++ sys/compat/linux/linux_file.c
	@@ -149,17 +149,17 @@
	fdrop(fp, td);
	goto done;
	}
	- sx_slock(&proctree_lock);
	+ sx_slock(&V_proctree_lock);
	PROC_LOCK(p);
	if (SESS_LEADER(p) && !(p->p_flag & P_CONTROLT)) {
	PROC_UNLOCK(p);
	- sx_sunlock(&proctree_lock);
	+ sx_sunlock(&V_proctree_lock);
	/* XXXPJD: Verify if TIOCSCTTY is allowed. */
	(void) fo_ioctl(fp, TIOCSCTTY, (caddr_t) 0,
	td->td_ucred, td);
	} else {
	PROC_UNLOCK(p);
	- sx_sunlock(&proctree_lock);
	+ sx_sunlock(&V_proctree_lock);
	}
	fdrop(fp, td);
	}
	Index: sys/compat/linux/linux_fork.c
	===================================================================
	--- sys/compat/linux/linux_fork.c
	+++ sys/compat/linux/linux_fork.c
	@@ -233,11 +233,11 @@
	* the same as that of the calling process.
	*/
	if (args->flags & LINUX_CLONE_PARENT) {
	- sx_xlock(&proctree_lock);
	+ sx_xlock(&V_proctree_lock);
	PROC_LOCK(p2);
	proc_reparent(p2, td->td_proc->p_pptr);
	PROC_UNLOCK(p2);
	- sx_xunlock(&proctree_lock);
	+ sx_xunlock(&V_proctree_lock);
	}

	#ifdef DEBUG
	Index: sys/compat/linux/linux_misc.c
	===================================================================
	--- sys/compat/linux/linux_misc.c
	+++ sys/compat/linux/linux_misc.c
	@@ -181,7 +181,7 @@
	sysinfo.totalswap = i * PAGE_SIZE;
	sysinfo.freeswap = (i - j) * PAGE_SIZE;

	- sysinfo.procs = nprocs;
	+ sysinfo.procs = V_nprocs;

	/* The following are only present in newer Linux kernels. */
	sysinfo.totalbig = 0;
	Index: sys/compat/linuxkpi/common/src/linux_current.c
	===================================================================
	--- sys/compat/linuxkpi/common/src/linux_current.c
	+++ sys/compat/linuxkpi/common/src/linux_current.c
	@@ -226,22 +226,29 @@
	static void
	linux_current_uninit(void *arg __unused)
	{
	+ VPS_ITERATOR_DECL(vps_iter);
	struct proc *p;
	struct task_struct *ts;
	struct thread *td;

	- sx_slock(&allproc_lock);
	- FOREACH_PROC_IN_SYSTEM(p) {
	- PROC_LOCK(p);
	- FOREACH_THREAD_IN_PROC(p, td) {
	- if ((ts = td->td_lkpi_task) != NULL) {
	- td->td_lkpi_task = NULL;
	- put_task_struct(ts);
	+ VPS_LIST_RLOCK();
	+ VPS_FOREACH(vps_iter) {
	+ CURVPS_SET(vps_iter);
	+ sx_slock(&V_allproc_lock);
	+ FOREACH_PROC_IN_SYSTEM(p) {
	+ PROC_LOCK(p);
	+ FOREACH_THREAD_IN_PROC(p, td) {
	+ if ((ts = td->td_lkpi_task) != NULL) {
	+ td->td_lkpi_task = NULL;
	+ put_task_struct(ts);
	+ }
	}
	+ PROC_UNLOCK(p);
	}
	- PROC_UNLOCK(p);
	+ sx_sunlock(&V_allproc_lock);
	+ CURVPS_RESTORE();
	}
	- sx_sunlock(&allproc_lock);
	+ VPS_LIST_RUNLOCK();

	EVENTHANDLER_DEREGISTER(thread_dtor, linuxkpi_thread_dtor_tag);
	}
	Index: sys/conf/files
	===================================================================
	--- sys/conf/files
	+++ sys/conf/files
	@@ -3852,6 +3852,7 @@
	kern/kern_tslog.c optional tslog
	kern/kern_umtx.c standard
	kern/kern_uuid.c standard
	+kern/kern_vps.c optional vimage
	kern/kern_xxx.c standard
	kern/link_elf.c standard
	kern/linker_if.m standard
	Index: sys/ddb/db_command.c
	===================================================================
	--- sys/ddb/db_command.c
	+++ sys/ddb/db_command.c
	@@ -693,11 +693,12 @@
	* Find the process in question. allproc_lock is not needed
	* since we're in DDB.
	*/
	- /* sx_slock(&allproc_lock); */
	+ /* Operate on current vps instance only. */
	+ /* sx_slock(&V_allproc_lock); */
	FOREACH_PROC_IN_SYSTEM(p)
	if (p->p_pid == pid)
	break;
	- /* sx_sunlock(&allproc_lock); */
	+ /* sx_sunlock(&V_allproc_lock); */
	if (p == NULL)
	DB_ERROR(("Can't find process with pid %ld\n", (long) pid));

	@@ -875,12 +876,26 @@
	}
	}

	+static void
	+_db_stack_trace_all_v(bool active_only)
	+{
	+ VPS_ITERATOR_DECL(vps_iter);
	+
	+ /* VPS_LIST_RLOCK(); */
	+ VPS_FOREACH(vps_iter) {
	+ CURVPS_SET(vps_iter);
	+ _db_stack_trace_all(active_only);
	+ CURVPS_RESTORE();
	+ }
	+ /* VPS_LIST_RUNLOCK(); */
	+}
	+
	static void
	db_stack_trace_active(db_expr_t dummy, bool dummy2, db_expr_t dummy3,
	char *dummy4)
	{

	- _db_stack_trace_all(true);
	+ _db_stack_trace_all_v(true);
	}

	static void
	@@ -888,7 +903,7 @@
	char *dummy4)
	{

	- _db_stack_trace_all(false);
	+ _db_stack_trace_all_v(false);
	}

	/*
	Index: sys/ddb/db_expr.c
	===================================================================
	--- sys/ddb/db_expr.c
	+++ sys/ddb/db_expr.c
	@@ -58,7 +58,8 @@
	if (t == tIDENT) {
	if (!db_value_of_name(db_tok_string, valuep) &&
	!db_value_of_name_pcpu(db_tok_string, valuep) &&
	- !db_value_of_name_vnet(db_tok_string, valuep)) {
	+ !db_value_of_name_vnet(db_tok_string, valuep) &&
	+ !db_value_of_name_vps(db_tok_string, valuep)) {
	db_printf("Symbol '%s' not found\n", db_tok_string);
	db_error(NULL);
	/NOTREACHED/
	Index: sys/ddb/db_ps.c
	===================================================================
	--- sys/ddb/db_ps.c
	+++ sys/ddb/db_ps.c
	@@ -90,10 +90,11 @@
	char state[9];
	int np, rflag, sflag, dflag, lflag, wflag;

	- np = nprocs;
	+ np = V_nprocs;

	- if (!LIST_EMPTY(&allproc))
	- p = LIST_FIRST(&allproc);
	+ /* Operate on current vps instance only. */
	+ if (!LIST_EMPTY(&V_allproc))
	+ p = LIST_FIRST(&V_allproc);
	else
	p = &proc0;

	@@ -217,8 +218,9 @@

	p = LIST_NEXT(p, p_list);
	if (p == NULL && np > 0)
	- p = LIST_FIRST(&zombproc);
	+ p = LIST_FIRST(&V_zombproc);
	}
	+ db_printf("nprocs = %d, np = %d\n", V_nprocs, np);
	}

	static void
	@@ -397,6 +399,9 @@
	db_printf(" last involuntary switch: %d ms ago\n",
	1000 * delta / hz);
	}
	+#ifdef VIMAGE
	+ db_printf(" vnet: %p vps: %p\n", td->td_vnet, td->td_vps);
	+#endif
	}

	DB_SHOW_COMMAND(proc, db_show_proc)
	@@ -475,6 +480,7 @@
	db_findstack_cmd(db_expr_t addr, bool have_addr, db_expr_t dummy3 __unused,
	char *dummy4 __unused)
	{
	+ VPS_ITERATOR_DECL(vps_iter);
	struct proc *p;
	struct thread *td;
	struct kstack_cache_entry *ks_ce;
	@@ -487,15 +493,22 @@
	return;
	}

	- FOREACH_PROC_IN_SYSTEM(p) {
	- FOREACH_THREAD_IN_PROC(p, td) {
	- if (td->td_kstack <= saddr && saddr < td->td_kstack +
	- PAGE_SIZE * td->td_kstack_pages) {
	- db_printf("Thread %p\n", td);
	- return;
	+ /* VPS_LIST_RLOCK(); */
	+ VPS_FOREACH(vps_iter) {
	+ CURVPS_SET(vps_iter);
	+ FOREACH_PROC_IN_SYSTEM(p) {
	+ FOREACH_THREAD_IN_PROC(p, td) {
	+ if (td->td_kstack <= saddr &&
	+ saddr < td->td_kstack +
	+ PAGE_SIZE * td->td_kstack_pages) {
	+ db_printf("Thread %p\n", td);
	+ return;
	+ }
	}
	}
	+ CURVPS_RESTORE();
	}
	+ /* VPS_LIST_RUNLOCK(); */

	for (ks_ce = kstack_cache; ks_ce != NULL;
	ks_ce = ks_ce->next_ks_entry) {
	Index: sys/ddb/db_sym.c
	===================================================================
	--- sys/ddb/db_sym.c
	+++ sys/ddb/db_sym.c
	@@ -37,8 +37,10 @@

	#include <sys/param.h>
	#include <sys/pcpu.h>
	+#include <sys/proc.h>
	#include <sys/smp.h>
	#include <sys/systm.h>
	+#include <sys/vps.h>

	#include <net/vnet.h>

	@@ -69,6 +71,7 @@

	#ifdef VIMAGE
	static void *db_vnet = NULL;
	+static void *db_vps = NULL;
	#endif

	/*
	@@ -168,6 +171,53 @@
	return (0);
	}
	}
	+
	+/*
	+ * Validate the virtual process space pointer used to interpret per-vps global
	+ * variable expansion. Right now we don't do much here, really we should
	+ * walk the global vps list to check it's an OK pointer.
	+ */
	+int
	+db_var_db_vps(struct db_variable vp, db_expr_t valuep, int op)
	+{
	+
	+ switch (op) {
	+ case DB_VAR_GET:
	+ *valuep = (db_expr_t)db_vps;
	+ return (1);
	+
	+ case DB_VAR_SET:
	+ db_vps = (void *)valuep;
	+ return (1);
	+
	+ default:
	+ db_printf("db_var_db_vps: unknown operation\n");
	+ return (0);
	+ }
	+}
	+
	+/*
	+ * Read-only variable reporting the current vps, which is what we use when
	+ * db_vps is set to NULL.
	+ */
	+int
	+db_var_curvps(struct db_variable vp, db_expr_t valuep, int op)
	+{
	+
	+ switch (op) {
	+ case DB_VAR_GET:
	+ *valuep = (db_expr_t)curvps;
	+ return (1);
	+
	+ case DB_VAR_SET:
	+ db_printf("Read-only variable.\n");
	+ return (0);
	+
	+ default:
	+ db_printf("db_var_curvps: unknown operation\n");
	+ return (0);
	+ }
	+}
	#endif

	/*
	@@ -278,6 +328,33 @@
	#endif
	}

	+bool
	+db_value_of_name_vps(const char name, db_expr_t valuep)
	+{
	+#ifdef VIMAGE
	+ static char tmp[256];
	+ db_expr_t value;
	+ c_db_sym_t sym;
	+ struct vps *vps;
	+
	+ if (db_vps != NULL)
	+ vps = db_vps;
	+ else
	+ vps = curvps;
	+ snprintf(tmp, sizeof(tmp), "vps_entry_%s", name);
	+ sym = db_lookup(tmp);
	+ if (sym == C_DB_SYM_NULL)
	+ return (false);
	+ db_symbol_values(sym, &name, &value);
	+ if (value < VPS_START \|\| value >= VPS_STOP)
	+ return (false);
	+ *valuep = (db_expr_t)((uintptr_t)value + vps->vps_data_base);
	+ return (true);
	+#else
	+ return (false);
	+#endif
	+}
	+
	/*
	* Lookup a symbol.
	* If the symbol has a qualifier (e.g., ux:vm_map),
	Index: sys/ddb/db_thread.c
	===================================================================
	--- sys/ddb/db_thread.c
	+++ sys/ddb/db_thread.c
	@@ -135,11 +135,12 @@
	if (td != NULL)
	return (td);
	if (check_pid) {
	+ /* Operate on current vps instance only. */
	FOREACH_PROC_IN_SYSTEM(p) {
	if (p->p_pid == decaddr)
	return (FIRST_THREAD_IN_PROC(p));
	}
	- LIST_FOREACH(p, &zombproc, p_list) {
	+ LIST_FOREACH(p, &V_zombproc, p_list) {
	if (p->p_pid == decaddr)
	return (FIRST_THREAD_IN_PROC(p));
	}
	@@ -161,11 +162,12 @@

	decaddr = db_hex2dec(addr);
	if (decaddr != -1) {
	+ /* Operate on current vps instance only. */
	FOREACH_PROC_IN_SYSTEM(p) {
	if (p->p_pid == decaddr)
	return (p);
	}
	- LIST_FOREACH(p, &zombproc, p_list) {
	+ LIST_FOREACH(p, &V_zombproc, p_list) {
	if (p->p_pid == decaddr)
	return (p);
	}
	Index: sys/ddb/db_variables.h
	===================================================================
	--- sys/ddb/db_variables.h
	+++ sys/ddb/db_variables.h
	@@ -56,8 +56,10 @@

	extern db_varfcn_t db_var_curcpu; /* DPCPU default CPU */
	extern db_varfcn_t db_var_curvnet; /* Default vnet */
	+extern db_varfcn_t db_var_curvps; /* Default vps */
	extern db_varfcn_t db_var_db_cpu; /* DPCPU active CPU */
	extern db_varfcn_t db_var_db_vnet; /* Active vnet */
	+extern db_varfcn_t db_var_db_vps; /* Active vps */

	int db_read_variable(struct db_variable , db_expr_t );
	int db_write_variable(struct db_variable *, db_expr_t);
	Index: sys/ddb/db_variables.c
	===================================================================
	--- sys/ddb/db_variables.c
	+++ sys/ddb/db_variables.c
	@@ -53,6 +53,8 @@
	#ifdef VIMAGE
	{ "curvnet", NULL, db_var_curvnet },
	{ "db_vnet", NULL, db_var_db_vnet },
	+ { "curvps", NULL, db_var_curvps },
	+ { "db_vps", NULL, db_var_db_vps },
	#endif
	};
	static struct db_variable *db_evars = db_vars + nitems(db_vars);
	Index: sys/ddb/ddb.h
	===================================================================
	--- sys/ddb/ddb.h
	+++ sys/ddb/ddb.h
	@@ -229,6 +229,7 @@
	bool db_value_of_name(const char name, db_expr_t valuep);
	bool db_value_of_name_pcpu(const char name, db_expr_t valuep);
	bool db_value_of_name_vnet(const char name, db_expr_t valuep);
	+bool db_value_of_name_vps(const char name, db_expr_t valuep);
	int db_write_bytes(vm_offset_t addr, size_t size, char *data);
	void db_command_register(struct command_table , struct command );
	void db_command_unregister(struct command_table , struct command );
	Index: sys/dev/filemon/filemon.c
	===================================================================
	--- sys/dev/filemon/filemon.c
	+++ sys/dev/filemon/filemon.c
	@@ -210,6 +210,7 @@
	static void
	filemon_untrack_processes(struct filemon *filemon)
	{
	+ VPS_ITERATOR_DECL(vps_iter);
	struct proc *p;

	sx_assert(&filemon->lock, SA_XLOCKED);
	@@ -223,18 +224,24 @@
	* filemon_event_process_exit() will lock on filemon->lock
	* which we hold.
	*/
	- sx_slock(&allproc_lock);
	- FOREACH_PROC_IN_SYSTEM(p) {
	- /*
	- * No PROC_LOCK is needed to compare here since it is
	- * guaranteed to not change since we have its filemon
	- * locked. Everything that changes this p_filemon will
	- * be locked on it.
	- */
	- if (p->p_filemon == filemon)
	- filemon_proc_drop(p);
	+ VPS_LIST_RLOCK();
	+ VPS_FOREACH(vps_iter) {
	+ CURVPS_SET(vps_iter);
	+ sx_slock(&V_allproc_lock);
	+ FOREACH_PROC_IN_SYSTEM(p) {
	+ /*
	+ * No PROC_LOCK is needed to compare here since it is
	+ * guaranteed to not change since we have its filemon
	+ * locked. Everything that changes this p_filemon will
	+ * be locked on it.
	+ */
	+ if (p->p_filemon == filemon)
	+ filemon_proc_drop(p);
	+ }
	+ sx_sunlock(&V_allproc_lock);
	+ CURVPS_RESTORE();
	}
	- sx_sunlock(&allproc_lock);
	+ VPS_LIST_RUNLOCK();

	/*
	* It's possible some references were acquired but will be
	Index: sys/dev/hwpmc/hwpmc_mod.c
	===================================================================
	--- sys/dev/hwpmc/hwpmc_mod.c
	+++ sys/dev/hwpmc/hwpmc_mod.c
	@@ -1203,7 +1203,7 @@
	* this PMC.
	*/

	- sx_slock(&proctree_lock);
	+ sx_slock(&V_proctree_lock);

	top = p;

	@@ -1227,7 +1227,7 @@
	(void) pmc_detach_process(top, pm);

	done:
	- sx_sunlock(&proctree_lock);
	+ sx_sunlock(&V_proctree_lock);
	return error;
	}

	@@ -1312,7 +1312,7 @@
	* partially attached proc tree.
	*/

	- sx_slock(&proctree_lock);
	+ sx_slock(&V_proctree_lock);

	top = p;

	@@ -1333,7 +1333,7 @@
	}

	done:
	- sx_sunlock(&proctree_lock);
	+ sx_sunlock(&V_proctree_lock);

	if (LIST_EMPTY(&pm->pm_targets))
	pm->pm_flags &= ~PMC_F_ATTACH_DONE;
	@@ -2025,7 +2025,7 @@

	PROC_UNLOCK(p);

	- sx_slock(&proctree_lock);
	+ sx_slock(&V_proctree_lock);

	top = p;

	@@ -2044,7 +2044,7 @@
	}
	}
	done:
	- sx_sunlock(&proctree_lock);
	+ sx_sunlock(&V_proctree_lock);
	}

	/*
	@@ -5364,6 +5364,7 @@
	static void
	pmc_process_allproc(struct pmc *pm)
	{
	+ VPS_ITERATOR_DECL(vps_iter);
	struct pmc_owner *po;
	struct thread *td;
	struct proc *p;
	@@ -5371,15 +5372,22 @@
	po = pm->pm_owner;
	if ((po->po_flags & PMC_PO_OWNS_LOGFILE) == 0)
	return;
	- sx_slock(&allproc_lock);
	- FOREACH_PROC_IN_SYSTEM(p) {
	- pmclog_process_proccreate(po, p, 0 /* sync */);
	- PROC_LOCK(p);
	- FOREACH_THREAD_IN_PROC(p, td)
	- pmclog_process_threadcreate(po, td, 0 /* sync */);
	- PROC_UNLOCK(p);
	+
	+ VPS_LIST_RLOCK();
	+ VPS_FOREACH(vps_iter) {
	+ CURVPS_SET(vps_iter);
	+ sx_slock(&V_allproc_lock);
	+ FOREACH_PROC_IN_SYSTEM(p) {
	+ pmclog_process_proccreate(po, p, 0 /* sync */);
	+ PROC_LOCK(p);
	+ FOREACH_THREAD_IN_PROC(p, td)
	+ pmclog_process_threadcreate(po, td, 0 /* sync */);
	+ PROC_UNLOCK(p);
	+ }
	+ sx_sunlock(&V_allproc_lock);
	+ CURVPS_RESTORE();
	}
	- sx_sunlock(&allproc_lock);
	+ VPS_LIST_RUNLOCK();
	pmclog_flush(po, 0);
	}

	Index: sys/fs/devfs/devfs_vnops.c
	===================================================================
	--- sys/fs/devfs/devfs_vnops.c
	+++ sys/fs/devfs/devfs_vnops.c
	@@ -596,7 +596,7 @@
	if (vp == p->p_session->s_ttyvp) {
	PROC_UNLOCK(p);
	oldvp = NULL;
	- sx_xlock(&proctree_lock);
	+ sx_xlock(&V_proctree_lock);
	if (vp == p->p_session->s_ttyvp) {
	SESS_LOCK(p->p_session);
	VI_LOCK(vp);
	@@ -609,7 +609,7 @@
	VI_UNLOCK(vp);
	SESS_UNLOCK(p->p_session);
	}
	- sx_xunlock(&proctree_lock);
	+ sx_xunlock(&V_proctree_lock);
	if (oldvp != NULL)
	vrele(oldvp);
	} else
	@@ -813,9 +813,9 @@

	if (error == 0 && com == TIOCSCTTY) {
	/* Do nothing if reassigning same control tty */
	- sx_slock(&proctree_lock);
	+ sx_slock(&V_proctree_lock);
	if (td->td_proc->p_session->s_ttyvp == vp) {
	- sx_sunlock(&proctree_lock);
	+ sx_sunlock(&V_proctree_lock);
	return (0);
	}

	@@ -826,7 +826,7 @@
	td->td_proc->p_session->s_ttydp = cdev2priv(dev);
	SESS_UNLOCK(td->td_proc->p_session);

	- sx_sunlock(&proctree_lock);
	+ sx_sunlock(&V_proctree_lock);

	/* Get rid of reference to old control tty */
	if (vpold)
	Index: sys/fs/nfs/nfsport.h
	===================================================================
	--- sys/fs/nfs/nfsport.h
	+++ sys/fs/nfs/nfsport.h
	@@ -692,8 +692,8 @@
	#define NFSUNLOCKMNT(m) mtx_unlock(&((m)->nm_mtx))
	#define NFSLOCKREQUEST(r) mtx_lock(&((r)->r_mtx))
	#define NFSUNLOCKREQUEST(r) mtx_unlock(&((r)->r_mtx))
	-#define NFSPROCLISTLOCK() sx_slock(&allproc_lock)
	-#define NFSPROCLISTUNLOCK() sx_sunlock(&allproc_lock)
	+#define NFSPROCLISTLOCK() sx_slock(&V_allproc_lock)
	+#define NFSPROCLISTUNLOCK() sx_sunlock(&V_allproc_lock)
	#define NFSLOCKSOCKREQ(r) mtx_lock(&((r)->nr_mtx))
	#define NFSUNLOCKSOCKREQ(r) mtx_unlock(&((r)->nr_mtx))
	#define NFSLOCKDS(d) mtx_lock(&((d)->nfsclds_mtx))
	Index: sys/fs/pseudofs/pseudofs_vnops.c
	===================================================================
	--- sys/fs/pseudofs/pseudofs_vnops.c
	+++ sys/fs/pseudofs/pseudofs_vnops.c
	@@ -705,7 +705,7 @@
	{
	int visible;

	- sx_assert(&allproc_lock, SX_SLOCKED);
	+ sx_assert(&V_allproc_lock, SX_SLOCKED);
	pfs_assert_owned(pd);
	again:
	if (*pn == NULL) {
	@@ -716,9 +716,14 @@
	pn = (pn)->pn_next;
	}
	if (pn != NULL && (pn)->pn_type == pfstype_procdir) {
	+ /*
	+ * Operate on current vps instance only.
	+ * We must not iterate over all vps as duplicate process space
	+ * would not work at all and leak a lot of information.
	+ */
	/* next process */
	if (*p == NULL)
	- *p = LIST_FIRST(&allproc);
	+ *p = LIST_FIRST(&V_allproc);
	else
	p = LIST_NEXT(p, p_list);
	/* out of processes: next node */
	@@ -791,12 +796,12 @@
	if (resid == 0)
	PFS_RETURN (0);

	- sx_slock(&allproc_lock);
	+ sx_slock(&V_allproc_lock);
	pfs_lock(pd);

	/* check if the directory is visible to the caller */
	if (!pfs_visible(curthread, pd, pid, true, &proc)) {
	- sx_sunlock(&allproc_lock);
	+ sx_sunlock(&V_allproc_lock);
	pfs_unlock(pd);
	PFS_RETURN (ENOENT);
	}
	@@ -810,7 +815,7 @@
	if (proc != NULL)
	PROC_UNLOCK(proc);
	pfs_unlock(pd);
	- sx_sunlock(&allproc_lock);
	+ sx_sunlock(&V_allproc_lock);
	PFS_RETURN (0);
	}
	}
	@@ -860,7 +865,7 @@
	if (proc != NULL)
	PROC_UNLOCK(proc);
	pfs_unlock(pd);
	- sx_sunlock(&allproc_lock);
	+ sx_sunlock(&V_allproc_lock);
	i = 0;
	STAILQ_FOREACH_SAFE(pfsent, &lst, link, pfsent2) {
	if (error == 0)
	Index: sys/i386/i386/pmap.c
	===================================================================
	--- sys/i386/i386/pmap.c
	+++ sys/i386/i386/pmap.c
	@@ -5799,7 +5799,7 @@
	int npte = 0;
	int index;

	- sx_slock(&allproc_lock);
	+ sx_slock(&V_allproc_lock);
	FOREACH_PROC_IN_SYSTEM(p) {
	if (p->p_pid != pid)
	continue;
	@@ -5822,7 +5822,7 @@
	index = 0;
	printf("\n");
	}
	- sx_sunlock(&allproc_lock);
	+ sx_sunlock(&V_allproc_lock);
	return (npte);
	}
	pte = pmap_pte(pmap, va);
	@@ -5847,7 +5847,7 @@
	}
	}
	}
	- sx_sunlock(&allproc_lock);
	+ sx_sunlock(&V_allproc_lock);
	return (npte);
	}
	#endif
	Index: sys/i386/ibcs2/ibcs2_sysvec.c
	===================================================================
	--- sys/i386/ibcs2/ibcs2_sysvec.c
	+++ sys/i386/ibcs2/ibcs2_sysvec.c
	@@ -109,6 +109,7 @@
	static int
	ibcs2_modevent(module_t mod, int type, void *unused)
	{
	+ VPS_ITERATOR_DECL(vps_iter);
	struct proc *p = NULL;
	int rval = 0;

	@@ -117,14 +118,20 @@
	break;
	case MOD_UNLOAD:
	/* if this was an ELF module we'd use elf_brand_inuse()... */
	- sx_slock(&allproc_lock);
	- FOREACH_PROC_IN_SYSTEM(p) {
	- if (p->p_sysent == &ibcs2_svr3_sysvec) {
	- rval = EBUSY;
	- break;
	+ VPS_LIST_RLOCK();
	+ VPS_FOREACH(vps_iter) {
	+ CURVPS_SET(vps_iter);
	+ sx_slock(&V_allproc_lock);
	+ FOREACH_PROC_IN_SYSTEM(p) {
	+ if (p->p_sysent == &ibcs2_svr3_sysvec) {
	+ rval = EBUSY;
	+ break;
	+ }
	}
	+ sx_sunlock(&V_allproc_lock);
	+ CURVPS_RESTORE();
	}
	- sx_sunlock(&allproc_lock);
	+ VPS_LIST_RUNLOCK();
	break;
	default:
	rval = EOPNOTSUPP;
	Index: sys/kern/imgact_elf.c
	===================================================================
	--- sys/kern/imgact_elf.c
	+++ sys/kern/imgact_elf.c
	@@ -238,17 +238,24 @@
	int
	__elfN(brand_inuse)(Elf_Brandinfo *entry)
	{
	+ VPS_ITERATOR_DECL(vps_iter);
	struct proc *p;
	int rval = FALSE;

	- sx_slock(&allproc_lock);
	- FOREACH_PROC_IN_SYSTEM(p) {
	- if (p->p_sysent == entry->sysvec) {
	- rval = TRUE;
	- break;
	+ VPS_LIST_RLOCK();
	+ VPS_FOREACH(vps_iter) {
	+ CURVPS_SET(vps_iter);
	+ sx_slock(&V_allproc_lock);
	+ FOREACH_PROC_IN_SYSTEM(p) {
	+ if (p->p_sysent == entry->sysvec) {
	+ rval = TRUE;
	+ break;
	+ }
	}
	+ sx_sunlock(&V_allproc_lock);
	+ CURVPS_RESTORE();
	}
	- sx_sunlock(&allproc_lock);
	+ VPS_LIST_RUNLOCK();

	return (rval);
	}
	@@ -2106,10 +2113,10 @@
	KASSERT(*sizep == size, ("invalid size"));
	structsize = sizeof(elf_kinfo_proc_t);
	sbuf_bcat(sb, &structsize, sizeof(structsize));
	- sx_slock(&proctree_lock);
	+ sx_slock(&V_proctree_lock);
	PROC_LOCK(p);
	kern_proc_out(p, sb, ELF_KERN_PROC_MASK);
	- sx_sunlock(&proctree_lock);
	+ sx_sunlock(&V_proctree_lock);
	}
	*sizep = size;
	}
	Index: sys/kern/init_main.c
	===================================================================
	--- sys/kern/init_main.c
	+++ sys/kern/init_main.c
	@@ -56,6 +56,7 @@
	#include <sys/file.h>
	#include <sys/filedesc.h>
	#include <sys/jail.h>
	+#include <sys/kthread.h>
	#include <sys/ktr.h>
	#include <sys/lock.h>
	#include <sys/loginclass.h>
	@@ -79,6 +80,7 @@
	#include <sys/malloc.h>
	#include <sys/conf.h>
	#include <sys/cpuset.h>
	+#include <sys/vps.h>

	#include <machine/cpu.h>

	@@ -103,7 +105,18 @@
	struct proc proc0;
	struct thread0_storage thread0_st __aligned(32);
	struct vmspace vmspace0;
	-struct proc *initproc;
	+VPS_DEFINE(struct proc *, initproc);
	+
	+VPS_DEFINE(struct proc *, vproc0);
	+#ifdef VIMAGE
	+/*
	+ * Initialize to -2; after kproc_create() our thread will still be
	+ * forked from thread0 and in the wrong vps. Once that is fixed it will
	+ * see the local copy and not the DEFAULT_VPS one. Make sure we have
	+ * a value that we can spin on until this happens.
	+ */
	+VPS_DEFINE(int, vpsdying) = -2;
	+#endif

	#ifndef BOOTHOWTO
	#define BOOTHOWTO 0
	@@ -461,9 +474,8 @@
	p->p_osrel = osreldate;

	/*
	- * Initialize thread and process structures.
	+ * Initialize thread structures.
	*/
	- procinit(); /* set up proc zone */
	threadinit(); /* set up UMA zones */

	/*
	@@ -475,7 +487,8 @@
	/*
	* Create process 0 (the swapper).
	*/
	- LIST_INSERT_HEAD(&allproc, p, p_list);
	+ V_vproc0 = p;
	+ LIST_INSERT_HEAD(&V_allproc, p, p_list);
	LIST_INSERT_HEAD(PIDHASH(0), p, p_hash);
	mtx_init(&pgrp0.pg_mtx, "process group", NULL, MTX_DEF \| MTX_DUPOK);
	p->p_pgrp = &pgrp0;
	@@ -511,6 +524,9 @@
	td->td_cpuset = cpuset_thread0();
	td->td_domain.dr_policy = td->td_cpuset->cs_domain;
	prison0_init();
	+#ifdef VIMAGE
	+ td->td_vps = vps0;
	+#endif
	p->p_peers = 0;
	p->p_leader = p;
	p->p_reaper = p;
	@@ -549,7 +565,7 @@
	p->p_sigacts = sigacts_alloc();

	/* Initialize signal state for process 0. */
	- siginit(&proc0);
	+ siginit(V_vproc0);

	/* Create the file descriptor table. */
	p->p_fd = fdinit(NULL, false);
	@@ -614,7 +630,184 @@
	racct_add_force(p, RACCT_NPROC, 1);
	PROC_UNLOCK(p);
	}
	-SYSINIT(p0init, SI_SUB_INTRINSIC, SI_ORDER_FIRST, proc0_init, NULL);
	+SYSINIT(p0init, SI_SUB_INTRINSIC, SI_ORDER_THIRD, proc0_init, NULL);
	+
	+#ifdef VIMAGE
	+static void
	+vps_swapper(void *dummy __unused)
	+{
	+
	+ /*
	+ * Make sure the surgical changes to V_vproc0 are done before
	+ * entering the long-lasting loop. Otherwise we may start
	+ * acquiring locks and accessing variables based on the wrong
	+ * credential leading to, e.g., panics when trying to unlock a
	+ * lock from a different context which may not be locked.
	+ * When entering the function our credentials might still point
	+ * to the DEFAULT_VPS; see comment for V_vpsdying declaration above.
	+ */
	+ while (V_vpsdying < 0)
	+ pause("wswvps", hz/2);
	+
	+ /*
	+ * Now hand over this thread to swapper.
	+ */
	+ swapper();
	+
	+ if (V_vpsdying < 1)
	+ panic("%s: swapper curtd %p ended but V_vpsdying %d\n",
	+ __func__, curthread, V_vpsdying);
	+
	+ kproc_exit(0);
	+}
	+
	+static void
	+proc0_init_vps(void *dummy __unused)
	+{
	+ struct ucred newcred, savecred;
	+ struct thread *td;
	+ struct prison *pr;
	+ struct uidinfo tmpuinfo;
	+ struct loginclass tmplc = {
	+ .lc_name = "",
	+ };
	+ int error;
	+
	+ /* vps0 is handled normally in p0init. */
	+ if (IS_DEFAULT_VPS(curvps))
	+ return;
	+
	+ KASSERT((curvps->vps_pr != NULL && curvps != vps0),
	+ ("%s: curvps %p has vps_pr %p or is vps0 %p\n",
	+ __func__, curvps, curvps->vps_pr, vps0));
	+ KASSERT((curvps == curvps->vps_pr->pr_vps),
	+ ("%s: curvps %p != curvps->vps_pr %p ->pr_vps %p\n",
	+ __func__, curvps, curvps->vps_pr, curvps->vps_pr->pr_vps));
	+
	+ /*
	+ * Initialized the non-default VPS version to < 0 so vps_swapper()
	+ * will spin once the credential is changed before all other surgery
	+ * has happened.
	+ */
	+ V_vpsdying = -1;
	+
	+ /*
	+ * Default is nprocs = 1 for vps0; need to set it to 0 here as our
	+ * "proc0" and with that initproc are forked and not manually constructed.
	+ */
	+ V_nprocs = 0;
	+
	+ /*
	+ * Set lastpid to -1 so that our swapper gets 0.
	+ */
	+ V_lastpid = -1;
	+
	+ error = kproc_create(vps_swapper, NULL, &V_vproc0, 0, 0, "vps%u",
	+ curvps->vps_pr->pr_id);
	+ if (error)
	+ panic("%s: cannot create vps %p swapper: %d\n",
	+ __func__, curvps, error);
	+
	+ /* Create credentials. Copied from proc0. Just using vps_pr. */
	+ newcred = crget();
	+ newcred->cr_ngroups = 1; /* group 0 */
	+ /* A hack to prevent uifind from tripping over NULL pointers. */
	+ savecred = curthread->td_ucred;
	+ curthread->td_ucred = newcred;
	+ tmpuinfo.ui_uid = 1;
	+ newcred->cr_uidinfo = newcred->cr_ruidinfo = &tmpuinfo;
	+ newcred->cr_uidinfo = uifind(0);
	+ newcred->cr_ruidinfo = uifind(0);
	+ newcred->cr_loginclass = &tmplc;
	+ newcred->cr_loginclass = loginclass_find("default");
	+ /* End hack. creds get properly set later with thread_cow_get_proc */
	+ curthread->td_ucred = savecred;
	+ PROC_LOCK(V_vproc0);
	+ newcred->cr_prison = curvps->vps_pr;
	+ prison_hold(newcred->cr_prison);
	+ /* The kernel process was accounted to thread0's prison. */
	+ prison_proc_hold(newcred->cr_prison);
	+ prison_proc_free(savecred->cr_prison);
	+ V_vproc0->p_treeflag \|= P_TREE_REAPER;
	+ savecred = proc_set_cred(V_vproc0, newcred);
	+ PROC_UNLOCK(V_vproc0);
	+#ifdef AUDIT
	+ audit_cred_kproc0(newcred);
	+#endif
	+#ifdef MAC
	+ mac_cred_create_swapper(newcred);
	+#endif
	+ crfree(savecred);
	+
	+ PROC_LOCK(V_vproc0);
	+ td = FIRST_THREAD_IN_PROC(V_vproc0);
	+ thread_cow_get_proc(td, V_vproc0);
	+ PROC_UNLOCK(V_vproc0);
	+ KASSERT(curvps->vps_pr ==
	+ FIRST_THREAD_IN_PROC(V_vproc0)->td_ucred->cr_prison,
	+ ("%s:%d: curvps %p vps_pr %p != FTIP(V_vproc0 %p)->td_ucred %p "
	+ "cr_prison %p\n", __func__, __LINE__,
	+ curvps, curvps->vps_pr, V_vproc0,
	+ FIRST_THREAD_IN_PROC(V_vproc0)->td_ucred,
	+ FIRST_THREAD_IN_PROC(V_vproc0)->td_ucred->cr_prison));
	+ KASSERT(curvps == TD_TO_VPS(FIRST_THREAD_IN_PROC(V_vproc0)),
	+ ("%s:%d: curvps %p != TD_TO_VPS(..(V_vproc0 %p)) %p\n",
	+ __func__, __LINE__,
	+ curvps, V_vproc0, TD_TO_VPS(FIRST_THREAD_IN_PROC(V_vproc0))));
	+
	+ /* Chroot it. */
	+ td = FIRST_THREAD_IN_PROC(V_vproc0);
	+ pr = curvps->vps_pr;
	+ vn_lock(pr->pr_root, LK_EXCLUSIVE \| LK_RETRY);
	+ if ((error = change_dir(pr->pr_root, td)) != 0) {
	+ printf("%s: td %p change_dir %p failed: %d\n",
	+ __func__, td, pr->pr_root, error);
	+ goto err;
	+ }
	+#ifdef MAC
	+ if ((error = mac_vnode_check_chroot(td->td_ucred, pr->pr_root))) {
	+ printf("%s: td %p mac_vnode_check_chroot %p failed: %d\n",
	+ __func__, td, pr->pr_root, error);
	+ goto err;
	+ }
	+#endif
	+ VOP_UNLOCK(pr->pr_root, 0);
	+ if ((error = pwd_chroot(td, pr->pr_root))) {
	+ printf("%s: td %p pwd_chroot %p failed: %d\n",
	+ __func__, td, pr->pr_root, error);
	+ goto err;
	+ }
	+
	+ V_vpsdying = 0;
	+ return;
	+
	+err:
	+ /* XXX could panic or singal the jail to abort; cannot really stop. */
	+ return;
	+}
	+VPS_SYSINIT(p0init_vps, SI_SUB_INTRINSIC, SI_ORDER_THIRD, proc0_init_vps, NULL);
	+
	+static void
	+proc0_uninit_vps(void *dummy __unused)
	+{
	+
	+ if (IS_DEFAULT_VPS(curvps))
	+ return;
	+
	+ /*
	+ * XXX ideally we want to get that state from elsewhere;
	+ * neither prison, not vps state, .. lends itself though.
	+ */
	+ V_vpsdying = 1;
	+ wakeup(V_vproc0);
	+
	+ /* Operate on current vps instance only. */
	+ while (V_vproc0 != NULL \|\|
	+ !LIST_EMPTY(&V_zombproc) \|\| !LIST_EMPTY(&V_allproc))
	+ pause("p0uvps", hz/2);
	+}
	+VPS_SYSUNINIT(p0uninit_vps, SI_SUB_INTRINSIC, SI_ORDER_THIRD, proc0_uninit_vps, NULL);
	+#endif

	/* ARGSUSED*/
	static void
	@@ -628,8 +821,9 @@
	/*
	* Now we can look at the time, having had a chance to verify the
	* time from the filesystem. Pretend that proc0 started now.
	+ * Operate on vps0 instance only.
	*/
	- sx_slock(&allproc_lock);
	+ sx_slock(&V_allproc_lock);
	FOREACH_PROC_IN_SYSTEM(p) {
	PROC_LOCK(p);
	if (p->p_state == PRS_NEW) {
	@@ -649,7 +843,7 @@
	}
	PROC_UNLOCK(p);
	}
	- sx_sunlock(&allproc_lock);
	+ sx_sunlock(&V_allproc_lock);
	PCPU_SET(switchtime, cpu_ticks());
	PCPU_SET(switchticks, ticks);

	@@ -729,7 +923,8 @@
	td = curthread;
	p = td->td_proc;

	- vfs_mountroot();
	+ if (IS_DEFAULT_VPS(curvps))
	+ vfs_mountroot();

	/* Wipe GELI passphrase from the environment. */
	kern_unsetenv("kern.geom.eli.passphrase");
	@@ -753,8 +948,8 @@
	while ((path = strsep(&tmp_init_path, ":")) != NULL) {
	pathlen = strlen(path) + 1;
	if (bootverbose)
	- printf("start_init: trying %s\n", path);
	-
	+ printf("%s: trying %s\n", __func__, path);
	+
	/*
	* Move out the boot flag argument.
	*/
	@@ -839,38 +1034,60 @@
	struct thread *td;
	int error;

	+ KASSERT(curvps == FIRST_THREAD_IN_PROC(V_vproc0)->td_vps,
	+ ("%s: curvps %p != V_vproc0 %p first td %p td_vps %p\n",
	+ __func__, curvps, V_vproc0, FIRST_THREAD_IN_PROC(V_vproc0),
	+ FIRST_THREAD_IN_PROC(V_vproc0)->td_vps));
	+ KASSERT(curvps == TD_TO_VPS(FIRST_THREAD_IN_PROC(V_vproc0)),
	+ ("%s: curvps %p != TD_TO_VPS(..(V_vproc0 %p)) %p\n",
	+ __func__, curvps, V_vproc0,
	+ TD_TO_VPS(FIRST_THREAD_IN_PROC(V_vproc0))));
	+
	bzero(&fr, sizeof(fr));
	fr.fr_flags = RFFDG \| RFPROC \| RFSTOPPED;
	- fr.fr_procp = &initproc;
	- error = fork1(&thread0, &fr);
	+ fr.fr_procp = &V_initproc;
	+ td = FIRST_THREAD_IN_PROC(V_vproc0);
	+ error = fork1(td, &fr);
	if (error)
	panic("cannot fork init: %d\n", error);
	- KASSERT(initproc->p_pid == 1, ("create_init: initproc->p_pid != 1"));
	+ KASSERT(V_initproc->p_pid == 1, ("%s: initproc->p_pid(%d) != 1",
	+ __func__, V_initproc->p_pid));
	+ KASSERT(curvps == FIRST_THREAD_IN_PROC(V_initproc)->td_vps,
	+ ("%s: curvps %p != V_initproc %p first td %p td_vps %p\n",
	+ __func__, curvps, V_initproc, FIRST_THREAD_IN_PROC(V_initproc),
	+ FIRST_THREAD_IN_PROC(V_initproc)->td_vps));
	+
	/* divorce init's credentials from the kernel's */
	newcred = crget();
	- sx_xlock(&proctree_lock);
	- PROC_LOCK(initproc);
	- initproc->p_flag \|= P_SYSTEM \| P_INMEM;
	- initproc->p_treeflag \|= P_TREE_REAPER;
	- oldcred = initproc->p_ucred;
	+ sx_xlock(&V_proctree_lock);
	+ PROC_LOCK(V_initproc);
	+ V_initproc->p_flag \|= P_SYSTEM \| P_INMEM;
	+ V_initproc->p_treeflag \|= P_TREE_REAPER;
	+ oldcred = V_initproc->p_ucred;
	crcopy(newcred, oldcred);
	+#ifdef VIMAGE
	+ /* Swap to the correct prison. */
	+ /* XXX is this really needed or was this related to a V_vproc0 bug? */
	+ prison_free(newcred->cr_prison);
	+ newcred->cr_prison = curvps->vps_pr;
	+ prison_hold(newcred->cr_prison);
	+#endif
	#ifdef MAC
	mac_cred_create_init(newcred);
	#endif
	#ifdef AUDIT
	audit_cred_proc1(newcred);
	#endif
	- proc_set_cred(initproc, newcred);
	- td = FIRST_THREAD_IN_PROC(initproc);
	- crfree(td->td_ucred);
	- td->td_ucred = crhold(initproc->p_ucred);
	- PROC_UNLOCK(initproc);
	- sx_xunlock(&proctree_lock);
	+ /* This will also update cowgen. */
	+ proc_set_cred(V_initproc, newcred);
	+ PROC_UNLOCK(V_initproc);
	+ sx_xunlock(&V_proctree_lock);
	crfree(oldcred);
	- cpu_fork_kthread_handler(FIRST_THREAD_IN_PROC(initproc),
	+
	+ cpu_fork_kthread_handler(FIRST_THREAD_IN_PROC(V_initproc),
	start_init, NULL);
	}
	-SYSINIT(init, SI_SUB_CREATE_INIT, SI_ORDER_FIRST, create_init, NULL);
	+VPS_SYSINIT(init, SI_SUB_CREATE_INIT, SI_ORDER_FIRST, create_init, NULL);

	/*
	* Make it runnable now.
	@@ -880,10 +1097,49 @@
	{
	struct thread *td;

	- td = FIRST_THREAD_IN_PROC(initproc);
	+ td = FIRST_THREAD_IN_PROC(V_initproc);
	thread_lock(td);
	TD_SET_CAN_RUN(td);
	sched_add(td, SRQ_BORING);
	thread_unlock(td);
	}
	-SYSINIT(kickinit, SI_SUB_KTHREAD_INIT, SI_ORDER_MIDDLE, kick_init, NULL);
	+VPS_SYSINIT(kickinit, SI_SUB_KTHREAD_INIT, SI_ORDER_MIDDLE, kick_init, NULL);
	+
	+#ifdef VIMAGE
	+static void
	+reapinit(void *ident __unused)
	+{
	+ struct proc p, p2;
	+
	+ while (V_nprocs > 2) {
	+ sx_slock(&V_allproc_lock);
	+ FOREACH_PROC_IN_SYSTEM(p) {
	+ if (p->p_pid <= 1)
	+ continue;
	+ PROC_LOCK(p);
	+ kern_psignal(p, SIGKILL);
	+ PROC_UNLOCK(p);
	+ }
	+ sx_sunlock(&V_allproc_lock);
	+ pause("reapin1t", hz/2);
	+ }
	+
	+ /* Operate on current vps instance only. */
	+ sx_xlock(&V_proctree_lock);
	+ LIST_FOREACH_SAFE(p, &V_zombproc, p_list, p2) {
	+ PROC_LOCK(p);
	+ proc_reap(FIRST_THREAD_IN_PROC(V_vproc0), p, NULL, 0);
	+ sx_xlock(&V_proctree_lock);
	+ }
	+ sx_xunlock(&V_proctree_lock);
	+
	+ while (V_nprocs > 1)
	+ pause("reapinit", hz/2);
	+
	+ /* Only our "swapper" left. */
	+ KASSERT(V_nprocs == 1, ("%s: vps %p V_nprocs %d != 1",
	+ __func__, curvps, V_nprocs));
	+}
	+/* Run very first. */
	+VPS_SYSUNINIT(reapinit, SI_SUB_VIMAGE_DONE, SI_ORDER_ANY, reapinit, NULL);
	+#endif
	Index: sys/kern/kern_acct.c
	===================================================================
	--- sys/kern/kern_acct.c
	+++ sys/kern/kern_acct.c
	@@ -378,7 +378,7 @@
	* Get process accounting information.
	*/

	- sx_slock(&proctree_lock);
	+ sx_slock(&V_proctree_lock);
	PROC_LOCK(p);

	/* (1) The terminal from which the process was started */
	@@ -386,7 +386,7 @@
	acct.ac_tty = tty_udev(p->p_pgrp->pg_session->s_ttyp);
	else
	acct.ac_tty = NODEV;
	- sx_sunlock(&proctree_lock);
	+ sx_sunlock(&V_proctree_lock);

	/* (2) The name of the command that ran */
	bcopy(p->p_comm, acct.ac_comm, sizeof acct.ac_comm);
	Index: sys/kern/kern_clock.c
	===================================================================
	--- sys/kern/kern_clock.c
	+++ sys/kern/kern_clock.c
	@@ -184,12 +184,78 @@
	static int blktime_threshold = 900;
	static int sleepfreq = 3;

	+static __inline void
	+_deadlres_td_on_lock(struct proc p, struct thread td, int blkticks)
	+{
	+ int tticks;
	+
	+ /*
	+ * The thread should be blocked on a turnstile, simply check
	+ * if the turnstile channel is in good state.
	+ */
	+ MPASS(td->td_blocked != NULL);
	+
	+ tticks = ticks - td->td_blktick;
	+ thread_unlock(td);
	+ if (tticks > blkticks) {
	+
	+ /*
	+ * Accordingly with provided thresholds, this thread is stuck
	+ * for too long on a turnstile.
	+ */
	+ PROC_UNLOCK(p);
	+ sx_sunlock(&V_allproc_lock);
	+ panic("%s: possible deadlock detected for %p, "
	+ "blocked for %d ticks\n", __func__, td, tticks);
	+ }
	+}
	+
	+static __inline void
	+_deadlres_td_sleep_q(struct proc p, struct thread td, int slpticks)
	+{
	+ void *wchan;
	+ int i, slptype, tryl, tticks;
	+
	+ /*
	+ * Check if the thread is sleeping on a lock, otherwise skip the check.
	+ * Drop the thread lock in order to avoid a LOR with the sleepqueue
	+ * spinlock.
	+ */
	+ wchan = td->td_wchan;
	+ tticks = ticks - td->td_slptick;
	+ thread_unlock(td);
	+ slptype = sleepq_type(wchan);
	+ if ((slptype == SLEEPQ_SX \|\| slptype == SLEEPQ_LK) &&
	+ tticks > slpticks) {
	+
	+ /*
	+ * Accordingly with provided thresholds, this thread is stuck
	+ * for too long on a sleepqueue.
	+ * However, being on a sleepqueue, we might still check for the
	+ * blessed list.
	+ */
	+ tryl = 0;
	+ for (i = 0; blessed[i] != NULL; i++) {
	+ if (!strcmp(blessed[i], td->td_wmesg)) {
	+ tryl = 1;
	+ break;
	+ }
	+ }
	+ if (tryl != 0)
	+ return;
	+ PROC_UNLOCK(p);
	+ sx_sunlock(&V_allproc_lock);
	+ panic("%s: possible deadlock detected for %p, "
	+ "blocked for %d ticks\n", __func__, td, tticks);
	+ }
	+}
	+
	static void
	deadlres_td_on_lock(struct proc p, struct thread td, int blkticks)
	{
	int tticks;

	- sx_assert(&allproc_lock, SX_LOCKED);
	+ sx_assert(&V_allproc_lock, SX_LOCKED);
	PROC_LOCK_ASSERT(p, MA_OWNED);
	THREAD_LOCK_ASSERT(td, MA_OWNED);
	/*
	@@ -214,7 +280,7 @@
	void *wchan;
	int i, slptype, tticks;

	- sx_assert(&allproc_lock, SX_LOCKED);
	+ sx_assert(&V_allproc_lock, SX_LOCKED);
	PROC_LOCK_ASSERT(p, MA_OWNED);
	THREAD_LOCK_ASSERT(td, MA_OWNED);
	/*
	@@ -246,6 +312,7 @@
	static void
	deadlkres(void)
	{
	+ VPS_ITERATOR_DECL(vps_iter);
	struct proc *p;
	struct thread *td;
	int blkticks, slpticks, tryl;
	@@ -255,41 +322,49 @@
	blkticks = blktime_threshold * hz;
	slpticks = slptime_threshold * hz;

	- /*
	- * Avoid to sleep on the sx_lock in order to avoid a
	- * possible priority inversion problem leading to
	- * starvation.
	- * If the lock can't be held after 100 tries, panic.
	- */
	- if (!sx_try_slock(&allproc_lock)) {
	- if (tryl > 100)
	- panic("%s: possible deadlock detected "
	- "on allproc_lock\n", __func__);
	- tryl++;
	- pause("allproc", sleepfreq * hz);
	- continue;
	- }
	- tryl = 0;
	- FOREACH_PROC_IN_SYSTEM(p) {
	- PROC_LOCK(p);
	- if (p->p_state == PRS_NEW) {
	- PROC_UNLOCK(p);
	- continue;
	+ VPS_LIST_RLOCK();
	+ VPS_FOREACH(vps_iter) {
	+again:
	+ CURVPS_SET_QUIET(vps_iter);
	+ /*
	+ * Avoid to sleep on the sx_lock in order to avoid a
	+ * possible priority inversion problem leading to
	+ * starvation.
	+ * If the lock can't be held after 100 tries, panic.
	+ */
	+ if (!sx_try_slock(&V_allproc_lock)) {
	+ if (tryl > 100)
	+ panic("%s: possible deadlock detected "
	+ "on allproc_lock\n", __func__);
	+ tryl++;
	+ CURVPS_RESTORE();
	+ pause("allproc", sleepfreq * hz);
	+ goto again;
	}
	- FOREACH_THREAD_IN_PROC(p, td) {
	- thread_lock(td);
	- if (TD_ON_LOCK(td))
	- deadlres_td_on_lock(p, td,
	- blkticks);
	- else if (TD_IS_SLEEPING(td) &&
	- TD_ON_SLEEPQ(td))
	- deadlres_td_sleep_q(p, td,
	- slpticks);
	- thread_unlock(td);
	+ tryl = 0;
	+ FOREACH_PROC_IN_SYSTEM(p) {
	+ PROC_LOCK(p);
	+ if (p->p_state == PRS_NEW) {
	+ PROC_UNLOCK(p);
	+ continue;
	+ }
	+ FOREACH_THREAD_IN_PROC(p, td) {
	+ thread_lock(td);
	+ if (TD_ON_LOCK(td))
	+ deadlres_td_on_lock(p, td,
	+ blkticks);
	+ else if (TD_IS_SLEEPING(td) &&
	+ TD_ON_SLEEPQ(td))
	+ deadlres_td_sleep_q(p, td,
	+ slpticks);
	+ thread_unlock(td);
	+ }
	+ PROC_UNLOCK(p);
	}
	- PROC_UNLOCK(p);
	+ sx_sunlock(&V_allproc_lock);
	+ CURVPS_RESTORE();
	}
	- sx_sunlock(&allproc_lock);
	+ VPS_LIST_RUNLOCK();

	/* Sleep for sleepfreq seconds. */
	pause("-", sleepfreq * hz);
	Index: sys/kern/kern_cpuset.c
	===================================================================
	--- sys/kern/kern_cpuset.c
	+++ sys/kern/kern_cpuset.c
	@@ -510,24 +510,32 @@
	static void
	domainset_notify(void)
	{
	+ VPS_ITERATOR_DECL(vps_iter);
	struct thread *td;
	struct proc *p;

	- sx_slock(&allproc_lock);
	- FOREACH_PROC_IN_SYSTEM(p) {
	- PROC_LOCK(p);
	- if (p->p_state == PRS_NEW) {
	+ VPS_LIST_RLOCK();
	+ VPS_FOREACH(vps_iter) {
	+ CURVPS_SET(vps_iter);
	+ sx_slock(&V_allproc_lock);
	+ FOREACH_PROC_IN_SYSTEM(p) {
	+ PROC_LOCK(p);
	+ if (p->p_state == PRS_NEW) {
	+ PROC_UNLOCK(p);
	+ continue;
	+ }
	+ FOREACH_THREAD_IN_PROC(p, td) {
	+ thread_lock(td);
	+ td->td_domain.dr_policy =
	+ td->td_cpuset->cs_domain;
	+ thread_unlock(td);
	+ }
	PROC_UNLOCK(p);
	- continue;
	}
	- FOREACH_THREAD_IN_PROC(p, td) {
	- thread_lock(td);
	- td->td_domain.dr_policy = td->td_cpuset->cs_domain;
	- thread_unlock(td);
	- }
	- PROC_UNLOCK(p);
	+ sx_sunlock(&V_allproc_lock);
	+ CURVPS_RESTORE();
	}
	- sx_sunlock(&allproc_lock);
	+ VPS_LIST_RUNLOCK();
	kernel_object->domain.dr_policy = cpuset_kernel->cs_domain;
	}

	Index: sys/kern/kern_descrip.c
	===================================================================
	--- sys/kern/kern_descrip.c
	+++ sys/kern/kern_descrip.c
	@@ -1063,7 +1063,7 @@
	sigio->sio_ucred = crhold(curthread->td_ucred);
	sigio->sio_myref = sigiop;

	- sx_slock(&proctree_lock);
	+ sx_slock(&V_proctree_lock);
	if (pgid > 0) {
	proc = pfind(pgid);
	if (proc == NULL) {
	@@ -1131,14 +1131,14 @@
	sigio->sio_pgrp = pgrp;
	PGRP_UNLOCK(pgrp);
	}
	- sx_sunlock(&proctree_lock);
	+ sx_sunlock(&V_proctree_lock);
	SIGIO_LOCK();
	*sigiop = sigio;
	SIGIO_UNLOCK();
	return (0);

	fail:
	- sx_sunlock(&proctree_lock);
	+ sx_sunlock(&V_proctree_lock);
	crfree(sigio->sio_ucred);
	free(sigio, M_SIGIO);
	return (ret);
	@@ -3190,6 +3190,7 @@
	void
	mountcheckdirs(struct vnode olddp, struct vnode newdp)
	{
	+ VPS_ITERATOR_DECL(vps_iter);
	struct filedesc *fdp;
	struct prison *pr;
	struct proc *p;
	@@ -3198,33 +3199,40 @@
	if (vrefcnt(olddp) == 1)
	return;
	nrele = 0;
	- sx_slock(&allproc_lock);
	- FOREACH_PROC_IN_SYSTEM(p) {
	- PROC_LOCK(p);
	- fdp = fdhold(p);
	- PROC_UNLOCK(p);
	- if (fdp == NULL)
	- continue;
	- FILEDESC_XLOCK(fdp);
	- if (fdp->fd_cdir == olddp) {
	- vrefact(newdp);
	- fdp->fd_cdir = newdp;
	- nrele++;
	- }
	- if (fdp->fd_rdir == olddp) {
	- vrefact(newdp);
	- fdp->fd_rdir = newdp;
	- nrele++;
	- }
	- if (fdp->fd_jdir == olddp) {
	- vrefact(newdp);
	- fdp->fd_jdir = newdp;
	- nrele++;
	+
	+ VPS_LIST_RLOCK();
	+ VPS_FOREACH(vps_iter) {
	+ CURVPS_SET(vps_iter);
	+ sx_slock(&V_allproc_lock);
	+ FOREACH_PROC_IN_SYSTEM(p) {
	+ PROC_LOCK(p);
	+ fdp = fdhold(p);
	+ PROC_UNLOCK(p);
	+ if (fdp == NULL)
	+ continue;
	+ FILEDESC_XLOCK(fdp);
	+ if (fdp->fd_cdir == olddp) {
	+ vrefact(newdp);
	+ fdp->fd_cdir = newdp;
	+ nrele++;
	+ }
	+ if (fdp->fd_rdir == olddp) {
	+ vrefact(newdp);
	+ fdp->fd_rdir = newdp;
	+ nrele++;
	+ }
	+ if (fdp->fd_jdir == olddp) {
	+ vrefact(newdp);
	+ fdp->fd_jdir = newdp;
	+ nrele++;
	+ }
	+ FILEDESC_XUNLOCK(fdp);
	+ fddrop(fdp);
	}
	- FILEDESC_XUNLOCK(fdp);
	- fddrop(fdp);
	+ sx_sunlock(&V_allproc_lock);
	+ CURVPS_RESTORE();
	}
	- sx_sunlock(&allproc_lock);
	+ VPS_LIST_RUNLOCK();
	if (rootvnode == olddp) {
	vrefact(newdp);
	rootvnode = newdp;
	@@ -3307,6 +3315,7 @@
	static int
	sysctl_kern_file(SYSCTL_HANDLER_ARGS)
	{
	+ VPS_ITERATOR_DECL(vps_iter);
	struct xfile xf;
	struct filedesc *fdp;
	struct file *fp;
	@@ -3318,68 +3327,82 @@
	return (error);
	if (req->oldptr == NULL) {
	n = 0;
	- sx_slock(&allproc_lock);
	+ VPS_LIST_RLOCK();
	+ VPS_FOREACH(vps_iter) {
	+ CURVPS_SET(vps_iter);
	+ sx_slock(&V_allproc_lock);
	+ FOREACH_PROC_IN_SYSTEM(p) {
	+ PROC_LOCK(p);
	+ if (p->p_state == PRS_NEW) {
	+ PROC_UNLOCK(p);
	+ continue;
	+ }
	+ fdp = fdhold(p);
	+ PROC_UNLOCK(p);
	+ if (fdp == NULL)
	+ continue;
	+ /* overestimates sparse tables. */
	+ if (fdp->fd_lastfile > 0)
	+ n += fdp->fd_lastfile;
	+ fddrop(fdp);
	+ }
	+ sx_sunlock(&V_allproc_lock);
	+ CURVPS_RESTORE();
	+ }
	+ VPS_LIST_RUNLOCK();
	+ return (SYSCTL_OUT(req, 0, n * sizeof(xf)));
	+ }
	+ error = 0;
	+ bzero(&xf, sizeof(xf));
	+ xf.xf_size = sizeof(xf);
	+ VPS_LIST_RLOCK();
	+ VPS_FOREACH(vps_iter) {
	+ CURVPS_SET(vps_iter);
	+ sx_slock(&V_allproc_lock);
	FOREACH_PROC_IN_SYSTEM(p) {
	PROC_LOCK(p);
	if (p->p_state == PRS_NEW) {
	PROC_UNLOCK(p);
	continue;
	}
	+ if (p_cansee(req->td, p) != 0) {
	+ PROC_UNLOCK(p);
	+ continue;
	+ }
	+ xf.xf_pid = p->p_pid;
	+ xf.xf_uid = p->p_ucred->cr_uid;
	fdp = fdhold(p);
	PROC_UNLOCK(p);
	if (fdp == NULL)
	continue;
	- /* overestimates sparse tables. */
	- if (fdp->fd_lastfile > 0)
	- n += fdp->fd_lastfile;
	+ FILEDESC_SLOCK(fdp);
	+ for (n = 0; fdp->fd_refcnt > 0 && n <= fdp->fd_lastfile; ++n) {
	+ if ((fp = fdp->fd_ofiles[n].fde_file) == NULL)
	+ continue;
	+ xf.xf_fd = n;
	+ xf.xf_file = (kvaddr_t)(uintptr_t)fp;
	+ xf.xf_data = (kvaddr_t)(uintptr_t)fp->f_data;
	+ xf.xf_vnode = (kvaddr_t)(uintptr_t)fp->f_vnode;
	+ xf.xf_type = (kvaddr_t)(uintptr_t)fp->f_type;
	+ xf.xf_count = fp->f_count;
	+ xf.xf_msgcount = 0;
	+ xf.xf_offset = foffset_get(fp);
	+ xf.xf_flag = fp->f_flag;
	+ error = SYSCTL_OUT(req, &xf, sizeof(xf));
	+ if (error)
	+ break;
	+ }
	+ FILEDESC_SUNLOCK(fdp);
	fddrop(fdp);
	- }
	- sx_sunlock(&allproc_lock);
	- return (SYSCTL_OUT(req, 0, n * sizeof(xf)));
	- }
	- error = 0;
	- bzero(&xf, sizeof(xf));
	- xf.xf_size = sizeof(xf);
	- sx_slock(&allproc_lock);
	- FOREACH_PROC_IN_SYSTEM(p) {
	- PROC_LOCK(p);
	- if (p->p_state == PRS_NEW) {
	- PROC_UNLOCK(p);
	- continue;
	- }
	- if (p_cansee(req->td, p) != 0) {
	- PROC_UNLOCK(p);
	- continue;
	- }
	- xf.xf_pid = p->p_pid;
	- xf.xf_uid = p->p_ucred->cr_uid;
	- fdp = fdhold(p);
	- PROC_UNLOCK(p);
	- if (fdp == NULL)
	- continue;
	- FILEDESC_SLOCK(fdp);
	- for (n = 0; fdp->fd_refcnt > 0 && n <= fdp->fd_lastfile; ++n) {
	- if ((fp = fdp->fd_ofiles[n].fde_file) == NULL)
	- continue;
	- xf.xf_fd = n;
	- xf.xf_file = (kvaddr_t)(uintptr_t)fp;
	- xf.xf_data = (kvaddr_t)(uintptr_t)fp->f_data;
	- xf.xf_vnode = (kvaddr_t)(uintptr_t)fp->f_vnode;
	- xf.xf_type = (kvaddr_t)(uintptr_t)fp->f_type;
	- xf.xf_count = fp->f_count;
	- xf.xf_msgcount = 0;
	- xf.xf_offset = foffset_get(fp);
	- xf.xf_flag = fp->f_flag;
	- error = SYSCTL_OUT(req, &xf, sizeof(xf));
	if (error)
	break;
	}
	- FILEDESC_SUNLOCK(fdp);
	- fddrop(fdp);
	+ sx_sunlock(&V_allproc_lock);
	+ CURVPS_RESTORE();
	if (error)
	break;
	}
	- sx_sunlock(&allproc_lock);
	+ VPS_LIST_RUNLOCK();
	return (error);
	}

	@@ -3930,21 +3953,28 @@
	static struct proc *
	file_to_first_proc(struct file *fp)
	{
	+ VPS_ITERATOR_DECL(vps_iter);
	struct filedesc *fdp;
	struct proc *p;
	int n;

	- FOREACH_PROC_IN_SYSTEM(p) {
	- if (p->p_state == PRS_NEW)
	- continue;
	- fdp = p->p_fd;
	- if (fdp == NULL)
	- continue;
	- for (n = 0; n <= fdp->fd_lastfile; n++) {
	- if (fp == fdp->fd_ofiles[n].fde_file)
	- return (p);
	+ /* VPS_LIST_RLOCK(); */
	+ VPS_FOREACH(vps_iter) {
	+ CURVPS_SET(vps_iter);
	+ FOREACH_PROC_IN_SYSTEM(p) {
	+ if (p->p_state == PRS_NEW)
	+ continue;
	+ fdp = p->p_fd;
	+ if (fdp == NULL)
	+ continue;
	+ for (n = 0; n <= fdp->fd_lastfile; n++) {
	+ if (fp == fdp->fd_ofiles[n].fde_file)
	+ return (p);
	+ }
	}
	+ CURVPS_RESTORE();
	}
	+ /* VPS_LIST_RUNLOCK(); */
	return (NULL);
	}

	@@ -3982,6 +4012,7 @@

	DB_SHOW_COMMAND(files, db_show_files)
	{
	+ VPS_ITERATOR_DECL(vps_iter);
	struct filedesc *fdp;
	struct file *fp;
	struct proc *p;
	@@ -3989,18 +4020,24 @@
	int n;

	header = 1;
	- FOREACH_PROC_IN_SYSTEM(p) {
	- if (p->p_state == PRS_NEW)
	- continue;
	- if ((fdp = p->p_fd) == NULL)
	- continue;
	- for (n = 0; n <= fdp->fd_lastfile; ++n) {
	- if ((fp = fdp->fd_ofiles[n].fde_file) == NULL)
	+ /* VPS_LIST_RLOCK(); */
	+ VPS_FOREACH(vps_iter) {
	+ CURVPS_SET(vps_iter);
	+ FOREACH_PROC_IN_SYSTEM(p) {
	+ if (p->p_state == PRS_NEW)
	+ continue;
	+ if ((fdp = p->p_fd) == NULL)
	continue;
	- db_print_file(fp, header);
	- header = 0;
	+ for (n = 0; n <= fdp->fd_lastfile; ++n) {
	+ if ((fp = fdp->fd_ofiles[n].fde_file) == NULL)
	+ continue;
	+ db_print_file(fp, header);
	+ header = 0;
	+ }
	}
	+ CURVPS_RESTORE();
	}
	+ /* VPS_LIST_RUNLOCK(); */
	}
	#endif

	Index: sys/kern/kern_exit.c
	===================================================================
	--- sys/kern/kern_exit.c
	+++ sys/kern/kern_exit.c
	@@ -96,6 +96,11 @@
	SDT_PROVIDER_DECLARE(proc);
	SDT_PROBE_DEFINE1(proc, , , exit, "int");

	+#ifdef VIMAGE
	+VPS_DECLARE(int, vrebooting); /* kern_reboot() has been called. */
	+#define V_vrebooting VPS(vrebooting)
	+#endif
	+
	/* Hook for NFS teardown procedure. */
	void (nlminfo_release_p)(struct proc p);

	@@ -106,13 +111,13 @@
	{
	struct proc p, parent;

	- sx_assert(&proctree_lock, SX_LOCKED);
	+ sx_assert(&V_proctree_lock, SX_LOCKED);
	if ((child->p_treeflag & P_TREE_ORPHANED) == 0) {
	if (child->p_oppid == 0 \|\|
	child->p_pptr->p_pid == child->p_oppid)
	parent = child->p_pptr;
	else
	- parent = initproc;
	+ parent = V_initproc;
	return (parent);
	}
	for (p = child; (p->p_treeflag & P_TREE_FIRST_ORPHAN) == 0;) {
	@@ -132,10 +137,16 @@
	{
	struct proc p1, p2, *ptmp;

	- sx_assert(&proctree_lock, SX_LOCKED);
	- KASSERT(p != initproc, ("reaper_abandon_children for initproc"));
	- if ((p->p_treeflag & P_TREE_REAPER) == 0)
	+ sx_assert(&V_proctree_lock, SX_LOCKED);
	+ /* init inside a vps may die on prison_remove. */
	+ KASSERT(!IS_DEFAULT_VPS(curvps) \|\| p != V_initproc,
	+ ("%s: for initproc %p", __func__, p));
	+ if ((p->p_treeflag & P_TREE_REAPER) == 0) {
	+ KASSERT((p != V_initproc && p->p_pid != 1 && p->p_pid != 0),
	+ ("%s:%d curvps %p p %p pid %d p_treeflag %#x",
	+ __func__, __LINE__, curvps, p, p->p_pid, p->p_treeflag));
	return;
	+ }
	p1 = p->p_reaper;
	LIST_FOREACH_SAFE(p2, &p->p_reaplist, p_reapsibling, ptmp) {
	LIST_REMOVE(p2, p_reapsibling);
	@@ -148,7 +159,8 @@
	PROC_UNLOCK(p2);
	}
	}
	- KASSERT(LIST_EMPTY(&p->p_reaplist), ("p_reaplist not empty"));
	+ KASSERT(LIST_EMPTY(&p->p_reaplist),
	+ ("%s: p %p p_reaplist not empty", __func__, p));
	p->p_treeflag &= ~P_TREE_REAPER;
	}

	@@ -157,7 +169,7 @@
	{
	struct proc *p1;

	- sx_assert(&proctree_lock, SA_XLOCKED);
	+ sx_assert(&V_proctree_lock, SA_XLOCKED);
	if ((p->p_treeflag & P_TREE_ORPHANED) == 0)
	return;
	if ((p->p_treeflag & P_TREE_FIRST_ORPHAN) != 0) {
	@@ -203,9 +215,19 @@
	* work around an unsolved stack overflow seen very late during
	* shutdown on sparc64 when the gmirror worker process exists.
	*/
	- if (p == initproc && rebooting == 0) {
	+ if (p == V_initproc && (rebooting == 0
	+#ifdef VIMAGE
	+ \|\| V_vrebooting
	+#endif
	+ )) {
	printf("init died (signal %d, exit %d)\n", signo, rval);
	- panic("Going nowhere without my init!");
	+#ifdef VIMAGE
	+ if (!IS_DEFAULT_VPS(TD_TO_VPS(td)))
	+ /* XXX-BZ make this jail go away. */ ;
	+ else
	+#endif
	+ panic("%s: Going nowhere without my init! td %p",
	+ __func__, td);
	}

	/*
	@@ -313,7 +335,7 @@

	/* Are we a task leader with peers? */
	if (p->p_peers != NULL && p == p->p_leader) {
	- mtx_lock(&ppeers_lock);
	+ mtx_lock(&V_ppeers_lock);
	q = p->p_peers;
	while (q != NULL) {
	PROC_LOCK(q);
	@@ -322,8 +344,8 @@
	q = q->p_peers;
	}
	while (p->p_peers != NULL)
	- msleep(p, &ppeers_lock, PWAIT, "exit1", 0);
	- mtx_unlock(&ppeers_lock);
	+ msleep(p, &V_ppeers_lock, PWAIT, "exit1", 0);
	+ mtx_unlock(&V_ppeers_lock);
	}

	/*
	@@ -388,7 +410,7 @@
	* Remove ourself from our leader's peer list and wake our leader.
	*/
	if (p->p_leader->p_peers != NULL) {
	- mtx_lock(&ppeers_lock);
	+ mtx_lock(&V_ppeers_lock);
	if (p->p_leader->p_peers != NULL) {
	q = p->p_leader;
	while (q->p_peers != p)
	@@ -396,7 +418,7 @@
	q->p_peers = p->p_peers;
	wakeup(p->p_leader);
	}
	- mtx_unlock(&ppeers_lock);
	+ mtx_unlock(&V_ppeers_lock);
	}

	vmspace_exit(td);
	@@ -432,16 +454,17 @@

	WITNESS_WARN(WARN_PANIC, NULL, "process (pid %d) exiting", p->p_pid);

	- sx_xlock(&proctree_lock);
	+ sx_xlock(&V_proctree_lock);
	/*
	* Remove proc from allproc queue and pidhash chain.
	* Place onto zombproc. Unlink from parent's child list.
	*/
	- sx_xlock(&allproc_lock);
	+ /* Operate on current vps instance only. */
	+ sx_xlock(&V_allproc_lock);
	LIST_REMOVE(p, p_list);
	- LIST_INSERT_HEAD(&zombproc, p, p_list);
	+ LIST_INSERT_HEAD(&V_zombproc, p, p_list);
	LIST_REMOVE(p, p_hash);
	- sx_xunlock(&allproc_lock);
	+ sx_xunlock(&V_allproc_lock);

	/*
	* Reparent all children processes:
	@@ -602,7 +625,7 @@
	} else
	mtx_unlock(&p->p_pptr->p_sigacts->ps_mtx);

	- if (p->p_pptr == p->p_reaper \|\| p->p_pptr == initproc) {
	+ if (p->p_pptr == p->p_reaper \|\| p->p_pptr == V_initproc) {
	signal_parent = 1;
	} else if (p->p_sigparent != 0) {
	if (p->p_sigparent == SIGCHLD) {
	@@ -613,7 +636,7 @@
	}
	} else
	PROC_LOCK(p->p_pptr);
	- sx_xunlock(&proctree_lock);
	+ sx_xunlock(&V_proctree_lock);

	if (signal_parent == 1) {
	childproc_exited(p);
	@@ -827,9 +850,9 @@
	{
	struct proc q, t;

	- sx_assert(&proctree_lock, SA_XLOCKED);
	+ sx_assert(&V_proctree_lock, SA_XLOCKED);
	PROC_LOCK_ASSERT(p, MA_OWNED);
	- KASSERT(p->p_state == PRS_ZOMBIE, ("proc_reap: !PRS_ZOMBIE"));
	+ KASSERT(p->p_state == PRS_ZOMBIE, ("%s: !PRS_ZOMBIE", __func__));

	mtx_spin_wait_unlocked(&p->p_slock);

	@@ -843,7 +866,7 @@
	* release the proc struct just yet.
	*/
	PROC_UNLOCK(p);
	- sx_xunlock(&proctree_lock);
	+ sx_xunlock(&V_proctree_lock);
	return;
	}

	@@ -870,7 +893,7 @@
	wakeup(t);
	cv_broadcast(&p->p_pwait);
	PROC_UNLOCK(t);
	- sx_xunlock(&proctree_lock);
	+ sx_xunlock(&V_proctree_lock);
	return;
	}
	p->p_oppid = 0;
	@@ -880,9 +903,9 @@
	* Remove other references to this process to ensure we have an
	* exclusive reference.
	*/
	- sx_xlock(&allproc_lock);
	+ sx_xlock(&V_allproc_lock);
	LIST_REMOVE(p, p_list); /* off zombproc */
	- sx_xunlock(&allproc_lock);
	+ sx_xunlock(&V_allproc_lock);
	LIST_REMOVE(p, p_sibling);
	reaper_abandon_children(p, true);
	LIST_REMOVE(p, p_reapsibling);
	@@ -892,7 +915,7 @@
	leavepgrp(p);
	if (p->p_procdesc != NULL)
	procdesc_reap(p);
	- sx_xunlock(&proctree_lock);
	+ sx_xunlock(&V_proctree_lock);

	PROC_LOCK(p);
	knlist_detach(p->p_klist);
	@@ -953,9 +976,9 @@
	#endif

	KASSERT(FIRST_THREAD_IN_PROC(p),
	- ("proc_reap: no residual thread!"));
	+ ("%s: no residual thread!", __func__));
	uma_zfree(proc_zone, p);
	- atomic_add_int(&nprocs, -1);
	+ atomic_add_int(&V_nprocs, -1);
	}

	static int
	@@ -965,7 +988,7 @@
	{
	struct rusage *rup;

	- sx_assert(&proctree_lock, SA_XLOCKED);
	+ sx_assert(&V_proctree_lock, SA_XLOCKED);

	PROC_LOCK(p);

	@@ -1156,7 +1179,7 @@
	bool cont;

	PROC_LOCK_ASSERT(p, MA_OWNED);
	- sx_assert(&proctree_lock, SA_XLOCKED);
	+ sx_assert(&V_proctree_lock, SA_XLOCKED);
	MPASS(si_code == CLD_TRAPPED \|\| si_code == CLD_STOPPED \|\|
	si_code == CLD_CONTINUED);

	@@ -1170,7 +1193,7 @@
	sigqueue_take(p->p_ksi);
	PROC_UNLOCK(td->td_proc);
	}
	- sx_xunlock(&proctree_lock);
	+ sx_xunlock(&V_proctree_lock);
	if (siginfo != NULL) {
	siginfo->si_code = si_code;
	siginfo->si_status = cont ? SIGCONT : p->p_xsig;
	@@ -1223,7 +1246,7 @@
	q->p_flag &= ~P_STATCHILD;
	PROC_UNLOCK(q);
	}
	- sx_xlock(&proctree_lock);
	+ sx_xlock(&V_proctree_lock);
	loop_locked:
	nfound = 0;
	LIST_FOREACH(p, &q->p_children, p_sibling) {
	@@ -1307,11 +1330,11 @@
	}
	}
	if (nfound == 0) {
	- sx_xunlock(&proctree_lock);
	+ sx_xunlock(&V_proctree_lock);
	return (ECHILD);
	}
	if (options & WNOHANG) {
	- sx_xunlock(&proctree_lock);
	+ sx_xunlock(&V_proctree_lock);
	td->td_retval[0] = 0;
	return (0);
	}
	@@ -1321,7 +1344,7 @@
	PROC_UNLOCK(q);
	goto loop_locked;
	}
	- sx_xunlock(&proctree_lock);
	+ sx_xunlock(&V_proctree_lock);
	error = msleep(q, &q->p_mtx, PWAIT \| PCATCH \| PDROP, "wait", 0);
	if (error)
	return (error);
	@@ -1336,7 +1359,7 @@
	proc_reparent(struct proc child, struct proc parent)
	{

	- sx_assert(&proctree_lock, SX_XLOCKED);
	+ sx_assert(&V_proctree_lock, SX_XLOCKED);
	PROC_LOCK_ASSERT(child, MA_OWNED);
	if (child->p_pptr == parent)
	return;
	Index: sys/kern/kern_fork.c
	===================================================================
	--- sys/kern/kern_fork.c
	+++ sys/kern/kern_fork.c
	@@ -74,6 +74,7 @@
	#include <sys/sx.h>
	#include <sys/sysent.h>
	#include <sys/signalvar.h>
	+#include <sys/vps.h>

	#include <security/audit/audit.h>
	#include <security/mac/mac_framework.h>
	@@ -184,10 +185,10 @@
	return (error);
	}

	-int nprocs = 1; /* process 0 */
	-int lastpid = 0;
	-SYSCTL_INT(_kern, OID_AUTO, lastpid, CTLFLAG_RD, &lastpid, 0,
	- "Last used PID");
	+VPS_DEFINE(int, nprocs) = 1; /* process 0 */
	+VPS_DEFINE(int, lastpid) = 0;
	+SYSCTL_INT(_kern, OID_AUTO, lastpid, CTLFLAG_RD\|CTLFLAG_VPS,
	+ &VPS_NAME(lastpid), 0, "Last used PID");

	/*
	* Random component to lastpid generation. We mix in a random factor to make
	@@ -197,7 +198,8 @@
	* modulus that is too big causes a LOT more process table scans and slows
	* down fork processing as the pidchecked caching is defeated.
	*/
	-static int randompid = 0;
	+static VPS_DEFINE(int, randompid) = 0;
	+#define V_randompid VPS(randompid)

	static int
	sysctl_kern_randompid(SYSCTL_HANDLER_ARGS)
	@@ -207,44 +209,46 @@
	error = sysctl_wire_old_buffer(req, sizeof(int));
	if (error != 0)
	return(error);
	- sx_xlock(&allproc_lock);
	- pid = randompid;
	+ sx_xlock(&V_allproc_lock);
	+ pid = V_randompid;
	error = sysctl_handle_int(oidp, &pid, 0, req);
	if (error == 0 && req->newptr != NULL) {
	if (pid == 0)
	- randompid = 0;
	+ V_randompid = 0;
	else if (pid == 1)
	/* generate a random PID modulus between 100 and 1123 */
	- randompid = 100 + arc4random() % 1024;
	+ V_randompid = 100 + arc4random() % 1024;
	else if (pid < 0 \|\| pid > pid_max - 100)
	/* out of range */
	- randompid = pid_max - 100;
	+ V_randompid = pid_max - 100;
	else if (pid < 100)
	/* Make it reasonable */
	- randompid = 100;
	+ V_randompid = 100;
	else
	- randompid = pid;
	+ V_randompid = pid;
	}
	- sx_xunlock(&allproc_lock);
	+ sx_xunlock(&V_allproc_lock);
	return (error);
	}

	SYSCTL_PROC(_kern, OID_AUTO, randompid, CTLTYPE_INT\|CTLFLAG_RW,
	0, 0, sysctl_kern_randompid, "I", "Random PID modulus. Special values: 0: disable, 1: choose random value");

	-static int
	+static VPS_DEFINE(int, pidchecked) = 0;
	+#define V_pidchecked VPS(pidchecked)
	+
	+int
	fork_findpid(int flags)
	{
	struct proc *p;
	int trypid;
	- static int pidchecked = 0;

	/*
	* Requires allproc_lock in order to iterate over the list
	* of processes, and proctree_lock to access p_pgrp.
	*/
	- sx_assert(&allproc_lock, SX_LOCKED);
	- sx_assert(&proctree_lock, SX_LOCKED);
	+ sx_assert(&V_allproc_lock, SX_LOCKED);
	+ sx_assert(&V_proctree_lock, SX_LOCKED);

	/*
	* Find an unused process ID. We remember a range of unused IDs
	@@ -253,13 +257,13 @@
	* If RFHIGHPID is set (used during system boot), do not allocate
	* low-numbered pids.
	*/
	- trypid = lastpid + 1;
	+ trypid = V_lastpid + 1;
	if (flags & RFHIGHPID) {
	if (trypid < 10)
	trypid = 10;
	} else {
	- if (randompid)
	- trypid += arc4random() % randompid;
	+ if (V_randompid)
	+ trypid += arc4random() % V_randompid;
	}
	retry:
	/*
	@@ -271,12 +275,12 @@
	trypid = trypid % pid_max;
	if (trypid < 100)
	trypid += 100;
	- pidchecked = 0;
	+ V_pidchecked = 0;
	}
	- if (trypid >= pidchecked) {
	+ if (trypid >= V_pidchecked) {
	int doingzomb = 0;

	- pidchecked = PID_MAX;
	+ V_pidchecked = PID_MAX;
	/*
	* Scan the active and zombie procs to check whether this pid
	* is in use. Remember the lowest pid that's greater
	@@ -291,7 +295,8 @@
	* reserved pids is limited by process limit times
	* two.
	*/
	- p = LIST_FIRST(&allproc);
	+ /* Operate on current vps instance only. */
	+ p = LIST_FIRST(&V_allproc);
	again:
	for (; p != NULL; p = LIST_NEXT(p, p_list)) {
	while (p->p_pid == trypid \|\|
	@@ -301,24 +306,25 @@
	(p->p_session != NULL &&
	p->p_session->s_sid == trypid)))) {
	trypid++;
	- if (trypid >= pidchecked)
	+ if (trypid >= V_pidchecked)
	goto retry;
	}
	- if (p->p_pid > trypid && pidchecked > p->p_pid)
	- pidchecked = p->p_pid;
	+ if (p->p_pid > trypid && V_pidchecked > p->p_pid)
	+ V_pidchecked = p->p_pid;
	if (p->p_pgrp != NULL) {
	if (p->p_pgrp->pg_id > trypid &&
	- pidchecked > p->p_pgrp->pg_id)
	- pidchecked = p->p_pgrp->pg_id;
	+ V_pidchecked > p->p_pgrp->pg_id)
	+ V_pidchecked = p->p_pgrp->pg_id;
	if (p->p_session != NULL &&
	p->p_session->s_sid > trypid &&
	- pidchecked > p->p_session->s_sid)
	- pidchecked = p->p_session->s_sid;
	+ V_pidchecked > p->p_session->s_sid)
	+ V_pidchecked = p->p_session->s_sid;
	}
	}
	if (!doingzomb) {
	+ /* Operate on current vps instance only. */
	doingzomb = 1;
	- p = LIST_FIRST(&zombproc);
	+ p = LIST_FIRST(&V_zombproc);
	goto again;
	}
	}
	@@ -327,9 +333,9 @@
	* RFHIGHPID does not mess with the lastpid counter during boot.
	*/
	if (flags & RFHIGHPID)
	- pidchecked = 0;
	+ V_pidchecked = 0;
	else
	- lastpid = trypid;
	+ V_lastpid = trypid;

	return (trypid);
	}
	@@ -394,8 +400,8 @@
	struct filedesc_to_leader *fdtol;
	struct sigacts *newsigacts;

	- sx_assert(&proctree_lock, SX_LOCKED);
	- sx_assert(&allproc_lock, SX_XLOCKED);
	+ sx_assert(&V_proctree_lock, SX_LOCKED);
	+ sx_assert(&V_allproc_lock, SX_XLOCKED);

	p1 = td->td_proc;

	@@ -404,14 +410,14 @@
	p2->p_state = PRS_NEW; /* protect against others */
	p2->p_pid = trypid;
	AUDIT_ARG_PID(p2->p_pid);
	- LIST_INSERT_HEAD(&allproc, p2, p_list);
	+ LIST_INSERT_HEAD(&V_allproc, p2, p_list);
	allproc_gen++;
	LIST_INSERT_HEAD(PIDHASH(p2->p_pid), p2, p_hash);
	PROC_LOCK(p2);
	PROC_LOCK(p1);

	- sx_xunlock(&allproc_lock);
	- sx_xunlock(&proctree_lock);
	+ sx_xunlock(&V_allproc_lock);
	+ sx_xunlock(&V_proctree_lock);

	bcopy(&p1->p_startcopy, &p2->p_startcopy,
	__rangeof(struct proc, p_startcopy, p_endcopy));
	@@ -490,6 +496,7 @@
	td2->td_lend_user_pri = PRI_MAX;

	#ifdef VIMAGE
	+ td2->td_vps = TD_TO_VPS(td);
	td2->td_vnet = NULL;
	td2->td_vnet_lpush = NULL;
	#endif
	@@ -554,11 +561,11 @@
	* Set up linkage for kernel based threading.
	*/
	if ((fr->fr_flags & RFTHREAD) != 0) {
	- mtx_lock(&ppeers_lock);
	+ mtx_lock(&V_ppeers_lock);
	p2->p_peers = p1->p_peers;
	p1->p_peers = p2;
	p2->p_leader = p1->p_leader;
	- mtx_unlock(&ppeers_lock);
	+ mtx_unlock(&V_ppeers_lock);
	PROC_LOCK(p1->p_leader);
	if ((p1->p_leader->p_flag & P_WEXIT) != 0) {
	PROC_UNLOCK(p1->p_leader);
	@@ -585,7 +592,7 @@
	p2->p_leader = p2;
	}

	- sx_xlock(&proctree_lock);
	+ sx_xlock(&V_proctree_lock);
	PGRP_LOCK(p1->p_pgrp);
	PROC_LOCK(p2);
	PROC_LOCK(p1);
	@@ -648,7 +655,7 @@
	LIST_INSERT_HEAD(&p2->p_reaper->p_reaplist, p2, p_reapsibling);
	if (p2->p_reaper == p1)
	p2->p_reapsubtree = p2->p_pid;
	- sx_xunlock(&proctree_lock);
	+ sx_xunlock(&V_proctree_lock);

	/* Inform accounting that we have forked. */
	p2->p_acflag = AFORK;
	@@ -751,7 +758,7 @@
	* if being set atm.
	*/
	if ((p1->p_ptevents & PTRACE_FORK) != 0) {
	- sx_xlock(&proctree_lock);
	+ sx_xlock(&V_proctree_lock);
	PROC_LOCK(p2);

	/*
	@@ -777,7 +784,7 @@
	proc_reparent(p2, p1->p_pptr);
	}
	PROC_UNLOCK(p2);
	- sx_xunlock(&proctree_lock);
	+ sx_xunlock(&V_proctree_lock);
	}

	if ((fr->fr_flags & RFSTOPPED) == 0) {
	@@ -801,6 +808,11 @@
	PROC_UNLOCK(p2);
	}

	+static VPS_DEFINE(int, curfail);
	+#define V_curfail VPS(curfail)
	+static VPS_DEFINE(struct timeval, lastfail);
	+#define V_lastfail VPS(lastfail)
	+
	int
	fork1(struct thread td, struct fork_req fr)
	{
	@@ -810,8 +822,6 @@
	struct file *fp_procdesc;
	vm_ooffset_t mem_charged;
	int error, nprocs_new, ok;
	- static int curfail;
	- static struct timeval lastfail;
	int flags, pages;

	flags = fr->fr_flags;
	@@ -881,17 +891,17 @@
	* Don't allow a nonprivileged user to use the last ten
	* processes; don't let root exceed the limit.
	*/
	- nprocs_new = atomic_fetchadd_int(&nprocs, 1) + 1;
	+ nprocs_new = atomic_fetchadd_int(&V_nprocs, 1) + 1;
	if ((nprocs_new >= maxproc - 10 && priv_check_cred(td->td_ucred,
	PRIV_MAXPROC, 0) != 0) \|\| nprocs_new >= maxproc) {
	error = EAGAIN;
	- sx_xlock(&allproc_lock);
	- if (ppsratecheck(&lastfail, &curfail, 1)) {
	+ sx_xlock(&V_allproc_lock);
	+ if (ppsratecheck(&V_lastfail, &V_curfail, 1)) {
	printf("maxproc limit exceeded by uid %u (pid %d); "
	"see tuning(7) and login.conf(5)\n",
	td->td_ucred->cr_ruid, p1->p_pid);
	}
	- sx_xunlock(&allproc_lock);
	+ sx_xunlock(&V_allproc_lock);
	goto fail2;
	}

	@@ -973,8 +983,8 @@
	STAILQ_INIT(&newproc->p_ktr);

	/* We have to lock the process tree while we look for a pid. */
	- sx_xlock(&proctree_lock);
	- sx_xlock(&allproc_lock);
	+ sx_xlock(&V_proctree_lock);
	+ sx_xlock(&V_allproc_lock);

	/*
	* Increment the count of procs running with this uid. Don't allow
	@@ -995,8 +1005,8 @@
	}

	error = EAGAIN;
	- sx_xunlock(&allproc_lock);
	- sx_xunlock(&proctree_lock);
	+ sx_xunlock(&V_allproc_lock);
	+ sx_xunlock(&V_proctree_lock);
	#ifdef MAC
	mac_proc_destroy(newproc);
	#endif
	@@ -1012,7 +1022,7 @@
	fdclose(td, fp_procdesc, *fr->fr_pd_fd);
	fdrop(fp_procdesc, td);
	}
	- atomic_add_int(&nprocs, -1);
	+ atomic_add_int(&V_nprocs, -1);
	pause("fork", hz / 2);
	return (error);
	}
	Index: sys/kern/kern_jail.c
	===================================================================
	--- sys/kern/kern_jail.c
	+++ sys/kern/kern_jail.c
	@@ -62,6 +62,10 @@
	#include <sys/syscallsubr.h>
	#include <sys/sysctl.h>
	#include <sys/vnode.h>
	+#include <sys/vps.h>
	+#ifdef VIMAGE
	+#include <sys/reboot.h>
	+#endif

	#include <net/if.h>
	#include <net/vnet.h>
	@@ -107,7 +111,7 @@
	.pr_hostuuid = DEFAULT_HOSTUUID,
	.pr_children = LIST_HEAD_INITIALIZER(prison0.pr_children),
	#ifdef VIMAGE
	- .pr_flags = PR_HOST\|PR_VNET\|_PR_IP_SADDRSEL,
	+ .pr_flags = PR_HOST\|PR_VNET\|PR_VPS\|_PR_IP_SADDRSEL,
	#else
	.pr_flags = PR_HOST\|_PR_IP_SADDRSEL,
	#endif
	@@ -171,6 +175,7 @@
	{"host", 0, PR_HOST},
	#ifdef VIMAGE
	{"vnet", 0, PR_VNET},
	+ {"vps", 0, PR_VPS},
	#endif
	#ifdef INET
	{"ip4", PR_IP4_USER, PR_IP4_USER},
	@@ -627,6 +632,11 @@
	vfs_opterror(opts, "vnet cannot be changed after creation");
	goto done_errmsg;
	}
	+ if ((flags & JAIL_UPDATE) && (ch_flags & PR_VPS)) {
	+ error = EINVAL;
	+ vfs_opterror(opts, "vps cannot be changed after creation");
	+ goto done_errmsg;
	+ }
	#endif
	#ifdef INET
	if ((flags & JAIL_UPDATE) && (ch_flags & PR_IP4_USER)) {
	@@ -1801,6 +1811,39 @@
	goto done_errmsg;
	}

	+#ifdef VIMAGE
	+ /* Allocate a new vps if specified. */
	+ if (pr_flags & PR_VPS) {
	+ vn_lock(pr->pr_root, LK_EXCLUSIVE \| LK_RETRY);
	+ if ((error = change_dir(pr->pr_root, td)) != 0)
	+ goto c_unlock;
	+#ifdef MAC
	+ if ((error = mac_vnode_check_chroot(td->td_ucred, pr->pr_root)))
	+ goto c_unlock;
	+#endif
	+c_unlock:
	+ VOP_UNLOCK(pr->pr_root, 0);
	+ if (error \|\| (error = pwd_chroot(td, pr->pr_root))) {
	+ vfs_opterror(opts, "vps chroot failed");
	+ if (!created)
	+ prison_deref(pr, PD_DEREF);
	+ goto done_errmsg;
	+ }
	+
	+ /* We temporary need a ref as otheriwse a prhold will panic. */
	+ mtx_lock(&pr->pr_mtx);
	+ pr->pr_ref++;
	+ pr->pr_uref++;
	+ mtx_unlock(&pr->pr_mtx);
	+ pr->pr_vps = vps_alloc(pr);
	+ mtx_lock(&pr->pr_mtx);
	+ pr->pr_ref--;
	+ pr->pr_uref--;
	+ mtx_unlock(&pr->pr_mtx);
	+ } else {
	+ pr->pr_vps = ppr->pr_vps;
	+ }
	+#endif
	/* Attach this process to the prison if requested. */
	if (flags & JAIL_ATTACH) {
	mtx_lock(&pr->pr_mtx);
	@@ -2285,7 +2328,28 @@
	/*
	* Kill all processes unfortunate enough to be attached to this prison.
	*/
	- sx_slock(&allproc_lock);
	+#ifdef VIMAGE
	+ if (pr->pr_vps) {
	+ /*
	+ * Send signal to init and let init do it's job.
	+ * This should run rc.shutdown and processes should go away.
	+ * All but init? We need to catch the tail-end of reboot(2)
	+ * and handle appropriately for the non-default vpss.
	+ * vps_destroy() will ensure init and swapper will also go
	+ * away and might sleep. If they do not go something will
	+ * hold refs on cred and prisons.
	+ * XXX There are other places which might do that for a long
	+ * time as well.
	+ */
	+ CURVPS_SET(pr->pr_vps);
	+ shutdown_nice(RB_HALT\|RB_POWEROFF);
	+ vps_destroy(pr->pr_vps);
	+ CURVPS_RESTORE();
	+ } else
	+#endif
	+ {
	+ /* Operate on current vps instance only. */
	+ sx_slock(&V_allproc_lock);
	FOREACH_PROC_IN_SYSTEM(p) {
	PROC_LOCK(p);
	if (p->p_state != PRS_NEW && p->p_ucred &&
	@@ -2293,7 +2357,8 @@
	kern_psignal(p, SIGKILL);
	PROC_UNLOCK(p);
	}
	- sx_sunlock(&allproc_lock);
	+ sx_sunlock(&V_allproc_lock);
	+ }
	/* Remove the temporary reference added by jail_remove. */
	prison_deref(pr, deuref \| PD_DEREF);
	}
	@@ -2348,6 +2413,24 @@
	struct ucred newcred, oldcred;
	int error;

	+#ifdef VIMAGE
	+ /*
	+ * Do not allow to migrate a process between virtual process spaces.
	+ * Use the console to attach to it. Getting all process spaces things
	+ * right, including a new pid, progress group, session, terminal,
	+ * tracing is one thing (with a lot of work) and may break apps if the
	+ * pid changes, the pgrp no longer has the same (p)id; getting things
	+ * restored to original state and properly re-parented is virtually
	+ * impossile. So do what we do on a normal machine, present a terminal
	+ * to login to.
	+ */
	+ if (pr->pr_flags & PR_VPS) {
	+ mtx_unlock(&pr->pr_mtx);
	+ sx_sunlock(&allprison_lock);
	+ return (EPERM);
	+ }
	+#endif
	+
	/*
	* XXX: Note that there is a slight race here if two threads
	* in the same privileged process attempt to attach to two
	@@ -2628,6 +2711,9 @@
	#ifdef VIMAGE
	if (pr->pr_vnet != ppr->pr_vnet)
	vnet_destroy(pr->pr_vnet);
	+ KASSERT((pr->pr_vps == ppr->pr_vps \|\| pr->pr_vps == NULL),
	+ ("%s: pr %p pr_vps %p != NULL\n",
	+ __func__, pr, pr->pr_vps));
	#endif
	if (pr->pr_root != NULL)
	vrele(pr->pr_root);
	@@ -2912,9 +2998,9 @@
	#ifdef VIMAGE
	/*
	* Determine whether the prison represented by cred owns
	- * its vnet rather than having it inherited.
	+ * its vnet/vps rather than having it inherited.
	*
	- * Returns 1 in case the prison owns the vnet, 0 otherwise.
	+ * Returns 1 in case the prison owns the vnet/vps, 0 otherwise.
	*/
	int
	prison_owns_vnet(struct ucred *cred)
	@@ -2926,6 +3012,17 @@
	*/
	return (cred->cr_prison->pr_flags & PR_VNET ? 1 : 0);
	}
	+
	+int
	+prison_owns_vps(struct ucred *cred)
	+{
	+
	+ /*
	+ * vps cannot be added/removed after jail creation,
	+ * so no need to lock here.
	+ */
	+ return (cred->cr_prison->pr_flags & PR_VPS ? 1 : 0);
	+}
	#endif

	/*
	@@ -3542,6 +3639,26 @@
	CTLTYPE_INT \| CTLFLAG_RD \| CTLFLAG_MPSAFE, NULL, 0,
	sysctl_jail_vnet, "I", "Jail owns vnet?");

	+static int
	+sysctl_jail_vps(SYSCTL_HANDLER_ARGS)
	+{
	+ int error, havevps;
	+#ifdef VIMAGE
	+ struct ucred *cred = req->td->td_ucred;
	+
	+ havevps = jailed(cred) && prison_owns_vps(cred);
	+#else
	+ havevps = 0;
	+#endif
	+ error = SYSCTL_OUT(req, &havevps, sizeof(havevps));
	+
	+ return (error);
	+}
	+
	+SYSCTL_PROC(_security_jail, OID_AUTO, vps,
	+ CTLTYPE_INT \| CTLFLAG_RD \| CTLFLAG_MPSAFE, NULL, 0,
	+ sysctl_jail_vps, "I", "Jail owns vps?");
	+
	#if defined(INET) \|\| defined(INET6)
	SYSCTL_UINT(_security_jail, OID_AUTO, jail_max_af_ips, CTLFLAG_RW,
	&jail_max_af_ips, 0,
	@@ -3697,6 +3814,8 @@
	#ifdef VIMAGE
	SYSCTL_JAIL_PARAM(, vnet, CTLTYPE_INT \| CTLFLAG_RDTUN,
	"E,jailsys", "Virtual network stack");
	+SYSCTL_JAIL_PARAM(, vps, CTLTYPE_INT \| CTLFLAG_RDTUN,
	+ "E,jailsys", "Virtual process space");
	#endif
	SYSCTL_JAIL_PARAM(, dying, CTLTYPE_INT \| CTLFLAG_RD,
	"B", "Jail is in the process of shutting down");
	@@ -4023,12 +4142,12 @@

	ASSERT_RACCT_ENABLED();

	- sx_slock(&allproc_lock);
	+ sx_slock(&V_allproc_lock);
	sx_xlock(&allprison_lock);

	if (strcmp(pr->pr_name, pr->pr_prison_racct->prr_name) == 0) {
	sx_xunlock(&allprison_lock);
	- sx_sunlock(&allproc_lock);
	+ sx_sunlock(&V_allproc_lock);
	return;
	}

	@@ -4046,6 +4165,7 @@
	/*
	* Force rctl to reattach rules to processes.
	*/
	+ /* XXX do we need to do this over all vps instances as well? */
	FOREACH_PROC_IN_SYSTEM(p) {
	PROC_LOCK(p);
	cred = crhold(p->p_ucred);
	@@ -4055,7 +4175,7 @@
	}
	#endif

	- sx_sunlock(&allproc_lock);
	+ sx_sunlock(&V_allproc_lock);
	prison_racct_free_locked(oldprr);
	sx_xunlock(&allprison_lock);
	}
	@@ -4103,6 +4223,7 @@
	? pr->pr_cpuset->cs_id : -1);
	#ifdef VIMAGE
	db_printf(" vnet = %p\n", pr->pr_vnet);
	+ db_printf(" vps = %p\n", pr->pr_vps);
	#endif
	db_printf(" root = %p\n", pr->pr_root);
	db_printf(" securelevel = %d\n", pr->pr_securelevel);
	Index: sys/kern/kern_kthread.c
	===================================================================
	--- sys/kern/kern_kthread.c
	+++ sys/kern/kern_kthread.c
	@@ -32,6 +32,7 @@
	#include <sys/param.h>
	#include <sys/systm.h>
	#include <sys/cpuset.h>
	+#include <sys/jail.h>
	#include <sys/kthread.h>
	#include <sys/lock.h>
	#include <sys/mutex.h>
	@@ -45,6 +46,8 @@
	#include <sys/wait.h>
	#include <sys/sched.h>
	#include <sys/tslog.h>
	+#include <sys/vps.h>
	+
	#include <vm/vm.h>
	#include <vm/vm_extern.h>

	@@ -164,11 +167,30 @@
	* Reparent curthread from proc0 to init so that the zombie
	* is harvested.
	*/
	- sx_xlock(&proctree_lock);
	+ sx_xlock(&V_proctree_lock);
	PROC_LOCK(p);
	- proc_reparent(p, initproc);
	+#ifdef VIMAGE
	+ /*
	+ * In the VIMAGE case if the kproc is our virtual "swapper"
	+ * do not reparent it to our init as otherwise it would create
	+ * a circle and never go away. Let the parent vps reap it
	+ * as it was setup. And it needs to be the init there and
	+ * not the swapper(kernel).
	+ */
	+ if (!IS_DEFAULT_VPS(TD_TO_VPS(FIRST_THREAD_IN_PROC(p))) &&
	+ p->p_pid == 0) {
	+ struct proc *init0;
	+
	+ CURVPS_SET_QUIET(vps0)
	+ init0 = V_initproc;
	+ CURVPS_RESTORE();
	+
	+ proc_reparent(p, init0);
	+ } else
	+#endif
	+ proc_reparent(p, V_initproc);
	PROC_UNLOCK(p);
	- sx_xunlock(&proctree_lock);
	+ sx_xunlock(&V_proctree_lock);

	/*
	* Wakeup anyone waiting for us to exit.
	@@ -271,7 +293,7 @@

	/* If no process supplied, put it on proc0 */
	if (p == NULL)
	- p = &proc0;
	+ p = V_vproc0;

	/* Initialize our new td */
	newtd = thread_alloc(pages);
	@@ -294,6 +316,9 @@
	TSTHREAD(newtd, newtd->td_name);

	newtd->td_proc = p; /* needed for cpu_copy_thread */
	+#ifdef VIMAGE
	+ newtd->td_vps = TD_TO_VPS(oldtd);
	+#endif
	/* might be further optimized for kthread */
	cpu_copy_thread(newtd, oldtd);
	/* put the designated function(arg) as the resume context */
	Index: sys/kern/kern_ktrace.c
	===================================================================
	--- sys/kern/kern_ktrace.c
	+++ sys/kern/kern_ktrace.c
	@@ -952,25 +952,33 @@
	* Clear all uses of the tracefile.
	*/
	if (ops == KTROP_CLEARFILE) {
	+ VPS_ITERATOR_DECL(vps_iter);
	int vrele_count;

	vrele_count = 0;
	- sx_slock(&allproc_lock);
	- FOREACH_PROC_IN_SYSTEM(p) {
	- PROC_LOCK(p);
	- if (p->p_tracevp == vp) {
	- if (ktrcanset(td, p)) {
	- mtx_lock(&ktrace_mtx);
	- ktr_freeproc(p, &cred, NULL);
	- mtx_unlock(&ktrace_mtx);
	- vrele_count++;
	- crfree(cred);
	- } else
	- error = EPERM;
	+
	+ VPS_LIST_RLOCK();
	+ VPS_FOREACH(vps_iter) {
	+ CURVPS_SET(vps_iter);
	+ sx_slock(&V_allproc_lock);
	+ FOREACH_PROC_IN_SYSTEM(p) {
	+ PROC_LOCK(p);
	+ if (p->p_tracevp == vp) {
	+ if (ktrcanset(td, p)) {
	+ mtx_lock(&ktrace_mtx);
	+ ktr_freeproc(p, &cred, NULL);
	+ mtx_unlock(&ktrace_mtx);
	+ vrele_count++;
	+ crfree(cred);
	+ } else
	+ error = EPERM;
	+ }
	+ PROC_UNLOCK(p);
	}
	- PROC_UNLOCK(p);
	+ sx_sunlock(&V_allproc_lock);
	+ CURVPS_RESTORE();
	}
	- sx_sunlock(&allproc_lock);
	+ VPS_LIST_RUNLOCK();
	if (vrele_count > 0) {
	while (vrele_count-- > 0)
	vrele(vp);
	@@ -980,14 +988,14 @@
	/*
	* do it
	*/
	- sx_slock(&proctree_lock);
	+ sx_slock(&V_proctree_lock);
	if (uap->pid < 0) {
	/*
	* by process group
	*/
	pg = pgfind(-uap->pid);
	if (pg == NULL) {
	- sx_sunlock(&proctree_lock);
	+ sx_sunlock(&V_proctree_lock);
	error = ESRCH;
	goto done;
	}
	@@ -1011,7 +1019,7 @@
	ret \|= ktrops(td, p, ops, facs, vp);
	}
	if (nfound == 0) {
	- sx_sunlock(&proctree_lock);
	+ sx_sunlock(&V_proctree_lock);
	error = ESRCH;
	goto done;
	}
	@@ -1027,7 +1035,7 @@
	if (error) {
	if (p != NULL)
	PROC_UNLOCK(p);
	- sx_sunlock(&proctree_lock);
	+ sx_sunlock(&V_proctree_lock);
	goto done;
	}
	if (descend)
	@@ -1035,7 +1043,7 @@
	else
	ret \|= ktrops(td, p, ops, facs, vp);
	}
	- sx_sunlock(&proctree_lock);
	+ sx_sunlock(&V_proctree_lock);
	if (!ret)
	error = EPERM;
	done:
	@@ -1143,7 +1151,7 @@

	p = top;
	PROC_LOCK_ASSERT(p, MA_OWNED);
	- sx_assert(&proctree_lock, SX_LOCKED);
	+ sx_assert(&V_proctree_lock, SX_LOCKED);
	for (;;) {
	ret \|= ktrops(td, p, ops, facs, vp);
	/*
	@@ -1170,6 +1178,7 @@
	static void
	ktr_writerequest(struct thread td, struct ktr_request req)
	{
	+ VPS_ITERATOR_DECL(vps_iter);
	struct ktr_header *kth;
	struct vnode *vp;
	struct proc *p;
	@@ -1270,22 +1279,28 @@
	* credentials for the operation.
	*/
	cred = NULL;
	- sx_slock(&allproc_lock);
	- FOREACH_PROC_IN_SYSTEM(p) {
	- PROC_LOCK(p);
	- if (p->p_tracevp == vp) {
	- mtx_lock(&ktrace_mtx);
	- ktr_freeproc(p, &cred, NULL);
	- mtx_unlock(&ktrace_mtx);
	- vrele_count++;
	- }
	- PROC_UNLOCK(p);
	- if (cred != NULL) {
	- crfree(cred);
	- cred = NULL;
	+ VPS_LIST_RLOCK();
	+ VPS_FOREACH(vps_iter) {
	+ CURVPS_SET(vps_iter);
	+ sx_slock(&V_allproc_lock);
	+ FOREACH_PROC_IN_SYSTEM(p) {
	+ PROC_LOCK(p);
	+ if (p->p_tracevp == vp) {
	+ mtx_lock(&ktrace_mtx);
	+ ktr_freeproc(p, &cred, NULL);
	+ mtx_unlock(&ktrace_mtx);
	+ vrele_count++;
	+ }
	+ PROC_UNLOCK(p);
	+ if (cred != NULL) {
	+ crfree(cred);
	+ cred = NULL;
	+ }
	}
	+ sx_sunlock(&V_allproc_lock);
	+ CURVPS_RESTORE();
	}
	- sx_sunlock(&allproc_lock);
	+ VPS_LIST_RUNLOCK();

	while (vrele_count-- > 0)
	vrele(vp);
	Index: sys/kern/kern_mib.c
	===================================================================
	--- sys/kern/kern_mib.c
	+++ sys/kern/kern_mib.c
	@@ -556,8 +556,8 @@
	error = sysctl_handle_int(oidp, &pm, 0, req);
	if (error \|\| !req->newptr)
	return (error);
	- sx_xlock(&proctree_lock);
	- sx_xlock(&allproc_lock);
	+ sx_xlock(&V_proctree_lock);
	+ sx_xlock(&V_allproc_lock);

	/*
	* Only permit the values less then PID_MAX.
	@@ -567,8 +567,8 @@
	error = EINVAL;
	else
	pid_max = pm;
	- sx_xunlock(&allproc_lock);
	- sx_xunlock(&proctree_lock);
	+ sx_xunlock(&V_allproc_lock);
	+ sx_xunlock(&V_proctree_lock);
	return (error);
	}
	SYSCTL_PROC(_kern, OID_AUTO, pid_max, CTLTYPE_INT \|
	Index: sys/kern/kern_proc.c
	===================================================================
	--- sys/kern/kern_proc.c
	+++ sys/kern/kern_proc.c
	@@ -126,15 +126,21 @@
	/*
	* Other process lists
	*/
	-struct pidhashhead *pidhashtbl;
	-u_long pidhash;
	-struct pgrphashhead *pgrphashtbl;
	-u_long pgrphash;
	-struct proclist allproc;
	-struct proclist zombproc;
	+VPS_DEFINE(struct pidhashhead *, pidhashtbl);
	+VPS_DEFINE(u_long, pidhash);
	+VPS_DEFINE(struct pgrphashhead *, pgrphashtbl);
	+VPS_DEFINE(u_long, pgrphash);
	+VPS_DEFINE(struct proclist, allproc);
	+VPS_DEFINE(struct proclist, zombproc);
	+#ifndef VIMAGE
	struct sx __exclusive_cache_line allproc_lock;
	struct sx __exclusive_cache_line proctree_lock;
	struct mtx __exclusive_cache_line ppeers_lock;
	+#else
	+VPS_DEFINE(struct sx, allproc_lock);
	+VPS_DEFINE(struct sx, proctree_lock);
	+VPS_DEFINE(struct mtx, ppeers_lock);
	+#endif
	uma_zone_t proc_zone;

	/*
	@@ -179,22 +185,46 @@
	/*
	* Initialize global process hashing structures.
	*/
	-void
	+static void
	procinit(void)
	{

	- sx_init(&allproc_lock, "allproc");
	- sx_init(&proctree_lock, "proctree");
	- mtx_init(&ppeers_lock, "p_peers", NULL, MTX_DEF);
	- LIST_INIT(&allproc);
	- LIST_INIT(&zombproc);
	- pidhashtbl = hashinit(maxproc / 4, M_PROC, &pidhash);
	- pgrphashtbl = hashinit(maxproc / 4, M_PROC, &pgrphash);
	- proc_zone = uma_zcreate("PROC", sched_sizeof_proc(),
	- proc_ctor, proc_dtor, proc_init, proc_fini,
	- UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
	- uihashinit();
	+ sx_init(&V_allproc_lock, "allproc");
	+ sx_init(&V_proctree_lock, "proctree");
	+ mtx_init(&V_ppeers_lock, "p_peers", NULL, MTX_DEF);
	+ LIST_INIT(&V_allproc);
	+ LIST_INIT(&V_zombproc);
	+ V_pidhashtbl = hashinit(maxproc / 4, M_PROC, &V_pidhash);
	+ V_pgrphashtbl = hashinit(maxproc / 4, M_PROC, &V_pgrphash);
	+ if (IS_DEFAULT_VPS(curvps)) {
	+ proc_zone = uma_zcreate("PROC", sched_sizeof_proc(),
	+ proc_ctor, proc_dtor, proc_init, proc_fini,
	+ UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
	+ }
	}
	+VPS_SYSINIT(procinit, SI_SUB_INTRINSIC, SI_ORDER_SECOND, procinit, NULL);
	+
	+#ifdef VIMAGE
	+static void
	+procdestroy(void *ident __unused)
	+{
	+
	+ KASSERT((LIST_EMPTY(&V_allproc)), ("%s: list allproc %p not empty\n",
	+ __func__, &V_allproc));
	+ KASSERT((LIST_EMPTY(&V_zombproc)), ("%s: list zombproc %p not empty\n",
	+ __func__, &V_zombproc));
	+
	+ /* proc_zone */
	+ hashdestroy(V_pgrphashtbl, M_PROC, V_pgrphash);
	+ hashdestroy(V_pidhashtbl, M_PROC, V_pidhash);
	+
	+ mtx_destroy(&V_ppeers_lock);
	+ sx_destroy(&V_proctree_lock);
	+ sx_destroy(&V_allproc_lock);
	+}
	+VPS_SYSUNINIT(procdestroy, SI_SUB_INTRINSIC, SI_ORDER_SECOND, procdestroy,
	+ NULL);
	+#endif

	/*
	* Prepare a proc for use.
	@@ -303,7 +333,7 @@
	inferior(struct proc *p)
	{

	- sx_assert(&proctree_lock, SX_LOCKED);
	+ sx_assert(&V_proctree_lock, SX_LOCKED);
	PROC_LOCK_ASSERT(p, MA_OWNED);
	for (; p != curproc; p = proc_realparent(p)) {
	if (p->p_pid == 0)
	@@ -317,7 +347,7 @@
	{
	struct proc *p;

	- sx_assert(&allproc_lock, SX_LOCKED);
	+ sx_assert(&V_allproc_lock, SX_LOCKED);
	LIST_FOREACH(p, PIDHASH(pid), p_hash) {
	if (p->p_pid == pid) {
	PROC_LOCK(p);
	@@ -347,9 +377,9 @@
	PROC_LOCK(p);
	return (p);
	}
	- sx_slock(&allproc_lock);
	+ sx_slock(&V_allproc_lock);
	p = pfind_locked(pid);
	- sx_sunlock(&allproc_lock);
	+ sx_sunlock(&V_allproc_lock);
	return (p);
	}

	@@ -361,11 +391,11 @@
	{
	struct proc *p;

	- sx_slock(&allproc_lock);
	+ sx_slock(&V_allproc_lock);
	p = pfind_locked(pid);
	if (p == NULL)
	p = zpfind_locked(pid);
	- sx_sunlock(&allproc_lock);
	+ sx_sunlock(&V_allproc_lock);

	return (p);
	}
	@@ -376,7 +406,8 @@
	struct proc *p;
	struct thread *td;

	- sx_assert(&allproc_lock, SX_LOCKED);
	+ /* Operate on current vps instance only. */
	+ sx_assert(&V_allproc_lock, SX_LOCKED);
	FOREACH_PROC_IN_SYSTEM(p) {
	PROC_LOCK(p);
	if (p->p_state == PRS_NEW) {
	@@ -402,7 +433,7 @@
	{
	struct pgrp *pgrp;

	- sx_assert(&proctree_lock, SX_LOCKED);
	+ sx_assert(&V_proctree_lock, SX_LOCKED);

	LIST_FOREACH(pgrp, PGRPHASH(pgid), pg_hash) {
	if (pgrp->pg_id == pgid) {
	@@ -426,7 +457,7 @@
	if (p->p_pid == pid) {
	PROC_LOCK(p);
	} else {
	- sx_slock(&allproc_lock);
	+ sx_slock(&V_allproc_lock);
	if (pid <= PID_MAX) {
	p = pfind_locked(pid);
	if (p == NULL && (flags & PGET_NOTWEXIT) == 0)
	@@ -436,7 +467,7 @@
	} else {
	p = NULL;
	}
	- sx_sunlock(&allproc_lock);
	+ sx_sunlock(&V_allproc_lock);
	if (p == NULL)
	return (ESRCH);
	if ((flags & PGET_CANSEE) != 0) {
	@@ -486,7 +517,7 @@
	enterpgrp(struct proc p, pid_t pgid, struct pgrp pgrp, struct session *sess)
	{

	- sx_assert(&proctree_lock, SX_XLOCKED);
	+ sx_assert(&V_proctree_lock, SX_XLOCKED);

	KASSERT(pgrp != NULL, ("enterpgrp: pgrp == NULL"));
	KASSERT(p->p_pid == pgid,
	@@ -547,7 +578,7 @@
	enterthispgrp(struct proc p, struct pgrp pgrp)
	{

	- sx_assert(&proctree_lock, SX_XLOCKED);
	+ sx_assert(&V_proctree_lock, SX_XLOCKED);
	PROC_LOCK_ASSERT(p, MA_NOTOWNED);
	PGRP_LOCK_ASSERT(pgrp, MA_NOTOWNED);
	PGRP_LOCK_ASSERT(p->p_pgrp, MA_NOTOWNED);
	@@ -573,7 +604,7 @@
	{
	struct pgrp *savepgrp;

	- sx_assert(&proctree_lock, SX_XLOCKED);
	+ sx_assert(&V_proctree_lock, SX_XLOCKED);
	PROC_LOCK_ASSERT(p, MA_NOTOWNED);
	PGRP_LOCK_ASSERT(pgrp, MA_NOTOWNED);
	PGRP_LOCK_ASSERT(p->p_pgrp, MA_NOTOWNED);
	@@ -610,7 +641,7 @@
	{
	struct pgrp *savepgrp;

	- sx_assert(&proctree_lock, SX_XLOCKED);
	+ sx_assert(&V_proctree_lock, SX_XLOCKED);
	savepgrp = p->p_pgrp;
	PGRP_LOCK(savepgrp);
	PROC_LOCK(p);
	@@ -632,7 +663,7 @@
	struct session *savesess;
	struct tty *tp;

	- sx_assert(&proctree_lock, SX_XLOCKED);
	+ sx_assert(&V_proctree_lock, SX_XLOCKED);
	PGRP_LOCK_ASSERT(pgrp, MA_NOTOWNED);
	SESS_LOCK_ASSERT(pgrp->pg_session, MA_NOTOWNED);

	@@ -691,7 +722,7 @@
	struct session *mysession;
	struct proc *q;

	- sx_assert(&proctree_lock, SX_LOCKED);
	+ sx_assert(&V_proctree_lock, SX_LOCKED);
	PROC_LOCK_ASSERT(p, MA_NOTOWNED);
	PGRP_LOCK_ASSERT(pgrp, MA_NOTOWNED);
	SESS_LOCK_ASSERT(pgrp->pg_session, MA_NOTOWNED);
	@@ -744,7 +775,7 @@
	}
	PROC_UNLOCK(p);

	- sx_xlock(&proctree_lock);
	+ sx_xlock(&V_proctree_lock);
	if (SESS_LEADER(p)) {
	sp = p->p_session;

	@@ -781,17 +812,17 @@
	}

	if (ttyvp != NULL) {
	- sx_xunlock(&proctree_lock);
	+ sx_xunlock(&V_proctree_lock);
	if (vn_lock(ttyvp, LK_EXCLUSIVE) == 0) {
	VOP_REVOKE(ttyvp, REVOKEALL);
	VOP_UNLOCK(ttyvp, 0);
	}
	vrele(ttyvp);
	- sx_xlock(&proctree_lock);
	+ sx_xlock(&V_proctree_lock);
	}
	}
	fixjobc(p, p->p_pgrp, 0);
	- sx_xunlock(&proctree_lock);
	+ sx_xunlock(&V_proctree_lock);
	}

	/*
	@@ -851,10 +882,10 @@
	struct proc *p;
	int i;

	- for (i = 0; i <= pgrphash; i++) {
	- if (!LIST_EMPTY(&pgrphashtbl[i])) {
	+ for (i = 0; i <= V_pgrphash; i++) {
	+ if (!LIST_EMPTY(&V_pgrphashtbl[i])) {
	printf("\tindx %d\n", i);
	- LIST_FOREACH(pgrp, &pgrphashtbl[i], pg_hash) {
	+ LIST_FOREACH(pgrp, &V_pgrphashtbl[i], pg_hash) {
	printf(
	"\tpgrp %p, pgid %ld, sess %p, sesscnt %d, mem %p\n",
	(void *)pgrp, (long)pgrp->pg_id,
	@@ -910,7 +941,7 @@
	struct timeval boottime;

	/* For proc_realparent. */
	- sx_assert(&proctree_lock, SX_LOCKED);
	+ sx_assert(&V_proctree_lock, SX_LOCKED);
	PROC_LOCK_ASSERT(p, MA_OWNED);
	bzero(kp, sizeof(*kp));

	@@ -1019,7 +1050,7 @@
	kp->ki_kiflag \|= KI_CTTY;
	if (SESS_LEADER(p))
	kp->ki_kiflag \|= KI_SLEADER;
	- /* XXX proctree_lock */
	+ /* XXX V_proctree_lock */
	tp = sp->s_ttyp;
	SESS_UNLOCK(sp);
	}
	@@ -1209,8 +1240,9 @@
	{
	struct proc *p;

	- sx_assert(&allproc_lock, SX_LOCKED);
	- LIST_FOREACH(p, &zombproc, p_list) {
	+ /* Operate on current vps instance only. */
	+ sx_assert(&V_allproc_lock, SX_LOCKED);
	+ LIST_FOREACH(p, &V_zombproc, p_list) {
	if (p->p_pid == pid) {
	PROC_LOCK(p);
	break;
	@@ -1227,9 +1259,9 @@
	{
	struct proc *p;

	- sx_slock(&allproc_lock);
	+ sx_slock(&V_allproc_lock);
	p = zpfind_locked(pid);
	- sx_sunlock(&allproc_lock);
	+ sx_sunlock(&V_allproc_lock);
	return (p);
	}

	@@ -1465,11 +1497,11 @@
	error = sysctl_wire_old_buffer(req, 0);
	if (error)
	return (error);
	- sx_slock(&proctree_lock);
	+ sx_slock(&V_proctree_lock);
	error = pget((pid_t)name[0], PGET_CANSEE, &p);
	if (error == 0)
	error = sysctl_out_proc(p, req, flags);
	- sx_sunlock(&proctree_lock);
	+ sx_sunlock(&V_proctree_lock);
	return (error);
	}

	@@ -1502,14 +1534,15 @@
	* traced process. Only grab it if we are producing any
	* data to begin with.
	*/
	- sx_slock(&proctree_lock);
	+ sx_slock(&V_proctree_lock);
	}
	- sx_slock(&allproc_lock);
	+ sx_slock(&V_allproc_lock);
	for (doingzomb=0 ; doingzomb < 2 ; doingzomb++) {
	+ /* Operate on current vps instance only. */
	if (!doingzomb)
	- p = LIST_FIRST(&allproc);
	+ p = LIST_FIRST(&V_allproc);
	else
	- p = LIST_FIRST(&zombproc);
	+ p = LIST_FIRST(&V_zombproc);
	for (; p != NULL; p = LIST_NEXT(p, p_list)) {
	/*
	* Skip embryonic processes.
	@@ -1569,7 +1602,7 @@
	PROC_UNLOCK(p);
	continue;
	}
	- /* XXX proctree_lock */
	+ /* XXX V_proctree_lock */
	SESS_LOCK(p->p_session);
	if (p->p_session->s_ttyp == NULL \|\|
	tty_udev(p->p_session->s_ttyp) !=
	@@ -1609,9 +1642,9 @@
	}
	}
	out:
	- sx_sunlock(&allproc_lock);
	+ sx_sunlock(&V_allproc_lock);
	if (req->oldptr != NULL)
	- sx_sunlock(&proctree_lock);
	+ sx_sunlock(&V_proctree_lock);
	return (error);
	}

	@@ -3095,101 +3128,133 @@
	void
	stop_all_proc(void)
	{
	+ VPS_ITERATOR_DECL(vps_iter);
	struct proc cp, p;
	int r, gen;
	bool restart, seen_stopped, seen_exiting, stopped_some;

	- cp = curproc;
	+ KASSERT(IS_DEFAULT_VPS(curvps),
	+ ("%s: called from non vps0 %p: vps %p\n", __func__, vps0, curvps));
	+
	+ VPS_LIST_RLOCK();
	+ VPS_FOREACH(vps_iter) {
	+ CURVPS_SET(vps_iter);
	+#ifdef VIMAGE
	+ if (saved_vps != vps_iter)
	+ cp = NULL;
	+ else
	+#endif
	+ cp = curproc;
	allproc_loop:
	- sx_xlock(&allproc_lock);
	- gen = allproc_gen;
	- seen_exiting = seen_stopped = stopped_some = restart = false;
	- LIST_REMOVE(cp, p_list);
	- LIST_INSERT_HEAD(&allproc, cp, p_list);
	- for (;;) {
	- p = LIST_NEXT(cp, p_list);
	- if (p == NULL)
	- break;
	+ sx_xlock(&V_allproc_lock);
	+ if (cp == NULL)
	+ cp = LIST_FIRST(&V_allproc);
	+ gen = allproc_gen;
	+ seen_exiting = seen_stopped = stopped_some = restart = false;
	LIST_REMOVE(cp, p_list);
	- LIST_INSERT_AFTER(p, cp, p_list);
	- PROC_LOCK(p);
	- if ((p->p_flag & (P_KPROC \| P_SYSTEM \| P_TOTAL_STOP)) != 0) {
	- PROC_UNLOCK(p);
	- continue;
	- }
	- if ((p->p_flag & P_WEXIT) != 0) {
	- seen_exiting = true;
	- PROC_UNLOCK(p);
	- continue;
	- }
	- if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
	- /*
	- * Stopped processes are tolerated when there
	- * are no other processes which might continue
	- * them. P_STOPPED_SINGLE but not
	- * P_TOTAL_STOP process still has at least one
	- * thread running.
	- */
	- seen_stopped = true;
	+ LIST_INSERT_HEAD(&V_allproc, cp, p_list);
	+ for (;;) {
	+ p = LIST_NEXT(cp, p_list);
	+ if (p == NULL)
	+ break;
	+ LIST_REMOVE(cp, p_list);
	+ LIST_INSERT_AFTER(p, cp, p_list);
	+ PROC_LOCK(p);
	+ if ((p->p_flag & (P_KPROC \| P_SYSTEM \| P_TOTAL_STOP)) != 0) {
	+ PROC_UNLOCK(p);
	+ continue;
	+ }
	+ if ((p->p_flag & P_WEXIT) != 0) {
	+ seen_exiting = true;
	+ PROC_UNLOCK(p);
	+ continue;
	+ }
	+ if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
	+ /*
	+ * Stopped processes are tolerated when there
	+ * are no other processes which might continue
	+ * them. P_STOPPED_SINGLE but not
	+ * P_TOTAL_STOP process still has at least one
	+ * thread running.
	+ */
	+ seen_stopped = true;
	+ PROC_UNLOCK(p);
	+ continue;
	+ }
	+ _PHOLD(p);
	+ sx_xunlock(&V_allproc_lock);
	+ r = thread_single(p, SINGLE_ALLPROC);
	+ if (r != 0)
	+ restart = true;
	+ else
	+ stopped_some = true;
	+ _PRELE(p);
	PROC_UNLOCK(p);
	- continue;
	+ sx_xlock(&V_allproc_lock);
	}
	- _PHOLD(p);
	- sx_xunlock(&allproc_lock);
	- r = thread_single(p, SINGLE_ALLPROC);
	- if (r != 0)
	+ /* Catch forked children we did not see in iteration. */
	+ if (gen != allproc_gen)
	restart = true;
	- else
	- stopped_some = true;
	- _PRELE(p);
	- PROC_UNLOCK(p);
	- sx_xlock(&allproc_lock);
	- }
	- /* Catch forked children we did not see in iteration. */
	- if (gen != allproc_gen)
	- restart = true;
	- sx_xunlock(&allproc_lock);
	- if (restart \|\| stopped_some \|\| seen_exiting \|\| seen_stopped) {
	- kern_yield(PRI_USER);
	- goto allproc_loop;
	+ sx_xunlock(&V_allproc_lock);
	+ if (restart \|\| stopped_some \|\| seen_exiting \|\| seen_stopped) {
	+ kern_yield(PRI_USER);
	+ goto allproc_loop;
	+ }
	+ CURVPS_RESTORE();
	}
	+ VPS_LIST_RUNLOCK();
	}

	void
	resume_all_proc(void)
	{
	+ VPS_ITERATOR_DECL(vps_iter);
	struct proc cp, p;

	- cp = curproc;
	- sx_xlock(&allproc_lock);
	+ KASSERT(IS_DEFAULT_VPS(curvps),
	+ ("%s: called from non vps0 %p: vps %p\n", __func__, vps0, curvps));
	+
	+ VPS_LIST_RLOCK();
	+ VPS_FOREACH(vps_iter) {
	+ CURVPS_SET(vps_iter);
	+#ifdef VIMAGE
	+ if (saved_vps != vps_iter)
	+ cp = NULL;
	+ else
	+#endif
	+ cp = curproc;
	+ sx_xlock(&V_allproc_lock);
	again:
	- LIST_REMOVE(cp, p_list);
	- LIST_INSERT_HEAD(&allproc, cp, p_list);
	- for (;;) {
	- p = LIST_NEXT(cp, p_list);
	- if (p == NULL)
	- break;
	LIST_REMOVE(cp, p_list);
	- LIST_INSERT_AFTER(p, cp, p_list);
	- PROC_LOCK(p);
	- if ((p->p_flag & P_TOTAL_STOP) != 0) {
	- sx_xunlock(&allproc_lock);
	- _PHOLD(p);
	- thread_single_end(p, SINGLE_ALLPROC);
	- _PRELE(p);
	- PROC_UNLOCK(p);
	- sx_xlock(&allproc_lock);
	- } else {
	- PROC_UNLOCK(p);
	+ LIST_INSERT_HEAD(&V_allproc, cp, p_list);
	+ for (;;) {
	+ p = LIST_NEXT(cp, p_list);
	+ if (p == NULL)
	+ break;
	+ LIST_REMOVE(cp, p_list);
	+ LIST_INSERT_AFTER(p, cp, p_list);
	+ PROC_LOCK(p);
	+ if ((p->p_flag & P_TOTAL_STOP) != 0) {
	+ sx_xunlock(&V_allproc_lock);
	+ _PHOLD(p);
	+ thread_single_end(p, SINGLE_ALLPROC);
	+ _PRELE(p);
	+ PROC_UNLOCK(p);
	+ sx_xlock(&V_allproc_lock);
	+ } else {
	+ PROC_UNLOCK(p);
	+ }
	}
	+ /* Did the loop above missed any stopped process ? */
	+ FOREACH_PROC_IN_SYSTEM(p) {
	+ /* No need for proc lock. */
	+ if ((p->p_flag & P_TOTAL_STOP) != 0)
	+ goto again;
	+ }
	+ sx_xunlock(&V_allproc_lock);
	+ CURVPS_RESTORE();
	}
	- /* Did the loop above missed any stopped process ? */
	- FOREACH_PROC_IN_SYSTEM(p) {
	- /* No need for proc lock. */
	- if ((p->p_flag & P_TOTAL_STOP) != 0)
	- goto again;
	- }
	- sx_xunlock(&allproc_lock);
	+ VPS_LIST_RUNLOCK();
	}

	/* #define TOTAL_STOP_DEBUG 1 */
	Index: sys/kern/kern_procctl.c
	===================================================================
	--- sys/kern/kern_procctl.c
	+++ sys/kern/kern_procctl.c
	@@ -69,7 +69,7 @@

	p = top;
	ret = 0;
	- sx_assert(&proctree_lock, SX_LOCKED);
	+ sx_assert(&V_proctree_lock, SX_LOCKED);
	for (;;) {
	ret \|= protect_setchild(td, p, flags);
	PROC_UNLOCK(p);
	@@ -128,7 +128,7 @@
	reap_acquire(struct thread td, struct proc p)
	{

	- sx_assert(&proctree_lock, SX_XLOCKED);
	+ sx_assert(&V_proctree_lock, SX_XLOCKED);
	if (p != curproc)
	return (EPERM);
	if ((p->p_treeflag & P_TREE_REAPER) != 0)
	@@ -145,10 +145,10 @@
	reap_release(struct thread td, struct proc p)
	{

	- sx_assert(&proctree_lock, SX_XLOCKED);
	+ sx_assert(&V_proctree_lock, SX_XLOCKED);
	if (p != curproc)
	return (EPERM);
	- if (p == initproc)
	+ if (p == V_initproc)
	return (EINVAL);
	if ((p->p_treeflag & P_TREE_REAPER) == 0)
	return (EINVAL);
	@@ -162,7 +162,7 @@
	{
	struct proc reap, p2, *first_p;

	- sx_assert(&proctree_lock, SX_LOCKED);
	+ sx_assert(&V_proctree_lock, SX_LOCKED);
	bzero(rs, sizeof(*rs));
	if ((p->p_treeflag & P_TREE_REAPER) == 0) {
	reap = p->p_reaper;
	@@ -170,7 +170,7 @@
	reap = p;
	rs->rs_flags \|= REAPER_STATUS_OWNED;
	}
	- if (reap == initproc)
	+ if (reap == V_initproc)
	rs->rs_flags \|= REAPER_STATUS_REALINIT;
	rs->rs_reaper = reap->p_pid;
	rs->rs_descendants = 0;
	@@ -199,18 +199,18 @@
	u_int i, n;
	int error;

	- sx_assert(&proctree_lock, SX_LOCKED);
	+ sx_assert(&V_proctree_lock, SX_LOCKED);
	PROC_UNLOCK(p);
	reap = (p->p_treeflag & P_TREE_REAPER) == 0 ? p->p_reaper : p;
	n = i = 0;
	error = 0;
	LIST_FOREACH(p2, &reap->p_reaplist, p_reapsibling)
	n++;
	- sx_unlock(&proctree_lock);
	+ sx_unlock(&V_proctree_lock);
	if (rp->rp_count < n)
	n = rp->rp_count;
	pi = malloc(n * sizeof(*pi), M_TEMP, M_WAITOK);
	- sx_slock(&proctree_lock);
	+ sx_slock(&V_proctree_lock);
	LIST_FOREACH(p2, &reap->p_reaplist, p_reapsibling) {
	if (i == n)
	break;
	@@ -225,10 +225,10 @@
	pip->pi_flags \|= REAPER_PIDINFO_REAPER;
	i++;
	}
	- sx_sunlock(&proctree_lock);
	+ sx_sunlock(&V_proctree_lock);
	error = copyout(pi, rp->rp_pids, i * sizeof(*pi));
	free(pi, M_TEMP);
	- sx_slock(&proctree_lock);
	+ sx_slock(&V_proctree_lock);
	PROC_LOCK(p);
	return (error);
	}
	@@ -278,7 +278,7 @@
	struct reap_kill_tracker *t;
	int error;

	- sx_assert(&proctree_lock, SX_LOCKED);
	+ sx_assert(&V_proctree_lock, SX_LOCKED);
	if (IN_CAPABILITY_MODE(td))
	return (ECAPMODE);
	if (rk->rk_sig <= 0 \|\| rk->rk_sig > _SIG_MAXSIG \|\|
	@@ -585,12 +585,12 @@
	case PROC_REAP_KILL:
	case PROC_TRACE_CTL:
	case PROC_TRAPCAP_CTL:
	- sx_slock(&proctree_lock);
	+ sx_slock(&V_proctree_lock);
	tree_locked = true;
	break;
	case PROC_REAP_ACQUIRE:
	case PROC_REAP_RELEASE:
	- sx_xlock(&proctree_lock);
	+ sx_xlock(&V_proctree_lock);
	tree_locked = true;
	break;
	case PROC_TRACE_STATUS:
	@@ -657,6 +657,6 @@
	break;
	}
	if (tree_locked)
	- sx_unlock(&proctree_lock);
	+ sx_unlock(&V_proctree_lock);
	return (error);
	}
	Index: sys/kern/kern_prot.c
	===================================================================
	--- sys/kern/kern_prot.c
	+++ sys/kern/kern_prot.c
	@@ -52,6 +52,7 @@
	#include <sys/param.h>
	#include <sys/systm.h>
	#include <sys/acct.h>
	+#include <sys/filedesc.h>
	#include <sys/kdb.h>
	#include <sys/kernel.h>
	#include <sys/lock.h>
	@@ -133,10 +134,10 @@
	PROC_UNLOCK(p);
	} else {
	PROC_UNLOCK(p);
	- sx_slock(&proctree_lock);
	+ sx_slock(&V_proctree_lock);
	pp = proc_realparent(p);
	ppid = pp->p_pid;
	- sx_sunlock(&proctree_lock);
	+ sx_sunlock(&V_proctree_lock);
	}

	return (ppid);
	@@ -340,7 +341,7 @@
	newpgrp = malloc(sizeof(struct pgrp), M_PGRP, M_WAITOK \| M_ZERO);
	newsess = malloc(sizeof(struct session), M_SESSION, M_WAITOK \| M_ZERO);

	- sx_xlock(&proctree_lock);
	+ sx_xlock(&V_proctree_lock);

	if (p->p_pgid == p->p_pid \|\| (pgrp = pgfind(p->p_pid)) != NULL) {
	if (pgrp != NULL)
	@@ -353,7 +354,7 @@
	newsess = NULL;
	}

	- sx_xunlock(&proctree_lock);
	+ sx_xunlock(&V_proctree_lock);

	if (newpgrp != NULL)
	free(newpgrp, M_PGRP);
	@@ -399,7 +400,7 @@

	newpgrp = malloc(sizeof(struct pgrp), M_PGRP, M_WAITOK \| M_ZERO);

	- sx_xlock(&proctree_lock);
	+ sx_xlock(&V_proctree_lock);
	if (uap->pid != 0 && uap->pid != curp->p_pid) {
	if ((targp = pfind(uap->pid)) == NULL) {
	error = ESRCH;
	@@ -457,7 +458,7 @@
	error = enterthispgrp(targp, pgrp);
	}
	done:
	- sx_xunlock(&proctree_lock);
	+ sx_xunlock(&V_proctree_lock);
	KASSERT((error == 0) \|\| (newpgrp != NULL),
	("setpgid failed and newpgrp is NULL"));
	if (newpgrp != NULL)
	@@ -1738,7 +1739,7 @@
	}

	/* Can't trace init when securelevel > 0. */
	- if (p == initproc) {
	+ if (p == V_initproc) {
	error = securelevel_gt(td->td_ucred, 0);
	if (error)
	return (error);
	@@ -1860,8 +1861,10 @@
	crfree(struct ucred *cr)
	{

	- KASSERT(cr->cr_ref > 0, ("bad ucred refcount: %d", cr->cr_ref));
	- KASSERT(cr->cr_ref != 0xdeadc0de, ("dangling reference to ucred"));
	+ KASSERT(cr->cr_ref > 0, ("%s: bad ucred %p refcount: %d",
	+ __func__, cr, cr->cr_ref));
	+ KASSERT(cr->cr_ref != 0xdeadc0de,
	+ ("%s: dangling reference to ucred %p", __func__, cr));
	if (refcount_release(&cr->cr_ref)) {
	/*
	* Some callers of crget(), such as nfs_statfs(),
	Index: sys/kern/kern_racct.c
	===================================================================
	--- sys/kern/kern_racct.c
	+++ sys/kern/kern_racct.c
	@@ -1214,94 +1214,107 @@
	}

	static void
	-racctd(void)
	+_racctd(void)
	{
	struct thread *td;
	struct proc *p;
	struct timeval wallclock;
	uint64_t pct, pct_estimate, runtime;

	- ASSERT_RACCT_ENABLED();
	-
	- for (;;) {
	- racct_decay();
	+ sx_slock(&V_allproc_lock);

	- sx_slock(&allproc_lock);
	+ LIST_FOREACH(p, &V_zombproc, p_list) {
	+ PROC_LOCK(p);
	+ racct_set(p, RACCT_PCTCPU, 0);
	+ PROC_UNLOCK(p);
	+ }

	- LIST_FOREACH(p, &zombproc, p_list) {
	- PROC_LOCK(p);
	- racct_set(p, RACCT_PCTCPU, 0);
	+ FOREACH_PROC_IN_SYSTEM(p) {
	+ PROC_LOCK(p);
	+ if (p->p_state != PRS_NORMAL) {
	PROC_UNLOCK(p);
	+ continue;
	}

	- FOREACH_PROC_IN_SYSTEM(p) {
	- PROC_LOCK(p);
	- if (p->p_state != PRS_NORMAL) {
	- PROC_UNLOCK(p);
	- continue;
	- }
	-
	- microuptime(&wallclock);
	- timevalsub(&wallclock, &p->p_stats->p_start);
	- PROC_STATLOCK(p);
	- FOREACH_THREAD_IN_PROC(p, td)
	- ruxagg(p, td);
	- runtime = cputick2usec(p->p_rux.rux_runtime);
	- PROC_STATUNLOCK(p);
	+ microuptime(&wallclock);
	+ timevalsub(&wallclock, &p->p_stats->p_start);
	+ PROC_STATLOCK(p);
	+ FOREACH_THREAD_IN_PROC(p, td)
	+ ruxagg(p, td);
	+ runtime = cputick2usec(p->p_rux.rux_runtime);
	+ PROC_STATUNLOCK(p);
	#ifdef notyet
	- KASSERT(runtime >= p->p_prev_runtime,
	- ("runtime < p_prev_runtime"));
	+ KASSERT(runtime >= p->p_prev_runtime,
	+ ("runtime < p_prev_runtime"));
	#else
	- if (runtime < p->p_prev_runtime)
	- runtime = p->p_prev_runtime;
	+ if (runtime < p->p_prev_runtime)
	+ runtime = p->p_prev_runtime;
	#endif
	- p->p_prev_runtime = runtime;
	- if (wallclock.tv_sec > 0 \|\| wallclock.tv_usec > 0) {
	- pct_estimate = (1000000 * runtime * 100) /
	- ((uint64_t)wallclock.tv_sec * 1000000 +
	- wallclock.tv_usec);
	- } else
	- pct_estimate = 0;
	- pct = racct_getpcpu(p, pct_estimate);
	- RACCT_LOCK();
	+ p->p_prev_runtime = runtime;
	+ if (wallclock.tv_sec > 0 \|\| wallclock.tv_usec > 0) {
	+ pct_estimate = (1000000 * runtime * 100) /
	+ ((uint64_t)wallclock.tv_sec * 1000000 +
	+ wallclock.tv_usec);
	+ } else
	+ pct_estimate = 0;
	+ pct = racct_getpcpu(p, pct_estimate);
	+ RACCT_LOCK();
	#ifdef RCTL
	- rctl_throttle_decay(p->p_racct, RACCT_READBPS);
	- rctl_throttle_decay(p->p_racct, RACCT_WRITEBPS);
	- rctl_throttle_decay(p->p_racct, RACCT_READIOPS);
	- rctl_throttle_decay(p->p_racct, RACCT_WRITEIOPS);
	+ rctl_throttle_decay(p->p_racct, RACCT_READBPS);
	+ rctl_throttle_decay(p->p_racct, RACCT_WRITEBPS);
	+ rctl_throttle_decay(p->p_racct, RACCT_READIOPS);
	+ rctl_throttle_decay(p->p_racct, RACCT_WRITEIOPS);
	#endif
	- racct_set_locked(p, RACCT_PCTCPU, pct, 1);
	- racct_set_locked(p, RACCT_CPU, runtime, 0);
	- racct_set_locked(p, RACCT_WALLCLOCK,
	- (uint64_t)wallclock.tv_sec * 1000000 +
	- wallclock.tv_usec, 0);
	- RACCT_UNLOCK();
	+ racct_set_locked(p, RACCT_PCTCPU, pct, 1);
	+ racct_set_locked(p, RACCT_CPU, runtime, 0);
	+ racct_set_locked(p, RACCT_WALLCLOCK,
	+ (uint64_t)wallclock.tv_sec * 1000000 +
	+ wallclock.tv_usec, 0);
	+ RACCT_UNLOCK();
	+ PROC_UNLOCK(p);
	+ }
	+
	+ /*
	+ * To ensure that processes are throttled in a fair way, we need
	+ * to iterate over all processes again and check the limits
	+ * for %cpu resource only after ucred racct containers have been
	+ * properly filled.
	+ */
	+ FOREACH_PROC_IN_SYSTEM(p) {
	+ PROC_LOCK(p);
	+ if (p->p_state != PRS_NORMAL) {
	PROC_UNLOCK(p);
	+ continue;
	}

	- /*
	- * To ensure that processes are throttled in a fair way, we need
	- * to iterate over all processes again and check the limits
	- * for %cpu resource only after ucred racct containers have been
	- * properly filled.
	- */
	- FOREACH_PROC_IN_SYSTEM(p) {
	- PROC_LOCK(p);
	- if (p->p_state != PRS_NORMAL) {
	- PROC_UNLOCK(p);
	- continue;
	- }
	+ if (racct_pcpu_available(p) <= 0) {
	+ if (p->p_racct->r_resources[RACCT_PCTCPU] >
	+ pcpu_threshold)
	+ racct_proc_throttle(p, -1);
	+ } else if (p->p_throttled == -1) {
	+ racct_proc_wakeup(p);
	+ }
	+ PROC_UNLOCK(p);
	+ }
	+ sx_sunlock(&V_allproc_lock);
	+}

	- if (racct_pcpu_available(p) <= 0) {
	- if (p->p_racct->r_resources[RACCT_PCTCPU] >
	- pcpu_threshold)
	- racct_proc_throttle(p, -1);
	- } else if (p->p_throttled == -1) {
	- racct_proc_wakeup(p);
	- }
	- PROC_UNLOCK(p);
	+static void
	+racctd(void)
	+{
	+ VPS_ITERATOR_DECL(vps_iter);
	+
	+ ASSERT_RACCT_ENABLED();
	+
	+ for (;;) {
	+ racct_decay();
	+ VPS_LIST_RLOCK();
	+ VPS_FOREACH(vps_iter) {
	+ CURVPS_SET(vps_iter);
	+ _racctd();
	+ CURVPS_RESTORE();
	}
	- sx_sunlock(&allproc_lock);
	+ VPS_LIST_RUNLOCK();
	pause("-", hz);
	}
	}
	Index: sys/kern/kern_rctl.c
	===================================================================
	--- sys/kern/kern_rctl.c
	+++ sys/kern/kern_rctl.c
	@@ -1175,7 +1175,7 @@
	error = str2id(subject_idstr, &id);
	if (error != 0)
	goto out;
	- sx_assert(&allproc_lock, SA_LOCKED);
	+ sx_assert(&V_allproc_lock, SA_LOCKED);
	rule->rr_subject.rs_proc = pfind(id);
	if (rule->rr_subject.rs_proc == NULL) {
	error = ESRCH;
	@@ -1266,6 +1266,7 @@
	int
	rctl_rule_add(struct rctl_rule *rule)
	{
	+ VPS_ITERATOR_DECL(vps_iter);
	struct proc *p;
	struct ucred *cred;
	struct uidinfo *uip;
	@@ -1357,37 +1358,51 @@
	* Now go through all the processes and add the new rule to the ones
	* it applies to.
	*/
	- sx_assert(&allproc_lock, SA_LOCKED);
	- FOREACH_PROC_IN_SYSTEM(p) {
	- cred = p->p_ucred;
	- switch (rule->rr_subject_type) {
	- case RCTL_SUBJECT_TYPE_USER:
	- if (cred->cr_uidinfo == rule->rr_subject.rs_uip \|\|
	- cred->cr_ruidinfo == rule->rr_subject.rs_uip)
	- break;
	- continue;
	- case RCTL_SUBJECT_TYPE_LOGINCLASS:
	- if (cred->cr_loginclass == rule->rr_subject.rs_loginclass)
	- break;
	- continue;
	- case RCTL_SUBJECT_TYPE_JAIL:
	- match = 0;
	- for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent) {
	- if (pr->pr_prison_racct == rule->rr_subject.rs_prison_racct) {
	- match = 1;
	+ sx_assert(&V_allproc_lock, SA_LOCKED);
	+ VPS_LIST_RLOCK();
	+ VPS_FOREACH(vps_iter) {
	+ CURVPS_SET(vps_iter);
	+#ifdef VIMAGE
	+ if (saved_vps != vps_iter)
	+ sx_slock(&V_allproc_lock);
	+#endif
	+ FOREACH_PROC_IN_SYSTEM(p) {
	+ cred = p->p_ucred;
	+ switch (rule->rr_subject_type) {
	+ case RCTL_SUBJECT_TYPE_USER:
	+ if (cred->cr_uidinfo == rule->rr_subject.rs_uip \|\|
	+ cred->cr_ruidinfo == rule->rr_subject.rs_uip)
	+ break;
	+ continue;
	+ case RCTL_SUBJECT_TYPE_LOGINCLASS:
	+ if (cred->cr_loginclass == rule->rr_subject.rs_loginclass)
	break;
	+ continue;
	+ case RCTL_SUBJECT_TYPE_JAIL:
	+ match = 0;
	+ for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent) {
	+ if (pr->pr_prison_racct == rule->rr_subject.rs_prison_racct) {
	+ match = 1;
	+ break;
	+ }
	}
	+ if (match)
	+ break;
	+ continue;
	+ default:
	+ panic("rctl_rule_add: unknown subject type %d",
	+ rule->rr_subject_type);
	}
	- if (match)
	- break;
	- continue;
	- default:
	- panic("rctl_rule_add: unknown subject type %d",
	- rule->rr_subject_type);
	- }

	- rctl_racct_add_rule(p->p_racct, rule);
	+ rctl_racct_add_rule(p->p_racct, rule);
	+ }
	+#ifdef VIMAGE
	+ if (saved_vps != vps_iter)
	+ sx_sunlock(&V_allproc_lock);
	+#endif
	+ CURVPS_RESTORE();
	}
	+ VPS_LIST_RUNLOCK();

	return (0);
	}
	@@ -1426,6 +1441,7 @@
	int
	rctl_rule_remove(struct rctl_rule *filter)
	{
	+ VPS_ITERATOR_DECL(vps_iter);
	struct proc *p;
	int found = 0;

	@@ -1452,11 +1468,25 @@
	rctl_rule_pre_callback, rctl_rule_post_callback,
	filter, (void *)&found);

	- sx_assert(&allproc_lock, SA_LOCKED);
	+ sx_assert(&V_allproc_lock, SA_LOCKED);
	RACCT_LOCK();
	- FOREACH_PROC_IN_SYSTEM(p) {
	- found += rctl_racct_remove_rules(p->p_racct, filter);
	+ VPS_LIST_RLOCK();
	+ VPS_FOREACH(vps_iter) {
	+ CURVPS_SET(vps_iter);
	+#ifdef VIMAGE
	+ if (saved_vps != vps_iter)
	+ sx_slock(&V_allproc_lock);
	+#endif
	+ FOREACH_PROC_IN_SYSTEM(p) {
	+ found += rctl_racct_remove_rules(p->p_racct, filter);
	+ }
	+#ifdef VIMAGE
	+ if (saved_vps != vps_iter)
	+ sx_sunlock(&V_allproc_lock);
	+#endif
	+ CURVPS_RESTORE();
	}
	+ VPS_LIST_RUNLOCK();
	RACCT_UNLOCK();

	if (found)
	@@ -1623,11 +1653,11 @@
	if (error != 0)
	return (error);

	- sx_slock(&allproc_lock);
	+ sx_slock(&V_allproc_lock);
	error = rctl_string_to_rule(inputstr, &filter);
	free(inputstr, M_RCTL);
	if (error != 0) {
	- sx_sunlock(&allproc_lock);
	+ sx_sunlock(&V_allproc_lock);
	return (error);
	}

	@@ -1669,7 +1699,7 @@
	}
	out:
	rctl_rule_release(filter);
	- sx_sunlock(&allproc_lock);
	+ sx_sunlock(&V_allproc_lock);
	if (error != 0)
	return (error);

	@@ -1699,6 +1729,7 @@
	int
	sys_rctl_get_rules(struct thread td, struct rctl_get_rules_args uap)
	{
	+ VPS_ITERATOR_DECL(vps_iter);
	struct sbuf *sb;
	struct rctl_rule *filter;
	struct rctl_rule_link *link;
	@@ -1718,41 +1749,56 @@
	if (error != 0)
	return (error);

	- sx_slock(&allproc_lock);
	+ sx_slock(&V_allproc_lock);
	error = rctl_string_to_rule(inputstr, &filter);
	free(inputstr, M_RCTL);
	if (error != 0) {
	- sx_sunlock(&allproc_lock);
	+ sx_sunlock(&V_allproc_lock);
	return (error);
	}

	bufsize = uap->outbuflen;
	if (bufsize > rctl_maxbufsize) {
	- sx_sunlock(&allproc_lock);
	+ sx_sunlock(&V_allproc_lock);
	return (E2BIG);
	}

	buf = malloc(bufsize, M_RCTL, M_WAITOK);
	sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN);
	KASSERT(sb != NULL, ("sbuf_new failed"));
	-
	- FOREACH_PROC_IN_SYSTEM(p) {
	- RACCT_LOCK();
	- LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
	- /*
	- * Non-process rules will be added to the buffer later.
	- * Adding them here would result in duplicated output.
	- */
	- if (link->rrl_rule->rr_subject_type !=
	- RCTL_SUBJECT_TYPE_PROCESS)
	- continue;
	- if (!rctl_rule_matches(link->rrl_rule, filter))
	- continue;
	- rctl_rule_to_sbuf(sb, link->rrl_rule);
	- sbuf_printf(sb, ",");
	+ VPS_LIST_RLOCK();
	+ VPS_FOREACH(vps_iter) {
	+ CURVPS_SET(vps_iter);
	+#ifdef VIMAGE
	+ if (saved_vps != vps_iter)
	+ sx_slock(&V_allproc_lock);
	+#endif
	+ FOREACH_PROC_IN_SYSTEM(p) {
	+ RACCT_LOCK();
	+ LIST_FOREACH(link, &p->p_racct->r_rule_links,
	+ rrl_next) {
	+ /*
	+ * Non-process rules will be added to the
	+ * buffer later. Adding them here would result
	+ * in duplicated output.
	+ */
	+ if (link->rrl_rule->rr_subject_type !=
	+ RCTL_SUBJECT_TYPE_PROCESS)
	+ continue;
	+ if (!rctl_rule_matches(link->rrl_rule, filter))
	+ continue;
	+ rctl_rule_to_sbuf(sb, link->rrl_rule);
	+ sbuf_printf(sb, ",");
	+ }
	+ RACCT_UNLOCK();
	}
	- RACCT_UNLOCK();
	+#ifdef VIMAGE
	+ if (saved_vps != vps_iter)
	+ sx_sunlock(&V_allproc_lock);
	+#endif
	+ CURVPS_RESTORE();
	}
	+ VPS_LIST_RUNLOCK();

	loginclass_racct_foreach(rctl_get_rules_callback,
	rctl_rule_pre_callback, rctl_rule_post_callback,
	@@ -1777,7 +1823,7 @@
	error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen);
	out:
	rctl_rule_release(filter);
	- sx_sunlock(&allproc_lock);
	+ sx_sunlock(&V_allproc_lock);
	free(buf, M_RCTL);
	return (error);
	}
	@@ -1803,34 +1849,34 @@
	if (error != 0)
	return (error);

	- sx_slock(&allproc_lock);
	+ sx_slock(&V_allproc_lock);
	error = rctl_string_to_rule(inputstr, &filter);
	free(inputstr, M_RCTL);
	if (error != 0) {
	- sx_sunlock(&allproc_lock);
	+ sx_sunlock(&V_allproc_lock);
	return (error);
	}

	if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_UNDEFINED) {
	rctl_rule_release(filter);
	- sx_sunlock(&allproc_lock);
	+ sx_sunlock(&V_allproc_lock);
	return (EINVAL);
	}
	if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_PROCESS) {
	rctl_rule_release(filter);
	- sx_sunlock(&allproc_lock);
	+ sx_sunlock(&V_allproc_lock);
	return (EOPNOTSUPP);
	}
	if (filter->rr_subject.rs_proc == NULL) {
	rctl_rule_release(filter);
	- sx_sunlock(&allproc_lock);
	+ sx_sunlock(&V_allproc_lock);
	return (EINVAL);
	}

	bufsize = uap->outbuflen;
	if (bufsize > rctl_maxbufsize) {
	rctl_rule_release(filter);
	- sx_sunlock(&allproc_lock);
	+ sx_sunlock(&V_allproc_lock);
	return (E2BIG);
	}

	@@ -1860,7 +1906,7 @@
	error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen);
	out:
	rctl_rule_release(filter);
	- sx_sunlock(&allproc_lock);
	+ sx_sunlock(&V_allproc_lock);
	free(buf, M_RCTL);
	return (error);
	}
	@@ -1883,11 +1929,11 @@
	if (error != 0)
	return (error);

	- sx_slock(&allproc_lock);
	+ sx_slock(&V_allproc_lock);
	error = rctl_string_to_rule(inputstr, &rule);
	free(inputstr, M_RCTL);
	if (error != 0) {
	- sx_sunlock(&allproc_lock);
	+ sx_sunlock(&V_allproc_lock);
	return (error);
	}
	/*
	@@ -1906,7 +1952,7 @@

	out:
	rctl_rule_release(rule);
	- sx_sunlock(&allproc_lock);
	+ sx_sunlock(&V_allproc_lock);
	return (error);
	}

	@@ -1928,17 +1974,17 @@
	if (error != 0)
	return (error);

	- sx_slock(&allproc_lock);
	+ sx_slock(&V_allproc_lock);
	error = rctl_string_to_rule(inputstr, &filter);
	free(inputstr, M_RCTL);
	if (error != 0) {
	- sx_sunlock(&allproc_lock);
	+ sx_sunlock(&V_allproc_lock);
	return (error);
	}

	error = rctl_rule_remove(filter);
	rctl_rule_release(filter);
	- sx_sunlock(&allproc_lock);
	+ sx_sunlock(&V_allproc_lock);

	return (error);
	}
	Index: sys/kern/kern_resource.c
	===================================================================
	--- sys/kern/kern_resource.c
	+++ sys/kern/kern_resource.c
	@@ -69,10 +69,15 @@

	static MALLOC_DEFINE(M_PLIMIT, "plimit", "plimit structures");
	static MALLOC_DEFINE(M_UIDINFO, "uidinfo", "uidinfo structures");
	-#define UIHASH(uid) (&uihashtbl[(uid) & uihash])
	+
	static struct rwlock uihashtbl_lock;
	-static LIST_HEAD(uihashhead, uidinfo) *uihashtbl;
	-static u_long uihash; /* size of hash table - 1 */
	+
	+LIST_HEAD(uihashhead, uidinfo);
	+static VPS_DEFINE(struct uihashhead *, uihashtbl);
	+#define V_uihashtbl VPS(uihashtbl)
	+static VPS_DEFINE(u_long, uihash); /* size of hash table - 1 */
	+#define V_uihash VPS(uihash)
	+#define UIHASH(uid) (&V_uihashtbl[(uid) & V_uihash])

	static void calcru1(struct proc p, struct rusage_ext ruxp,
	struct timeval up, struct timeval sp);
	@@ -114,18 +119,18 @@
	break;

	case PRIO_PGRP:
	- sx_slock(&proctree_lock);
	+ sx_slock(&V_proctree_lock);
	if (uap->who == 0) {
	pg = td->td_proc->p_pgrp;
	PGRP_LOCK(pg);
	} else {
	pg = pgfind(uap->who);
	if (pg == NULL) {
	- sx_sunlock(&proctree_lock);
	+ sx_sunlock(&V_proctree_lock);
	break;
	}
	}
	- sx_sunlock(&proctree_lock);
	+ sx_sunlock(&V_proctree_lock);
	LIST_FOREACH(p, &pg->pg_members, p_pglist) {
	PROC_LOCK(p);
	if (p->p_state == PRS_NORMAL &&
	@@ -141,7 +146,8 @@
	case PRIO_USER:
	if (uap->who == 0)
	uap->who = td->td_ucred->cr_uid;
	- sx_slock(&allproc_lock);
	+ /* Operate on current vps instance only. */
	+ sx_slock(&V_allproc_lock);
	FOREACH_PROC_IN_SYSTEM(p) {
	PROC_LOCK(p);
	if (p->p_state == PRS_NORMAL &&
	@@ -152,7 +158,7 @@
	}
	PROC_UNLOCK(p);
	}
	- sx_sunlock(&allproc_lock);
	+ sx_sunlock(&V_allproc_lock);
	break;

	default:
	@@ -199,18 +205,18 @@
	break;

	case PRIO_PGRP:
	- sx_slock(&proctree_lock);
	+ sx_slock(&V_proctree_lock);
	if (uap->who == 0) {
	pg = curp->p_pgrp;
	PGRP_LOCK(pg);
	} else {
	pg = pgfind(uap->who);
	if (pg == NULL) {
	- sx_sunlock(&proctree_lock);
	+ sx_sunlock(&V_proctree_lock);
	break;
	}
	}
	- sx_sunlock(&proctree_lock);
	+ sx_sunlock(&V_proctree_lock);
	LIST_FOREACH(p, &pg->pg_members, p_pglist) {
	PROC_LOCK(p);
	if (p->p_state == PRS_NORMAL &&
	@@ -226,7 +232,8 @@
	case PRIO_USER:
	if (uap->who == 0)
	uap->who = td->td_ucred->cr_uid;
	- sx_slock(&allproc_lock);
	+ /* Operate on current vps instance only. */
	+ sx_slock(&V_allproc_lock);
	FOREACH_PROC_IN_SYSTEM(p) {
	PROC_LOCK(p);
	if (p->p_state == PRS_NORMAL &&
	@@ -237,7 +244,7 @@
	}
	PROC_UNLOCK(p);
	}
	- sx_sunlock(&allproc_lock);
	+ sx_sunlock(&V_allproc_lock);
	break;

	default:
	@@ -1214,13 +1221,43 @@
	p->p_sysent->sv_fixlimit(rlp, which);
	}

	-void
	+static void
	uihashinit()
	{

	- uihashtbl = hashinit(maxproc / 16, M_UIDINFO, &uihash);
	rw_init(&uihashtbl_lock, "uidinfo hash");
	}
	+SYSINIT(uihashinit, SI_SUB_INTRINSIC, SI_ORDER_SECOND, uihashinit, NULL);
	+
	+static void
	+uihashinit_vps()
	+{
	+
	+ V_uihashtbl = hashinit(maxproc / 16, M_UIDINFO, &V_uihash);
	+}
	+VPS_SYSINIT(uihashinit_vps, SI_SUB_INTRINSIC, SI_ORDER_SECOND, uihashinit_vps,
	+ NULL);
	+
	+#ifdef VIMAGE
	+static void
	+uihashdestroy_vps(void *ident __unused)
	+{
	+ struct uidinfo *uip;
	+ struct uihashhead *uih;
	+ int i;
	+
	+ i = 0;
	+ for (uih = &V_uihashtbl[V_uihash]; uih >= V_uihashtbl; uih--)
	+ LIST_FOREACH(uip, uih, ui_hash)
	+ i++;
	+ if (i == 0)
	+ hashdestroy(V_uihashtbl, M_UIDINFO, V_uihash);
	+ else
	+ printf("%s: leaking %d uihash entries\n", __func__, i);
	+}
	+VPS_SYSUNINIT(uihashdestroy_vps, SI_SUB_INTRINSIC, SI_ORDER_SECOND,
	+ uihashdestroy_vps, NULL);
	+#endif

	/*
	* Look up a uidinfo struct for the parameter uid.
	@@ -1368,7 +1405,7 @@
	rw_rlock(&uihashtbl_lock);
	if (pre != NULL)
	(pre)();
	- for (uih = &uihashtbl[uihash]; uih >= uihashtbl; uih--) {
	+ for (uih = &V_uihashtbl[V_uihash]; uih >= V_uihashtbl; uih--) {
	LIST_FOREACH(uip, uih, ui_hash) {
	(callback)(uip->ui_racct, arg2, arg3);
	}
	@@ -1392,7 +1429,8 @@
	return (0);
	}
	} else if (new < 0)
	- printf("negative %s for uid = %d\n", name, uip->ui_uid);
	+ printf("%s: curthread %p uip %p negative %s for uid = %d\n",
	+ __func__, curthread, uip, name, uip->ui_uid);
	return (1);
	}

	Index: sys/kern/kern_shutdown.c
	===================================================================
	--- sys/kern/kern_shutdown.c
	+++ sys/kern/kern_shutdown.c
	@@ -204,6 +204,10 @@

	int dumping; /* system is dumping */
	int rebooting; /* system is rebooting */
	+#ifdef VIMAGE
	+VPS_DEFINE(int, vrebooting); /* vps is rebooting */
	+#define V_vrebooting VPS(vrebooting)
	+#endif
	static struct dumperinfo dumper; /* our selected dumper */

	/* Context information for dump-debuggers. */
	@@ -276,29 +280,42 @@
	if (error == 0) {
	if (uap->opt & RB_REROOT)
	error = kern_reroot();
	- else
	+ else {
	+#ifdef VIMAGE
	+ /* XXX Can argue that we should never make it here. */
	+ /* Init will want to _exit() in this case. */
	+ if (!IS_DEFAULT_VPS(TD_TO_VPS(curthread))) {
	+ V_vrebooting = 1;
	+ return (error);
	+ }
	+#endif
	kern_reboot(uap->opt);
	+ }
	}
	return (error);
	}

	+static VPS_DEFINE(int, vhowto);
	+#define V_vhowto VPS(vhowto)
	+
	static void
	shutdown_nice_task_fn(void *arg, int pending __unused)
	{
	- int howto;

	- howto = (uintptr_t)arg;
	+ CURVPS_SET((struct vps *)arg);
	/* Send a signal to init(8) and have it shutdown the world. */
	- PROC_LOCK(initproc);
	- if (howto & RB_POWEROFF)
	- kern_psignal(initproc, SIGUSR2);
	- else if (howto & RB_POWERCYCLE)
	- kern_psignal(initproc, SIGWINCH);
	- else if (howto & RB_HALT)
	- kern_psignal(initproc, SIGUSR1);
	+ PROC_LOCK(V_initproc);
	+ if (V_vhowto & RB_POWEROFF)
	+ kern_psignal(V_initproc, SIGUSR2);
	+ else if (V_vhowto & RB_POWERCYCLE)
	+ kern_psignal(V_initproc, SIGWINCH);
	+ else if (V_vhowto & RB_HALT)
	+ kern_psignal(V_initproc, SIGUSR1);
	else
	- kern_psignal(initproc, SIGINT);
	- PROC_UNLOCK(initproc);
	+ kern_psignal(V_initproc, SIGINT);
	+ PROC_UNLOCK(V_initproc);
	+ V_vhowto = 0;
	+ CURVPS_RESTORE();
	}

	static struct task shutdown_nice_task = TASK_INITIALIZER(0,
	@@ -311,10 +328,22 @@
	shutdown_nice(int howto)
	{

	- if (initproc != NULL && !SCHEDULER_STOPPED()) {
	- shutdown_nice_task.ta_context = (void *)(uintptr_t)howto;
	+ if (V_initproc != NULL && !SCHEDULER_STOPPED()) {
	+
	+ KASSERT(V_vhowto == 0, ("%s: vps %p howto not 0: %d\n",
	+ __func__, curvps, V_vhowto));
	+ V_vhowto = howto;
	+ shutdown_nice_task.ta_context = (void *)curvps;
	taskqueue_enqueue(taskqueue_fast, &shutdown_nice_task);
	} else {
	+#ifdef VIMAGE
	+ /* XXX Can argue that we should never make it here. */
	+ /* Init will want to _exit() in this case. */
	+ if (!IS_DEFAULT_VPS(TD_TO_VPS(curthread))) {
	+ V_vrebooting = 1;
	+ return;
	+ }
	+#endif
	/*
	* No init(8) running, or scheduler would not allow it
	* to run, so simply reboot.
	@@ -462,7 +491,7 @@
	struct mount mp, devmp;
	int error;

	- if (curproc != initproc)
	+ if (curproc != V_initproc)
	return (EPERM);

	/*
	Index: sys/kern/kern_sig.c
	===================================================================
	--- sys/kern/kern_sig.c
	+++ sys/kern/kern_sig.c
	@@ -1669,9 +1669,9 @@
	ret = ESRCH;
	if (all) {
	/*
	- * broadcast
	+ * broadcast; current vps context only.
	*/
	- sx_slock(&allproc_lock);
	+ sx_slock(&V_allproc_lock);
	FOREACH_PROC_IN_SYSTEM(p) {
	if (p->p_pid <= 1 \|\| p->p_flag & P_SYSTEM \|\|
	p == td->td_proc \|\| p->p_state == PRS_NEW) {
	@@ -1688,9 +1688,9 @@
	ret = err;
	PROC_UNLOCK(p);
	}
	- sx_sunlock(&allproc_lock);
	+ sx_sunlock(&V_allproc_lock);
	} else {
	- sx_slock(&proctree_lock);
	+ sx_slock(&V_proctree_lock);
	if (pgid == 0) {
	/*
	* zero pgid means send to my process group.
	@@ -1700,11 +1700,11 @@
	} else {
	pgrp = pgfind(pgid);
	if (pgrp == NULL) {
	- sx_sunlock(&proctree_lock);
	+ sx_sunlock(&V_proctree_lock);
	return (ESRCH);
	}
	}
	- sx_sunlock(&proctree_lock);
	+ sx_sunlock(&V_proctree_lock);
	LIST_FOREACH(p, &pgrp->pg_members, p_pglist) {
	PROC_LOCK(p);
	if (p->p_pid <= 1 \|\| p->p_flag & P_SYSTEM \|\|
	@@ -1891,9 +1891,9 @@
	struct pgrp *pgrp;

	if (pgid != 0) {
	- sx_slock(&proctree_lock);
	+ sx_slock(&V_proctree_lock);
	pgrp = pgfind(pgid);
	- sx_sunlock(&proctree_lock);
	+ sx_sunlock(&V_proctree_lock);
	if (pgrp != NULL) {
	pgsignal(pgrp, sig, 0, ksi);
	PGRP_UNLOCK(pgrp);
	@@ -3279,7 +3279,7 @@
	/*
	* Protect the access to corefilename[] by allproc_lock.
	*/
	-#define corefilename_lock allproc_lock
	+#define corefilename_lock V_allproc_lock

	static char corefilename[MAXPATHLEN] = {"%N.core"};
	TUNABLE_STR("kern.corefile", corefilename, sizeof(corefilename));
	Index: sys/kern/kern_sysctl.c
	===================================================================
	--- sys/kern/kern_sysctl.c
	+++ sys/kern/kern_sysctl.c
	@@ -60,6 +60,7 @@
	#include <sys/sx.h>
	#include <sys/sysproto.h>
	#include <sys/uio.h>
	+#include <sys/vps.h>
	#ifdef KTRACE
	#include <sys/ktrace.h>
	#endif
	@@ -498,6 +499,7 @@
	if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE &&
	#ifdef VIMAGE
	(oidp->oid_kind & CTLFLAG_VNET) == 0 &&
	+ (oidp->oid_kind & CTLFLAG_VPS) == 0 &&
	#endif
	(oidp->oid_kind & CTLFLAG_TUN) != 0 &&
	(oidp->oid_kind & CTLFLAG_NOFETCH) == 0) {
	@@ -1998,6 +2000,9 @@
	else if ((oid->oid_kind & CTLFLAG_VNET) &&
	prison_owns_vnet(req->td->td_ucred))
	priv = PRIV_SYSCTL_WRITEJAIL;
	+ else if ((oid->oid_kind & CTLFLAG_VPS) &&
	+ prison_owns_vps(req->td->td_ucred))
	+ priv = PRIV_SYSCTL_WRITEJAIL;
	#endif
	else
	priv = PRIV_SYSCTL_WRITE;
	@@ -2025,8 +2030,13 @@
	goto out;
	#endif
	#ifdef VIMAGE
	+ KASSERT(((oid->oid_kind & (CTLFLAG_VNET\|CTLFLAG_VPS)) !=
	+ (CTLFLAG_VNET\|CTLFLAG_VPS)),
	+ ("CTLFLAG VNET and VPS set oid %p", oid));
	if ((oid->oid_kind & CTLFLAG_VNET) && arg1 != NULL)
	arg1 = (void *)(curvnet->vnet_data_base + (uintptr_t)arg1);
	+ if ((oid->oid_kind & CTLFLAG_VPS) && arg1 != NULL)
	+ arg1 = (void *)(curvps->vps_data_base + (uintptr_t)arg1);
	#endif
	error = sysctl_root_handler_locked(oid, arg1, arg2, req, &tracker);

	@@ -2118,6 +2128,7 @@
	memlocked = 1;
	sx_xlock(&sysctlmemlock);
	}
	+ CURVPS_SET(TD_TO_VPS(td));
	CURVNET_SET(TD_TO_VNET(td));

	for (;;) {
	@@ -2130,6 +2141,7 @@
	}

	CURVNET_RESTORE();
	+ CURVPS_RESTORE();

	if (req.lock == REQ_WIRED && req.validlen > 0)
	vsunlock(req.oldptr, req.validlen);
	Index: sys/kern/kern_thr.c
	===================================================================
	--- sys/kern/kern_thr.c
	+++ sys/kern/kern_thr.c
	@@ -32,6 +32,7 @@
	#include "opt_posix.h"
	#include "opt_hwpmc_hooks.h"
	#include <sys/param.h>
	+#include <sys/jail.h>
	#include <sys/kernel.h>
	#include <sys/lock.h>
	#include <sys/mutex.h>
	@@ -56,6 +57,7 @@
	#include <sys/rtprio.h>
	#include <sys/umtx.h>
	#include <sys/limits.h>
	+#include <sys/vps.h>
	#ifdef HWPMC_HOOKS
	#include <sys/pmckern.h>
	#endif
	@@ -238,6 +240,9 @@
	bcopy(&td->td_startcopy, &newtd->td_startcopy,
	__rangeof(struct thread, td_startcopy, td_endcopy));
	newtd->td_proc = td->td_proc;
	+#ifdef VIMAGE
	+ newtd->td_vps = TD_TO_VPS(td);
	+#endif
	newtd->td_rb_list = newtd->td_rbp_list = newtd->td_rb_inact = 0;
	thread_cow_get(newtd, td);

	Index: sys/kern/kern_thread.c
	===================================================================
	--- sys/kern/kern_thread.c
	+++ sys/kern/kern_thread.c
	@@ -58,6 +58,9 @@
	#ifdef HWPMC_HOOKS
	#include <sys/pmckern.h>
	#endif
	+#ifdef VIMAGE
	+#include <sys/jail.h>
	+#endif

	#include <security/audit/audit.h>

	@@ -83,7 +86,7 @@
	"struct thread KBI td_pflags");
	_Static_assert(offsetof(struct thread, td_frame) == 0x470,
	"struct thread KBI td_frame");
	-_Static_assert(offsetof(struct thread, td_emuldata) == 0x518,
	+_Static_assert(offsetof(struct thread, td_emuldata) == 0x528,
	"struct thread KBI td_emuldata");
	_Static_assert(offsetof(struct proc, p_flag) == 0xb0,
	"struct proc KBI p_flag");
	@@ -103,7 +106,7 @@
	"struct thread KBI td_pflags");
	_Static_assert(offsetof(struct thread, td_frame) == 0x2e8,
	"struct thread KBI td_frame");
	-_Static_assert(offsetof(struct thread, td_emuldata) == 0x334,
	+_Static_assert(offsetof(struct thread, td_emuldata) == 0x33c,
	"struct thread KBI td_emuldata");
	_Static_assert(offsetof(struct proc, p_flag) == 0x68,
	"struct proc KBI p_flag");
	@@ -451,6 +454,10 @@

	PROC_LOCK_ASSERT(p, MA_OWNED);
	newtd->td_ucred = crhold(p->p_ucred);
	+#ifdef VIMAGE
	+ /* Make sure the cached vps stays correct. */
	+ newtd->td_vps = p->p_ucred->cr_prison->pr_vps;
	+#endif
	newtd->td_limit = lim_hold(p->p_limit);
	newtd->td_cowgen = p->p_cowgen;
	}
	@@ -460,6 +467,10 @@
	{

	newtd->td_ucred = crhold(td->td_ucred);
	+#ifdef VIMAGE
	+ /* Make sure to inherit the cached vps as well. */
	+ newtd->td_vps = td->td_vps;
	+#endif
	newtd->td_limit = lim_hold(td->td_limit);
	newtd->td_cowgen = td->td_cowgen;
	}
	@@ -489,6 +500,11 @@
	oldcred = td->td_ucred;
	td->td_ucred = crhold(p->p_ucred);
	}
	+#ifdef VIMAGE
	+ /* Make sure the cached vps stays correct. */
	+ if (td->td_vps != p->p_ucred->cr_prison->pr_vps)
	+ td->td_vps = p->p_ucred->cr_prison->pr_vps;
	+#endif
	if (td->td_limit != p->p_limit) {
	oldlimit = td->td_limit;
	td->td_limit = lim_hold(p->p_limit);
	Index: sys/kern/kern_vps.c
	===================================================================
	--- /dev/null
	+++ sys/kern/kern_vps.c
	@@ -0,0 +1,835 @@
	+/*-
	+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
	+ *
	+ * Copyright (c) 2004-2009 University of Zagreb
	+ * Copyright (c) 2006-2009 FreeBSD Foundation
	+ * Copyright (c) 2018 iXsystems, Inc.
	+ * All rights reserved.
	+ *
	+ * This software was developed by the University of Zagreb and the
	+ * FreeBSD Foundation under sponsorship by the Stichting NLnet and the
	+ * FreeBSD Foundation.
	+ *
	+ * Portions of this software were developed by Bjoern Zeeb
	+ * under sponsorship from iXsystems, Inc.
	+ *
	+ * Copyright (c) 2009 Jeffrey Roberson <jeff@freebsd.org>
	+ * Copyright (c) 2009 Robert N. M. Watson
	+ * All rights reserved.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ */
	+
	+#include <sys/cdefs.h>
	+__FBSDID("$FreeBSD$");
	+
	+#include "opt_ddb.h"
	+#include "opt_kdb.h"
	+
	+#include <sys/param.h>
	+#include <sys/kdb.h>
	+#include <sys/kernel.h>
	+#include <sys/jail.h>
	+#include <sys/sdt.h>
	+#include <sys/systm.h>
	+#include <sys/sysctl.h>
	+#include <sys/eventhandler.h>
	+#include <sys/lock.h>
	+#include <sys/malloc.h>
	+#include <sys/proc.h>
	+#include <sys/socket.h>
	+#include <sys/sx.h>
	+#include <sys/sysctl.h>
	+#include <sys/vps.h>
	+
	+#include <machine/stdarg.h>
	+
	+#ifdef DDB
	+#include <ddb/ddb.h>
	+#include <ddb/db_sym.h>
	+#endif
	+
	+
	+/*-
	+ * This file implements core functions for virtual process spaces:
	+ *
	+ * - Virtual process space management functions.
	+ *
	+ * - Virtual process space memory allocator, which virtualizes global
	+ * variables in the process space.
	+ *
	+ * - Virtualized SYSINIT's/SYSUNINIT's, which allow process spaces
	+ * to register startup/shutdown events to be run for each virtual process
	+ * space instance.
	+ */
	+
	+static MALLOC_DEFINE(M_VPS, "vps", "process space control block");
	+
	+/*
	+ * The virtual process space list has two read-write locks, one sleepable and
	+ * the other not, so that the list can be stablized and walked in a variety
	+ * of process space contexts. Both must be acquired exclusively to modify
	+ * the list, but a read lock of either lock is sufficient to walk the list.
	+ */
	+struct rwlock vps_rwlock;
	+struct sx vps_sxlock;
	+
	+#define VPS_LIST_WLOCK() do { \
	+ sx_xlock(&vps_sxlock); \
	+ rw_wlock(&vps_rwlock); \
	+} while (0)
	+
	+#define VPS_LIST_WUNLOCK() do { \
	+ rw_wunlock(&vps_rwlock); \
	+ sx_xunlock(&vps_sxlock); \
	+} while (0)
	+
	+struct vps_list_head vps_head;
	+struct vps *vps0;
	+
	+/*
	+ * The virtual process space allocator provides storage for virtualized
	+ * global variables. These variables are defined/declared using the
	+ * VPS_DEFINE()/VPS_DECLARE() macros, which place them in the 'set_vps'
	+ * linker set. The details of the implementation are somewhat subtle, but
	+ * allow the majority of most process subsystems to maintain
	+ * virtualization-agnostic.
	+ *
	+ * The virtual process space allocator handles variables in the base kernel
	+ * vs. modules in similar but different ways. In both cases, virtualized
	+ * global variables are marked as such by being declared to be part of the
	+ * vps linker set. These "primary" copies of global variables serve two
	+ * functions:
	+ *
	+ * (1) They contain static initialization or "default" values for global
	+ * variables which will be propagated to each virtual process space
	+ * instance when created. As with normal global variables, they default
	+ * to zero-filled.
	+ *
	+ * (2) They act as unique global names by which the variable can be referred
	+ * to, regardless of process space instance. The single global symbol
	+ * will be used to calculate the location of a per-virtual instance
	+ * variable at run-time.
	+ *
	+ * Each virtual process space instance has a complete copy of each
	+ * virtualized global variable, stored in a malloc'd block of memory
	+ * referred to by vps->vps_data_mem. Critical to the design is that each
	+ * per-instance memory block is laid out identically to the primary block so
	+ * that the offset of each global variable is the same across all blocks.
	+ * To optimize run-time access, a precalculated 'base' address,
	+ * vps->vps_data_base, is stored in each vps, and is the amount that can
	+ * be added to the address of a 'primary' instance of a variable to get to the
	+ * per-vps instance.
	+ *
	+ * Virtualized global variables are handled in a similar manner, but as each
	+ * module has its own 'set_vps' linker set, and we want to keep all
	+ * virtualized globals togther, we reserve space in the kernel's linker set
	+ * for potential module variables using a per-vps character array,
	+ * 'modspace'. The virtual process space allocator maintains a free list to
	+ * track what space in the array is free (all, initially) and as modules are
	+ * linked, allocates portions of the space to specific globals. The kernel
	+ * module linker queries the virtual process space allocator and will
	+ * bind references of the global to the location during linking. It also
	+ * calls into the virtual process space allocator, once the memory is
	+ * initialized, in order to propagate the new static initializations to all
	+ * existing virtual process space instances so that the soon-to-be executing
	+ * module will find every process space instance with proper default values.
	+ */
	+
	+/*
	+ * Number of bytes of data in the 'set_vps' linker set, and hence the total
	+ * size of all kernel virtualized global variables, and the malloc(9) type
	+ * that will be used to allocate it.
	+ */
	+#define VPS_BYTES (VPS_STOP - VPS_START)
	+
	+static MALLOC_DEFINE(M_VPS_DATA, "vps_data", "VPS data");
	+
	+/*
	+ * VPS_MODMIN is the minimum number of bytes we will reserve for the sum of
	+ * global variables across all loaded modules. As this actually sizes an
	+ * array declared as a virtualized global variable in the kernel itself, and
	+ * we want the virtualized global variable space to be page-sized, we may
	+ * have more space than that in practice.
	+ */
	+#define VPS_MODMIN 8192
	+#define VPS_SIZE roundup2(VPS_BYTES, PAGE_SIZE)
	+
	+/*
	+ * Space to store virtualized global variables from loadable kernel modules,
	+ * and the free list to manage it.
	+ */
	+static VPS_DEFINE(char, modspace[VPS_MODMIN]);
	+
	+/*
	+ * Global lists of subsystem constructor and destructors for vpss. They are
	+ * registered via VPS_SYSINIT() and VPS_SYSUNINIT(). Both lists are
	+ * protected by the vps_sysinit_sxlock global lock.
	+ */
	+static TAILQ_HEAD(vps_sysinit_head, vps_sysinit) vps_constructors =
	+ TAILQ_HEAD_INITIALIZER(vps_constructors);
	+static TAILQ_HEAD(vps_sysuninit_head, vps_sysinit) vps_destructors =
	+ TAILQ_HEAD_INITIALIZER(vps_destructors);
	+
	+struct sx vps_sysinit_sxlock;
	+
	+#define VPS_SYSINIT_WLOCK() sx_xlock(&vps_sysinit_sxlock);
	+#define VPS_SYSINIT_WUNLOCK() sx_xunlock(&vps_sysinit_sxlock);
	+#define VPS_SYSINIT_RLOCK() sx_slock(&vps_sysinit_sxlock);
	+#define VPS_SYSINIT_RUNLOCK() sx_sunlock(&vps_sysinit_sxlock);
	+
	+/* XXX-BZ should probably be vpd_* instead of vnd_* but in the hope to
	+ * harmonize most of this later on keep the names the same for now. */
	+struct vps_data_free {
	+ uintptr_t vnd_start;
	+ int vnd_len;
	+ TAILQ_ENTRY(vps_data_free) vnd_link;
	+};
	+
	+static MALLOC_DEFINE(M_VPS_DATA_FREE, "vps_data_free",
	+ "VPS resource accounting");
	+static TAILQ_HEAD(, vps_data_free) vps_data_free_head =
	+ TAILQ_HEAD_INITIALIZER(vps_data_free_head);
	+static struct sx vps_data_free_lock;
	+
	+SDT_PROVIDER_DEFINE(vps);
	+SDT_PROBE_DEFINE1(vps, functions, vps_alloc, entry, "int");
	+SDT_PROBE_DEFINE2(vps, functions, vps_alloc, alloc, "int", "struct vps *");
	+SDT_PROBE_DEFINE2(vps, functions, vps_alloc, return, "int", "struct vps *");
	+SDT_PROBE_DEFINE2(vps, functions, vps_destroy, entry, "int", "struct vps *");
	+SDT_PROBE_DEFINE1(vps, functions, vps_destroy, return, "int");
	+
	+#ifdef DDB
	+static void db_show_vps_print_vs(struct vps_sysinit *, int);
	+#endif
	+
	+/*
	+ * Allocate a virtual process space.
	+ */
	+struct vps *
	+vps_alloc(struct prison *pr)
	+{
	+ struct vps *vps;
	+
	+ SDT_PROBE1(vps, functions, vps_alloc, entry, __LINE__);
	+ vps = malloc(sizeof(struct vps), M_VPS, M_WAITOK \| M_ZERO);
	+ vps->vps_magic_n = VPS_MAGIC_N;
	+ vps->vps_state = 0;
	+ vps->vps_pr = pr;
	+ /* Cheat for vps_sysinit() to get creds right. */
	+ pr->pr_vps = vps;
	+ SDT_PROBE2(vps, functions, vps_alloc, alloc, __LINE__, vps);
	+
	+ /*
	+ * Allocate storage for virtualized global variables and copy in
	+ * initial values form our 'primary' copy.
	+ */
	+ vps->vps_data_mem = malloc(VPS_SIZE, M_VPS_DATA, M_WAITOK);
	+ memcpy(vps->vps_data_mem, (void *)VPS_START, VPS_BYTES);
	+
	+ /*
	+ * All use of vps-specific data will immediately subtract VPS_START
	+ * from the base memory pointer, so pre-calculate that now to avoid
	+ * it on each use.
	+ */
	+ vps->vps_data_base = (uintptr_t)vps->vps_data_mem - VPS_START;
	+
	+ /* Initialize / attach vps module instances. */
	+ CURVPS_SET_QUIET(vps);
	+ vps_sysinit();
	+ CURVPS_RESTORE();
	+
	+ VPS_LIST_WLOCK();
	+ LIST_INSERT_HEAD(&vps_head, vps, vps_le);
	+ VPS_LIST_WUNLOCK();
	+
	+ SDT_PROBE2(vps, functions, vps_alloc, return, __LINE__, vps);
	+ return (vps);
	+}
	+
	+/*
	+ * Destroy a virtual process space.
	+ */
	+void
	+vps_destroy(struct vps *vps)
	+{
	+
	+ SDT_PROBE2(vps, functions, vps_destroy, entry, __LINE__, vps);
	+
	+ VPS_LIST_WLOCK();
	+ if (vps->vps_le.le_prev == NULL && vps->vps_le.le_next == NULL) {
	+ VPS_LIST_WUNLOCK();
	+ DELAY(10000);
	+ return;
	+ }
	+ LIST_REMOVE(vps, vps_le);
	+ vps->vps_le.le_prev = NULL;
	+ vps->vps_le.le_next = NULL;
	+ VPS_LIST_WUNLOCK();
	+
	+ CURVPS_SET_QUIET(vps);
	+ vps_sysuninit();
	+ CURVPS_RESTORE();
	+
	+ /*
	+ * Release storage for the virtual process space instance.
	+ */
	+ free(vps->vps_data_mem, M_VPS_DATA);
	+ vps->vps_data_mem = NULL;
	+ vps->vps_data_base = 0;
	+ vps->vps_pr->pr_vps = NULL;
	+ vps->vps_pr = NULL;
	+ vps->vps_magic_n = 0xdeadbeef;
	+ free(vps, M_VPS);
	+ SDT_PROBE1(vps, functions, vps_destroy, return, __LINE__);
	+}
	+
	+/*
	+ * Boot time initialization and allocation of virtual process space.
	+ */
	+static void
	+vps_init_prelink(void *arg __unused)
	+{
	+
	+ rw_init(&vps_rwlock, "vps_rwlock");
	+ sx_init(&vps_sxlock, "vps_sxlock");
	+ sx_init(&vps_sysinit_sxlock, "vps_sysinit_sxlock");
	+ LIST_INIT(&vps_head);
	+}
	+SYSINIT(vps_init_prelink, SI_SUB_VIMAGE_PRELINK, SI_ORDER_FIRST,
	+ vps_init_prelink, NULL);
	+
	+static void
	+vps0_init(void *arg __unused)
	+{
	+
	+ if (bootverbose)
	+ printf("VIMAGE (virtualized process space) enabled\n");
	+
	+ /*
	+ * We MUST clear curvps in vi_init_done() before going SMP,
	+ * otherwise CURVPS_SET() macros would scream about unnecessary
	+ * curvps recursions.
	+ */
	+ curvps = prison0.pr_vps = vps0 = vps_alloc(&prison0);
	+}
	+SYSINIT(vps0_init, SI_SUB_INTRINSIC, SI_ORDER_FIRST, vps0_init, NULL);
	+
	+#if 0
	+/* Compared to vnets, nuking the vps of the current thread does not go down well. */
	+static void
	+vps_init_done(void *unused __unused)
	+{
	+
	+ curvps = NULL;
	+}
	+SYSINIT(vps_init_done, SI_SUB_VIMAGE_DONE, SI_ORDER_ANY, vps_init_done, NULL);
	+#endif
	+
	+/*
	+ * Once on boot, initialize the modspace freelist to entirely cover modspace.
	+ */
	+static void
	+vps_data_startup(void *dummy __unused)
	+{
	+ struct vps_data_free *df;
	+
	+ df = malloc(sizeof(*df), M_VPS_DATA_FREE, M_WAITOK \| M_ZERO);
	+ df->vnd_start = (uintptr_t)&VPS_NAME(modspace);
	+ df->vnd_len = VPS_MODMIN;
	+ TAILQ_INSERT_HEAD(&vps_data_free_head, df, vnd_link);
	+ sx_init(&vps_data_free_lock, "vps_data alloc lock");
	+}
	+SYSINIT(vps_data, SI_SUB_KLD, SI_ORDER_FIRST, vps_data_startup, 0);
	+
	+/* Dummy VPS_SYSINIT to make sure we always reach the final end state. */
	+static void
	+vps_sysinit_done(void *unused __unused)
	+{
	+
	+ return;
	+}
	+VPS_SYSINIT(vps_sysinit_done, SI_SUB_VIMAGE_DONE, SI_ORDER_ANY,
	+ vps_sysinit_done, NULL);
	+
	+/*
	+ * When a module is loaded and requires storage for a virtualized global
	+ * variable, allocate space from the modspace free list. This interface
	+ * should be used only by the kernel linker.
	+ */
	+void *
	+vps_data_alloc(int size)
	+{
	+ struct vps_data_free *df;
	+ void *s;
	+
	+ s = NULL;
	+ size = roundup2(size, sizeof(void *));
	+ sx_xlock(&vps_data_free_lock);
	+ TAILQ_FOREACH(df, &vps_data_free_head, vnd_link) {
	+ if (df->vnd_len < size)
	+ continue;
	+ if (df->vnd_len == size) {
	+ s = (void *)df->vnd_start;
	+ TAILQ_REMOVE(&vps_data_free_head, df, vnd_link);
	+ free(df, M_VPS_DATA_FREE);
	+ break;
	+ }
	+ s = (void *)df->vnd_start;
	+ df->vnd_len -= size;
	+ df->vnd_start = df->vnd_start + size;
	+ break;
	+ }
	+ sx_xunlock(&vps_data_free_lock);
	+
	+ return (s);
	+}
	+
	+/*
	+ * Free space for a virtualized global variable on module unload.
	+ */
	+void
	+vps_data_free(void *start_arg, int size)
	+{
	+ struct vps_data_free *df;
	+ struct vps_data_free *dn;
	+ uintptr_t start;
	+ uintptr_t end;
	+
	+ size = roundup2(size, sizeof(void *));
	+ start = (uintptr_t)start_arg;
	+ end = start + size;
	+ /*
	+ * Free a region of space and merge it with as many neighbors as
	+ * possible. Keeping the list sorted simplifies this operation.
	+ */
	+ sx_xlock(&vps_data_free_lock);
	+ TAILQ_FOREACH(df, &vps_data_free_head, vnd_link) {
	+ if (df->vnd_start > end)
	+ break;
	+ /*
	+ * If we expand at the end of an entry we may have to merge
	+ * it with the one following it as well.
	+ */
	+ if (df->vnd_start + df->vnd_len == start) {
	+ df->vnd_len += size;
	+ dn = TAILQ_NEXT(df, vnd_link);
	+ if (df->vnd_start + df->vnd_len == dn->vnd_start) {
	+ df->vnd_len += dn->vnd_len;
	+ TAILQ_REMOVE(&vps_data_free_head, dn,
	+ vnd_link);
	+ free(dn, M_VPS_DATA_FREE);
	+ }
	+ sx_xunlock(&vps_data_free_lock);
	+ return;
	+ }
	+ if (df->vnd_start == end) {
	+ df->vnd_start = start;
	+ df->vnd_len += size;
	+ sx_xunlock(&vps_data_free_lock);
	+ return;
	+ }
	+ }
	+ dn = malloc(sizeof(*df), M_VPS_DATA_FREE, M_WAITOK \| M_ZERO);
	+ dn->vnd_start = start;
	+ dn->vnd_len = size;
	+ if (df)
	+ TAILQ_INSERT_BEFORE(df, dn, vnd_link);
	+ else
	+ TAILQ_INSERT_TAIL(&vps_data_free_head, dn, vnd_link);
	+ sx_xunlock(&vps_data_free_lock);
	+}
	+
	+/*
	+ * When a new virtualized global variable has been allocated, propagate its
	+ * initial value to each already-allocated virtual process space instance.
	+ */
	+void
	+vps_data_copy(void *start, int size)
	+{
	+ struct vps *vps;
	+
	+ VPS_LIST_RLOCK();
	+ LIST_FOREACH(vps, &vps_head, vps_le)
	+ memcpy((void *)((uintptr_t)vps->vps_data_base +
	+ (uintptr_t)start), start, size);
	+ VPS_LIST_RUNLOCK();
	+}
	+
	+/*
	+ * Support for special SYSINIT handlers registered via VPS_SYSINIT()
	+ * and VPS_SYSUNINIT().
	+ */
	+void
	+vps_register_sysinit(void *arg)
	+{
	+ struct vps_sysinit vs, vs2;
	+ struct vps *vps;
	+
	+ vs = arg;
	+ KASSERT(vs->subsystem >= SI_SUB_INTRINSIC, ("vps sysinit too early"));
	+
	+ /* Add the constructor to the global list of vps constructors. */
	+ VPS_SYSINIT_WLOCK();
	+ TAILQ_FOREACH(vs2, &vps_constructors, link) {
	+ if (vs2->subsystem > vs->subsystem)
	+ break;
	+ if (vs2->subsystem == vs->subsystem && vs2->order > vs->order)
	+ break;
	+ }
	+ if (vs2 != NULL)
	+ TAILQ_INSERT_BEFORE(vs2, vs, link);
	+ else
	+ TAILQ_INSERT_TAIL(&vps_constructors, vs, link);
	+
	+ /*
	+ * Invoke the constructor on all the existing vpss when it is
	+ * registered.
	+ */
	+ VPS_FOREACH(vps) {
	+ CURVPS_SET_QUIET(vps);
	+ vs->func(vs->arg);
	+ CURVPS_RESTORE();
	+ }
	+ VPS_SYSINIT_WUNLOCK();
	+}
	+
	+void
	+vps_deregister_sysinit(void *arg)
	+{
	+ struct vps_sysinit *vs;
	+
	+ vs = arg;
	+
	+ /* Remove the constructor from the global list of vps constructors. */
	+ VPS_SYSINIT_WLOCK();
	+ TAILQ_REMOVE(&vps_constructors, vs, link);
	+ VPS_SYSINIT_WUNLOCK();
	+}
	+
	+void
	+vps_register_sysuninit(void *arg)
	+{
	+ struct vps_sysinit vs, vs2;
	+
	+ vs = arg;
	+
	+ /* Add the destructor to the global list of vps destructors. */
	+ VPS_SYSINIT_WLOCK();
	+ TAILQ_FOREACH(vs2, &vps_destructors, link) {
	+ if (vs2->subsystem > vs->subsystem)
	+ break;
	+ if (vs2->subsystem == vs->subsystem && vs2->order > vs->order)
	+ break;
	+ }
	+ if (vs2 != NULL)
	+ TAILQ_INSERT_BEFORE(vs2, vs, link);
	+ else
	+ TAILQ_INSERT_TAIL(&vps_destructors, vs, link);
	+ VPS_SYSINIT_WUNLOCK();
	+}
	+
	+void
	+vps_deregister_sysuninit(void *arg)
	+{
	+ struct vps_sysinit *vs;
	+ struct vps *vps;
	+
	+ vs = arg;
	+
	+ /*
	+ * Invoke the destructor on all the existing vpss when it is
	+ * deregistered.
	+ */
	+ VPS_SYSINIT_WLOCK();
	+ VPS_FOREACH(vps) {
	+ CURVPS_SET_QUIET(vps);
	+ vs->func(vs->arg);
	+ CURVPS_RESTORE();
	+ }
	+
	+ /* Remove the destructor from the global list of vps destructors. */
	+ TAILQ_REMOVE(&vps_destructors, vs, link);
	+ VPS_SYSINIT_WUNLOCK();
	+}
	+
	+/*
	+ * Invoke all registered vps constructors on the current vps. Used during
	+ * vps construction. The caller is responsible for ensuring the new vps is
	+ * the current vps and that the vps_sysinit_sxlock lock is locked.
	+ */
	+void
	+vps_sysinit(void)
	+{
	+ struct vps_sysinit *vs;
	+ struct vps *vps;
	+
	+ vps = curvps;
	+ VPS_SYSINIT_RLOCK();
	+ TAILQ_FOREACH(vs, &vps_constructors, link) {
	+ curvps->vps_state = vs->subsystem;
	+ vs->func(vs->arg);
	+ KASSERT((curvps == vps),
	+ ("%s: vs %p subsystem %u order %u func %p returned "
	+ "with curvps altered: curvps %p should be %p\n",
	+ __func__, vs, vs->subsystem, vs->order, vs->func,
	+ curvps, vps));
	+ }
	+ VPS_SYSINIT_RUNLOCK();
	+}
	+
	+/*
	+ * Invoke all registered vps destructors on the current vps. Used during
	+ * vps destruction. The caller is responsible for ensuring the dying vps
	+ * the current vps and that the vps_sysinit_sxlock lock is locked.
	+ */
	+void
	+vps_sysuninit(void)
	+{
	+ struct vps_sysinit *vs;
	+
	+ VPS_SYSINIT_RLOCK();
	+ TAILQ_FOREACH_REVERSE(vs, &vps_destructors, vps_sysuninit_head,
	+ link) {
	+ curvps->vps_state = vs->subsystem;
	+ vs->func(vs->arg);
	+ }
	+ VPS_SYSINIT_RUNLOCK();
	+}
	+
	+/*
	+ * EVENTHANDLER(9) extensions.
	+ */
	+/*
	+ * Invoke the eventhandler function originally registered with the possibly
	+ * registered argument for all virtual process space instances.
	+ *
	+ * This iterator can only be used for eventhandlers that do not take any
	+ * additional arguments, as we do ignore the variadic arguments from the
	+ * EVENTHANDLER_INVOKE() call.
	+ */
	+void
	+vps_global_eventhandler_iterator_func(void *arg, ...)
	+{
	+ VPS_ITERATOR_DECL(vps_iter);
	+ struct eventhandler_entry_vimage *v_ee;
	+
	+ /*
	+ * There is a bug here in that we should actually cast things to
	+ * (struct eventhandler_entry_ ## name *) but that's not easily
	+ * possible in here so just re-using the variadic version we
	+ * defined for the generic vimage case.
	+ */
	+ v_ee = arg;
	+ VPS_LIST_RLOCK();
	+ VPS_FOREACH(vps_iter) {
	+ CURVPS_SET(vps_iter);
	+ ((vimage_iterator_func_t)v_ee->func)(v_ee->ee_arg);
	+ CURVPS_RESTORE();
	+ }
	+ VPS_LIST_RUNLOCK();
	+}
	+
	+#ifdef VPS_DEBUG
	+struct vps_recursion {
	+ SLIST_ENTRY(vps_recursion) vnr_le;
	+ const char *prev_fn;
	+ const char *where_fn;
	+ int where_line;
	+ struct vps *old_vps;
	+ struct vps *new_vps;
	+};
	+
	+static SLIST_HEAD(, vps_recursion) vps_recursions =
	+ SLIST_HEAD_INITIALIZER(vps_recursions);
	+
	+static void
	+vps_print_recursion(struct vps_recursion *vnr, int brief)
	+{
	+
	+ if (!brief)
	+ printf("CURVPS_SET() recursion in ");
	+ printf("%s() line %d, prev in %s()", vnr->where_fn, vnr->where_line,
	+ vnr->prev_fn);
	+ if (brief)
	+ printf(", ");
	+ else
	+ printf("\n ");
	+ printf("%p -> %p\n", vnr->old_vps, vnr->new_vps);
	+}
	+
	+void
	+vps_log_recursion(struct vps old_vps, const char old_fn, int line)
	+{
	+ struct vps_recursion *vnr;
	+
	+ /* Skip already logged recursion events. */
	+ SLIST_FOREACH(vnr, &vps_recursions, vnr_le)
	+ if (vnr->prev_fn == old_fn &&
	+ vnr->where_fn == curthread->td_vps_lpush &&
	+ vnr->where_line == line &&
	+ (vnr->old_vps == vnr->new_vps) == (curvps == old_vps))
	+ return;
	+
	+ vnr = malloc(sizeof(*vnr), M_VPS, M_NOWAIT \| M_ZERO);
	+ if (vnr == NULL)
	+ panic("%s: malloc failed", __func__);
	+ vnr->prev_fn = old_fn;
	+ vnr->where_fn = curthread->td_vps_lpush;
	+ vnr->where_line = line;
	+ vnr->old_vps = old_vps;
	+ vnr->new_vps = curvps;
	+
	+ SLIST_INSERT_HEAD(&vps_recursions, vnr, vnr_le);
	+
	+ vps_print_recursion(vnr, 0);
	+#ifdef KDB
	+ kdb_backtrace();
	+#endif
	+}
	+#endif /* VPS_DEBUG */
	+
	+/*
	+ * DDB(4).
	+ */
	+#ifdef DDB
	+static void
	+db_vps_print(struct vps *vps)
	+{
	+
	+ db_printf("vps = %p\n", vps);
	+ db_printf(" vps_magic_n = %#08x (%s, orig %#08x)\n",
	+ vps->vps_magic_n,
	+ (vps->vps_magic_n == VPS_MAGIC_N) ?
	+ "ok" : "mismatch", VPS_MAGIC_N);
	+ db_printf(" vps_data_mem = %p\n", vps->vps_data_mem);
	+ db_printf(" vps_data_base = %#jx\n",
	+ (uintmax_t)vps->vps_data_base);
	+ db_printf(" vps_state = %#08x\n", vps->vps_state);
	+ db_printf("\n");
	+}
	+
	+DB_SHOW_ALL_COMMAND(vpss, db_show_all_vpss)
	+{
	+ VPS_ITERATOR_DECL(vps_iter);
	+
	+ VPS_FOREACH(vps_iter) {
	+ db_vps_print(vps_iter);
	+ if (db_pager_quit)
	+ break;
	+ }
	+}
	+
	+DB_SHOW_COMMAND(vps, db_show_vps)
	+{
	+
	+ if (!have_addr) {
	+ db_printf("usage: show vps <struct vps *>\n");
	+ return;
	+ }
	+
	+ db_vps_print((struct vps *)addr);
	+}
	+
	+static void
	+db_show_vps_print_vs(struct vps_sysinit *vs, int ddb)
	+{
	+ const char vsname, funcname;
	+ c_db_sym_t sym;
	+ db_expr_t offset;
	+
	+#define xprint(...) \
	+ if (ddb) \
	+ db_printf(__VA_ARGS__); \
	+ else \
	+ printf(__VA_ARGS__)
	+
	+ if (vs == NULL) {
	+ xprint("%s: no vps_sysinit * given\n", __func__);
	+ return;
	+ }
	+
	+ sym = db_search_symbol((vm_offset_t)vs, DB_STGY_ANY, &offset);
	+ db_symbol_values(sym, &vsname, NULL);
	+ sym = db_search_symbol((vm_offset_t)vs->func, DB_STGY_PROC, &offset);
	+ db_symbol_values(sym, &funcname, NULL);
	+ xprint("%s(%p)\n", (vsname != NULL) ? vsname : "", vs);
	+ xprint(" %#08x %#08x\n", vs->subsystem, vs->order);
	+ xprint(" %p(%s)(%p)\n",
	+ vs->func, (funcname != NULL) ? funcname : "", vs->arg);
	+#undef xprint
	+}
	+
	+DB_SHOW_COMMAND(vps_sysinit, db_show_vps_sysinit)
	+{
	+ struct vps_sysinit *vs;
	+
	+ db_printf("VPS_SYSINIT vs Name(Ptr)\n");
	+ db_printf(" Subsystem Order\n");
	+ db_printf(" Function(Name)(Arg)\n");
	+ TAILQ_FOREACH(vs, &vps_constructors, link) {
	+ db_show_vps_print_vs(vs, 1);
	+ if (db_pager_quit)
	+ break;
	+ }
	+}
	+
	+DB_SHOW_COMMAND(vps_sysuninit, db_show_vps_sysuninit)
	+{
	+ struct vps_sysinit *vs;
	+
	+ db_printf("VPS_SYSUNINIT vs Name(Ptr)\n");
	+ db_printf(" Subsystem Order\n");
	+ db_printf(" Function(Name)(Arg)\n");
	+ TAILQ_FOREACH_REVERSE(vs, &vps_destructors, vps_sysuninit_head,
	+ link) {
	+ db_show_vps_print_vs(vs, 1);
	+ if (db_pager_quit)
	+ break;
	+ }
	+}
	+
	+DB_COMMAND(setcurvps, db_setcurvps)
	+{
	+ struct vps *vps;
	+
	+ if (!have_addr) {
	+ db_printf("usage: setcurvps <stauct vps *>\n");
	+ return;
	+ }
	+
	+ vps = (struct vps *)addr;
	+ db_printf("curvps %p -> %p\n", curvps, vps);
	+ curvps = vps;
	+ db_vps_print(vps);
	+}
	+
	+#ifdef VPS_DEBUG
	+DB_SHOW_COMMAND(vpsrcrs, db_show_vpsrcrs)
	+{
	+ struct vps_recursion *vnr;
	+
	+ SLIST_FOREACH(vnr, &vps_recursions, vnr_le)
	+ vps_print_recursion(vnr, 1);
	+}
	+#endif
	+#endif /* DDB */
	Index: sys/kern/sched_4bsd.c
	===================================================================
	--- sys/kern/sched_4bsd.c
	+++ sys/kern/sched_4bsd.c
	@@ -454,17 +454,15 @@
	* Recompute process priorities, every hz ticks.
	* MP-safe, called without the Giant mutex.
	*/
	-/* ARGSUSED */
	-static void
	-schedcpu(void)
	+static __inline void
	+_schedcpu(fixpt_t loadfac)
	{
	- fixpt_t loadfac = loadfactor(averunnable.ldavg[0]);
	struct thread *td;
	struct proc *p;
	struct td_sched *ts;
	int awake;

	- sx_slock(&allproc_lock);
	+ sx_slock(&V_allproc_lock);
	FOREACH_PROC_IN_SYSTEM(p) {
	PROC_LOCK(p);
	if (p->p_state == PRS_NEW) {
	@@ -550,7 +548,22 @@
	}
	PROC_UNLOCK(p);
	}
	- sx_sunlock(&allproc_lock);
	+ sx_sunlock(&V_allproc_lock);
	+}
	+
	+static void
	+schedcpu(void)
	+{
	+ VPS_ITERATOR_DECL(vps_iter);
	+ fixpt_t loadfac = loadfactor(averunnable.ldavg[0]);
	+
	+ VPS_LIST_RLOCK();
	+ VPS_FOREACH(vps_iter) {
	+ CURVPS_SET(vps_iter);
	+ _schedcpu(loadfac);
	+ CURVPS_RESTORE();
	+ }
	+ VPS_LIST_RUNLOCK();
	}

	/*
	Index: sys/kern/subr_pcpu.c
	===================================================================
	--- sys/kern/subr_pcpu.c
	+++ sys/kern/subr_pcpu.c
	@@ -378,6 +378,7 @@

	#ifdef VIMAGE
	db_printf("curvnet = %p\n", pc->pc_curthread->td_vnet);
	+ db_printf("curvps = %p\n", pc->pc_curthread->td_vps);
	#endif

	#ifdef WITNESS
	Index: sys/kern/subr_prf.c
	===================================================================
	--- sys/kern/subr_prf.c
	+++ sys/kern/subr_prf.c
	@@ -165,12 +165,12 @@
	if (TD_IS_IDLETHREAD(td))
	return (0);

	- sx_slock(&proctree_lock);
	+ sx_slock(&V_proctree_lock);
	p = td->td_proc;
	PROC_LOCK(p);
	if ((p->p_flag & P_CONTROLT) == 0) {
	PROC_UNLOCK(p);
	- sx_sunlock(&proctree_lock);
	+ sx_sunlock(&V_proctree_lock);
	return (0);
	}
	SESS_LOCK(p->p_session);
	@@ -178,14 +178,14 @@
	SESS_UNLOCK(p->p_session);
	PROC_UNLOCK(p);
	if (pca.tty == NULL) {
	- sx_sunlock(&proctree_lock);
	+ sx_sunlock(&V_proctree_lock);
	return (0);
	}
	pca.flags = TOTTY;
	pca.p_bufr = NULL;
	va_start(ap, fmt);
	tty_lock(pca.tty);
	- sx_sunlock(&proctree_lock);
	+ sx_sunlock(&V_proctree_lock);
	retval = kvprintf(fmt, putchar, &pca, 10, ap);
	tty_unlock(pca.tty);
	va_end(ap);
	@@ -214,7 +214,7 @@
	struct putchar_arg pca;
	struct session *sess = NULL;

	- sx_slock(&proctree_lock);
	+ sx_slock(&V_proctree_lock);
	if (pri != -1)
	flags \|= TOLOG;
	if (p != NULL) {
	@@ -237,7 +237,7 @@
	pca.p_bufr = NULL;
	if (pca.tty != NULL)
	tty_lock(pca.tty);
	- sx_sunlock(&proctree_lock);
	+ sx_sunlock(&V_proctree_lock);
	kvprintf(fmt, putchar, &pca, 10, ap);
	if (pca.tty != NULL)
	tty_unlock(pca.tty);
	Index: sys/kern/subr_turnstile.c
	===================================================================
	--- sys/kern/subr_turnstile.c
	+++ sys/kern/subr_turnstile.c
	@@ -1212,22 +1212,32 @@

	DB_SHOW_ALL_COMMAND(chains, db_show_allchains)
	{
	+ VPS_ITERATOR_DECL(vps_iter);
	struct thread *td;
	struct proc *p;
	int i;

	i = 1;
	- FOREACH_PROC_IN_SYSTEM(p) {
	- FOREACH_THREAD_IN_PROC(p, td) {
	- if ((TD_ON_LOCK(td) && LIST_EMPTY(&td->td_contested))
	- \|\| (TD_IS_INHIBITED(td) && TD_ON_SLEEPQ(td))) {
	- db_printf("chain %d:\n", i++);
	- print_lockchain(td, " ");
	+
	+ /* VPS_LIST_RLOCK(); */
	+ VPS_FOREACH(vps_iter) {
	+ CURVPS_SET(vps_iter);
	+ FOREACH_PROC_IN_SYSTEM(p) {
	+ FOREACH_THREAD_IN_PROC(p, td) {
	+ if ((TD_ON_LOCK(td) &&
	+ LIST_EMPTY(&td->td_contested))
	+ \|\| (TD_IS_INHIBITED(td) &&
	+ TD_ON_SLEEPQ(td))) {
	+ db_printf("chain %d:\n", i++);
	+ print_lockchain(td, " ");
	+ }
	+ if (db_pager_quit)
	+ return;
	}
	- if (db_pager_quit)
	- return;
	}
	+ CURVPS_RESTORE();
	}
	+ /* VPS_LIST_RUNLOCK(); */
	}
	DB_SHOW_ALIAS(allchains, db_show_allchains)

	Index: sys/kern/subr_witness.c
	===================================================================
	--- sys/kern/subr_witness.c
	+++ sys/kern/subr_witness.c
	@@ -2534,6 +2534,7 @@

	DB_SHOW_ALL_COMMAND(locks, db_witness_list_all)
	{
	+ VPS_ITERATOR_DECL(vps_iter);
	struct thread *td;
	struct proc *p;

	@@ -2542,19 +2543,25 @@
	* held sleep locks, but that information is currently not exported
	* by WITNESS.
	*/
	- FOREACH_PROC_IN_SYSTEM(p) {
	- if (!witness_proc_has_locks(p))
	- continue;
	- FOREACH_THREAD_IN_PROC(p, td) {
	- if (!witness_thread_has_locks(td))
	+ /* VPS_LIST_RLOCK(); */
	+ VPS_FOREACH(vps_iter) {
	+ CURVPS_SET(vps_iter);
	+ FOREACH_PROC_IN_SYSTEM(p) {
	+ if (!witness_proc_has_locks(p))
	continue;
	- db_printf("Process %d (%s) thread %p (%d)\n", p->p_pid,
	- p->p_comm, td, td->td_tid);
	- witness_ddb_list(td);
	- if (db_pager_quit)
	- return;
	+ FOREACH_THREAD_IN_PROC(p, td) {
	+ if (!witness_thread_has_locks(td))
	+ continue;
	+ db_printf("Process %d (%s) thread %p (%d)\n",
	+ p->p_pid, p->p_comm, td, td->td_tid);
	+ witness_ddb_list(td);
	+ if (db_pager_quit)
	+ return;
	+ }
	}
	+ CURVPS_RESTORE();
	}
	+ /* VPS_LIST_RUNLOCK(); */
	}
	DB_SHOW_ALIAS(alllocks, db_witness_list_all)

	Index: sys/kern/sys_procdesc.c
	===================================================================
	--- sys/kern/sys_procdesc.c
	+++ sys/kern/sys_procdesc.c
	@@ -153,13 +153,13 @@
	goto out;
	}
	pd = fp->f_data;
	- sx_slock(&proctree_lock);
	+ sx_slock(&V_proctree_lock);
	if (pd->pd_proc != NULL) {
	*p = pd->pd_proc;
	PROC_LOCK(*p);
	} else
	error = ESRCH;
	- sx_sunlock(&proctree_lock);
	+ sx_sunlock(&V_proctree_lock);
	out:
	fdrop(fp, td);
	return (error);
	@@ -305,14 +305,14 @@
	{
	struct procdesc *pd;

	- sx_assert(&proctree_lock, SA_XLOCKED);
	+ sx_assert(&V_proctree_lock, SA_XLOCKED);
	PROC_LOCK_ASSERT(p, MA_OWNED);
	KASSERT(p->p_procdesc != NULL, ("procdesc_exit: p_procdesc NULL"));

	pd = p->p_procdesc;

	PROCDESC_LOCK(pd);
	- KASSERT((pd->pd_flags & PDF_CLOSED) == 0 \|\| p->p_pptr == initproc,
	+ KASSERT((pd->pd_flags & PDF_CLOSED) == 0 \|\| p->p_pptr == V_initproc,
	("procdesc_exit: closed && parent not init"));

	pd->pd_flags \|= PDF_EXITED;
	@@ -349,7 +349,7 @@
	{
	struct procdesc *pd;

	- sx_assert(&proctree_lock, SA_XLOCKED);
	+ sx_assert(&V_proctree_lock, SA_XLOCKED);
	KASSERT(p->p_procdesc != NULL, ("procdesc_reap: p_procdesc == NULL"));

	pd = p->p_procdesc;
	@@ -375,7 +375,7 @@
	fp->f_ops = &badfileops;
	fp->f_data = NULL;

	- sx_xlock(&proctree_lock);
	+ sx_xlock(&V_proctree_lock);
	PROCDESC_LOCK(pd);
	pd->pd_flags \|= PDF_CLOSED;
	PROCDESC_UNLOCK(pd);
	@@ -385,7 +385,7 @@
	* This is the case where process' exit status was already
	* collected and procdesc_reap() was already called.
	*/
	- sx_xunlock(&proctree_lock);
	+ sx_xunlock(&V_proctree_lock);
	} else {
	PROC_LOCK(p);
	AUDIT_ARG_PROCESS(p);
	@@ -415,11 +415,11 @@
	* prejudice.
	*/
	p->p_sigparent = SIGCHLD;
	- proc_reparent(p, initproc);
	+ proc_reparent(p, V_initproc);
	if ((pd->pd_flags & PDF_DAEMON) == 0)
	kern_psignal(p, SIGKILL);
	PROC_UNLOCK(p);
	- sx_xunlock(&proctree_lock);
	+ sx_xunlock(&V_proctree_lock);
	}
	}

	@@ -531,7 +531,7 @@
	*/
	bzero(sb, sizeof(*sb));
	pd = fp->f_data;
	- sx_slock(&proctree_lock);
	+ sx_slock(&V_proctree_lock);
	if (pd->pd_proc != NULL) {
	PROC_LOCK(pd->pd_proc);
	AUDIT_ARG_PROCESS(pd->pd_proc);
	@@ -553,7 +553,7 @@
	PROC_UNLOCK(pd->pd_proc);
	} else
	sb->st_mode = S_IFREG;
	- sx_sunlock(&proctree_lock);
	+ sx_sunlock(&V_proctree_lock);
	return (0);
	}

	Index: sys/kern/sys_process.c
	===================================================================
	--- sys/kern/sys_process.c
	+++ sys/kern/sys_process.c
	@@ -688,7 +688,7 @@
	proc_set_traced(struct proc *p, bool stop)
	{

	- sx_assert(&proctree_lock, SX_XLOCKED);
	+ sx_assert(&V_proctree_lock, SX_XLOCKED);
	PROC_LOCK_ASSERT(p, MA_OWNED);
	p->p_flag \|= P_TRACED;
	if (stop)
	@@ -733,7 +733,7 @@
	case PT_SET_EVENT_MASK:
	case PT_DETACH:
	case PT_GET_SC_ARGS:
	- sx_xlock(&proctree_lock);
	+ sx_xlock(&V_proctree_lock);
	proctree_locked = 1;
	break;
	default:
	@@ -747,14 +747,14 @@
	if (pid <= PID_MAX) {
	if ((p = pfind(pid)) == NULL) {
	if (proctree_locked)
	- sx_xunlock(&proctree_lock);
	+ sx_xunlock(&V_proctree_lock);
	return (ESRCH);
	}
	} else {
	td2 = tdfind(pid, -1);
	if (td2 == NULL) {
	if (proctree_locked)
	- sx_xunlock(&proctree_lock);
	+ sx_xunlock(&V_proctree_lock);
	return (ESRCH);
	}
	p = td2->td_proc;
	@@ -816,7 +816,7 @@
	error = EBUSY;
	goto fail;
	}
	- if (p->p_pptr == initproc) {
	+ if (p->p_pptr == V_initproc) {
	error = EPERM;
	goto fail;
	}
	@@ -923,7 +923,7 @@
	CTR2(KTR_PTRACE, "PT_ATTACH: pid %d, oppid %d", p->p_pid,
	p->p_oppid);

	- sx_xunlock(&proctree_lock);
	+ sx_xunlock(&V_proctree_lock);
	proctree_locked = 0;
	MPASS(p->p_xthread == NULL);
	MPASS((p->p_flag & P_STOPPED_TRACE) == 0);
	@@ -1113,7 +1113,7 @@

	pp = proc_realparent(p);
	proc_reparent(p, pp);
	- if (pp == initproc)
	+ if (pp == V_initproc)
	p->p_sigparent = SIGCHLD;
	CTR3(KTR_PTRACE,
	"PT_DETACH: pid %d reparented to pid %d, sig %d",
	@@ -1142,7 +1142,7 @@
	break;
	}

	- sx_xunlock(&proctree_lock);
	+ sx_xunlock(&V_proctree_lock);
	proctree_locked = 0;

	sendsig:
	@@ -1456,7 +1456,7 @@
	fail:
	PROC_UNLOCK(p);
	if (proctree_locked)
	- sx_xunlock(&proctree_lock);
	+ sx_xunlock(&V_proctree_lock);
	return (error);
	}
	#undef PROC_READ
	Index: sys/kern/tty.c
	===================================================================
	--- sys/kern/tty.c
	+++ sys/kern/tty.c
	@@ -1703,18 +1703,18 @@

	/* XXX: This looks awful. */
	tty_unlock(tp);
	- sx_xlock(&proctree_lock);
	+ sx_xlock(&V_proctree_lock);
	tty_lock(tp);

	if (!SESS_LEADER(p)) {
	/* Only the session leader may do this. */
	- sx_xunlock(&proctree_lock);
	+ sx_xunlock(&V_proctree_lock);
	return (EPERM);
	}

	if (tp->t_session != NULL && tp->t_session == p->p_session) {
	/* This is already our controlling TTY. */
	- sx_xunlock(&proctree_lock);
	+ sx_xunlock(&V_proctree_lock);
	return (0);
	}

	@@ -1732,7 +1732,7 @@
	* TTYs of which the session leader has been
	* killed or the TTY revoked.
	*/
	- sx_xunlock(&proctree_lock);
	+ sx_xunlock(&V_proctree_lock);
	return (EPERM);
	}

	@@ -1740,7 +1740,7 @@
	tp->t_session = p->p_session;
	tp->t_session->s_ttyp = tp;
	tp->t_sessioncnt++;
	- sx_xunlock(&proctree_lock);
	+ sx_xunlock(&V_proctree_lock);

	/* Assign foreground process group. */
	tp->t_pgrp = p->p_pgrp;
	@@ -1759,12 +1759,12 @@
	* decompose proctree_lock.
	*/
	tty_unlock(tp);
	- sx_slock(&proctree_lock);
	+ sx_slock(&V_proctree_lock);
	pg = pgfind((int )data);
	if (pg != NULL)
	PGRP_UNLOCK(pg);
	if (pg == NULL \|\| pg->pg_session != td->td_proc->p_session) {
	- sx_sunlock(&proctree_lock);
	+ sx_sunlock(&V_proctree_lock);
	tty_lock(tp);
	return (EPERM);
	}
	@@ -1775,11 +1775,11 @@
	* relocking the TTY.
	*/
	if (!tty_is_ctty(tp, td->td_proc)) {
	- sx_sunlock(&proctree_lock);
	+ sx_sunlock(&V_proctree_lock);
	return (ENOTTY);
	}
	tp->t_pgrp = pg;
	- sx_sunlock(&proctree_lock);
	+ sx_sunlock(&V_proctree_lock);

	/* Wake up the background process groups. */
	cv_broadcast(&tp->t_bgwait);
	Index: sys/kern/tty_tty.c
	===================================================================
	--- sys/kern/tty_tty.c
	+++ sys/kern/tty_tty.c
	@@ -68,7 +68,7 @@
	return;
	p = curproc;
	sx_sunlock(&clone_drain_lock);
	- sx_slock(&proctree_lock);
	+ sx_slock(&V_proctree_lock);
	sx_slock(&clone_drain_lock);
	dev_lock();
	if (!(p->p_flag & P_CONTROLT))
	@@ -83,7 +83,7 @@
	*dev = p->p_session->s_ttyvp->v_rdev;
	dev_refl(*dev);
	dev_unlock();
	- sx_sunlock(&proctree_lock);
	+ sx_sunlock(&V_proctree_lock);
	}

	static void
	Index: sys/net/vnet.c
	===================================================================
	--- sys/net/vnet.c
	+++ sys/net/vnet.c
	@@ -80,8 +80,6 @@
	* stack instance.
	*/

	-FEATURE(vimage, "VIMAGE kernel virtualization");
	-
	static MALLOC_DEFINE(M_VNET, "vnet", "network stack control block");

	/*
	@@ -307,7 +305,7 @@
	sx_init(&vnet_sysinit_sxlock, "vnet_sysinit_sxlock");
	LIST_INIT(&vnet_head);
	}
	-SYSINIT(vnet_init_prelink, SI_SUB_VNET_PRELINK, SI_ORDER_FIRST,
	+SYSINIT(vnet_init_prelink, SI_SUB_VIMAGE_PRELINK, SI_ORDER_FIRST,
	vnet_init_prelink, NULL);

	static void
	Index: sys/sys/jail.h
	===================================================================
	--- sys/sys/jail.h
	+++ sys/sys/jail.h
	@@ -166,6 +166,7 @@
	struct osd pr_osd; /* (p) additional data */
	struct cpuset pr_cpuset; / (p) cpuset */
	struct vnet pr_vnet; / (c) network stack */
	+ struct vps pr_vps; / (c) process space */
	struct vnode pr_root; / (c) vnode to rdir */
	int pr_ip4s; /* (p) number of v4 IPs */
	int pr_ip6s; /* (p) number of v6 IPs */
	@@ -209,6 +210,7 @@
	/* primary jail address. */
	#define PR_IP6_SADDRSEL 0x00000100 /* Do IPv6 src addr sel. or use the */
	/* primary jail address. */
	+#define PR_VPS 0x00000200 /* Virtual process space */

	/* Internal flag bits */
	#define PR_IP4 0x02000000 /* IPv4 restricted or disabled */
	@@ -370,6 +372,7 @@
	int prison_allow(struct ucred *, unsigned);
	int prison_check(struct ucred cred1, struct ucred cred2);
	int prison_owns_vnet(struct ucred *);
	+int prison_owns_vps(struct ucred *);
	int prison_canseemount(struct ucred cred, struct mount mp);
	void prison_enforce_statfs(struct ucred cred, struct mount mp,
	struct statfs *sp);
	Index: sys/sys/kernel.h
	===================================================================
	--- sys/sys/kernel.h
	+++ sys/sys/kernel.h
	@@ -102,7 +102,7 @@
	SI_SUB_MTX_POOL_DYNAMIC = 0x1AC0000, /* dynamic mutex pool */
	SI_SUB_LOCK = 0x1B00000, /* various locks */
	SI_SUB_EVENTHANDLER = 0x1C00000, /* eventhandler init */
	- SI_SUB_VNET_PRELINK = 0x1E00000, /* vnet init before modules */
	+ SI_SUB_VIMAGE_PRELINK = 0x1E00000, /* VIMAGE init before modules */
	SI_SUB_KLD = 0x2000000, /* KLD and module setup */
	SI_SUB_CPU = 0x2100000, /* CPU resource(s)*/
	SI_SUB_RACCT = 0x2110000, /* resource accounting */
	@@ -159,7 +159,7 @@
	SI_SUB_ROOT_CONF = 0xb000000, /* Find root devices */
	SI_SUB_INTRINSIC_POST = 0xd000000, /* proc 0 cleanup*/
	SI_SUB_SYSCALLS = 0xd800000, /* register system calls */
	- SI_SUB_VNET_DONE = 0xdc00000, /* vnet registration complete */
	+ SI_SUB_VNET_DONE = 0xdc00000, /* VNET registration complete */
	SI_SUB_KTHREAD_INIT = 0xe000000, /* init process*/
	SI_SUB_KTHREAD_PAGE = 0xe400000, /* pageout daemon*/
	SI_SUB_KTHREAD_VM = 0xe800000, /* vm daemon*/
	@@ -170,6 +170,7 @@
	SI_SUB_SMP = 0xf000000, /* start the APs*/
	#endif
	SI_SUB_RACCTD = 0xf100000, /* start racctd*/
	+ SI_SUB_VIMAGE_DONE = 0xf800000, /* VIMAGE initialization done */
	SI_SUB_LAST = 0xfffffff /* final initialization */
	};

	Index: sys/sys/proc.h
	===================================================================
	--- sys/sys/proc.h
	+++ sys/sys/proc.h
	@@ -68,6 +68,9 @@
	#include <sys/ucred.h>
	#include <sys/types.h>
	#include <sys/_domainset.h>
	+#ifdef _KERNEL
	+#include <sys/vps.h>
	+#endif

	#include <machine/proc.h> /* Machine-dependent proc substruct. */
	#ifdef _KERNEL
	@@ -351,6 +354,8 @@
	/* LP64 hole */
	struct vnet td_vnet; / (k) Effective vnet. */
	const char td_vnet_lpush; / (k) Debugging vnet push / pop. */
	+ struct vps td_vps; / (k) Effective vps. */
	+ const char td_vps_lpush; / (k) Debugging vps push / pop. */
	struct trapframe td_intr_frame;/ (k) Frame of the current irq */
	struct proc td_rfppwait_p; / (k) The vforked child */
	struct vm_page *td_ma; / (k) uio pages held */
	@@ -809,7 +814,7 @@
	#endif

	#define FOREACH_PROC_IN_SYSTEM(p) \
	- LIST_FOREACH((p), &allproc, p_list)
	+ LIST_FOREACH((p), &V_allproc, p_list)
	#define FOREACH_THREAD_IN_PROC(p, td) \
	TAILQ_FOREACH((td), &(p)->p_threads, td_plist)

	@@ -939,38 +944,61 @@

	#define THREAD_CAN_SLEEP() ((curthread)->td_no_sleeping == 0)

	-#define PIDHASH(pid) (&pidhashtbl[(pid) & pidhash])
	-extern LIST_HEAD(pidhashhead, proc) *pidhashtbl;
	-extern u_long pidhash;
	-#define TIDHASH(tid) (&tidhashtbl[(tid) & tidhash])
	+LIST_HEAD(pidhashhead, proc);
	+VPS_DECLARE(struct pidhashhead *, pidhashtbl);
	+#define V_pidhashtbl VPS(pidhashtbl)
	+VPS_DECLARE(u_long, pidhash);
	+#define V_pidhash VPS(pidhash)
	+#define PIDHASH(pid) (&V_pidhashtbl[(pid) & V_pidhash])
	+
	extern LIST_HEAD(tidhashhead, thread) *tidhashtbl;
	extern u_long tidhash;
	+#define TIDHASH(tid) (&tidhashtbl[(tid) & tidhash])
	extern struct rwlock tidhash_lock;

	-#define PGRPHASH(pgid) (&pgrphashtbl[(pgid) & pgrphash])
	-extern LIST_HEAD(pgrphashhead, pgrp) *pgrphashtbl;
	-extern u_long pgrphash;
	+LIST_HEAD(pgrphashhead, pgrp);
	+VPS_DECLARE(struct pgrphashhead *, pgrphashtbl);
	+#define V_pgrphashtbl VPS(pgrphashtbl)
	+VPS_DECLARE(u_long, pgrphash);
	+#define V_pgrphash VPS(pgrphash)
	+#define PGRPHASH(pgid) (&V_pgrphashtbl[(pgid) & V_pgrphash])

	-extern struct sx allproc_lock;
	+VPS_DECLARE(struct sx, allproc_lock);
	+#define V_allproc_lock VPS(allproc_lock)
	extern int allproc_gen;
	-extern struct sx proctree_lock;
	-extern struct mtx ppeers_lock;
	+VPS_DECLARE(struct sx, proctree_lock);
	+#define V_proctree_lock VPS(proctree_lock)
	+VPS_DECLARE(struct mtx, ppeers_lock);
	+#define V_ppeers_lock VPS(ppeers_lock)
	extern struct proc proc0; /* Process slot for swapper. */
	extern struct thread0_storage thread0_st; /* Primary thread in proc0. */
	#define thread0 (thread0_st.t0st_thread)
	extern struct vmspace vmspace0; /* VM space for proc0. */
	+VPS_DECLARE(struct proc *, vproc0);
	+#define V_vproc0 VPS(vproc0)
	+#ifdef VIMAGE
	+VPS_DECLARE(int, vpsdying);
	+#define V_vpsdying VPS(vpsdying)
	+#endif
	extern int hogticks; /* Limit on kernel cpu hogs. */
	-extern int lastpid;
	-extern int nprocs, maxproc; /* Current and max number of procs. */
	+VPS_DECLARE(int, lastpid);
	+#define V_lastpid VPS(lastpid)
	+VPS_DECLARE(int, nprocs); /* Current number of procs. */
	+#define V_nprocs VPS(nprocs)
	+extern int maxproc; /* Max number of procs. */
	extern int maxprocperuid; /* Max procs per uid. */
	extern u_long ps_arg_cache_limit;

	LIST_HEAD(proclist, proc);
	TAILQ_HEAD(procqueue, proc);
	TAILQ_HEAD(threadqueue, thread);
	-extern struct proclist allproc; /* List of all processes. */
	-extern struct proclist zombproc; /* List of zombie processes. */
	-extern struct proc initproc, pageproc; /* Process slots for init, pager. */
	+VPS_DECLARE(struct proclist, allproc); /* List of all processes. */
	+#define V_allproc VPS(allproc)
	+VPS_DECLARE(struct proclist, zombproc); /* List of zombie processes. */
	+#define V_zombproc VPS(zombproc)
	+VPS_DECLARE(struct proc , initproc); / Process slots for init. */
	+#define V_initproc VPS(initproc)
	+extern struct proc pageproc; / Process slot for pager. */

	extern struct uma_zone *proc_zone;

	@@ -1021,6 +1049,7 @@
	int fork1(struct thread , struct fork_req );
	void fork_exit(void ()(void , struct trapframe ), void ,
	struct trapframe *);
	+int fork_findpid(int);
	void fork_return(struct thread , struct trapframe );
	int inferior(struct proc *p);
	void kern_proc_vmmap_resident(struct vm_map map, struct vm_map_entry entry,
	@@ -1043,7 +1072,6 @@
	int proc_getargv(struct thread td, struct proc p, struct sbuf *sb);
	int proc_getauxv(struct thread td, struct proc p, struct sbuf *sb);
	int proc_getenvv(struct thread td, struct proc p, struct sbuf *sb);
	-void procinit(void);
	void proc_linkup0(struct proc p, struct thread td);
	void proc_linkup(struct proc p, struct thread td);
	struct proc proc_realparent(struct proc child);
	Index: sys/sys/resourcevar.h
	===================================================================
	--- sys/sys/resourcevar.h
	+++ sys/sys/resourcevar.h
	@@ -154,7 +154,6 @@
	struct uidinfo
	*uifind(uid_t uid);
	void uifree(struct uidinfo *uip);
	-void uihashinit(void);
	void uihold(struct uidinfo *uip);
	#ifdef RACCT
	void ui_racct_foreach(void (callback)(struct racct racct,
	Index: sys/sys/sysctl.h
	===================================================================
	--- sys/sys/sysctl.h
	+++ sys/sys/sysctl.h
	@@ -104,6 +104,7 @@
	#define CTLFLAG_CAPWR 0x00004000 /* Can be written in capability mode */
	#define CTLFLAG_STATS 0x00002000 /* Statistics, not a tuneable */
	#define CTLFLAG_NOFETCH 0x00001000 /* Don't fetch tunable from getenv() */
	+#define CTLFLAG_VPS 0x00000800 /* Prisons with vps can fiddle */
	#define CTLFLAG_CAPRW (CTLFLAG_CAPRD\|CTLFLAG_CAPWR)

	/*
	Index: sys/sys/vps.h
	===================================================================
	--- /dev/null
	+++ sys/sys/vps.h
	@@ -0,0 +1,381 @@
	+/*-
	+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
	+ *
	+ * Copyright (c) 2006-2009 University of Zagreb
	+ * Copyright (c) 2006-2009 FreeBSD Foundation
	+ * Copyright (c) 2018 iXsystems, Inc.
	+ * All rights reserved.
	+ *
	+ * This software was developed by the University of Zagreb and the
	+ * FreeBSD Foundation under sponsorship by the Stichting NLnet and the
	+ * FreeBSD Foundation.
	+ *
	+ * Portions of this software were developed by Bjoern Zeeb
	+ * under sponsorship from iXsystems, Inc.
	+ *
	+ * Copyright (c) 2009 Jeffrey Roberson <jeff@freebsd.org>
	+ * Copyright (c) 2009 Robert N. M. Watson
	+ * All rights reserved.
	+ *
	+ * Redistribution and use in source and binary forms, with or without
	+ * modification, are permitted provided that the following conditions
	+ * are met:
	+ * 1. Redistributions of source code must retain the above copyright
	+ * notice, this list of conditions and the following disclaimer.
	+ * 2. Redistributions in binary form must reproduce the above copyright
	+ * notice, this list of conditions and the following disclaimer in the
	+ * documentation and/or other materials provided with the distribution.
	+ *
	+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
	+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	+ * SUCH DAMAGE.
	+ *
	+ * $FreeBSD$
	+ */
	+
	+/*-
	+ * This header file defines several sets of interfaces supporting virtualized
	+ * process space:
	+ *
	+ * - Definition of 'struct vps' and functions and macros to allocate/free/
	+ * manipulate it.
	+ *
	+ * - A virtual process stack memory allocator, which provides support for
	+ * virtualized global variables via a special linker set, set_vps.
	+ *
	+ * - Virtualized sysinits/sysuninits, which allow constructors and
	+ * destructors to be run for each process space as virtual
	+ * instances are created and destroyed.
	+ *
	+ * If VIMAGE isn't compiled into the kernel, virtualized global variables
	+ * compile to normal global variables, and virtualized sysinits to regular
	+ * sysinits.
	+ */
	+
	+#ifndef _SYS_VPS_H_
	+#define _SYS_VPS_H_
	+
	+/*
	+ * struct vps describes a virtualized process space, and is primarily a
	+ * pointer to storage for virtualized global variables. Expose to userspace
	+ * as required for libkvm.
	+ */
	+#if defined(_KERNEL) \|\| defined(_WANT_VPS)
	+#include <sys/queue.h>
	+
	+struct vps {
	+ LIST_ENTRY(vps) vps_le; /* all vps list */
	+ u_int vps_magic_n;
	+ u_int vps_state; /* SI_SUB_* */
	+ void *vps_data_mem;
	+ uintptr_t vps_data_base;
	+ struct prison vps_pr; / Put init on this if set. */
	+};
	+#define VPS_MAGIC_N 0x0f0307e2
	+
	+/*
	+ * These two virtual process space allocator definitions are also required
	+ * for libkvm so that it can evaluate virtualized global variables.
	+ */
	+#define VPS_SETNAME "set_vps"
	+#define VPS_SYMPREFIX "vps_entry_"
	+#endif
	+
	+#ifdef _KERNEL
	+#ifdef VIMAGE
	+#include <sys/lock.h>
	+#include <sys/proc.h> /* for struct thread */
	+#include <sys/rwlock.h>
	+#include <sys/sx.h>
	+
	+/*
	+ * Location of the kernel's 'set_vps' linker set.
	+ */
	+extern uintptr_t *__start_set_vps;
	+__GLOBL(__start_set_vps);
	+extern uintptr_t *__stop_set_vps;
	+__GLOBL(__stop_set_vps);
	+
	+#define VPS_START (uintptr_t)&__start_set_vps
	+#define VPS_STOP (uintptr_t)&__stop_set_vps
	+
	+/*
	+ * Functions to allocate and destroy virtual process spaces.
	+ */
	+struct vps vps_alloc(struct prison );
	+void vps_destroy(struct vps *);
	+
	+/*
	+ * The current virtual process space -- we may wish to move this to struct
	+ * pcpu in the future.
	+ */
	+#define curvps curthread->td_vps
	+
	+/*
	+ * Various macros -- get and set the current process space, but also
	+ * assertions.
	+ */
	+#if defined(INVARIANTS) \|\| defined(VPS_DEBUG)
	+#define VPS_ASSERT(exp, msg) do { \
	+ if (!(exp)) \
	+ panic msg; \
	+} while (0)
	+#else
	+#define VPS_ASSERT(exp, msg) do { \
	+} while (0)
	+#endif
	+
	+#ifdef VPS_DEBUG
	+void vps_log_recursion(struct vps , const char , int);
	+
	+#define CURVPS_SET_QUIET(arg) \
	+ VPS_ASSERT((arg) != NULL && (arg)->vps_magic_n == VPS_MAGIC_N, \
	+ ("CURVPS_SET at %s:%d %s() curvps=%p vps=%p", \
	+ __FILE__, __LINE__, __func__, curvps, (arg))); \
	+ struct vps *saved_vps = curvps; \
	+ const char *saved_vps_lpush = curthread->td_vps_lpush; \
	+ curvps = arg; \
	+ curthread->td_vps_lpush = __func__;
	+
	+#define CURVPS_SET_VERBOSE(arg) \
	+ CURVPS_SET_QUIET(arg) \
	+ if (saved_vps) \
	+ vps_log_recursion(saved_vps, saved_vps_lpush, __LINE__);
	+
	+#define CURVPS_SET(arg) CURVPS_SET_VERBOSE(arg)
	+
	+#define CURVPS_RESTORE() \
	+ VPS_ASSERT(curvps != NULL && (saved_vps == NULL \|\| \
	+ saved_vps->vps_magic_n == VPS_MAGIC_N), \
	+ ("CURVPS_RESTORE at %s:%d %s() curvps=%p saved_vps=%p", \
	+ __FILE__, __LINE__, __func__, curvps, saved_vps)); \
	+ curvps = saved_vps; \
	+ curthread->td_vps_lpush = saved_vps_lpush;
	+#else /* !VPS_DEBUG */
	+
	+#define CURVPS_SET_QUIET(arg) \
	+ VPS_ASSERT((arg) != NULL && (arg)->vps_magic_n == VPS_MAGIC_N, \
	+ ("CURVPS_SET at %s:%d %s() curvps=%p vps=%p", \
	+ __FILE__, __LINE__, __func__, curvps, (arg))); \
	+ struct vps *saved_vps = curvps; \
	+ curvps = arg;
	+
	+#define CURVPS_SET_VERBOSE(arg) \
	+ CURVPS_SET_QUIET(arg)
	+
	+#define CURVPS_SET(arg) CURVPS_SET_VERBOSE(arg)
	+
	+#define CURVPS_RESTORE() \
	+ VPS_ASSERT(curvps != NULL && (saved_vps == NULL \|\| \
	+ saved_vps->vps_magic_n == VPS_MAGIC_N), \
	+ ("CURVPS_RESTORE at %s:%d %s() curvps=%p saved_vps=%p", \
	+ __FILE__, __LINE__, __func__, curvps, saved_vps)); \
	+ curvps = saved_vps;
	+#endif /* VPS_DEBUG */
	+
	+extern struct vps *vps0;
	+#define IS_DEFAULT_VPS(arg) ((arg) == vps0)
	+
	+#define CRED_TO_VPS(cr) (cr)->cr_prison->pr_vps
	+#define TD_TO_VPS(td) CRED_TO_VPS((td)->td_ucred)
	+#define P_TO_VPS(p) CRED_TO_VPS((p)->p_ucred)
	+
	+/*
	+ * Global linked list of all virtual process spaces, along with read locks to
	+ * access it. If a caller may sleep while accessing the list, it must use
	+ * the sleepable lock macros.
	+ */
	+LIST_HEAD(vps_list_head, vps);
	+extern struct vps_list_head vps_head;
	+extern struct rwlock vps_rwlock;
	+extern struct sx vps_sxlock;
	+
	+#define VPS_LIST_RLOCK() sx_slock(&vps_sxlock)
	+#define VPS_LIST_RLOCK_NOSLEEP() rw_rlock(&vps_rwlock)
	+#define VPS_LIST_RUNLOCK() sx_sunlock(&vps_sxlock)
	+#define VPS_LIST_RUNLOCK_NOSLEEP() rw_runlock(&vps_rwlock)
	+
	+/*
	+ * Iteration macros to walk the global list of virtual process spaces.
	+ */
	+#define VPS_ITERATOR_DECL(arg) struct vps *arg
	+#define VPS_FOREACH(arg) LIST_FOREACH((arg), &vps_head, vps_le)
	+
	+/*
	+ * Virtual process space memory allocator, which allows global variables to
	+ * be automatically instantiated for each process space instance.
	+ */
	+#define VPS_NAME(n) vps_entry_##n
	+#define VPS_DECLARE(t, n) extern t VPS_NAME(n)
	+#define VPS_DEFINE(t, n) t VPS_NAME(n) __section(VPS_SETNAME) __used
	+#define _VPS_PTR(b, n) (__typeof(VPS_NAME(n))*) \
	+ ((b) + (uintptr_t)&VPS_NAME(n))
	+
	+#define _VPS(b, n) (*_VPS_PTR(b, n))
	+
	+/*
	+ * Virtualized global variable accessor macros.
	+ */
	+#define VPS_VPS_PTR(vps, n) _VPS_PTR((vps)->vps_data_base, n)
	+#define VPS_VPS(vps, n) (*VPS_VPS_PTR((vps), n))
	+
	+#define VPS_PTR(n) VPS_VPS_PTR(curvps, n)
	+#define VPS(n) VPS_VPS(curvps, n)
	+
	+/*
	+ * Virtual process space allocator interfaces from the kernel linker.
	+ */
	+void *vps_data_alloc(int size);
	+void vps_data_copy(void *start, int size);
	+void vps_data_free(void *start_arg, int size);
	+
	+/*
	+ * Virtual sysinit mechanism, allowing process space components to declare
	+ * startup and shutdown methods to be run when virtual process space
	+ * instances are created and destroyed.
	+ */
	+#include <sys/kernel.h>
	+
	+/*
	+ * SYSINIT/SYSUNINIT variants that provide per-vps constructors and
	+ * destructors.
	+ */
	+struct vps_sysinit {
	+ enum sysinit_sub_id subsystem;
	+ enum sysinit_elem_order order;
	+ sysinit_cfunc_t func;
	+ const void *arg;
	+ TAILQ_ENTRY(vps_sysinit) link;
	+};
	+
	+#define VPS_SYSINIT(ident, subsystem, order, func, arg) \
	+ static struct vps_sysinit ident ## _vps_init = { \
	+ subsystem, \
	+ order, \
	+ (sysinit_cfunc_t)(sysinit_nfunc_t)func, \
	+ (arg) \
	+ }; \
	+ SYSINIT(vps_init_ ## ident, subsystem, order, \
	+ vps_register_sysinit, &ident ## _vps_init); \
	+ SYSUNINIT(vps_init_ ## ident, subsystem, order, \
	+ vps_deregister_sysinit, &ident ## _vps_init)
	+
	+#define VPS_SYSUNINIT(ident, subsystem, order, func, arg) \
	+ static struct vps_sysinit ident ## _vps_uninit = { \
	+ subsystem, \
	+ order, \
	+ (sysinit_cfunc_t)(sysinit_nfunc_t)func, \
	+ (arg) \
	+ }; \
	+ SYSINIT(vps_uninit_ ## ident, subsystem, order, \
	+ vps_register_sysuninit, &ident ## _vps_uninit); \
	+ SYSUNINIT(vps_uninit_ ## ident, subsystem, order, \
	+ vps_deregister_sysuninit, &ident ## _vps_uninit)
	+
	+/*
	+ * Run per-vps sysinits or sysuninits during vps creation/destruction.
	+ */
	+void vps_sysinit(void);
	+void vps_sysuninit(void);
	+
	+/*
	+ * Interfaces for managing per-vps constructors and destructors.
	+ */
	+void vps_register_sysinit(void *arg);
	+void vps_register_sysuninit(void *arg);
	+void vps_deregister_sysinit(void *arg);
	+void vps_deregister_sysuninit(void *arg);
	+
	+/*
	+ * EVENTHANDLER(9) extensions.
	+ */
	+#include <sys/eventhandler.h>
	+
	+void vps_global_eventhandler_iterator_func(void *, ...);
	+#define VPS_GLOBAL_EVENTHANDLER_REGISTER_TAG(tag, name, func, arg, priority) \
	+do { \
	+ if (IS_DEFAULT_VPS(curvps)) { \
	+ (tag) = vimage_eventhandler_register(NULL, #name, func, \
	+ arg, priority, \
	+ vps_global_eventhandler_iterator_func); \
	+ } \
	+} while(0)
	+#define VPS_GLOBAL_EVENTHANDLER_REGISTER(name, func, arg, priority) \
	+do { \
	+ if (IS_DEFAULT_VPS(curvps)) { \
	+ vimage_eventhandler_register(NULL, #name, func, \
	+ arg, priority, \
	+ vps_global_eventhandler_iterator_func); \
	+ } \
	+} while(0)
	+
	+#else /* !VIMAGE */
	+
	+/*
	+ * Various virtual process space macros compile to no-ops without VIMAGE.
	+ */
	+#define curvps NULL
	+
	+#define VPS_ASSERT(exp, msg)
	+#define CURVPS_SET(arg)
	+#define CURVPS_SET_QUIET(arg)
	+#define CURVPS_RESTORE()
	+
	+#define VPS_LIST_RLOCK()
	+#define VPS_LIST_RLOCK_NOSLEEP()
	+#define VPS_LIST_RUNLOCK()
	+#define VPS_LIST_RUNLOCK_NOSLEEP()
	+#define VPS_ITERATOR_DECL(arg)
	+#define VPS_FOREACH(arg)
	+
	+#define IS_DEFAULT_VPS(arg) 1
	+#define CRED_TO_VPS(cr) NULL
	+#define TD_TO_VPS(td) NULL
	+#define P_TO_VPS(p) NULL
	+
	+/*
	+ * Versions of the vps macros that compile to normal global variables and
	+ * standard sysctl definitions.
	+ */
	+#define VPS_NAME(n) n
	+#define VPS_DECLARE(t, n) extern t n
	+#define VPS_DEFINE(t, n) t n
	+#define _VPS_PTR(b, n) &VPS_NAME(n)
	+
	+/*
	+ * Virtualized global variable accessor macros.
	+ */
	+#define VPS_VPS_PTR(vps, n) (&(n))
	+#define VPS_VPS(vps, n) (n)
	+
	+#define VPS_PTR(n) (&(n))
	+#define VPS(n) (n)
	+
	+/*
	+ * When VIMAGE isn't compiled into the kernel, VPS_SYSINIT/VPS_SYSUNINIT
	+ * map into normal sysinits, which have the same ordering properties.
	+ */
	+#define VPS_SYSINIT(ident, subsystem, order, func, arg) \
	+ SYSINIT(ident, subsystem, order, func, arg)
	+#define VPS_SYSUNINIT(ident, subsystem, order, func, arg) \
	+ SYSUNINIT(ident, subsystem, order, func, arg)
	+
	+/*
	+ * Without VIMAGE revert to the default implementation.
	+ */
	+#define VPS_GLOBAL_EVENTHANDLER_REGISTER_TAG(tag, name, func, arg, priority) \
	+ (tag) = eventhandler_register(NULL, #name, func, arg, priority)
	+#define VPS_GLOBAL_EVENTHANDLER_REGISTER(name, func, arg, priority) \
	+ eventhandler_register(NULL, #name, func, arg, priority)
	+#endif /* VIMAGE */
	+#endif /* _KERNEL */
	+
	+#endif /* !_SYS_VPS_H_ */
	Index: sys/vm/vm_meter.c
	===================================================================
	--- sys/vm/vm_meter.c
	+++ sys/vm/vm_meter.c
	@@ -177,6 +177,7 @@
	static int
	vmtotal(SYSCTL_HANDLER_ARGS)
	{
	+ VPS_ITERATOR_DECL(vps_iter);
	struct vmtotal total;
	#if defined(COMPAT_FREEBSD11)
	struct vmtotal11 total11;
	@@ -197,41 +198,48 @@
	/*
	* Calculate process statistics.
	*/
	- sx_slock(&allproc_lock);
	- FOREACH_PROC_IN_SYSTEM(p) {
	- if ((p->p_flag & P_SYSTEM) != 0)
	- continue;
	- PROC_LOCK(p);
	- if (p->p_state != PRS_NEW) {
	- FOREACH_THREAD_IN_PROC(p, td) {
	- thread_lock(td);
	- switch (td->td_state) {
	- case TDS_INHIBITED:
	- if (TD_IS_SWAPPED(td))
	+ VPS_LIST_RLOCK();
	+ VPS_FOREACH(vps_iter) {
	+ CURVPS_SET(vps_iter);
	+ sx_slock(&V_allproc_lock);
	+ FOREACH_PROC_IN_SYSTEM(p) {
	+ if ((p->p_flag & P_SYSTEM) != 0)
	+ continue;
	+ PROC_LOCK(p);
	+ if (p->p_state != PRS_NEW) {
	+ FOREACH_THREAD_IN_PROC(p, td) {
	+ thread_lock(td);
	+ switch (td->td_state) {
	+ case TDS_INHIBITED:
	+ if (TD_IS_SWAPPED(td))
	+ total.t_sw++;
	+ else if (TD_IS_SLEEPING(td)) {
	+ if (td->td_priority <=
	+ PZERO)
	+ total.t_dw++;
	+ else
	+ total.t_sl++;
	+ }
	+ break;
	+ case TDS_CAN_RUN:
	total.t_sw++;
	- else if (TD_IS_SLEEPING(td)) {
	- if (td->td_priority <= PZERO)
	- total.t_dw++;
	- else
	- total.t_sl++;
	+ break;
	+ case TDS_RUNQ:
	+ case TDS_RUNNING:
	+ total.t_rq++;
	+ break;
	+ default:
	+ break;
	}
	- break;
	- case TDS_CAN_RUN:
	- total.t_sw++;
	- break;
	- case TDS_RUNQ:
	- case TDS_RUNNING:
	- total.t_rq++;
	- break;
	- default:
	- break;
	+ thread_unlock(td);
	}
	- thread_unlock(td);
	}
	+ PROC_UNLOCK(p);
	}
	- PROC_UNLOCK(p);
	+ sx_sunlock(&V_allproc_lock);
	+ CURVPS_RESTORE();
	}
	- sx_sunlock(&allproc_lock);
	+ VPS_LIST_RUNLOCK();
	/*
	* Calculate object memory usage statistics.
	*/
	Index: sys/vm/vm_object.c
	===================================================================
	--- sys/vm/vm_object.c
	+++ sys/vm/vm_object.c
	@@ -2507,18 +2507,27 @@
	static int
	vm_object_in_map(vm_object_t object)
	{
	+ VPS_ITERATOR_DECL(vps_iter);
	struct proc *p;

	- /* sx_slock(&allproc_lock); */
	- FOREACH_PROC_IN_SYSTEM(p) {
	- if (!p->p_vmspace /* \|\| (p->p_flag & (P_SYSTEM\|P_WEXIT)) */)
	- continue;
	- if (_vm_object_in_map(&p->p_vmspace->vm_map, object, 0)) {
	- /* sx_sunlock(&allproc_lock); */
	- return 1;
	+ /* VPS_LIST_RLOCK(); */
	+ VPS_FOREACH(vps_iter) {
	+ CURVPS_SET(vps_iter);
	+ /* sx_slock(&V_allproc_lock); */
	+ FOREACH_PROC_IN_SYSTEM(p) {
	+ if (!p->p_vmspace
	+ /* \|\| (p->p_flag & (P_SYSTEM\|P_WEXIT)) */)
	+ continue;
	+ if (_vm_object_in_map(&p->p_vmspace->vm_map, object,
	+ 0)) {
	+ /* sx_sunlock(&V_allproc_lock); */
	+ return 1;
	+ }
	}
	+ /* sx_sunlock(&V_allproc_lock); */
	+ CURVPS_RESTORE();
	}
	- /* sx_sunlock(&allproc_lock); */
	+ /* VPS_LIST_RUNLOCK(); */
	if (_vm_object_in_map(kernel_map, object, 0))
	return 1;
	return 0;
	Index: sys/vm/vm_pageout.c
	===================================================================
	--- sys/vm/vm_pageout.c
	+++ sys/vm/vm_pageout.c
	@@ -1744,6 +1744,7 @@
	void
	vm_pageout_oom(int shortage)
	{
	+ VPS_ITERATOR_DECL(vps_iter);
	struct proc p, bigproc;
	vm_offset_t size, bigsize;
	struct thread *td;
	@@ -1760,80 +1761,88 @@
	*/
	bigproc = NULL;
	bigsize = 0;
	- sx_slock(&allproc_lock);
	- FOREACH_PROC_IN_SYSTEM(p) {
	- PROC_LOCK(p);

	- /*
	- * If this is a system, protected or killed process, skip it.
	- */
	- if (p->p_state != PRS_NORMAL \|\| (p->p_flag & (P_INEXEC \|
	- P_PROTECTED \| P_SYSTEM \| P_WEXIT)) != 0 \|\|
	- p->p_pid == 1 \|\| P_KILLED(p) \|\|
	- (p->p_pid < 48 && swap_pager_avail != 0)) {
	- PROC_UNLOCK(p);
	- continue;
	- }
	- /*
	- * If the process is in a non-running type state,
	- * don't touch it. Check all the threads individually.
	- */
	- breakout = false;
	- FOREACH_THREAD_IN_PROC(p, td) {
	- thread_lock(td);
	- if (!TD_ON_RUNQ(td) &&
	- !TD_IS_RUNNING(td) &&
	- !TD_IS_SLEEPING(td) &&
	- !TD_IS_SUSPENDED(td) &&
	- !TD_IS_SWAPPED(td)) {
	+ VPS_LIST_RLOCK();
	+ VPS_FOREACH(vps_iter) {
	+ CURVPS_SET(vps_iter);
	+ sx_slock(&V_allproc_lock);
	+ FOREACH_PROC_IN_SYSTEM(p) {
	+ PROC_LOCK(p);
	+
	+ /*
	+ * If this is a system, protected or killed process,
	+ * skip it.
	+ */
	+ if (p->p_state != PRS_NORMAL \|\| (p->p_flag & (P_INEXEC \|
	+ P_PROTECTED \| P_SYSTEM \| P_WEXIT)) != 0 \|\|
	+ p->p_pid == 1 \|\| P_KILLED(p) \|\|
	+ (p->p_pid < 48 && swap_pager_avail != 0)) {
	+ PROC_UNLOCK(p);
	+ continue;
	+ }
	+ /*
	+ * If the process is in a non-running type state,
	+ * don't touch it. Check all the threads individually.
	+ */
	+ breakout = false;
	+ FOREACH_THREAD_IN_PROC(p, td) {
	+ thread_lock(td);
	+ if (!TD_ON_RUNQ(td) &&
	+ !TD_IS_RUNNING(td) &&
	+ !TD_IS_SLEEPING(td) &&
	+ !TD_IS_SUSPENDED(td) &&
	+ !TD_IS_SWAPPED(td)) {
	+ thread_unlock(td);
	+ breakout = true;
	+ break;
	+ }
	thread_unlock(td);
	- breakout = true;
	- break;
	}
	- thread_unlock(td);
	- }
	- if (breakout) {
	- PROC_UNLOCK(p);
	- continue;
	- }
	- /*
	- * get the process size
	- */
	- vm = vmspace_acquire_ref(p);
	- if (vm == NULL) {
	+ if (breakout) {
	+ PROC_UNLOCK(p);
	+ continue;
	+ }
	+ /*
	+ * get the process size
	+ */
	+ vm = vmspace_acquire_ref(p);
	+ if (vm == NULL) {
	+ PROC_UNLOCK(p);
	+ continue;
	+ }
	+ _PHOLD_LITE(p);
	PROC_UNLOCK(p);
	- continue;
	- }
	- _PHOLD_LITE(p);
	- PROC_UNLOCK(p);
	- sx_sunlock(&allproc_lock);
	- if (!vm_map_trylock_read(&vm->vm_map)) {
	+ sx_sunlock(&V_allproc_lock);
	+ if (!vm_map_trylock_read(&vm->vm_map)) {
	+ vmspace_free(vm);
	+ sx_slock(&V_allproc_lock);
	+ PRELE(p);
	+ continue;
	+ }
	+ size = vmspace_swap_count(vm);
	+ if (shortage == VM_OOM_MEM)
	+ size += vm_pageout_oom_pagecount(vm);
	+ vm_map_unlock_read(&vm->vm_map);
	vmspace_free(vm);
	- sx_slock(&allproc_lock);
	- PRELE(p);
	- continue;
	- }
	- size = vmspace_swap_count(vm);
	- if (shortage == VM_OOM_MEM)
	- size += vm_pageout_oom_pagecount(vm);
	- vm_map_unlock_read(&vm->vm_map);
	- vmspace_free(vm);
	- sx_slock(&allproc_lock);
	+ sx_slock(&V_allproc_lock);

	- /*
	- * If this process is bigger than the biggest one,
	- * remember it.
	- */
	- if (size > bigsize) {
	- if (bigproc != NULL)
	- PRELE(bigproc);
	- bigproc = p;
	- bigsize = size;
	- } else {
	- PRELE(p);
	+ /*
	+ * If this process is bigger than the biggest one,
	+ * remember it.
	+ */
	+ if (size > bigsize) {
	+ if (bigproc != NULL)
	+ PRELE(bigproc);
	+ bigproc = p;
	+ bigsize = size;
	+ } else {
	+ PRELE(p);
	+ }
	}
	+ sx_sunlock(&V_allproc_lock);
	+ CURVPS_RESTORE();
	}
	- sx_sunlock(&allproc_lock);
	+ VPS_LIST_RUNLOCK();
	if (bigproc != NULL) {
	if (vm_panic_on_oom != 0)
	panic("out of swap space");
	Index: sys/vm/vm_swapout.c
	===================================================================
	--- sys/vm/vm_swapout.c
	+++ sys/vm/vm_swapout.c
	@@ -378,6 +378,7 @@
	static void
	vm_daemon(void)
	{
	+ VPS_ITERATOR_DECL(vps_iter);
	struct rlimit rsslim;
	struct proc *p;
	struct thread *td;
	@@ -417,114 +418,129 @@
	attempts = 0;
	again:
	attempts++;
	- sx_slock(&allproc_lock);
	- FOREACH_PROC_IN_SYSTEM(p) {
	- vm_pindex_t limit, size;

	- /*
	- * if this is a system process or if we have already
	- * looked at this process, skip it.
	- */
	- PROC_LOCK(p);
	- if (p->p_state != PRS_NORMAL \|\|
	- p->p_flag & (P_INEXEC \| P_SYSTEM \| P_WEXIT)) {
	- PROC_UNLOCK(p);
	- continue;
	- }
	- /*
	- * if the process is in a non-running type state,
	- * don't touch it.
	- */
	- breakout = 0;
	- FOREACH_THREAD_IN_PROC(p, td) {
	- thread_lock(td);
	- if (!TD_ON_RUNQ(td) &&
	- !TD_IS_RUNNING(td) &&
	- !TD_IS_SLEEPING(td) &&
	- !TD_IS_SUSPENDED(td)) {
	+ VPS_LIST_RLOCK();
	+ VPS_FOREACH(vps_iter) {
	+ CURVPS_SET(vps_iter);
	+ sx_slock(&V_allproc_lock);
	+ FOREACH_PROC_IN_SYSTEM(p) {
	+ vm_pindex_t limit, size;
	+
	+ /*
	+ * If this is a system process or if we have
	+ * already looked at this process, skip it.
	+ */
	+ PROC_LOCK(p);
	+ if (p->p_state != PRS_NORMAL \|\| p->p_flag &
	+ (P_INEXEC \| P_SYSTEM \| P_WEXIT)) {
	+ PROC_UNLOCK(p);
	+ continue;
	+ }
	+ /*
	+ * If the process is in a non-running type
	+ * state, don't touch it.
	+ */
	+ breakout = 0;
	+ FOREACH_THREAD_IN_PROC(p, td) {
	+ thread_lock(td);
	+ if (!TD_ON_RUNQ(td) &&
	+ !TD_IS_RUNNING(td) &&
	+ !TD_IS_SLEEPING(td) &&
	+ !TD_IS_SUSPENDED(td)) {
	+ thread_unlock(td);
	+ breakout = 1;
	+ break;
	+ }
	thread_unlock(td);
	- breakout = 1;
	- break;
	}
	- thread_unlock(td);
	- }
	- if (breakout) {
	- PROC_UNLOCK(p);
	- continue;
	- }
	- /*
	- * get a limit
	- */
	- lim_rlimit_proc(p, RLIMIT_RSS, &rsslim);
	- limit = OFF_TO_IDX(
	- qmin(rsslim.rlim_cur, rsslim.rlim_max));
	+ if (breakout) {
	+ PROC_UNLOCK(p);
	+ continue;
	+ }
	+ /*
	+ * get a limit
	+ */
	+ lim_rlimit_proc(p, RLIMIT_RSS, &rsslim);
	+ limit = OFF_TO_IDX(
	+ qmin(rsslim.rlim_cur, rsslim.rlim_max));

	- /*
	- * let processes that are swapped out really be
	- * swapped out set the limit to nothing (will force a
	- * swap-out.)
	- */
	- if ((p->p_flag & P_INMEM) == 0)
	- limit = 0; /* XXX */
	- vm = vmspace_acquire_ref(p);
	- _PHOLD_LITE(p);
	- PROC_UNLOCK(p);
	- if (vm == NULL) {
	- PRELE(p);
	- continue;
	- }
	- sx_sunlock(&allproc_lock);
	+ /*
	+ * let processes that are swapped out really be
	+ * swapped out set the limit to nothing
	+ * (will force a swap-out.)
	+ */
	+ if ((p->p_flag & P_INMEM) == 0)
	+ limit = 0; /* XXX */
	+ vm = vmspace_acquire_ref(p);
	+ _PHOLD_LITE(p);
	+ PROC_UNLOCK(p);
	+ if (vm == NULL) {
	+ PRELE(p);
	+ continue;
	+ }
	+ sx_sunlock(&V_allproc_lock);

	- size = vmspace_resident_count(vm);
	- if (size >= limit) {
	- vm_swapout_map_deactivate_pages(
	- &vm->vm_map, limit);
	size = vmspace_resident_count(vm);
	- }
	-#ifdef RACCT
	- if (racct_enable) {
	- rsize = IDX_TO_OFF(size);
	- PROC_LOCK(p);
	- if (p->p_state == PRS_NORMAL)
	- racct_set(p, RACCT_RSS, rsize);
	- ravailable = racct_get_available(p, RACCT_RSS);
	- PROC_UNLOCK(p);
	- if (rsize > ravailable) {
	- /*
	- * Don't be overly aggressive; this
	- * might be an innocent process,
	- * and the limit could've been exceeded
	- * by some memory hog. Don't try
	- * to deactivate more than 1/4th
	- * of process' resident set size.
	- */
	- if (attempts <= 8) {
	- if (ravailable < rsize -
	- (rsize / 4)) {
	- ravailable = rsize -
	- (rsize / 4);
	- }
	- }
	+ if (size >= limit) {
	vm_swapout_map_deactivate_pages(
	- &vm->vm_map,
	- OFF_TO_IDX(ravailable));
	- /* Update RSS usage after paging out. */
	+ &vm->vm_map, limit);
	size = vmspace_resident_count(vm);
	+ }
	+#ifdef RACCT
	+ if (racct_enable) {
	rsize = IDX_TO_OFF(size);
	PROC_LOCK(p);
	if (p->p_state == PRS_NORMAL)
	racct_set(p, RACCT_RSS, rsize);
	+ ravailable = racct_get_available(p,
	+ RACCT_RSS);
	PROC_UNLOCK(p);
	- if (rsize > ravailable)
	- tryagain = 1;
	+ if (rsize > ravailable) {
	+ /*
	+ * Don't be overly aggressive;
	+ * this might be an innocent
	+ * process, and the limit
	+ * could've been exceeded by
	+ * some memory hog. Don't try to
	+ * deactivate more than 1/4th of
	+ * process' resident set size.
	+ */
	+ if (attempts <= 8) {
	+ if (ravailable < rsize -
	+ (rsize / 4)) {
	+ ravailable =
	+ rsize -
	+ (rsize / 4);
	+ }
	+ }
	+ vm_swapout_map_deactivate_pages(
	+ &vm->vm_map,
	+ OFF_TO_IDX(ravailable));
	+ /*
	+ * Update RSS usage after
	+ * paging out.
	+ */
	+ size = vmspace_resident_count(
	+ vm);
	+ rsize = IDX_TO_OFF(size);
	+ PROC_LOCK(p);
	+ if (p->p_state == PRS_NORMAL)
	+ racct_set(p, RACCT_RSS,
	+ rsize);
	+ PROC_UNLOCK(p);
	+ if (rsize > ravailable)
	+ tryagain = 1;
	+ }
	}
	- }
	#endif
	- vmspace_free(vm);
	- sx_slock(&allproc_lock);
	- PRELE(p);
	+ vmspace_free(vm);
	+ sx_slock(&V_allproc_lock);
	+ PRELE(p);
	+ }
	+ sx_sunlock(&V_allproc_lock);
	+ CURVPS_RESTORE();
	}
	- sx_sunlock(&allproc_lock);
	+ VPS_LIST_RUNLOCK();
	if (tryagain != 0 && attempts <= 10) {
	maybe_yield();
	goto again;
	@@ -656,6 +672,13 @@
	int ppri, pri, slptime, swtime;

	loop:
	+#ifdef VIMAGE
	+ if (!IS_DEFAULT_VPS(curvps) && V_vpsdying > 0) {
	+ V_vproc0 = NULL;
	+ return;
	+ }
	+#endif
	+
	if (vm_page_count_min()) {
	vm_wait_min();
	goto loop;
	@@ -663,7 +686,7 @@

	pp = NULL;
	ppri = INT_MIN;
	- sx_slock(&allproc_lock);
	+ sx_slock(&V_allproc_lock);
	FOREACH_PROC_IN_SYSTEM(p) {
	PROC_LOCK(p);
	if (p->p_state == PRS_NEW \|\|
	@@ -698,13 +721,13 @@
	}
	PROC_UNLOCK(p);
	}
	- sx_sunlock(&allproc_lock);
	+ sx_sunlock(&V_allproc_lock);

	/*
	* Nothing to do, back to sleep.
	*/
	if ((p = pp) == NULL) {
	- tsleep(&proc0, PVM, "swapin", MAXSLP * hz / 2);
	+ tsleep(V_vproc0, PVM, "swapin", MAXSLP * hz / 2);
	goto loop;
	}
	PROC_LOCK(p);
	@@ -738,6 +761,7 @@
	static void
	swapout_procs(int action)
	{
	+ VPS_ITERATOR_DECL(vps_iter);
	struct proc *p;
	struct thread *td;
	int slptime;
	@@ -746,74 +770,81 @@
	MPASS((action & (VM_SWAP_NORMAL \| VM_SWAP_IDLE)) != 0);

	didswap = false;
	- sx_slock(&allproc_lock);
	- FOREACH_PROC_IN_SYSTEM(p) {
	- /*
	- * Filter out not yet fully constructed processes. Do
	- * not swap out held processes. Avoid processes which
	- * are system, exiting, execing, traced, already swapped
	- * out or are in the process of being swapped in or out.
	- */
	- PROC_LOCK(p);
	- if (p->p_state != PRS_NORMAL \|\| p->p_lock != 0 \|\| (p->p_flag &
	- (P_SYSTEM \| P_WEXIT \| P_INEXEC \| P_STOPPED_SINGLE \|
	- P_TRACED \| P_SWAPPINGOUT \| P_SWAPPINGIN \| P_INMEM)) !=
	- P_INMEM) {
	- PROC_UNLOCK(p);
	- continue;
	- }

	- /*
	- * Further consideration of this process for swap out
	- * requires iterating over its threads. We release
	- * allproc_lock here so that process creation and
	- * destruction are not blocked while we iterate.
	- *
	- * To later reacquire allproc_lock and resume
	- * iteration over the allproc list, we will first have
	- * to release the lock on the process. We place a
	- * hold on the process so that it remains in the
	- * allproc list while it is unlocked.
	- */
	- _PHOLD_LITE(p);
	- sx_sunlock(&allproc_lock);
	+ VPS_LIST_RLOCK();
	+ VPS_FOREACH(vps_iter) {
	+ CURVPS_SET(vps_iter);
	+ sx_slock(&V_allproc_lock);
	+ FOREACH_PROC_IN_SYSTEM(p) {
	+ /*
	+ * Filter out not yet fully constructed processes. Do
	+ * not swap out held processes. Avoid processes which
	+ * are system, exiting, execing, traced, already swapped
	+ * out or are in the process of being swapped in or out.
	+ */
	+ PROC_LOCK(p);
	+ if (p->p_state != PRS_NORMAL \|\| p->p_lock != 0 \|\|
	+ (p->p_flag & (P_SYSTEM \| P_WEXIT \| P_INEXEC \|
	+ P_STOPPED_SINGLE \| P_TRACED \| P_SWAPPINGOUT \|
	+ P_SWAPPINGIN \| P_INMEM)) != P_INMEM) {
	+ PROC_UNLOCK(p);
	+ continue;
	+ }

	- /*
	- * Do not swapout a realtime process.
	- * Guarantee swap_idle_threshold1 time in memory.
	- * If the system is under memory stress, or if we are
	- * swapping idle processes >= swap_idle_threshold2,
	- * then swap the process out.
	- */
	- doswap = true;
	- FOREACH_THREAD_IN_PROC(p, td) {
	- thread_lock(td);
	- slptime = (ticks - td->td_slptick) / hz;
	- if (PRI_IS_REALTIME(td->td_pri_class) \|\|
	- slptime < swap_idle_threshold1 \|\|
	- !thread_safetoswapout(td) \|\|
	- ((action & VM_SWAP_NORMAL) == 0 &&
	- slptime < swap_idle_threshold2))
	- doswap = false;
	- thread_unlock(td);
	- if (!doswap)
	- break;
	- }
	- if (doswap && swapout(p) == 0)
	- didswap = true;
	+ /*
	+ * Further consideration of this process for swap out
	+ * requires iterating over its threads. We release
	+ * allproc_lock here so that process creation and
	+ * destruction are not blocked while we iterate.
	+ *
	+ * To later reacquire allproc_lock and resume
	+ * iteration over the allproc list, we will first have
	+ * to release the lock on the process. We place a
	+ * hold on the process so that it remains in the
	+ * allproc list while it is unlocked.
	+ */
	+ _PHOLD_LITE(p);
	+ sx_sunlock(&V_allproc_lock);

	- PROC_UNLOCK(p);
	- sx_slock(&allproc_lock);
	- PRELE(p);
	+ /*
	+ * Do not swapout a realtime process.
	+ * Guarantee swap_idle_threshold1 time in memory.
	+ * If the system is under memory stress, or if we are
	+ * swapping idle processes >= swap_idle_threshold2,
	+ * then swap the process out.
	+ */
	+ doswap = true;
	+ FOREACH_THREAD_IN_PROC(p, td) {
	+ thread_lock(td);
	+ slptime = (ticks - td->td_slptick) / hz;
	+ if (PRI_IS_REALTIME(td->td_pri_class) \|\|
	+ slptime < swap_idle_threshold1 \|\|
	+ !thread_safetoswapout(td) \|\|
	+ ((action & VM_SWAP_NORMAL) == 0 &&
	+ slptime < swap_idle_threshold2))
	+ doswap = false;
	+ thread_unlock(td);
	+ if (!doswap)
	+ break;
	+ }
	+ if (doswap && swapout(p) == 0)
	+ didswap = true;
	+
	+ PROC_UNLOCK(p);
	+ sx_slock(&V_allproc_lock);
	+ PRELE(p);
	+ }
	+ sx_sunlock(&V_allproc_lock);
	+ CURVPS_RESTORE();
	}
	- sx_sunlock(&allproc_lock);
	+ VPS_LIST_RUNLOCK();

	/*
	* If we swapped something out, and another process needed memory,
	* then wakeup the sched process.
	*/
	if (didswap)
	- wakeup(&proc0);
	+ wakeup(V_vproc0);
	}

	static void

File Metadata

Mime Type: text/plain
Expires: Thu, Mar 12, 12:36 PM (9 h, 49 m)
Storage Engine: blob
Storage Format: Raw Data
Storage Handle: 29582528
Default Alt Text: D15865.diff (197 KB)

D15865.diffNo OneTemporaryActions

D15865.diffView Options

File Metadata

Event Timeline

D15865.diff
No OneTemporary
Actions

D15865.diff
View Options