Index: sys/compat/linuxkpi/common/src/linux_current.c =================================================================== --- sys/compat/linuxkpi/common/src/linux_current.c +++ sys/compat/linuxkpi/common/src/linux_current.c @@ -226,22 +226,29 @@ static void linux_current_uninit(void *arg __unused) { + VPS_ITERATOR_DECL(vps_iter); struct proc *p; struct task_struct *ts; struct thread *td; - sx_slock(&V_allproc_lock); - FOREACH_PROC_IN_SYSTEM(p) { - PROC_LOCK(p); - FOREACH_THREAD_IN_PROC(p, td) { - if ((ts = td->td_lkpi_task) != NULL) { - td->td_lkpi_task = NULL; - put_task_struct(ts); + VPS_LIST_RLOCK(); + VPS_FOREACH(vps_iter) { + CURVPS_SET(vps_iter); + sx_slock(&V_allproc_lock); + FOREACH_PROC_IN_SYSTEM(p) { + PROC_LOCK(p); + FOREACH_THREAD_IN_PROC(p, td) { + if ((ts = td->td_lkpi_task) != NULL) { + td->td_lkpi_task = NULL; + put_task_struct(ts); + } } + PROC_UNLOCK(p); } - PROC_UNLOCK(p); + sx_sunlock(&V_allproc_lock); + CURVPS_RESTORE(); } - sx_sunlock(&V_allproc_lock); + VPS_LIST_RUNLOCK(); EVENTHANDLER_DEREGISTER(thread_dtor, linuxkpi_thread_dtor_tag); } Index: sys/ddb/db_command.c =================================================================== --- sys/ddb/db_command.c +++ sys/ddb/db_command.c @@ -693,6 +693,7 @@ * Find the process in question. allproc_lock is not needed * since we're in DDB. */ + /* Operate on current vps instance only. */ /* sx_slock(&V_allproc_lock); */ FOREACH_PROC_IN_SYSTEM(p) if (p->p_pid == pid) @@ -875,12 +876,26 @@ } } +static void +_db_stack_trace_all_v(bool active_only) +{ + VPS_ITERATOR_DECL(vps_iter); + + /* VPS_LIST_RLOCK(); */ + VPS_FOREACH(vps_iter) { + CURVPS_SET(vps_iter); + _db_stack_trace_all(active_only); + CURVPS_RESTORE(); + } + /* VPS_LIST_RUNLOCK(); */ +} + static void db_stack_trace_active(db_expr_t dummy, bool dummy2, db_expr_t dummy3, char *dummy4) { - _db_stack_trace_all(true); + _db_stack_trace_all_v(true); } static void @@ -888,7 +903,7 @@ char *dummy4) { - _db_stack_trace_all(false); + _db_stack_trace_all_v(false); } /* Index: sys/ddb/db_ps.c =================================================================== --- sys/ddb/db_ps.c +++ sys/ddb/db_ps.c @@ -92,6 +92,7 @@ np = V_nprocs; + /* Operate on current vps instance only. */ if (!LIST_EMPTY(&V_allproc)) p = LIST_FIRST(&V_allproc); else @@ -476,6 +477,7 @@ db_findstack_cmd(db_expr_t addr, bool have_addr, db_expr_t dummy3 __unused, char *dummy4 __unused) { + VPS_ITERATOR_DECL(vps_iter); struct proc *p; struct thread *td; struct kstack_cache_entry *ks_ce; @@ -488,15 +490,22 @@ return; } - FOREACH_PROC_IN_SYSTEM(p) { - FOREACH_THREAD_IN_PROC(p, td) { - if (td->td_kstack <= saddr && saddr < td->td_kstack + - PAGE_SIZE * td->td_kstack_pages) { - db_printf("Thread %p\n", td); - return; + /* VPS_LIST_RLOCK(); */ + VPS_FOREACH(vps_iter) { + CURVPS_SET(vps_iter); + FOREACH_PROC_IN_SYSTEM(p) { + FOREACH_THREAD_IN_PROC(p, td) { + if (td->td_kstack <= saddr && + saddr < td->td_kstack + + PAGE_SIZE * td->td_kstack_pages) { + db_printf("Thread %p\n", td); + return; + } } } + CURVPS_RESTORE(); } + /* VPS_LIST_RUNLOCK(); */ for (ks_ce = kstack_cache; ks_ce != NULL; ks_ce = ks_ce->next_ks_entry) { Index: sys/ddb/db_thread.c =================================================================== --- sys/ddb/db_thread.c +++ sys/ddb/db_thread.c @@ -135,6 +135,7 @@ if (td != NULL) return (td); if (check_pid) { + /* Operate on current vps instance only. */ FOREACH_PROC_IN_SYSTEM(p) { if (p->p_pid == decaddr) return (FIRST_THREAD_IN_PROC(p)); @@ -161,6 +162,7 @@ decaddr = db_hex2dec(addr); if (decaddr != -1) { + /* Operate on current vps instance only. */ FOREACH_PROC_IN_SYSTEM(p) { if (p->p_pid == decaddr) return (p); Index: sys/dev/filemon/filemon.c =================================================================== --- sys/dev/filemon/filemon.c +++ sys/dev/filemon/filemon.c @@ -210,6 +210,7 @@ static void filemon_untrack_processes(struct filemon *filemon) { + VPS_ITERATOR_DECL(vps_iter); struct proc *p; sx_assert(&filemon->lock, SA_XLOCKED); @@ -223,18 +224,24 @@ * filemon_event_process_exit() will lock on filemon->lock * which we hold. */ - sx_slock(&V_allproc_lock); - FOREACH_PROC_IN_SYSTEM(p) { - /* - * No PROC_LOCK is needed to compare here since it is - * guaranteed to not change since we have its filemon - * locked. Everything that changes this p_filemon will - * be locked on it. - */ - if (p->p_filemon == filemon) - filemon_proc_drop(p); + VPS_LIST_RLOCK(); + VPS_FOREACH(vps_iter) { + CURVPS_SET(vps_iter); + sx_slock(&V_allproc_lock); + FOREACH_PROC_IN_SYSTEM(p) { + /* + * No PROC_LOCK is needed to compare here since it is + * guaranteed to not change since we have its filemon + * locked. Everything that changes this p_filemon will + * be locked on it. + */ + if (p->p_filemon == filemon) + filemon_proc_drop(p); + } + sx_sunlock(&V_allproc_lock); + CURVPS_RESTORE(); } - sx_sunlock(&V_allproc_lock); + VPS_LIST_RUNLOCK(); /* * It's possible some references were acquired but will be Index: sys/dev/hwpmc/hwpmc_mod.c =================================================================== --- sys/dev/hwpmc/hwpmc_mod.c +++ sys/dev/hwpmc/hwpmc_mod.c @@ -5361,6 +5361,7 @@ static void pmc_process_allproc(struct pmc *pm) { + VPS_ITERATOR_DECL(vps_iter); struct pmc_owner *po; struct thread *td; struct proc *p; @@ -5368,15 +5369,22 @@ po = pm->pm_owner; if ((po->po_flags & PMC_PO_OWNS_LOGFILE) == 0) return; - sx_slock(&V_allproc_lock); - FOREACH_PROC_IN_SYSTEM(p) { - pmclog_process_proccreate(po, p, 0 /* sync */); - PROC_LOCK(p); - FOREACH_THREAD_IN_PROC(p, td) - pmclog_process_threadcreate(po, td, 0 /* sync */); - PROC_UNLOCK(p); + + VPS_LIST_RLOCK(); + VPS_FOREACH(vps_iter) { + CURVPS_SET(vps_iter); + sx_slock(&V_allproc_lock); + FOREACH_PROC_IN_SYSTEM(p) { + pmclog_process_proccreate(po, p, 0 /* sync */); + PROC_LOCK(p); + FOREACH_THREAD_IN_PROC(p, td) + pmclog_process_threadcreate(po, td, 0 /* sync */); + PROC_UNLOCK(p); + } + sx_sunlock(&V_allproc_lock); + CURVPS_RESTORE(); } - sx_sunlock(&V_allproc_lock); + VPS_LIST_RUNLOCK(); pmclog_flush(po, 0); } Index: sys/fs/pseudofs/pseudofs_vnops.c =================================================================== --- sys/fs/pseudofs/pseudofs_vnops.c +++ sys/fs/pseudofs/pseudofs_vnops.c @@ -716,6 +716,11 @@ *pn = (*pn)->pn_next; } if (*pn != NULL && (*pn)->pn_type == pfstype_procdir) { + /* + * Operate on current vps instance only. + * We must not iterate over all vps as duplicate process space + * would not work at all and leak a lot of information. + */ /* next process */ if (*p == NULL) *p = LIST_FIRST(&V_allproc); Index: sys/i386/ibcs2/ibcs2_sysvec.c =================================================================== --- sys/i386/ibcs2/ibcs2_sysvec.c +++ sys/i386/ibcs2/ibcs2_sysvec.c @@ -109,6 +109,7 @@ static int ibcs2_modevent(module_t mod, int type, void *unused) { + VPS_ITERATOR_DECL(vps_iter); struct proc *p = NULL; int rval = 0; @@ -117,14 +118,20 @@ break; case MOD_UNLOAD: /* if this was an ELF module we'd use elf_brand_inuse()... */ - sx_slock(&V_allproc_lock); - FOREACH_PROC_IN_SYSTEM(p) { - if (p->p_sysent == &ibcs2_svr3_sysvec) { - rval = EBUSY; - break; + VPS_LIST_RLOCK(); + VPS_FOREACH(vps_iter) { + CURVPS_SET(vps_iter); + sx_slock(&V_allproc_lock); + FOREACH_PROC_IN_SYSTEM(p) { + if (p->p_sysent == &ibcs2_svr3_sysvec) { + rval = EBUSY; + break; + } } + sx_sunlock(&V_allproc_lock); + CURVPS_RESTORE(); } - sx_sunlock(&V_allproc_lock); + VPS_LIST_RUNLOCK(); break; default: rval = EOPNOTSUPP; Index: sys/kern/imgact_elf.c =================================================================== --- sys/kern/imgact_elf.c +++ sys/kern/imgact_elf.c @@ -238,17 +238,24 @@ int __elfN(brand_inuse)(Elf_Brandinfo *entry) { + VPS_ITERATOR_DECL(vps_iter); struct proc *p; int rval = FALSE; - sx_slock(&V_allproc_lock); - FOREACH_PROC_IN_SYSTEM(p) { - if (p->p_sysent == entry->sysvec) { - rval = TRUE; - break; + VPS_LIST_RLOCK(); + VPS_FOREACH(vps_iter) { + CURVPS_SET(vps_iter); + sx_slock(&V_allproc_lock); + FOREACH_PROC_IN_SYSTEM(p) { + if (p->p_sysent == entry->sysvec) { + rval = TRUE; + break; + } } + sx_sunlock(&V_allproc_lock); + CURVPS_RESTORE(); } - sx_sunlock(&V_allproc_lock); + VPS_LIST_RUNLOCK(); return (rval); } Index: sys/kern/init_main.c =================================================================== --- sys/kern/init_main.c +++ sys/kern/init_main.c @@ -617,6 +617,7 @@ /* * Now we can look at the time, having had a chance to verify the * time from the filesystem. Pretend that proc0 started now. + * Operate on vps0 instance only. */ sx_slock(&V_allproc_lock); FOREACH_PROC_IN_SYSTEM(p) { Index: sys/kern/kern_clock.c =================================================================== --- sys/kern/kern_clock.c +++ sys/kern/kern_clock.c @@ -184,118 +184,129 @@ static int blktime_threshold = 900; static int sleepfreq = 3; +static __inline void +_deadlres_td_on_lock(struct proc *p, struct thread *td, int blkticks) +{ + int tticks; + + /* + * The thread should be blocked on a turnstile, simply check + * if the turnstile channel is in good state. + */ + MPASS(td->td_blocked != NULL); + + tticks = ticks - td->td_blktick; + thread_unlock(td); + if (tticks > blkticks) { + + /* + * Accordingly with provided thresholds, this thread is stuck + * for too long on a turnstile. + */ + PROC_UNLOCK(p); + sx_sunlock(&V_allproc_lock); + panic("%s: possible deadlock detected for %p, " + "blocked for %d ticks\n", __func__, td, tticks); + } +} + +static __inline void +_deadlres_td_sleep_q(struct proc *p, struct thread *td, int slpticks) +{ + void *wchan; + int i, slptype, tryl, tticks; + + /* + * Check if the thread is sleeping on a lock, otherwise skip the check. + * Drop the thread lock in order to avoid a LOR with the sleepqueue + * spinlock. + */ + wchan = td->td_wchan; + tticks = ticks - td->td_slptick; + thread_unlock(td); + slptype = sleepq_type(wchan); + if ((slptype == SLEEPQ_SX || slptype == SLEEPQ_LK) && + tticks > slpticks) { + + /* + * Accordingly with provided thresholds, this thread is stuck + * for too long on a sleepqueue. + * However, being on a sleepqueue, we might still check for the + * blessed list. + */ + tryl = 0; + for (i = 0; blessed[i] != NULL; i++) { + if (!strcmp(blessed[i], td->td_wmesg)) { + tryl = 1; + break; + } + } + if (tryl != 0) + return; + PROC_UNLOCK(p); + sx_sunlock(&V_allproc_lock); + panic("%s: possible deadlock detected for %p, " + "blocked for %d ticks\n", __func__, td, tticks); + } +} + static void deadlkres(void) { + VPS_ITERATOR_DECL(vps_iter); struct proc *p; struct thread *td; - void *wchan; - int blkticks, i, slpticks, slptype, tryl, tticks; + int blkticks, slpticks, tryl; tryl = 0; for (;;) { blkticks = blktime_threshold * hz; slpticks = slptime_threshold * hz; - /* - * Avoid to sleep on the sx_lock in order to avoid a possible - * priority inversion problem leading to starvation. - * If the lock can't be held after 100 tries, panic. - */ - if (!sx_try_slock(&V_allproc_lock)) { - if (tryl > 100) - panic("%s: possible deadlock detected on allproc_lock\n", - __func__); - tryl++; - pause("allproc", sleepfreq * hz); - continue; - } - tryl = 0; - FOREACH_PROC_IN_SYSTEM(p) { - PROC_LOCK(p); - if (p->p_state == PRS_NEW) { - PROC_UNLOCK(p); - continue; + VPS_LIST_RLOCK(); + VPS_FOREACH(vps_iter) { +again: + CURVPS_SET_QUIET(vps_iter); + /* + * Avoid to sleep on the sx_lock in order to avoid a + * possible priority inversion problem leading to + * starvation. + * If the lock can't be held after 100 tries, panic. + */ + if (!sx_try_slock(&V_allproc_lock)) { + if (tryl > 100) + panic("%s: possible deadlock detected " + "on allproc_lock\n", __func__); + tryl++; + CURVPS_RESTORE(); + pause("allproc", sleepfreq * hz); + goto again; } - FOREACH_THREAD_IN_PROC(p, td) { - - thread_lock(td); - if (TD_ON_LOCK(td)) { - - /* - * The thread should be blocked on a - * turnstile, simply check if the - * turnstile channel is in good state. - */ - MPASS(td->td_blocked != NULL); - - tticks = ticks - td->td_blktick; - thread_unlock(td); - if (tticks > blkticks) { - - /* - * Accordingly with provided - * thresholds, this thread is - * stuck for too long on a - * turnstile. - */ - PROC_UNLOCK(p); - sx_sunlock(&V_allproc_lock); - panic("%s: possible deadlock detected for %p, blocked for %d ticks\n", - __func__, td, tticks); - } - } else if (TD_IS_SLEEPING(td) && - TD_ON_SLEEPQ(td)) { - - /* - * Check if the thread is sleeping on a - * lock, otherwise skip the check. - * Drop the thread lock in order to - * avoid a LOR with the sleepqueue - * spinlock. - */ - wchan = td->td_wchan; - tticks = ticks - td->td_slptick; - thread_unlock(td); - slptype = sleepq_type(wchan); - if ((slptype == SLEEPQ_SX || - slptype == SLEEPQ_LK) && - tticks > slpticks) { - - /* - * Accordingly with provided - * thresholds, this thread is - * stuck for too long on a - * sleepqueue. - * However, being on a - * sleepqueue, we might still - * check for the blessed - * list. - */ - tryl = 0; - for (i = 0; blessed[i] != NULL; - i++) { - if (!strcmp(blessed[i], - td->td_wmesg)) { - tryl = 1; - break; - } - } - if (tryl != 0) { - tryl = 0; - continue; - } - PROC_UNLOCK(p); - sx_sunlock(&V_allproc_lock); - panic("%s: possible deadlock detected for %p, blocked for %d ticks\n", - __func__, td, tticks); - } - } else - thread_unlock(td); + tryl = 0; + FOREACH_PROC_IN_SYSTEM(p) { + PROC_LOCK(p); + if (p->p_state == PRS_NEW) { + PROC_UNLOCK(p); + continue; + } + FOREACH_THREAD_IN_PROC(p, td) { + thread_lock(td); + if (TD_ON_LOCK(td)) { + _deadlres_td_on_lock(p, td, + blkticks); + } else if (TD_IS_SLEEPING(td) && + TD_ON_SLEEPQ(td)) { + _deadlres_td_sleep_q(p, td, + slpticks); + } else + thread_unlock(td); + } + PROC_UNLOCK(p); } - PROC_UNLOCK(p); + sx_sunlock(&V_allproc_lock); + CURVPS_RESTORE(); } - sx_sunlock(&V_allproc_lock); + VPS_LIST_RUNLOCK(); /* Sleep for sleepfreq seconds. */ pause("-", sleepfreq * hz); Index: sys/kern/kern_cpuset.c =================================================================== --- sys/kern/kern_cpuset.c +++ sys/kern/kern_cpuset.c @@ -510,24 +510,32 @@ static void domainset_notify(void) { + VPS_ITERATOR_DECL(vps_iter); struct thread *td; struct proc *p; - sx_slock(&V_allproc_lock); - FOREACH_PROC_IN_SYSTEM(p) { - PROC_LOCK(p); - if (p->p_state == PRS_NEW) { + VPS_LIST_RLOCK(); + VPS_FOREACH(vps_iter) { + CURVPS_SET(vps_iter); + sx_slock(&V_allproc_lock); + FOREACH_PROC_IN_SYSTEM(p) { + PROC_LOCK(p); + if (p->p_state == PRS_NEW) { + PROC_UNLOCK(p); + continue; + } + FOREACH_THREAD_IN_PROC(p, td) { + thread_lock(td); + td->td_domain.dr_policy = + td->td_cpuset->cs_domain; + thread_unlock(td); + } PROC_UNLOCK(p); - continue; } - FOREACH_THREAD_IN_PROC(p, td) { - thread_lock(td); - td->td_domain.dr_policy = td->td_cpuset->cs_domain; - thread_unlock(td); - } - PROC_UNLOCK(p); + sx_sunlock(&V_allproc_lock); + CURVPS_RESTORE(); } - sx_sunlock(&V_allproc_lock); + VPS_LIST_RUNLOCK(); kernel_object->domain.dr_policy = cpuset_kernel->cs_domain; } Index: sys/kern/kern_descrip.c =================================================================== --- sys/kern/kern_descrip.c +++ sys/kern/kern_descrip.c @@ -3190,6 +3190,7 @@ void mountcheckdirs(struct vnode *olddp, struct vnode *newdp) { + VPS_ITERATOR_DECL(vps_iter); struct filedesc *fdp; struct prison *pr; struct proc *p; @@ -3198,33 +3199,40 @@ if (vrefcnt(olddp) == 1) return; nrele = 0; - sx_slock(&V_allproc_lock); - FOREACH_PROC_IN_SYSTEM(p) { - PROC_LOCK(p); - fdp = fdhold(p); - PROC_UNLOCK(p); - if (fdp == NULL) - continue; - FILEDESC_XLOCK(fdp); - if (fdp->fd_cdir == olddp) { - vrefact(newdp); - fdp->fd_cdir = newdp; - nrele++; - } - if (fdp->fd_rdir == olddp) { - vrefact(newdp); - fdp->fd_rdir = newdp; - nrele++; - } - if (fdp->fd_jdir == olddp) { - vrefact(newdp); - fdp->fd_jdir = newdp; - nrele++; + + VPS_LIST_RLOCK(); + VPS_FOREACH(vps_iter) { + CURVPS_SET(vps_iter); + sx_slock(&V_allproc_lock); + FOREACH_PROC_IN_SYSTEM(p) { + PROC_LOCK(p); + fdp = fdhold(p); + PROC_UNLOCK(p); + if (fdp == NULL) + continue; + FILEDESC_XLOCK(fdp); + if (fdp->fd_cdir == olddp) { + vrefact(newdp); + fdp->fd_cdir = newdp; + nrele++; + } + if (fdp->fd_rdir == olddp) { + vrefact(newdp); + fdp->fd_rdir = newdp; + nrele++; + } + if (fdp->fd_jdir == olddp) { + vrefact(newdp); + fdp->fd_jdir = newdp; + nrele++; + } + FILEDESC_XUNLOCK(fdp); + fddrop(fdp); } - FILEDESC_XUNLOCK(fdp); - fddrop(fdp); + sx_sunlock(&V_allproc_lock); + CURVPS_RESTORE(); } - sx_sunlock(&V_allproc_lock); + VPS_LIST_RUNLOCK(); if (rootvnode == olddp) { vrefact(newdp); rootvnode = newdp; @@ -3307,6 +3315,7 @@ static int sysctl_kern_file(SYSCTL_HANDLER_ARGS) { + VPS_ITERATOR_DECL(vps_iter); struct xfile xf; struct filedesc *fdp; struct file *fp; @@ -3318,6 +3327,37 @@ return (error); if (req->oldptr == NULL) { n = 0; + VPS_LIST_RLOCK(); + VPS_FOREACH(vps_iter) { + CURVPS_SET(vps_iter); + sx_slock(&V_allproc_lock); + FOREACH_PROC_IN_SYSTEM(p) { + PROC_LOCK(p); + if (p->p_state == PRS_NEW) { + PROC_UNLOCK(p); + continue; + } + fdp = fdhold(p); + PROC_UNLOCK(p); + if (fdp == NULL) + continue; + /* overestimates sparse tables. */ + if (fdp->fd_lastfile > 0) + n += fdp->fd_lastfile; + fddrop(fdp); + } + sx_sunlock(&V_allproc_lock); + CURVPS_RESTORE(); + } + VPS_LIST_RUNLOCK(); + return (SYSCTL_OUT(req, 0, n * sizeof(xf))); + } + error = 0; + bzero(&xf, sizeof(xf)); + xf.xf_size = sizeof(xf); + VPS_LIST_RLOCK(); + VPS_FOREACH(vps_iter) { + CURVPS_SET(vps_iter); sx_slock(&V_allproc_lock); FOREACH_PROC_IN_SYSTEM(p) { PROC_LOCK(p); @@ -3325,61 +3365,44 @@ PROC_UNLOCK(p); continue; } + if (p_cansee(req->td, p) != 0) { + PROC_UNLOCK(p); + continue; + } + xf.xf_pid = p->p_pid; + xf.xf_uid = p->p_ucred->cr_uid; fdp = fdhold(p); PROC_UNLOCK(p); if (fdp == NULL) continue; - /* overestimates sparse tables. */ - if (fdp->fd_lastfile > 0) - n += fdp->fd_lastfile; + FILEDESC_SLOCK(fdp); + for (n = 0; fdp->fd_refcnt > 0 && n <= fdp->fd_lastfile; ++n) { + if ((fp = fdp->fd_ofiles[n].fde_file) == NULL) + continue; + xf.xf_fd = n; + xf.xf_file = fp; + xf.xf_data = fp->f_data; + xf.xf_vnode = fp->f_vnode; + xf.xf_type = fp->f_type; + xf.xf_count = fp->f_count; + xf.xf_msgcount = 0; + xf.xf_offset = foffset_get(fp); + xf.xf_flag = fp->f_flag; + error = SYSCTL_OUT(req, &xf, sizeof(xf)); + if (error) + break; + } + FILEDESC_SUNLOCK(fdp); fddrop(fdp); - } - sx_sunlock(&V_allproc_lock); - return (SYSCTL_OUT(req, 0, n * sizeof(xf))); - } - error = 0; - bzero(&xf, sizeof(xf)); - xf.xf_size = sizeof(xf); - sx_slock(&V_allproc_lock); - FOREACH_PROC_IN_SYSTEM(p) { - PROC_LOCK(p); - if (p->p_state == PRS_NEW) { - PROC_UNLOCK(p); - continue; - } - if (p_cansee(req->td, p) != 0) { - PROC_UNLOCK(p); - continue; - } - xf.xf_pid = p->p_pid; - xf.xf_uid = p->p_ucred->cr_uid; - fdp = fdhold(p); - PROC_UNLOCK(p); - if (fdp == NULL) - continue; - FILEDESC_SLOCK(fdp); - for (n = 0; fdp->fd_refcnt > 0 && n <= fdp->fd_lastfile; ++n) { - if ((fp = fdp->fd_ofiles[n].fde_file) == NULL) - continue; - xf.xf_fd = n; - xf.xf_file = fp; - xf.xf_data = fp->f_data; - xf.xf_vnode = fp->f_vnode; - xf.xf_type = fp->f_type; - xf.xf_count = fp->f_count; - xf.xf_msgcount = 0; - xf.xf_offset = foffset_get(fp); - xf.xf_flag = fp->f_flag; - error = SYSCTL_OUT(req, &xf, sizeof(xf)); if (error) break; } - FILEDESC_SUNLOCK(fdp); - fddrop(fdp); + sx_sunlock(&V_allproc_lock); + CURVPS_RESTORE(); if (error) break; } - sx_sunlock(&V_allproc_lock); + VPS_LIST_RUNLOCK(); return (error); } @@ -3930,21 +3953,28 @@ static struct proc * file_to_first_proc(struct file *fp) { + VPS_ITERATOR_DECL(vps_iter); struct filedesc *fdp; struct proc *p; int n; - FOREACH_PROC_IN_SYSTEM(p) { - if (p->p_state == PRS_NEW) - continue; - fdp = p->p_fd; - if (fdp == NULL) - continue; - for (n = 0; n <= fdp->fd_lastfile; n++) { - if (fp == fdp->fd_ofiles[n].fde_file) - return (p); + /* VPS_LIST_RLOCK(); */ + VPS_FOREACH(vps_iter) { + CURVPS_SET(vps_iter); + FOREACH_PROC_IN_SYSTEM(p) { + if (p->p_state == PRS_NEW) + continue; + fdp = p->p_fd; + if (fdp == NULL) + continue; + for (n = 0; n <= fdp->fd_lastfile; n++) { + if (fp == fdp->fd_ofiles[n].fde_file) + return (p); + } } + CURVPS_RESTORE(); } + /* VPS_LIST_RUNLOCK(); */ return (NULL); } @@ -3982,6 +4012,7 @@ DB_SHOW_COMMAND(files, db_show_files) { + VPS_ITERATOR_DECL(vps_iter); struct filedesc *fdp; struct file *fp; struct proc *p; @@ -3989,18 +4020,24 @@ int n; header = 1; - FOREACH_PROC_IN_SYSTEM(p) { - if (p->p_state == PRS_NEW) - continue; - if ((fdp = p->p_fd) == NULL) - continue; - for (n = 0; n <= fdp->fd_lastfile; ++n) { - if ((fp = fdp->fd_ofiles[n].fde_file) == NULL) + /* VPS_LIST_RLOCK(); */ + VPS_FOREACH(vps_iter) { + CURVPS_SET(vps_iter); + FOREACH_PROC_IN_SYSTEM(p) { + if (p->p_state == PRS_NEW) + continue; + if ((fdp = p->p_fd) == NULL) continue; - db_print_file(fp, header); - header = 0; + for (n = 0; n <= fdp->fd_lastfile; ++n) { + if ((fp = fdp->fd_ofiles[n].fde_file) == NULL) + continue; + db_print_file(fp, header); + header = 0; + } } + CURVPS_RESTORE(); } + /* VPS_LIST_RUNLOCK(); */ } #endif Index: sys/kern/kern_exit.c =================================================================== --- sys/kern/kern_exit.c +++ sys/kern/kern_exit.c @@ -437,6 +437,7 @@ * Remove proc from allproc queue and pidhash chain. * Place onto zombproc. Unlink from parent's child list. */ + /* Operate on current vps instance only. */ sx_xlock(&V_allproc_lock); LIST_REMOVE(p, p_list); LIST_INSERT_HEAD(&V_zombproc, p, p_list); Index: sys/kern/kern_fork.c =================================================================== --- sys/kern/kern_fork.c +++ sys/kern/kern_fork.c @@ -295,6 +295,7 @@ * reserved pids is limited by process limit times * two. */ + /* Operate on current vps instance only. */ p = LIST_FIRST(&V_allproc); again: for (; p != NULL; p = LIST_NEXT(p, p_list)) { @@ -321,6 +322,7 @@ } } if (!doingzomb) { + /* Operate on current vps instance only. */ doingzomb = 1; p = LIST_FIRST(&V_zombproc); goto again; Index: sys/kern/kern_jail.c =================================================================== --- sys/kern/kern_jail.c +++ sys/kern/kern_jail.c @@ -2360,8 +2360,9 @@ #endif #endif { + /* Operate on current vps instance only. */ sx_slock(&V_allproc_lock); - LIST_FOREACH(p, &V_allproc, p_list) { + FOREACH_PROC_IN_SYSTEM(p) { PROC_LOCK(p); if (p->p_state != PRS_NEW && p->p_ucred && p->p_ucred->cr_prison == pr) @@ -4148,6 +4149,7 @@ /* * Force rctl to reattach rules to processes. */ + /* XXX do we need to do this over all vps instances as well? */ FOREACH_PROC_IN_SYSTEM(p) { PROC_LOCK(p); cred = crhold(p->p_ucred); Index: sys/kern/kern_ktrace.c =================================================================== --- sys/kern/kern_ktrace.c +++ sys/kern/kern_ktrace.c @@ -952,25 +952,33 @@ * Clear all uses of the tracefile. */ if (ops == KTROP_CLEARFILE) { + VPS_ITERATOR_DECL(vps_iter); int vrele_count; vrele_count = 0; - sx_slock(&V_allproc_lock); - FOREACH_PROC_IN_SYSTEM(p) { - PROC_LOCK(p); - if (p->p_tracevp == vp) { - if (ktrcanset(td, p)) { - mtx_lock(&ktrace_mtx); - ktr_freeproc(p, &cred, NULL); - mtx_unlock(&ktrace_mtx); - vrele_count++; - crfree(cred); - } else - error = EPERM; + + VPS_LIST_RLOCK(); + VPS_FOREACH(vps_iter) { + CURVPS_SET(vps_iter); + sx_slock(&V_allproc_lock); + FOREACH_PROC_IN_SYSTEM(p) { + PROC_LOCK(p); + if (p->p_tracevp == vp) { + if (ktrcanset(td, p)) { + mtx_lock(&ktrace_mtx); + ktr_freeproc(p, &cred, NULL); + mtx_unlock(&ktrace_mtx); + vrele_count++; + crfree(cred); + } else + error = EPERM; + } + PROC_UNLOCK(p); } - PROC_UNLOCK(p); + sx_sunlock(&V_allproc_lock); + CURVPS_RESTORE(); } - sx_sunlock(&V_allproc_lock); + VPS_LIST_RUNLOCK(); if (vrele_count > 0) { while (vrele_count-- > 0) vrele(vp); @@ -1170,6 +1178,7 @@ static void ktr_writerequest(struct thread *td, struct ktr_request *req) { + VPS_ITERATOR_DECL(vps_iter); struct ktr_header *kth; struct vnode *vp; struct proc *p; @@ -1270,22 +1279,28 @@ * credentials for the operation. */ cred = NULL; - sx_slock(&V_allproc_lock); - FOREACH_PROC_IN_SYSTEM(p) { - PROC_LOCK(p); - if (p->p_tracevp == vp) { - mtx_lock(&ktrace_mtx); - ktr_freeproc(p, &cred, NULL); - mtx_unlock(&ktrace_mtx); - vrele_count++; - } - PROC_UNLOCK(p); - if (cred != NULL) { - crfree(cred); - cred = NULL; + VPS_LIST_RLOCK(); + VPS_FOREACH(vps_iter) { + CURVPS_SET(vps_iter); + sx_slock(&V_allproc_lock); + FOREACH_PROC_IN_SYSTEM(p) { + PROC_LOCK(p); + if (p->p_tracevp == vp) { + mtx_lock(&ktrace_mtx); + ktr_freeproc(p, &cred, NULL); + mtx_unlock(&ktrace_mtx); + vrele_count++; + } + PROC_UNLOCK(p); + if (cred != NULL) { + crfree(cred); + cred = NULL; + } } + sx_sunlock(&V_allproc_lock); + CURVPS_RESTORE(); } - sx_sunlock(&V_allproc_lock); + VPS_LIST_RUNLOCK(); while (vrele_count-- > 0) vrele(vp); Index: sys/kern/kern_proc.c =================================================================== --- sys/kern/kern_proc.c +++ sys/kern/kern_proc.c @@ -383,6 +383,7 @@ struct proc *p; struct thread *td; + /* Operate on current vps instance only. */ sx_assert(&V_allproc_lock, SX_LOCKED); FOREACH_PROC_IN_SYSTEM(p) { PROC_LOCK(p); @@ -1216,6 +1217,7 @@ { struct proc *p; + /* Operate on current vps instance only. */ sx_assert(&V_allproc_lock, SX_LOCKED); LIST_FOREACH(p, &V_zombproc, p_list) { if (p->p_pid == pid) { @@ -1513,6 +1515,7 @@ } sx_slock(&V_allproc_lock); for (doingzomb=0 ; doingzomb < 2 ; doingzomb++) { + /* Operate on current vps instance only. */ if (!doingzomb) p = LIST_FIRST(&V_allproc); else @@ -3093,101 +3096,133 @@ void stop_all_proc(void) { + VPS_ITERATOR_DECL(vps_iter); struct proc *cp, *p; int r, gen; bool restart, seen_stopped, seen_exiting, stopped_some; - cp = curproc; + KASSERT(IS_DEFAULT_VPS(curvps), + ("%s: called from non vps0 %p: vps %p\n", __func__, vps0, curvps)); + + VPS_LIST_RLOCK(); + VPS_FOREACH(vps_iter) { + CURVPS_SET(vps_iter); +#ifdef VIMAGE + if (saved_vps != vps_iter) + cp = NULL; + else +#endif + cp = curproc; allproc_loop: - sx_xlock(&V_allproc_lock); - gen = allproc_gen; - seen_exiting = seen_stopped = stopped_some = restart = false; - LIST_REMOVE(cp, p_list); - LIST_INSERT_HEAD(&V_allproc, cp, p_list); - for (;;) { - p = LIST_NEXT(cp, p_list); - if (p == NULL) - break; + sx_xlock(&V_allproc_lock); + if (cp == NULL) + cp = LIST_FIRST(&V_allproc); + gen = allproc_gen; + seen_exiting = seen_stopped = stopped_some = restart = false; LIST_REMOVE(cp, p_list); - LIST_INSERT_AFTER(p, cp, p_list); - PROC_LOCK(p); - if ((p->p_flag & (P_KPROC | P_SYSTEM | P_TOTAL_STOP)) != 0) { - PROC_UNLOCK(p); - continue; - } - if ((p->p_flag & P_WEXIT) != 0) { - seen_exiting = true; - PROC_UNLOCK(p); - continue; - } - if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) { - /* - * Stopped processes are tolerated when there - * are no other processes which might continue - * them. P_STOPPED_SINGLE but not - * P_TOTAL_STOP process still has at least one - * thread running. - */ - seen_stopped = true; + LIST_INSERT_HEAD(&V_allproc, cp, p_list); + for (;;) { + p = LIST_NEXT(cp, p_list); + if (p == NULL) + break; + LIST_REMOVE(cp, p_list); + LIST_INSERT_AFTER(p, cp, p_list); + PROC_LOCK(p); + if ((p->p_flag & (P_KPROC | P_SYSTEM | P_TOTAL_STOP)) != 0) { + PROC_UNLOCK(p); + continue; + } + if ((p->p_flag & P_WEXIT) != 0) { + seen_exiting = true; + PROC_UNLOCK(p); + continue; + } + if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) { + /* + * Stopped processes are tolerated when there + * are no other processes which might continue + * them. P_STOPPED_SINGLE but not + * P_TOTAL_STOP process still has at least one + * thread running. + */ + seen_stopped = true; + PROC_UNLOCK(p); + continue; + } + _PHOLD(p); + sx_xunlock(&V_allproc_lock); + r = thread_single(p, SINGLE_ALLPROC); + if (r != 0) + restart = true; + else + stopped_some = true; + _PRELE(p); PROC_UNLOCK(p); - continue; + sx_xlock(&V_allproc_lock); } - _PHOLD(p); - sx_xunlock(&V_allproc_lock); - r = thread_single(p, SINGLE_ALLPROC); - if (r != 0) + /* Catch forked children we did not see in iteration. */ + if (gen != allproc_gen) restart = true; - else - stopped_some = true; - _PRELE(p); - PROC_UNLOCK(p); - sx_xlock(&V_allproc_lock); - } - /* Catch forked children we did not see in iteration. */ - if (gen != allproc_gen) - restart = true; - sx_xunlock(&V_allproc_lock); - if (restart || stopped_some || seen_exiting || seen_stopped) { - kern_yield(PRI_USER); - goto allproc_loop; + sx_xunlock(&V_allproc_lock); + if (restart || stopped_some || seen_exiting || seen_stopped) { + kern_yield(PRI_USER); + goto allproc_loop; + } + CURVPS_RESTORE(); } + VPS_LIST_RUNLOCK(); } void resume_all_proc(void) { + VPS_ITERATOR_DECL(vps_iter); struct proc *cp, *p; - cp = curproc; - sx_xlock(&V_allproc_lock); + KASSERT(IS_DEFAULT_VPS(curvps), + ("%s: called from non vps0 %p: vps %p\n", __func__, vps0, curvps)); + + VPS_LIST_RLOCK(); + VPS_FOREACH(vps_iter) { + CURVPS_SET(vps_iter); +#ifdef VIMAGE + if (saved_vps != vps_iter) + cp = NULL; + else +#endif + cp = curproc; + sx_xlock(&V_allproc_lock); again: - LIST_REMOVE(cp, p_list); - LIST_INSERT_HEAD(&V_allproc, cp, p_list); - for (;;) { - p = LIST_NEXT(cp, p_list); - if (p == NULL) - break; LIST_REMOVE(cp, p_list); - LIST_INSERT_AFTER(p, cp, p_list); - PROC_LOCK(p); - if ((p->p_flag & P_TOTAL_STOP) != 0) { - sx_xunlock(&V_allproc_lock); - _PHOLD(p); - thread_single_end(p, SINGLE_ALLPROC); - _PRELE(p); - PROC_UNLOCK(p); - sx_xlock(&V_allproc_lock); - } else { - PROC_UNLOCK(p); + LIST_INSERT_HEAD(&V_allproc, cp, p_list); + for (;;) { + p = LIST_NEXT(cp, p_list); + if (p == NULL) + break; + LIST_REMOVE(cp, p_list); + LIST_INSERT_AFTER(p, cp, p_list); + PROC_LOCK(p); + if ((p->p_flag & P_TOTAL_STOP) != 0) { + sx_xunlock(&V_allproc_lock); + _PHOLD(p); + thread_single_end(p, SINGLE_ALLPROC); + _PRELE(p); + PROC_UNLOCK(p); + sx_xlock(&V_allproc_lock); + } else { + PROC_UNLOCK(p); + } } + /* Did the loop above missed any stopped process ? */ + FOREACH_PROC_IN_SYSTEM(p) { + /* No need for proc lock. */ + if ((p->p_flag & P_TOTAL_STOP) != 0) + goto again; + } + sx_xunlock(&V_allproc_lock); + CURVPS_RESTORE(); } - /* Did the loop above missed any stopped process ? */ - LIST_FOREACH(p, &V_allproc, p_list) { - /* No need for proc lock. */ - if ((p->p_flag & P_TOTAL_STOP) != 0) - goto again; - } - sx_xunlock(&V_allproc_lock); + VPS_LIST_RUNLOCK(); } /* #define TOTAL_STOP_DEBUG 1 */ Index: sys/kern/kern_racct.c =================================================================== --- sys/kern/kern_racct.c +++ sys/kern/kern_racct.c @@ -1214,94 +1214,107 @@ } static void -racctd(void) +_racctd(void) { struct thread *td; struct proc *p; struct timeval wallclock; uint64_t pct, pct_estimate, runtime; - ASSERT_RACCT_ENABLED(); - - for (;;) { - racct_decay(); + sx_slock(&V_allproc_lock); - sx_slock(&V_allproc_lock); + LIST_FOREACH(p, &V_zombproc, p_list) { + PROC_LOCK(p); + racct_set(p, RACCT_PCTCPU, 0); + PROC_UNLOCK(p); + } - LIST_FOREACH(p, &V_zombproc, p_list) { - PROC_LOCK(p); - racct_set(p, RACCT_PCTCPU, 0); + FOREACH_PROC_IN_SYSTEM(p) { + PROC_LOCK(p); + if (p->p_state != PRS_NORMAL) { PROC_UNLOCK(p); + continue; } - FOREACH_PROC_IN_SYSTEM(p) { - PROC_LOCK(p); - if (p->p_state != PRS_NORMAL) { - PROC_UNLOCK(p); - continue; - } - - microuptime(&wallclock); - timevalsub(&wallclock, &p->p_stats->p_start); - PROC_STATLOCK(p); - FOREACH_THREAD_IN_PROC(p, td) - ruxagg(p, td); - runtime = cputick2usec(p->p_rux.rux_runtime); - PROC_STATUNLOCK(p); + microuptime(&wallclock); + timevalsub(&wallclock, &p->p_stats->p_start); + PROC_STATLOCK(p); + FOREACH_THREAD_IN_PROC(p, td) + ruxagg(p, td); + runtime = cputick2usec(p->p_rux.rux_runtime); + PROC_STATUNLOCK(p); #ifdef notyet - KASSERT(runtime >= p->p_prev_runtime, - ("runtime < p_prev_runtime")); + KASSERT(runtime >= p->p_prev_runtime, + ("runtime < p_prev_runtime")); #else - if (runtime < p->p_prev_runtime) - runtime = p->p_prev_runtime; + if (runtime < p->p_prev_runtime) + runtime = p->p_prev_runtime; #endif - p->p_prev_runtime = runtime; - if (wallclock.tv_sec > 0 || wallclock.tv_usec > 0) { - pct_estimate = (1000000 * runtime * 100) / - ((uint64_t)wallclock.tv_sec * 1000000 + - wallclock.tv_usec); - } else - pct_estimate = 0; - pct = racct_getpcpu(p, pct_estimate); - RACCT_LOCK(); + p->p_prev_runtime = runtime; + if (wallclock.tv_sec > 0 || wallclock.tv_usec > 0) { + pct_estimate = (1000000 * runtime * 100) / + ((uint64_t)wallclock.tv_sec * 1000000 + + wallclock.tv_usec); + } else + pct_estimate = 0; + pct = racct_getpcpu(p, pct_estimate); + RACCT_LOCK(); #ifdef RCTL - rctl_throttle_decay(p->p_racct, RACCT_READBPS); - rctl_throttle_decay(p->p_racct, RACCT_WRITEBPS); - rctl_throttle_decay(p->p_racct, RACCT_READIOPS); - rctl_throttle_decay(p->p_racct, RACCT_WRITEIOPS); + rctl_throttle_decay(p->p_racct, RACCT_READBPS); + rctl_throttle_decay(p->p_racct, RACCT_WRITEBPS); + rctl_throttle_decay(p->p_racct, RACCT_READIOPS); + rctl_throttle_decay(p->p_racct, RACCT_WRITEIOPS); #endif - racct_set_locked(p, RACCT_PCTCPU, pct, 1); - racct_set_locked(p, RACCT_CPU, runtime, 0); - racct_set_locked(p, RACCT_WALLCLOCK, - (uint64_t)wallclock.tv_sec * 1000000 + - wallclock.tv_usec, 0); - RACCT_UNLOCK(); + racct_set_locked(p, RACCT_PCTCPU, pct, 1); + racct_set_locked(p, RACCT_CPU, runtime, 0); + racct_set_locked(p, RACCT_WALLCLOCK, + (uint64_t)wallclock.tv_sec * 1000000 + + wallclock.tv_usec, 0); + RACCT_UNLOCK(); + PROC_UNLOCK(p); + } + + /* + * To ensure that processes are throttled in a fair way, we need + * to iterate over all processes again and check the limits + * for %cpu resource only after ucred racct containers have been + * properly filled. + */ + FOREACH_PROC_IN_SYSTEM(p) { + PROC_LOCK(p); + if (p->p_state != PRS_NORMAL) { PROC_UNLOCK(p); + continue; } - /* - * To ensure that processes are throttled in a fair way, we need - * to iterate over all processes again and check the limits - * for %cpu resource only after ucred racct containers have been - * properly filled. - */ - FOREACH_PROC_IN_SYSTEM(p) { - PROC_LOCK(p); - if (p->p_state != PRS_NORMAL) { - PROC_UNLOCK(p); - continue; - } + if (racct_pcpu_available(p) <= 0) { + if (p->p_racct->r_resources[RACCT_PCTCPU] > + pcpu_threshold) + racct_proc_throttle(p, -1); + } else if (p->p_throttled == -1) { + racct_proc_wakeup(p); + } + PROC_UNLOCK(p); + } + sx_sunlock(&V_allproc_lock); +} - if (racct_pcpu_available(p) <= 0) { - if (p->p_racct->r_resources[RACCT_PCTCPU] > - pcpu_threshold) - racct_proc_throttle(p, -1); - } else if (p->p_throttled == -1) { - racct_proc_wakeup(p); - } - PROC_UNLOCK(p); +static void +racctd(void) +{ + VPS_ITERATOR_DECL(vps_iter); + + ASSERT_RACCT_ENABLED(); + + for (;;) { + racct_decay(); + VPS_LIST_RLOCK(); + VPS_FOREACH(vps_iter) { + CURVPS_SET(vps_iter); + _racctd(); + CURVPS_RESTORE(); } - sx_sunlock(&V_allproc_lock); + VPS_LIST_RUNLOCK(); pause("-", hz); } } Index: sys/kern/kern_rctl.c =================================================================== --- sys/kern/kern_rctl.c +++ sys/kern/kern_rctl.c @@ -1266,6 +1266,7 @@ int rctl_rule_add(struct rctl_rule *rule) { + VPS_ITERATOR_DECL(vps_iter); struct proc *p; struct ucred *cred; struct uidinfo *uip; @@ -1358,36 +1359,50 @@ * it applies to. */ sx_assert(&V_allproc_lock, SA_LOCKED); - FOREACH_PROC_IN_SYSTEM(p) { - cred = p->p_ucred; - switch (rule->rr_subject_type) { - case RCTL_SUBJECT_TYPE_USER: - if (cred->cr_uidinfo == rule->rr_subject.rs_uip || - cred->cr_ruidinfo == rule->rr_subject.rs_uip) - break; - continue; - case RCTL_SUBJECT_TYPE_LOGINCLASS: - if (cred->cr_loginclass == rule->rr_subject.rs_loginclass) - break; - continue; - case RCTL_SUBJECT_TYPE_JAIL: - match = 0; - for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent) { - if (pr->pr_prison_racct == rule->rr_subject.rs_prison_racct) { - match = 1; + VPS_LIST_RLOCK(); + VPS_FOREACH(vps_iter) { + CURVPS_SET(vps_iter); +#ifdef VIMAGE + if (saved_vps != vps_iter) + sx_slock(&V_allproc_lock); +#endif + FOREACH_PROC_IN_SYSTEM(p) { + cred = p->p_ucred; + switch (rule->rr_subject_type) { + case RCTL_SUBJECT_TYPE_USER: + if (cred->cr_uidinfo == rule->rr_subject.rs_uip || + cred->cr_ruidinfo == rule->rr_subject.rs_uip) + break; + continue; + case RCTL_SUBJECT_TYPE_LOGINCLASS: + if (cred->cr_loginclass == rule->rr_subject.rs_loginclass) break; + continue; + case RCTL_SUBJECT_TYPE_JAIL: + match = 0; + for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent) { + if (pr->pr_prison_racct == rule->rr_subject.rs_prison_racct) { + match = 1; + break; + } } + if (match) + break; + continue; + default: + panic("rctl_rule_add: unknown subject type %d", + rule->rr_subject_type); } - if (match) - break; - continue; - default: - panic("rctl_rule_add: unknown subject type %d", - rule->rr_subject_type); - } - rctl_racct_add_rule(p->p_racct, rule); + rctl_racct_add_rule(p->p_racct, rule); + } +#ifdef VIMAGE + if (saved_vps != vps_iter) + sx_sunlock(&V_allproc_lock); +#endif + CURVPS_RESTORE(); } + VPS_LIST_RUNLOCK(); return (0); } @@ -1426,6 +1441,7 @@ int rctl_rule_remove(struct rctl_rule *filter) { + VPS_ITERATOR_DECL(vps_iter); struct proc *p; int found = 0; @@ -1454,9 +1470,23 @@ sx_assert(&V_allproc_lock, SA_LOCKED); RACCT_LOCK(); - FOREACH_PROC_IN_SYSTEM(p) { - found += rctl_racct_remove_rules(p->p_racct, filter); + VPS_LIST_RLOCK(); + VPS_FOREACH(vps_iter) { + CURVPS_SET(vps_iter); +#ifdef VIMAGE + if (saved_vps != vps_iter) + sx_slock(&V_allproc_lock); +#endif + FOREACH_PROC_IN_SYSTEM(p) { + found += rctl_racct_remove_rules(p->p_racct, filter); + } +#ifdef VIMAGE + if (saved_vps != vps_iter) + sx_sunlock(&V_allproc_lock); +#endif + CURVPS_RESTORE(); } + VPS_LIST_RUNLOCK(); RACCT_UNLOCK(); if (found) @@ -1699,6 +1729,7 @@ int sys_rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap) { + VPS_ITERATOR_DECL(vps_iter); struct sbuf *sb; struct rctl_rule *filter; struct rctl_rule_link *link; @@ -1735,24 +1766,39 @@ buf = malloc(bufsize, M_RCTL, M_WAITOK); sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN); KASSERT(sb != NULL, ("sbuf_new failed")); - - FOREACH_PROC_IN_SYSTEM(p) { - RACCT_LOCK(); - LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) { - /* - * Non-process rules will be added to the buffer later. - * Adding them here would result in duplicated output. - */ - if (link->rrl_rule->rr_subject_type != - RCTL_SUBJECT_TYPE_PROCESS) - continue; - if (!rctl_rule_matches(link->rrl_rule, filter)) - continue; - rctl_rule_to_sbuf(sb, link->rrl_rule); - sbuf_printf(sb, ","); + VPS_LIST_RLOCK(); + VPS_FOREACH(vps_iter) { + CURVPS_SET(vps_iter); +#ifdef VIMAGE + if (saved_vps != vps_iter) + sx_slock(&V_allproc_lock); +#endif + FOREACH_PROC_IN_SYSTEM(p) { + RACCT_LOCK(); + LIST_FOREACH(link, &p->p_racct->r_rule_links, + rrl_next) { + /* + * Non-process rules will be added to the + * buffer later. Adding them here would result + * in duplicated output. + */ + if (link->rrl_rule->rr_subject_type != + RCTL_SUBJECT_TYPE_PROCESS) + continue; + if (!rctl_rule_matches(link->rrl_rule, filter)) + continue; + rctl_rule_to_sbuf(sb, link->rrl_rule); + sbuf_printf(sb, ","); + } + RACCT_UNLOCK(); } - RACCT_UNLOCK(); +#ifdef VIMAGE + if (saved_vps != vps_iter) + sx_sunlock(&V_allproc_lock); +#endif + CURVPS_RESTORE(); } + VPS_LIST_RUNLOCK(); loginclass_racct_foreach(rctl_get_rules_callback, rctl_rule_pre_callback, rctl_rule_post_callback, Index: sys/kern/kern_resource.c =================================================================== --- sys/kern/kern_resource.c +++ sys/kern/kern_resource.c @@ -146,6 +146,7 @@ case PRIO_USER: if (uap->who == 0) uap->who = td->td_ucred->cr_uid; + /* Operate on current vps instance only. */ sx_slock(&V_allproc_lock); FOREACH_PROC_IN_SYSTEM(p) { PROC_LOCK(p); @@ -231,6 +232,7 @@ case PRIO_USER: if (uap->who == 0) uap->who = td->td_ucred->cr_uid; + /* Operate on current vps instance only. */ sx_slock(&V_allproc_lock); FOREACH_PROC_IN_SYSTEM(p) { PROC_LOCK(p); Index: sys/kern/kern_sig.c =================================================================== --- sys/kern/kern_sig.c +++ sys/kern/kern_sig.c @@ -1669,7 +1669,7 @@ ret = ESRCH; if (all) { /* - * broadcast + * broadcast; current vps context only. */ sx_slock(&V_allproc_lock); FOREACH_PROC_IN_SYSTEM(p) { Index: sys/kern/sched_4bsd.c =================================================================== --- sys/kern/sched_4bsd.c +++ sys/kern/sched_4bsd.c @@ -454,11 +454,9 @@ * Recompute process priorities, every hz ticks. * MP-safe, called without the Giant mutex. */ -/* ARGSUSED */ -static void -schedcpu(void) +static __inline void +_schedcpu(fixpt_t loadfac) { - fixpt_t loadfac = loadfactor(averunnable.ldavg[0]); struct thread *td; struct proc *p; struct td_sched *ts; @@ -553,6 +551,21 @@ sx_sunlock(&V_allproc_lock); } +static void +schedcpu(void) +{ + VPS_ITERATOR_DECL(vps_iter); + fixpt_t loadfac = loadfactor(averunnable.ldavg[0]); + + VPS_LIST_RLOCK(); + VPS_FOREACH(vps_iter) { + CURVPS_SET(vps_iter); + _schedcpu(loadfac); + CURVPS_RESTORE(); + } + VPS_LIST_RUNLOCK(); +} + /* * Main loop for a kthread that executes schedcpu once a second. */ Index: sys/kern/subr_kdb.c =================================================================== --- sys/kern/subr_kdb.c +++ sys/kern/subr_kdb.c @@ -580,14 +580,12 @@ struct proc *p; struct thread *thr; - p = LIST_FIRST(&V_allproc); - while (p != NULL) { + FOREACH_PROC_IN_SYSTEM(p) { if (p->p_flag & P_INMEM) { thr = FIRST_THREAD_IN_PROC(p); if (thr != NULL) return (thr); } - p = LIST_NEXT(p, p_list); } return (NULL); } @@ -597,11 +595,9 @@ { struct proc *p; - p = LIST_FIRST(&V_allproc); - while (p != NULL) { + FOREACH_PROC_IN_SYSTEM(p) { if (p->p_flag & P_INMEM && p->p_pid == pid) return (FIRST_THREAD_IN_PROC(p)); - p = LIST_NEXT(p, p_list); } return (NULL); } Index: sys/kern/subr_turnstile.c =================================================================== --- sys/kern/subr_turnstile.c +++ sys/kern/subr_turnstile.c @@ -1212,22 +1212,32 @@ DB_SHOW_ALL_COMMAND(chains, db_show_allchains) { + VPS_ITERATOR_DECL(vps_iter); struct thread *td; struct proc *p; int i; i = 1; - FOREACH_PROC_IN_SYSTEM(p) { - FOREACH_THREAD_IN_PROC(p, td) { - if ((TD_ON_LOCK(td) && LIST_EMPTY(&td->td_contested)) - || (TD_IS_INHIBITED(td) && TD_ON_SLEEPQ(td))) { - db_printf("chain %d:\n", i++); - print_lockchain(td, " "); + + /* VPS_LIST_RLOCK(); */ + VPS_FOREACH(vps_iter) { + CURVPS_SET(vps_iter); + FOREACH_PROC_IN_SYSTEM(p) { + FOREACH_THREAD_IN_PROC(p, td) { + if ((TD_ON_LOCK(td) && + LIST_EMPTY(&td->td_contested)) + || (TD_IS_INHIBITED(td) && + TD_ON_SLEEPQ(td))) { + db_printf("chain %d:\n", i++); + print_lockchain(td, " "); + } + if (db_pager_quit) + return; } - if (db_pager_quit) - return; } + CURVPS_RESTORE(); } + /* VPS_LIST_RUNLOCK(); */ } DB_SHOW_ALIAS(allchains, db_show_allchains) Index: sys/kern/subr_witness.c =================================================================== --- sys/kern/subr_witness.c +++ sys/kern/subr_witness.c @@ -2534,6 +2534,7 @@ DB_SHOW_ALL_COMMAND(locks, db_witness_list_all) { + VPS_ITERATOR_DECL(vps_iter); struct thread *td; struct proc *p; @@ -2542,19 +2543,25 @@ * held sleep locks, but that information is currently not exported * by WITNESS. */ - FOREACH_PROC_IN_SYSTEM(p) { - if (!witness_proc_has_locks(p)) - continue; - FOREACH_THREAD_IN_PROC(p, td) { - if (!witness_thread_has_locks(td)) + /* VPS_LIST_RLOCK(); */ + VPS_FOREACH(vps_iter) { + CURVPS_SET(vps_iter); + FOREACH_PROC_IN_SYSTEM(p) { + if (!witness_proc_has_locks(p)) continue; - db_printf("Process %d (%s) thread %p (%d)\n", p->p_pid, - p->p_comm, td, td->td_tid); - witness_ddb_list(td); - if (db_pager_quit) - return; + FOREACH_THREAD_IN_PROC(p, td) { + if (!witness_thread_has_locks(td)) + continue; + db_printf("Process %d (%s) thread %p (%d)\n", + p->p_pid, p->p_comm, td, td->td_tid); + witness_ddb_list(td); + if (db_pager_quit) + return; + } } + CURVPS_RESTORE(); } + /* VPS_LIST_RUNLOCK(); */ } DB_SHOW_ALIAS(alllocks, db_witness_list_all) Index: sys/vm/vm_meter.c =================================================================== --- sys/vm/vm_meter.c +++ sys/vm/vm_meter.c @@ -177,6 +177,7 @@ static int vmtotal(SYSCTL_HANDLER_ARGS) { + VPS_ITERATOR_DECL(vps_iter); struct vmtotal total; #if defined(COMPAT_FREEBSD11) struct vmtotal11 total11; @@ -197,41 +198,48 @@ /* * Calculate process statistics. */ - sx_slock(&V_allproc_lock); - FOREACH_PROC_IN_SYSTEM(p) { - if ((p->p_flag & P_SYSTEM) != 0) - continue; - PROC_LOCK(p); - if (p->p_state != PRS_NEW) { - FOREACH_THREAD_IN_PROC(p, td) { - thread_lock(td); - switch (td->td_state) { - case TDS_INHIBITED: - if (TD_IS_SWAPPED(td)) + VPS_LIST_RLOCK(); + VPS_FOREACH(vps_iter) { + CURVPS_SET(vps_iter); + sx_slock(&V_allproc_lock); + FOREACH_PROC_IN_SYSTEM(p) { + if ((p->p_flag & P_SYSTEM) != 0) + continue; + PROC_LOCK(p); + if (p->p_state != PRS_NEW) { + FOREACH_THREAD_IN_PROC(p, td) { + thread_lock(td); + switch (td->td_state) { + case TDS_INHIBITED: + if (TD_IS_SWAPPED(td)) + total.t_sw++; + else if (TD_IS_SLEEPING(td)) { + if (td->td_priority <= + PZERO) + total.t_dw++; + else + total.t_sl++; + } + break; + case TDS_CAN_RUN: total.t_sw++; - else if (TD_IS_SLEEPING(td)) { - if (td->td_priority <= PZERO) - total.t_dw++; - else - total.t_sl++; + break; + case TDS_RUNQ: + case TDS_RUNNING: + total.t_rq++; + break; + default: + break; } - break; - case TDS_CAN_RUN: - total.t_sw++; - break; - case TDS_RUNQ: - case TDS_RUNNING: - total.t_rq++; - break; - default: - break; + thread_unlock(td); } - thread_unlock(td); } + PROC_UNLOCK(p); } - PROC_UNLOCK(p); + sx_sunlock(&V_allproc_lock); + CURVPS_RESTORE(); } - sx_sunlock(&V_allproc_lock); + VPS_LIST_RUNLOCK(); /* * Calculate object memory usage statistics. */ Index: sys/vm/vm_object.c =================================================================== --- sys/vm/vm_object.c +++ sys/vm/vm_object.c @@ -2507,18 +2507,27 @@ static int vm_object_in_map(vm_object_t object) { + VPS_ITERATOR_DECL(vps_iter); struct proc *p; - /* sx_slock(&V_allproc_lock); */ - FOREACH_PROC_IN_SYSTEM(p) { - if (!p->p_vmspace /* || (p->p_flag & (P_SYSTEM|P_WEXIT)) */) - continue; - if (_vm_object_in_map(&p->p_vmspace->vm_map, object, 0)) { - /* sx_sunlock(&V_allproc_lock); */ - return 1; + /* VPS_LIST_RLOCK(); */ + VPS_FOREACH(vps_iter) { + CURVPS_SET(vps_iter); + /* sx_slock(&V_allproc_lock); */ + FOREACH_PROC_IN_SYSTEM(p) { + if (!p->p_vmspace + /* || (p->p_flag & (P_SYSTEM|P_WEXIT)) */) + continue; + if (_vm_object_in_map(&p->p_vmspace->vm_map, object, + 0)) { + /* sx_sunlock(&V_allproc_lock); */ + return 1; + } } + /* sx_sunlock(&V_allproc_lock); */ + CURVPS_RESTORE(); } - /* sx_sunlock(&V_allproc_lock); */ + /* VPS_LIST_RUNLOCK(); */ if (_vm_object_in_map(kernel_map, object, 0)) return 1; return 0; Index: sys/vm/vm_pageout.c =================================================================== --- sys/vm/vm_pageout.c +++ sys/vm/vm_pageout.c @@ -1744,6 +1744,7 @@ void vm_pageout_oom(int shortage) { + VPS_ITERATOR_DECL(vps_iter); struct proc *p, *bigproc; vm_offset_t size, bigsize; struct thread *td; @@ -1760,80 +1761,88 @@ */ bigproc = NULL; bigsize = 0; - sx_slock(&V_allproc_lock); - FOREACH_PROC_IN_SYSTEM(p) { - PROC_LOCK(p); - /* - * If this is a system, protected or killed process, skip it. - */ - if (p->p_state != PRS_NORMAL || (p->p_flag & (P_INEXEC | - P_PROTECTED | P_SYSTEM | P_WEXIT)) != 0 || - p->p_pid == 1 || P_KILLED(p) || - (p->p_pid < 48 && swap_pager_avail != 0)) { - PROC_UNLOCK(p); - continue; - } - /* - * If the process is in a non-running type state, - * don't touch it. Check all the threads individually. - */ - breakout = false; - FOREACH_THREAD_IN_PROC(p, td) { - thread_lock(td); - if (!TD_ON_RUNQ(td) && - !TD_IS_RUNNING(td) && - !TD_IS_SLEEPING(td) && - !TD_IS_SUSPENDED(td) && - !TD_IS_SWAPPED(td)) { + VPS_LIST_RLOCK(); + VPS_FOREACH(vps_iter) { + CURVPS_SET(vps_iter); + sx_slock(&V_allproc_lock); + FOREACH_PROC_IN_SYSTEM(p) { + PROC_LOCK(p); + + /* + * If this is a system, protected or killed process, + * skip it. + */ + if (p->p_state != PRS_NORMAL || (p->p_flag & (P_INEXEC | + P_PROTECTED | P_SYSTEM | P_WEXIT)) != 0 || + p->p_pid == 1 || P_KILLED(p) || + (p->p_pid < 48 && swap_pager_avail != 0)) { + PROC_UNLOCK(p); + continue; + } + /* + * If the process is in a non-running type state, + * don't touch it. Check all the threads individually. + */ + breakout = false; + FOREACH_THREAD_IN_PROC(p, td) { + thread_lock(td); + if (!TD_ON_RUNQ(td) && + !TD_IS_RUNNING(td) && + !TD_IS_SLEEPING(td) && + !TD_IS_SUSPENDED(td) && + !TD_IS_SWAPPED(td)) { + thread_unlock(td); + breakout = true; + break; + } thread_unlock(td); - breakout = true; - break; } - thread_unlock(td); - } - if (breakout) { - PROC_UNLOCK(p); - continue; - } - /* - * get the process size - */ - vm = vmspace_acquire_ref(p); - if (vm == NULL) { + if (breakout) { + PROC_UNLOCK(p); + continue; + } + /* + * get the process size + */ + vm = vmspace_acquire_ref(p); + if (vm == NULL) { + PROC_UNLOCK(p); + continue; + } + _PHOLD_LITE(p); PROC_UNLOCK(p); - continue; - } - _PHOLD_LITE(p); - PROC_UNLOCK(p); - sx_sunlock(&V_allproc_lock); - if (!vm_map_trylock_read(&vm->vm_map)) { + sx_sunlock(&V_allproc_lock); + if (!vm_map_trylock_read(&vm->vm_map)) { + vmspace_free(vm); + sx_slock(&V_allproc_lock); + PRELE(p); + continue; + } + size = vmspace_swap_count(vm); + if (shortage == VM_OOM_MEM) + size += vm_pageout_oom_pagecount(vm); + vm_map_unlock_read(&vm->vm_map); vmspace_free(vm); sx_slock(&V_allproc_lock); - PRELE(p); - continue; - } - size = vmspace_swap_count(vm); - if (shortage == VM_OOM_MEM) - size += vm_pageout_oom_pagecount(vm); - vm_map_unlock_read(&vm->vm_map); - vmspace_free(vm); - sx_slock(&V_allproc_lock); - /* - * If this process is bigger than the biggest one, - * remember it. - */ - if (size > bigsize) { - if (bigproc != NULL) - PRELE(bigproc); - bigproc = p; - bigsize = size; - } else { - PRELE(p); + /* + * If this process is bigger than the biggest one, + * remember it. + */ + if (size > bigsize) { + if (bigproc != NULL) + PRELE(bigproc); + bigproc = p; + bigsize = size; + } else { + PRELE(p); + } } + sx_sunlock(&V_allproc_lock); + CURVPS_RESTORE(); } - sx_sunlock(&V_allproc_lock); + VPS_LIST_RUNLOCK(); if (bigproc != NULL) { if (vm_panic_on_oom != 0) panic("out of swap space"); Index: sys/vm/vm_swapout.c =================================================================== --- sys/vm/vm_swapout.c +++ sys/vm/vm_swapout.c @@ -378,6 +378,7 @@ static void vm_daemon(void) { + VPS_ITERATOR_DECL(vps_iter); struct rlimit rsslim; struct proc *p; struct thread *td; @@ -417,114 +418,129 @@ attempts = 0; again: attempts++; - sx_slock(&V_allproc_lock); - FOREACH_PROC_IN_SYSTEM(p) { - vm_pindex_t limit, size; - /* - * if this is a system process or if we have already - * looked at this process, skip it. - */ - PROC_LOCK(p); - if (p->p_state != PRS_NORMAL || - p->p_flag & (P_INEXEC | P_SYSTEM | P_WEXIT)) { - PROC_UNLOCK(p); - continue; - } - /* - * if the process is in a non-running type state, - * don't touch it. - */ - breakout = 0; - FOREACH_THREAD_IN_PROC(p, td) { - thread_lock(td); - if (!TD_ON_RUNQ(td) && - !TD_IS_RUNNING(td) && - !TD_IS_SLEEPING(td) && - !TD_IS_SUSPENDED(td)) { + VPS_LIST_RLOCK(); + VPS_FOREACH(vps_iter) { + CURVPS_SET(vps_iter); + sx_slock(&V_allproc_lock); + FOREACH_PROC_IN_SYSTEM(p) { + vm_pindex_t limit, size; + + /* + * If this is a system process or if we have + * already looked at this process, skip it. + */ + PROC_LOCK(p); + if (p->p_state != PRS_NORMAL || p->p_flag & + (P_INEXEC | P_SYSTEM | P_WEXIT)) { + PROC_UNLOCK(p); + continue; + } + /* + * If the process is in a non-running type + * state, don't touch it. + */ + breakout = 0; + FOREACH_THREAD_IN_PROC(p, td) { + thread_lock(td); + if (!TD_ON_RUNQ(td) && + !TD_IS_RUNNING(td) && + !TD_IS_SLEEPING(td) && + !TD_IS_SUSPENDED(td)) { + thread_unlock(td); + breakout = 1; + break; + } thread_unlock(td); - breakout = 1; - break; } - thread_unlock(td); - } - if (breakout) { - PROC_UNLOCK(p); - continue; - } - /* - * get a limit - */ - lim_rlimit_proc(p, RLIMIT_RSS, &rsslim); - limit = OFF_TO_IDX( - qmin(rsslim.rlim_cur, rsslim.rlim_max)); + if (breakout) { + PROC_UNLOCK(p); + continue; + } + /* + * get a limit + */ + lim_rlimit_proc(p, RLIMIT_RSS, &rsslim); + limit = OFF_TO_IDX( + qmin(rsslim.rlim_cur, rsslim.rlim_max)); - /* - * let processes that are swapped out really be - * swapped out set the limit to nothing (will force a - * swap-out.) - */ - if ((p->p_flag & P_INMEM) == 0) - limit = 0; /* XXX */ - vm = vmspace_acquire_ref(p); - _PHOLD_LITE(p); - PROC_UNLOCK(p); - if (vm == NULL) { - PRELE(p); - continue; - } - sx_sunlock(&V_allproc_lock); + /* + * let processes that are swapped out really be + * swapped out set the limit to nothing + * (will force a swap-out.) + */ + if ((p->p_flag & P_INMEM) == 0) + limit = 0; /* XXX */ + vm = vmspace_acquire_ref(p); + _PHOLD_LITE(p); + PROC_UNLOCK(p); + if (vm == NULL) { + PRELE(p); + continue; + } + sx_sunlock(&V_allproc_lock); - size = vmspace_resident_count(vm); - if (size >= limit) { - vm_swapout_map_deactivate_pages( - &vm->vm_map, limit); size = vmspace_resident_count(vm); - } -#ifdef RACCT - if (racct_enable) { - rsize = IDX_TO_OFF(size); - PROC_LOCK(p); - if (p->p_state == PRS_NORMAL) - racct_set(p, RACCT_RSS, rsize); - ravailable = racct_get_available(p, RACCT_RSS); - PROC_UNLOCK(p); - if (rsize > ravailable) { - /* - * Don't be overly aggressive; this - * might be an innocent process, - * and the limit could've been exceeded - * by some memory hog. Don't try - * to deactivate more than 1/4th - * of process' resident set size. - */ - if (attempts <= 8) { - if (ravailable < rsize - - (rsize / 4)) { - ravailable = rsize - - (rsize / 4); - } - } + if (size >= limit) { vm_swapout_map_deactivate_pages( - &vm->vm_map, - OFF_TO_IDX(ravailable)); - /* Update RSS usage after paging out. */ + &vm->vm_map, limit); size = vmspace_resident_count(vm); + } +#ifdef RACCT + if (racct_enable) { rsize = IDX_TO_OFF(size); PROC_LOCK(p); if (p->p_state == PRS_NORMAL) racct_set(p, RACCT_RSS, rsize); + ravailable = racct_get_available(p, + RACCT_RSS); PROC_UNLOCK(p); - if (rsize > ravailable) - tryagain = 1; + if (rsize > ravailable) { + /* + * Don't be overly aggressive; + * this might be an innocent + * process, and the limit + * could've been exceeded by + * some memory hog. Don't try to + * deactivate more than 1/4th of + * process' resident set size. + */ + if (attempts <= 8) { + if (ravailable < rsize - + (rsize / 4)) { + ravailable = + rsize - + (rsize / 4); + } + } + vm_swapout_map_deactivate_pages( + &vm->vm_map, + OFF_TO_IDX(ravailable)); + /* + * Update RSS usage after + * paging out. + */ + size = vmspace_resident_count( + vm); + rsize = IDX_TO_OFF(size); + PROC_LOCK(p); + if (p->p_state == PRS_NORMAL) + racct_set(p, RACCT_RSS, + rsize); + PROC_UNLOCK(p); + if (rsize > ravailable) + tryagain = 1; + } } - } #endif - vmspace_free(vm); - sx_slock(&V_allproc_lock); - PRELE(p); + vmspace_free(vm); + sx_slock(&V_allproc_lock); + PRELE(p); + } + sx_sunlock(&V_allproc_lock); + CURVPS_RESTORE(); } - sx_sunlock(&V_allproc_lock); + VPS_LIST_RUNLOCK(); if (tryagain != 0 && attempts <= 10) { maybe_yield(); goto again; @@ -738,6 +754,7 @@ static void swapout_procs(int action) { + VPS_ITERATOR_DECL(vps_iter); struct proc *p; struct thread *td; int slptime; @@ -746,67 +763,74 @@ MPASS((action & (VM_SWAP_NORMAL | VM_SWAP_IDLE)) != 0); didswap = false; - sx_slock(&V_allproc_lock); - FOREACH_PROC_IN_SYSTEM(p) { - /* - * Filter out not yet fully constructed processes. Do - * not swap out held processes. Avoid processes which - * are system, exiting, execing, traced, already swapped - * out or are in the process of being swapped in or out. - */ - PROC_LOCK(p); - if (p->p_state != PRS_NORMAL || p->p_lock != 0 || (p->p_flag & - (P_SYSTEM | P_WEXIT | P_INEXEC | P_STOPPED_SINGLE | - P_TRACED | P_SWAPPINGOUT | P_SWAPPINGIN | P_INMEM)) != - P_INMEM) { - PROC_UNLOCK(p); - continue; - } - /* - * Further consideration of this process for swap out - * requires iterating over its threads. We release - * allproc_lock here so that process creation and - * destruction are not blocked while we iterate. - * - * To later reacquire allproc_lock and resume - * iteration over the allproc list, we will first have - * to release the lock on the process. We place a - * hold on the process so that it remains in the - * allproc list while it is unlocked. - */ - _PHOLD_LITE(p); - sx_sunlock(&V_allproc_lock); + VPS_LIST_RLOCK(); + VPS_FOREACH(vps_iter) { + CURVPS_SET(vps_iter); + sx_slock(&V_allproc_lock); + FOREACH_PROC_IN_SYSTEM(p) { + /* + * Filter out not yet fully constructed processes. Do + * not swap out held processes. Avoid processes which + * are system, exiting, execing, traced, already swapped + * out or are in the process of being swapped in or out. + */ + PROC_LOCK(p); + if (p->p_state != PRS_NORMAL || p->p_lock != 0 || + (p->p_flag & (P_SYSTEM | P_WEXIT | P_INEXEC | + P_STOPPED_SINGLE | P_TRACED | P_SWAPPINGOUT | + P_SWAPPINGIN | P_INMEM)) != P_INMEM) { + PROC_UNLOCK(p); + continue; + } - /* - * Do not swapout a realtime process. - * Guarantee swap_idle_threshold1 time in memory. - * If the system is under memory stress, or if we are - * swapping idle processes >= swap_idle_threshold2, - * then swap the process out. - */ - doswap = true; - FOREACH_THREAD_IN_PROC(p, td) { - thread_lock(td); - slptime = (ticks - td->td_slptick) / hz; - if (PRI_IS_REALTIME(td->td_pri_class) || - slptime < swap_idle_threshold1 || - !thread_safetoswapout(td) || - ((action & VM_SWAP_NORMAL) == 0 && - slptime < swap_idle_threshold2)) - doswap = false; - thread_unlock(td); - if (!doswap) - break; - } - if (doswap && swapout(p) == 0) - didswap = true; + /* + * Further consideration of this process for swap out + * requires iterating over its threads. We release + * allproc_lock here so that process creation and + * destruction are not blocked while we iterate. + * + * To later reacquire allproc_lock and resume + * iteration over the allproc list, we will first have + * to release the lock on the process. We place a + * hold on the process so that it remains in the + * allproc list while it is unlocked. + */ + _PHOLD_LITE(p); + sx_sunlock(&V_allproc_lock); - PROC_UNLOCK(p); - sx_slock(&V_allproc_lock); - PRELE(p); + /* + * Do not swapout a realtime process. + * Guarantee swap_idle_threshold1 time in memory. + * If the system is under memory stress, or if we are + * swapping idle processes >= swap_idle_threshold2, + * then swap the process out. + */ + doswap = true; + FOREACH_THREAD_IN_PROC(p, td) { + thread_lock(td); + slptime = (ticks - td->td_slptick) / hz; + if (PRI_IS_REALTIME(td->td_pri_class) || + slptime < swap_idle_threshold1 || + !thread_safetoswapout(td) || + ((action & VM_SWAP_NORMAL) == 0 && + slptime < swap_idle_threshold2)) + doswap = false; + thread_unlock(td); + if (!doswap) + break; + } + if (doswap && swapout(p) == 0) + didswap = true; + + PROC_UNLOCK(p); + sx_slock(&V_allproc_lock); + PRELE(p); + } + sx_sunlock(&V_allproc_lock); + CURVPS_RESTORE(); } - sx_sunlock(&V_allproc_lock); + VPS_LIST_RUNLOCK(); /* * If we swapped something out, and another process needed memory,