Index: head/sys/ddb/db_ps.c =================================================================== --- head/sys/ddb/db_ps.c (revision 350662) +++ head/sys/ddb/db_ps.c (revision 350663) @@ -1,536 +1,525 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1993 The Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_kstack_pages.h" #include #include #include #include #include #include #include #include -#include #include #include #include #include #include #define PRINT_NONE 0 #define PRINT_ARGS 1 static void dumpthread(volatile struct proc *p, volatile struct thread *td, int all); static int ps_mode; /* * At least one non-optional show-command must be implemented using * DB_SHOW_ALL_COMMAND() so that db_show_all_cmd_set gets created. * Here is one. */ DB_SHOW_ALL_COMMAND(procs, db_procs_cmd) { db_ps(addr, have_addr, count, modif); } static void dump_args(volatile struct proc *p) { char *args; int i, len; if (p->p_args == NULL) return; args = p->p_args->ar_args; len = (int)p->p_args->ar_length; for (i = 0; i < len; i++) { if (args[i] == '\0') db_printf(" "); else db_printf("%c", args[i]); } } /* * Layout: * - column counts * - header * - single-threaded process * - multi-threaded process * - thread in a MT process * * 1 2 3 4 5 6 7 * 1234567890123456789012345678901234567890123456789012345678901234567890 * pid ppid pgrp uid state wmesg wchan cmd * * (threaded) * * * For machines with 64-bit pointers, we expand the wchan field 8 more * characters. */ void db_ps(db_expr_t addr, bool hasaddr, db_expr_t count, char *modif) { volatile struct proc *p, *pp; volatile struct thread *td; struct ucred *cred; struct pgrp *pgrp; char state[9]; int np, rflag, sflag, dflag, lflag, wflag; ps_mode = modif[0] == 'a' ? PRINT_ARGS : PRINT_NONE; np = nprocs; if (!LIST_EMPTY(&allproc)) p = LIST_FIRST(&allproc); else p = &proc0; #ifdef __LP64__ db_printf(" pid ppid pgrp uid state wmesg wchan cmd\n"); #else db_printf(" pid ppid pgrp uid state wmesg wchan cmd\n"); #endif while (--np >= 0 && !db_pager_quit) { if (p == NULL) { db_printf("oops, ran out of processes early!\n"); break; } pp = p->p_pptr; if (pp == NULL) pp = p; cred = p->p_ucred; pgrp = p->p_pgrp; db_printf("%5d %5d %5d %5d ", p->p_pid, pp->p_pid, pgrp != NULL ? pgrp->pg_id : 0, cred != NULL ? cred->cr_ruid : 0); /* Determine our primary process state. */ switch (p->p_state) { case PRS_NORMAL: if (P_SHOULDSTOP(p)) state[0] = 'T'; else { /* * One of D, L, R, S, W. For a * multithreaded process we will use * the state of the thread with the * highest precedence. The * precendence order from high to low * is R, L, D, S, W. If no thread is * in a sane state we use '?' for our * primary state. */ rflag = sflag = dflag = lflag = wflag = 0; FOREACH_THREAD_IN_PROC(p, td) { if (td->td_state == TDS_RUNNING || td->td_state == TDS_RUNQ || td->td_state == TDS_CAN_RUN) rflag++; if (TD_ON_LOCK(td)) lflag++; if (TD_IS_SLEEPING(td)) { if (!(td->td_flags & TDF_SINTR)) dflag++; else sflag++; } if (TD_AWAITING_INTR(td)) wflag++; } if (rflag) state[0] = 'R'; else if (lflag) state[0] = 'L'; else if (dflag) state[0] = 'D'; else if (sflag) state[0] = 'S'; else if (wflag) state[0] = 'W'; else state[0] = '?'; } break; case PRS_NEW: state[0] = 'N'; break; case PRS_ZOMBIE: state[0] = 'Z'; break; default: state[0] = 'U'; break; } state[1] = '\0'; /* Additional process state flags. */ if (!(p->p_flag & P_INMEM)) strlcat(state, "W", sizeof(state)); if (p->p_flag & P_TRACED) strlcat(state, "X", sizeof(state)); if (p->p_flag & P_WEXIT && p->p_state != PRS_ZOMBIE) strlcat(state, "E", sizeof(state)); if (p->p_flag & P_PPWAIT) strlcat(state, "V", sizeof(state)); if (p->p_flag & P_SYSTEM || p->p_lock > 0) strlcat(state, "L", sizeof(state)); if (p->p_pgrp != NULL && p->p_session != NULL && SESS_LEADER(p)) strlcat(state, "s", sizeof(state)); /* Cheated here and didn't compare pgid's. */ if (p->p_flag & P_CONTROLT) strlcat(state, "+", sizeof(state)); if (cred != NULL && jailed(cred)) strlcat(state, "J", sizeof(state)); db_printf(" %-6.6s ", state); if (p->p_flag & P_HADTHREADS) { #ifdef __LP64__ db_printf(" (threaded) "); #else db_printf(" (threaded) "); #endif if (p->p_flag & P_SYSTEM) db_printf("["); db_printf("%s", p->p_comm); if (p->p_flag & P_SYSTEM) db_printf("]"); if (ps_mode == PRINT_ARGS) { db_printf(" "); dump_args(p); } db_printf("\n"); } FOREACH_THREAD_IN_PROC(p, td) { dumpthread(p, td, p->p_flag & P_HADTHREADS); if (db_pager_quit) break; } p = LIST_NEXT(p, p_list); if (p == NULL && np > 0) p = LIST_FIRST(&zombproc); } } static void dumpthread(volatile struct proc *p, volatile struct thread *td, int all) { char state[9], wprefix; const char *wmesg; void *wchan; if (all) { db_printf("%6d ", td->td_tid); switch (td->td_state) { case TDS_RUNNING: snprintf(state, sizeof(state), "Run"); break; case TDS_RUNQ: snprintf(state, sizeof(state), "RunQ"); break; case TDS_CAN_RUN: snprintf(state, sizeof(state), "CanRun"); break; case TDS_INACTIVE: snprintf(state, sizeof(state), "Inactv"); break; case TDS_INHIBITED: state[0] = '\0'; if (TD_ON_LOCK(td)) strlcat(state, "L", sizeof(state)); if (TD_IS_SLEEPING(td)) { if (td->td_flags & TDF_SINTR) strlcat(state, "S", sizeof(state)); else strlcat(state, "D", sizeof(state)); } if (TD_IS_SWAPPED(td)) strlcat(state, "W", sizeof(state)); if (TD_AWAITING_INTR(td)) strlcat(state, "I", sizeof(state)); if (TD_IS_SUSPENDED(td)) strlcat(state, "s", sizeof(state)); if (state[0] != '\0') break; default: snprintf(state, sizeof(state), "???"); } db_printf(" %-6.6s ", state); } wprefix = ' '; if (TD_ON_LOCK(td)) { wprefix = '*'; wmesg = td->td_lockname; wchan = td->td_blocked; } else if (TD_ON_SLEEPQ(td)) { wmesg = td->td_wmesg; wchan = td->td_wchan; } else if (TD_IS_RUNNING(td)) { snprintf(state, sizeof(state), "CPU %d", td->td_oncpu); wmesg = state; wchan = NULL; } else { wmesg = ""; wchan = NULL; } db_printf("%c%-7.7s ", wprefix, wmesg); if (wchan == NULL) #ifdef __LP64__ db_printf("%18s ", ""); #else db_printf("%10s ", ""); #endif else db_printf("%p ", wchan); if (p->p_flag & P_SYSTEM) db_printf("["); if (td->td_name[0] != '\0') db_printf("%s", td->td_name); else db_printf("%s", td->td_proc->p_comm); if (p->p_flag & P_SYSTEM) db_printf("]"); if (ps_mode == PRINT_ARGS && all == 0) { db_printf(" "); dump_args(p); } db_printf("\n"); } DB_SHOW_COMMAND(thread, db_show_thread) { struct thread *td; struct lock_object *lock; u_int delta; bool comma; /* Determine which thread to examine. */ if (have_addr) td = db_lookup_thread(addr, false); else td = kdb_thread; lock = (struct lock_object *)td->td_lock; db_printf("Thread %d at %p:\n", td->td_tid, td); db_printf(" proc (pid %d): %p\n", td->td_proc->p_pid, td->td_proc); if (td->td_name[0] != '\0') db_printf(" name: %s\n", td->td_name); db_printf(" pcb: %p\n", td->td_pcb); db_printf(" stack: %p-%p\n", (void *)td->td_kstack, (void *)(td->td_kstack + td->td_kstack_pages * PAGE_SIZE - 1)); db_printf(" flags: %#x ", td->td_flags); db_printf(" pflags: %#x\n", td->td_pflags); db_printf(" state: "); switch (td->td_state) { case TDS_INACTIVE: db_printf("INACTIVE\n"); break; case TDS_CAN_RUN: db_printf("CAN RUN\n"); break; case TDS_RUNQ: db_printf("RUNQ\n"); break; case TDS_RUNNING: db_printf("RUNNING (CPU %d)\n", td->td_oncpu); break; case TDS_INHIBITED: db_printf("INHIBITED: {"); comma = false; if (TD_IS_SLEEPING(td)) { db_printf("SLEEPING"); comma = true; } if (TD_IS_SUSPENDED(td)) { if (comma) db_printf(", "); db_printf("SUSPENDED"); comma = true; } if (TD_IS_SWAPPED(td)) { if (comma) db_printf(", "); db_printf("SWAPPED"); comma = true; } if (TD_ON_LOCK(td)) { if (comma) db_printf(", "); db_printf("LOCK"); comma = true; } if (TD_AWAITING_INTR(td)) { if (comma) db_printf(", "); db_printf("IWAIT"); } db_printf("}\n"); break; default: db_printf("??? (%#x)\n", td->td_state); break; } if (TD_ON_LOCK(td)) db_printf(" lock: %s turnstile: %p\n", td->td_lockname, td->td_blocked); if (TD_ON_SLEEPQ(td)) db_printf( " wmesg: %s wchan: %p sleeptimo %lx. %jx (curr %lx. %jx)\n", td->td_wmesg, td->td_wchan, (long)sbttobt(td->td_sleeptimo).sec, (uintmax_t)sbttobt(td->td_sleeptimo).frac, (long)sbttobt(sbinuptime()).sec, (uintmax_t)sbttobt(sbinuptime()).frac); db_printf(" priority: %d\n", td->td_priority); db_printf(" container lock: %s (%p)\n", lock->lo_name, lock); if (td->td_swvoltick != 0) { delta = ticks - td->td_swvoltick; db_printf(" last voluntary switch: %u.%03u s ago\n", delta / hz, (delta % hz) * 1000 / hz); } if (td->td_swinvoltick != 0) { delta = ticks - td->td_swinvoltick; db_printf(" last involuntary switch: %u.%03u s ago\n", delta / hz, (delta % hz) * 1000 / hz); } } DB_SHOW_COMMAND(proc, db_show_proc) { struct thread *td; struct proc *p; int i; /* Determine which process to examine. */ if (have_addr) p = db_lookup_proc(addr); else p = kdb_thread->td_proc; db_printf("Process %d (%s) at %p:\n", p->p_pid, p->p_comm, p); db_printf(" state: "); switch (p->p_state) { case PRS_NEW: db_printf("NEW\n"); break; case PRS_NORMAL: db_printf("NORMAL\n"); break; case PRS_ZOMBIE: db_printf("ZOMBIE\n"); break; default: db_printf("??? (%#x)\n", p->p_state); } if (p->p_ucred != NULL) { db_printf(" uid: %d gids: ", p->p_ucred->cr_uid); for (i = 0; i < p->p_ucred->cr_ngroups; i++) { db_printf("%d", p->p_ucred->cr_groups[i]); if (i < (p->p_ucred->cr_ngroups - 1)) db_printf(", "); } db_printf("\n"); } if (p->p_pptr != NULL) db_printf(" parent: pid %d at %p\n", p->p_pptr->p_pid, p->p_pptr); if (p->p_leader != NULL && p->p_leader != p) db_printf(" leader: pid %d at %p\n", p->p_leader->p_pid, p->p_leader); if (p->p_sysent != NULL) db_printf(" ABI: %s\n", p->p_sysent->sv_name); if (p->p_args != NULL) { db_printf(" arguments: "); dump_args(p); db_printf("\n"); } db_printf(" reaper: %p reapsubtree: %d\n", p->p_reaper, p->p_reapsubtree); db_printf(" sigparent: %d\n", p->p_sigparent); db_printf(" vmspace: %p\n", p->p_vmspace); db_printf(" (map %p)\n", (p->p_vmspace != NULL) ? &p->p_vmspace->vm_map : 0); db_printf(" (map.pmap %p)\n", (p->p_vmspace != NULL) ? &p->p_vmspace->vm_map.pmap : 0); db_printf(" (pmap %p)\n", (p->p_vmspace != NULL) ? &p->p_vmspace->vm_pmap : 0); db_printf(" threads: %d\n", p->p_numthreads); FOREACH_THREAD_IN_PROC(p, td) { dumpthread(p, td, 1); if (db_pager_quit) break; } } void db_findstack_cmd(db_expr_t addr, bool have_addr, db_expr_t dummy3 __unused, char *dummy4 __unused) { struct proc *p; struct thread *td; - struct kstack_cache_entry *ks_ce; vm_offset_t saddr; if (have_addr) saddr = addr; else { db_printf("Usage: findstack
\n"); return; } FOREACH_PROC_IN_SYSTEM(p) { FOREACH_THREAD_IN_PROC(p, td) { if (td->td_kstack <= saddr && saddr < td->td_kstack + PAGE_SIZE * td->td_kstack_pages) { db_printf("Thread %p\n", td); return; } - } - } - - for (ks_ce = kstack_cache; ks_ce != NULL; - ks_ce = ks_ce->next_ks_entry) { - if ((vm_offset_t)ks_ce <= saddr && saddr < (vm_offset_t)ks_ce + - PAGE_SIZE * kstack_pages) { - db_printf("Cached stack %p\n", ks_ce); - return; } } } Index: head/sys/sys/_kstack_cache.h =================================================================== --- head/sys/sys/_kstack_cache.h (revision 350662) +++ head/sys/sys/_kstack_cache.h (nonexistent) @@ -1,49 +0,0 @@ -/*- - * SPDX-License-Identifier: BSD-3-Clause - * - * Copyright (c) 2009 Konstantin Belousov - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ - -#ifndef _SYS__KSTACK_CACHE_H -#define _SYS__KSTACK_CACHE_H - -struct kstack_cache_entry { - struct vm_object *ksobj; - struct kstack_cache_entry *next_ks_entry; -}; - -extern struct kstack_cache_entry *kstack_cache; - -#ifndef KSTACK_MAX_PAGES -#define KSTACK_MAX_PAGES 32 -#endif - -#endif - - Property changes on: head/sys/sys/_kstack_cache.h ___________________________________________________________________ Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Index: head/sys/vm/vm_glue.c =================================================================== --- head/sys/vm/vm_glue.c (revision 350662) +++ head/sys/vm/vm_glue.c (revision 350663) @@ -1,596 +1,615 @@ /*- * SPDX-License-Identifier: (BSD-3-Clause AND MIT-CMU) * * Copyright (c) 1991, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * The Mach Operating System project at Carnegie-Mellon University. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)vm_glue.c 8.6 (Berkeley) 1/5/94 * * * Copyright (c) 1987, 1990 Carnegie-Mellon University. * All rights reserved. * * Permission to use, copy, modify and distribute this software and * its documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. * * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU * School of Computer Science * Carnegie Mellon University * Pittsburgh PA 15213-3890 * * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. */ #include __FBSDID("$FreeBSD$"); #include "opt_vm.h" #include "opt_kstack_pages.h" #include "opt_kstack_max_pages.h" #include "opt_kstack_usage_prof.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include -#include #include #include #include #include +#include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * MPSAFE * * WARNING! This code calls vm_map_check_protection() which only checks * the associated vm_map_entry range. It does not determine whether the * contents of the memory is actually readable or writable. In most cases * just checking the vm_map_entry is sufficient within the kernel's address * space. */ int kernacc(void *addr, int len, int rw) { boolean_t rv; vm_offset_t saddr, eaddr; vm_prot_t prot; KASSERT((rw & ~VM_PROT_ALL) == 0, ("illegal ``rw'' argument to kernacc (%x)\n", rw)); if ((vm_offset_t)addr + len > vm_map_max(kernel_map) || (vm_offset_t)addr + len < (vm_offset_t)addr) return (FALSE); prot = rw; saddr = trunc_page((vm_offset_t)addr); eaddr = round_page((vm_offset_t)addr + len); vm_map_lock_read(kernel_map); rv = vm_map_check_protection(kernel_map, saddr, eaddr, prot); vm_map_unlock_read(kernel_map); return (rv == TRUE); } /* * MPSAFE * * WARNING! This code calls vm_map_check_protection() which only checks * the associated vm_map_entry range. It does not determine whether the * contents of the memory is actually readable or writable. vmapbuf(), * vm_fault_quick(), or copyin()/copout()/su*()/fu*() functions should be * used in conjunction with this call. */ int useracc(void *addr, int len, int rw) { boolean_t rv; vm_prot_t prot; vm_map_t map; KASSERT((rw & ~VM_PROT_ALL) == 0, ("illegal ``rw'' argument to useracc (%x)\n", rw)); prot = rw; map = &curproc->p_vmspace->vm_map; if ((vm_offset_t)addr + len > vm_map_max(map) || (vm_offset_t)addr + len < (vm_offset_t)addr) { return (FALSE); } vm_map_lock_read(map); rv = vm_map_check_protection(map, trunc_page((vm_offset_t)addr), round_page((vm_offset_t)addr + len), prot); vm_map_unlock_read(map); return (rv == TRUE); } int vslock(void *addr, size_t len) { vm_offset_t end, last, start; vm_size_t npages; int error; last = (vm_offset_t)addr + len; start = trunc_page((vm_offset_t)addr); end = round_page(last); if (last < (vm_offset_t)addr || end < (vm_offset_t)addr) return (EINVAL); npages = atop(end - start); if (npages > vm_page_max_user_wired) return (ENOMEM); error = vm_map_wire(&curproc->p_vmspace->vm_map, start, end, VM_MAP_WIRE_SYSTEM | VM_MAP_WIRE_NOHOLES); if (error == KERN_SUCCESS) { curthread->td_vslock_sz += len; return (0); } /* * Return EFAULT on error to match copy{in,out}() behaviour * rather than returning ENOMEM like mlock() would. */ return (EFAULT); } void vsunlock(void *addr, size_t len) { /* Rely on the parameter sanity checks performed by vslock(). */ MPASS(curthread->td_vslock_sz >= len); curthread->td_vslock_sz -= len; (void)vm_map_unwire(&curproc->p_vmspace->vm_map, trunc_page((vm_offset_t)addr), round_page((vm_offset_t)addr + len), VM_MAP_WIRE_SYSTEM | VM_MAP_WIRE_NOHOLES); } /* * Pin the page contained within the given object at the given offset. If the * page is not resident, allocate and load it using the given object's pager. * Return the pinned page if successful; otherwise, return NULL. */ static vm_page_t vm_imgact_hold_page(vm_object_t object, vm_ooffset_t offset) { vm_page_t m; vm_pindex_t pindex; int rv; VM_OBJECT_WLOCK(object); pindex = OFF_TO_IDX(offset); m = vm_page_grab(object, pindex, VM_ALLOC_NORMAL | VM_ALLOC_NOBUSY | VM_ALLOC_WIRED); if (m->valid != VM_PAGE_BITS_ALL) { vm_page_xbusy(m); rv = vm_pager_get_pages(object, &m, 1, NULL, NULL); if (rv != VM_PAGER_OK) { vm_page_lock(m); vm_page_unwire(m, PQ_NONE); vm_page_free(m); vm_page_unlock(m); m = NULL; goto out; } vm_page_xunbusy(m); } out: VM_OBJECT_WUNLOCK(object); return (m); } /* * Return a CPU private mapping to the page at the given offset within the * given object. The page is pinned before it is mapped. */ struct sf_buf * vm_imgact_map_page(vm_object_t object, vm_ooffset_t offset) { vm_page_t m; m = vm_imgact_hold_page(object, offset); if (m == NULL) return (NULL); sched_pin(); return (sf_buf_alloc(m, SFB_CPUPRIVATE)); } /* * Destroy the given CPU private mapping and unpin the page that it mapped. */ void vm_imgact_unmap_page(struct sf_buf *sf) { vm_page_t m; m = sf_buf_page(sf); sf_buf_free(sf); sched_unpin(); vm_page_lock(m); vm_page_unwire(m, PQ_ACTIVE); vm_page_unlock(m); } void vm_sync_icache(vm_map_t map, vm_offset_t va, vm_offset_t sz) { pmap_sync_icache(map->pmap, va, sz); } -struct kstack_cache_entry *kstack_cache; +static uma_zone_t kstack_cache; static int kstack_cache_size = 128; -static int kstacks, kstack_domain_iter; -static struct mtx kstack_cache_mtx; -MTX_SYSINIT(kstack_cache, &kstack_cache_mtx, "kstkch", MTX_DEF); +static int kstack_domain_iter; -SYSCTL_INT(_vm, OID_AUTO, kstack_cache_size, CTLFLAG_RW, &kstack_cache_size, 0, - ""); -SYSCTL_INT(_vm, OID_AUTO, kstacks, CTLFLAG_RD, &kstacks, 0, - ""); +static int +sysctl_kstack_cache_size(SYSCTL_HANDLER_ARGS) +{ + int error, newsize; + newsize = kstack_cache_size; + error = sysctl_handle_int(oidp, &newsize, 0, req); + if (error == 0 && req->newptr && newsize != kstack_cache_size) + kstack_cache_size = + uma_zone_set_maxcache(kstack_cache, newsize); + return (error); +} +SYSCTL_PROC(_vm, OID_AUTO, kstack_cache_size, CTLTYPE_INT|CTLFLAG_RW, + &kstack_cache_size, 0, sysctl_kstack_cache_size, "IU", + "Maximum number of cached kernel stacks"); + /* * Create the kernel stack (including pcb for i386) for a new thread. * This routine directly affects the fork perf for a process and * create performance for a thread. */ -int -vm_thread_new(struct thread *td, int pages) +static vm_offset_t +vm_thread_stack_create(struct domainset *ds, vm_object_t *ksobjp, int pages) { + vm_page_t ma[KSTACK_MAX_PAGES]; vm_object_t ksobj; vm_offset_t ks; - vm_page_t ma[KSTACK_MAX_PAGES]; - struct kstack_cache_entry *ks_ce; int i; - /* Bounds check */ - if (pages <= 1) - pages = kstack_pages; - else if (pages > KSTACK_MAX_PAGES) - pages = KSTACK_MAX_PAGES; - - if (pages == kstack_pages && kstack_cache != NULL) { - mtx_lock(&kstack_cache_mtx); - if (kstack_cache != NULL) { - ks_ce = kstack_cache; - kstack_cache = ks_ce->next_ks_entry; - mtx_unlock(&kstack_cache_mtx); - - td->td_kstack_obj = ks_ce->ksobj; - td->td_kstack = (vm_offset_t)ks_ce; - td->td_kstack_pages = kstack_pages; - return (1); - } - mtx_unlock(&kstack_cache_mtx); - } - /* * Allocate an object for the kstack. */ ksobj = vm_object_allocate(OBJT_DEFAULT, pages); /* * Get a kernel virtual address for this thread's kstack. */ #if defined(__mips__) /* * We need to align the kstack's mapped address to fit within * a single TLB entry. */ if (vmem_xalloc(kernel_arena, (pages + KSTACK_GUARD_PAGES) * PAGE_SIZE, PAGE_SIZE * 2, 0, 0, VMEM_ADDR_MIN, VMEM_ADDR_MAX, M_BESTFIT | M_NOWAIT, &ks)) { ks = 0; } #else ks = kva_alloc((pages + KSTACK_GUARD_PAGES) * PAGE_SIZE); #endif if (ks == 0) { printf("vm_thread_new: kstack allocation failed\n"); vm_object_deallocate(ksobj); return (0); } - - /* - * Ensure that kstack objects can draw pages from any memory - * domain. Otherwise a local memory shortage can block a process - * swap-in. - */ if (vm_ndomains > 1) { - ksobj->domain.dr_policy = DOMAINSET_RR(); + ksobj->domain.dr_policy = ds; ksobj->domain.dr_iter = atomic_fetchadd_int(&kstack_domain_iter, 1); } - atomic_add_int(&kstacks, 1); if (KSTACK_GUARD_PAGES != 0) { pmap_qremove(ks, KSTACK_GUARD_PAGES); ks += KSTACK_GUARD_PAGES * PAGE_SIZE; } - td->td_kstack_obj = ksobj; - td->td_kstack = ks; - /* - * Knowing the number of pages allocated is useful when you - * want to deallocate them. - */ - td->td_kstack_pages = pages; + /* * For the length of the stack, link in a real page of ram for each * page of stack. */ VM_OBJECT_WLOCK(ksobj); (void)vm_page_grab_pages(ksobj, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOBUSY | VM_ALLOC_WIRED, ma, pages); for (i = 0; i < pages; i++) ma[i]->valid = VM_PAGE_BITS_ALL; VM_OBJECT_WUNLOCK(ksobj); pmap_qenter(ks, ma, pages); - return (1); + *ksobjp = ksobj; + + return (ks); } static void vm_thread_stack_dispose(vm_object_t ksobj, vm_offset_t ks, int pages) { vm_page_t m; int i; - atomic_add_int(&kstacks, -1); pmap_qremove(ks, pages); VM_OBJECT_WLOCK(ksobj); for (i = 0; i < pages; i++) { m = vm_page_lookup(ksobj, i); if (m == NULL) panic("vm_thread_dispose: kstack already missing?"); vm_page_lock(m); vm_page_unwire_noq(m); vm_page_free(m); vm_page_unlock(m); } VM_OBJECT_WUNLOCK(ksobj); vm_object_deallocate(ksobj); kva_free(ks - (KSTACK_GUARD_PAGES * PAGE_SIZE), (pages + KSTACK_GUARD_PAGES) * PAGE_SIZE); } /* + * Allocate the kernel stack for a new thread. + */ +int +vm_thread_new(struct thread *td, int pages) +{ + vm_object_t ksobj; + vm_offset_t ks; + + /* Bounds check */ + if (pages <= 1) + pages = kstack_pages; + else if (pages > KSTACK_MAX_PAGES) + pages = KSTACK_MAX_PAGES; + + ks = 0; + ksobj = NULL; + if (pages == kstack_pages && kstack_cache != NULL) { + ks = (vm_offset_t)uma_zalloc(kstack_cache, M_NOWAIT); + if (ks != 0) + ksobj = PHYS_TO_VM_PAGE(pmap_kextract(ks))->object; + } + + /* + * Ensure that kstack objects can draw pages from any memory + * domain. Otherwise a local memory shortage can block a process + * swap-in. + */ + if (ks == 0) + ks = vm_thread_stack_create(DOMAINSET_PREF(PCPU_GET(domain)), + &ksobj, pages); + if (ks == 0) + return (0); + td->td_kstack_obj = ksobj; + td->td_kstack = ks; + td->td_kstack_pages = pages; + return (1); +} + +/* * Dispose of a thread's kernel stack. */ void vm_thread_dispose(struct thread *td) { vm_object_t ksobj; vm_offset_t ks; - struct kstack_cache_entry *ks_ce; int pages; pages = td->td_kstack_pages; ksobj = td->td_kstack_obj; ks = td->td_kstack; td->td_kstack = 0; td->td_kstack_pages = 0; - if (pages == kstack_pages && kstacks <= kstack_cache_size) { - ks_ce = (struct kstack_cache_entry *)ks; - ks_ce->ksobj = ksobj; - mtx_lock(&kstack_cache_mtx); - ks_ce->next_ks_entry = kstack_cache; - kstack_cache = ks_ce; - mtx_unlock(&kstack_cache_mtx); - return; + if (pages == kstack_pages) + uma_zfree(kstack_cache, (void *)ks); + else + vm_thread_stack_dispose(ksobj, ks, pages); +} + +static int +kstack_import(void *arg, void **store, int cnt, int domain, int flags) +{ + vm_object_t ksobj; + int i; + + for (i = 0; i < cnt; i++) { + store[i] = (void *)vm_thread_stack_create( + DOMAINSET_PREF(domain), &ksobj, kstack_pages); + if (store[i] == NULL) + break; } - vm_thread_stack_dispose(ksobj, ks, pages); + return (i); } static void -vm_thread_stack_lowmem(void *nulll) +kstack_release(void *arg, void **store, int cnt) { - struct kstack_cache_entry *ks_ce, *ks_ce1; + vm_offset_t ks; + int i; - mtx_lock(&kstack_cache_mtx); - ks_ce = kstack_cache; - kstack_cache = NULL; - mtx_unlock(&kstack_cache_mtx); - - while (ks_ce != NULL) { - ks_ce1 = ks_ce; - ks_ce = ks_ce->next_ks_entry; - - vm_thread_stack_dispose(ks_ce1->ksobj, (vm_offset_t)ks_ce1, - kstack_pages); + for (i = 0; i < cnt; i++) { + ks = (vm_offset_t)store[i]; + vm_thread_stack_dispose( + PHYS_TO_VM_PAGE(pmap_kextract(ks))->object, + ks, kstack_pages); } } static void -kstack_cache_init(void *nulll) +kstack_cache_init(void *null) { - - EVENTHANDLER_REGISTER(vm_lowmem, vm_thread_stack_lowmem, NULL, - EVENTHANDLER_PRI_ANY); + kstack_cache = uma_zcache_create("kstack_cache", + kstack_pages * PAGE_SIZE, NULL, NULL, NULL, NULL, + kstack_import, kstack_release, NULL, + UMA_ZONE_NUMA|UMA_ZONE_MINBUCKET); + uma_zone_set_maxcache(kstack_cache, kstack_cache_size); } SYSINIT(vm_kstacks, SI_SUB_KTHREAD_INIT, SI_ORDER_ANY, kstack_cache_init, NULL); #ifdef KSTACK_USAGE_PROF /* * Track maximum stack used by a thread in kernel. */ static int max_kstack_used; SYSCTL_INT(_debug, OID_AUTO, max_kstack_used, CTLFLAG_RD, &max_kstack_used, 0, "Maxiumum stack depth used by a thread in kernel"); void intr_prof_stack_use(struct thread *td, struct trapframe *frame) { vm_offset_t stack_top; vm_offset_t current; int used, prev_used; /* * Testing for interrupted kernel mode isn't strictly * needed. It optimizes the execution, since interrupts from * usermode will have only the trap frame on the stack. */ if (TRAPF_USERMODE(frame)) return; stack_top = td->td_kstack + td->td_kstack_pages * PAGE_SIZE; current = (vm_offset_t)(uintptr_t)&stack_top; /* * Try to detect if interrupt is using kernel thread stack. * Hardware could use a dedicated stack for interrupt handling. */ if (stack_top <= current || current < td->td_kstack) return; used = stack_top - current; for (;;) { prev_used = max_kstack_used; if (prev_used >= used) break; if (atomic_cmpset_int(&max_kstack_used, prev_used, used)) break; } } #endif /* KSTACK_USAGE_PROF */ /* * Implement fork's actions on an address space. * Here we arrange for the address space to be copied or referenced, * allocate a user struct (pcb and kernel stack), then call the * machine-dependent layer to fill those in and make the new process * ready to run. The new process is set up so that it returns directly * to user mode to avoid stack copying and relocation problems. */ int vm_forkproc(struct thread *td, struct proc *p2, struct thread *td2, struct vmspace *vm2, int flags) { struct proc *p1 = td->td_proc; struct domainset *dset; int error; if ((flags & RFPROC) == 0) { /* * Divorce the memory, if it is shared, essentially * this changes shared memory amongst threads, into * COW locally. */ if ((flags & RFMEM) == 0) { if (p1->p_vmspace->vm_refcnt > 1) { error = vmspace_unshare(p1); if (error) return (error); } } cpu_fork(td, p2, td2, flags); return (0); } if (flags & RFMEM) { p2->p_vmspace = p1->p_vmspace; atomic_add_int(&p1->p_vmspace->vm_refcnt, 1); } dset = td2->td_domain.dr_policy; while (vm_page_count_severe_set(&dset->ds_mask)) { vm_wait_doms(&dset->ds_mask); } if ((flags & RFMEM) == 0) { p2->p_vmspace = vm2; if (p1->p_vmspace->vm_shm) shmfork(p1, p2); } /* * cpu_fork will copy and update the pcb, set up the kernel stack, * and make the child ready to run. */ cpu_fork(td, p2, td2, flags); return (0); } /* * Called after process has been wait(2)'ed upon and is being reaped. * The idea is to reclaim resources that we could not reclaim while * the process was still executing. */ void vm_waitproc(p) struct proc *p; { vmspace_exitfree(p); /* and clean-out the vmspace */ } void kick_proc0(void) { wakeup(&proc0); } Index: head/sys/vm/vm_param.h =================================================================== --- head/sys/vm/vm_param.h (revision 350662) +++ head/sys/vm/vm_param.h (revision 350663) @@ -1,137 +1,141 @@ /*- * SPDX-License-Identifier: (BSD-3-Clause AND MIT-CMU) * * Copyright (c) 1991, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * The Mach Operating System project at Carnegie-Mellon University. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)vm_param.h 8.1 (Berkeley) 6/11/93 * * * Copyright (c) 1987, 1990 Carnegie-Mellon University. * All rights reserved. * * Authors: Avadis Tevanian, Jr., Michael Wayne Young * * Permission to use, copy, modify and distribute this software and * its documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. * * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU * School of Computer Science * Carnegie Mellon University * Pittsburgh PA 15213-3890 * * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. * * $FreeBSD$ */ /* * Machine independent virtual memory parameters. */ #ifndef _VM_PARAM_ #define _VM_PARAM_ #include /* * CTL_VM identifiers */ #define VM_TOTAL 1 /* struct vmtotal */ #define VM_METER VM_TOTAL/* deprecated, use VM_TOTAL */ #define VM_LOADAVG 2 /* struct loadavg */ #define VM_V_FREE_MIN 3 /* vm_cnt.v_free_min */ #define VM_V_FREE_TARGET 4 /* vm_cnt.v_free_target */ #define VM_V_FREE_RESERVED 5 /* vm_cnt.v_free_reserved */ #define VM_V_INACTIVE_TARGET 6 /* vm_cnt.v_inactive_target */ #define VM_OBSOLETE_7 7 /* unused, formerly v_cache_min */ #define VM_OBSOLETE_8 8 /* unused, formerly v_cache_max */ #define VM_V_PAGEOUT_FREE_MIN 9 /* vm_cnt.v_pageout_free_min */ #define VM_OBSOLETE_10 10 /* pageout algorithm */ #define VM_SWAPPING_ENABLED 11 /* swapping enabled */ #define VM_OVERCOMMIT 12 /* vm.overcommit */ #define VM_MAXID 13 /* number of valid vm ids */ /* * Structure for swap device statistics */ #define XSWDEV_VERSION 2 struct xswdev { u_int xsw_version; dev_t xsw_dev; int xsw_flags; int xsw_nblks; int xsw_used; }; /* * Return values from the VM routines. */ #define KERN_SUCCESS 0 #define KERN_INVALID_ADDRESS 1 #define KERN_PROTECTION_FAILURE 2 #define KERN_NO_SPACE 3 #define KERN_INVALID_ARGUMENT 4 #define KERN_FAILURE 5 #define KERN_RESOURCE_SHORTAGE 6 #define KERN_NOT_RECEIVER 7 #define KERN_NO_ACCESS 8 #ifndef PA_LOCK_COUNT #ifdef SMP #define PA_LOCK_COUNT 32 #else #define PA_LOCK_COUNT 1 #endif /* !SMP */ #endif /* !PA_LOCK_COUNT */ +#ifndef KSTACK_MAX_PAGES +#define KSTACK_MAX_PAGES 32 +#endif + #ifndef ASSEMBLER #ifdef _KERNEL #define num_pages(x) \ ((vm_offset_t)((((vm_offset_t)(x)) + PAGE_MASK) >> PAGE_SHIFT)) extern unsigned long maxtsiz; extern unsigned long dfldsiz; extern unsigned long maxdsiz; extern unsigned long dflssiz; extern unsigned long maxssiz; extern unsigned long sgrowsiz; #endif /* _KERNEL */ #endif /* ASSEMBLER */ #endif /* _VM_PARAM_ */ Index: head/sys/vm/vm_swapout.c =================================================================== --- head/sys/vm/vm_swapout.c (revision 350662) +++ head/sys/vm/vm_swapout.c (revision 350663) @@ -1,963 +1,962 @@ /*- * SPDX-License-Identifier: (BSD-4-Clause AND MIT-CMU) * * Copyright (c) 1991 Regents of the University of California. * All rights reserved. * Copyright (c) 1994 John S. Dyson * All rights reserved. * Copyright (c) 1994 David Greenman * All rights reserved. * Copyright (c) 2005 Yahoo! Technologies Norway AS * All rights reserved. * * This code is derived from software contributed to Berkeley by * The Mach Operating System project at Carnegie-Mellon University. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)vm_pageout.c 7.4 (Berkeley) 5/7/91 * * * Copyright (c) 1987, 1990 Carnegie-Mellon University. * All rights reserved. * * Authors: Avadis Tevanian, Jr., Michael Wayne Young * * Permission to use, copy, modify and distribute this software and * its documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. * * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU * School of Computer Science * Carnegie Mellon University * Pittsburgh PA 15213-3890 * * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. */ #include __FBSDID("$FreeBSD$"); #include "opt_kstack_pages.h" #include "opt_kstack_max_pages.h" #include "opt_vm.h" #include #include #include #include #include #include #include #include -#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* the kernel process "vm_daemon" */ static void vm_daemon(void); static struct proc *vmproc; static struct kproc_desc vm_kp = { "vmdaemon", vm_daemon, &vmproc }; SYSINIT(vmdaemon, SI_SUB_KTHREAD_VM, SI_ORDER_FIRST, kproc_start, &vm_kp); static int vm_swap_enabled = 1; static int vm_swap_idle_enabled = 0; SYSCTL_INT(_vm, VM_SWAPPING_ENABLED, swap_enabled, CTLFLAG_RW, &vm_swap_enabled, 0, "Enable entire process swapout"); SYSCTL_INT(_vm, OID_AUTO, swap_idle_enabled, CTLFLAG_RW, &vm_swap_idle_enabled, 0, "Allow swapout on idle criteria"); /* * Swap_idle_threshold1 is the guaranteed swapped in time for a process */ static int swap_idle_threshold1 = 2; SYSCTL_INT(_vm, OID_AUTO, swap_idle_threshold1, CTLFLAG_RW, &swap_idle_threshold1, 0, "Guaranteed swapped in time for a process"); /* * Swap_idle_threshold2 is the time that a process can be idle before * it will be swapped out, if idle swapping is enabled. */ static int swap_idle_threshold2 = 10; SYSCTL_INT(_vm, OID_AUTO, swap_idle_threshold2, CTLFLAG_RW, &swap_idle_threshold2, 0, "Time before a process will be swapped out"); static int vm_pageout_req_swapout; /* XXX */ static int vm_daemon_needed; static struct mtx vm_daemon_mtx; /* Allow for use by vm_pageout before vm_daemon is initialized. */ MTX_SYSINIT(vm_daemon, &vm_daemon_mtx, "vm daemon", MTX_DEF); static int swapped_cnt; static int swap_inprogress; /* Pending swap-ins done outside swapper. */ static int last_swapin; static void swapclear(struct proc *); static int swapout(struct proc *); static void vm_swapout_map_deactivate_pages(vm_map_t, long); static void vm_swapout_object_deactivate_pages(pmap_t, vm_object_t, long); static void swapout_procs(int action); static void vm_req_vmdaemon(int req); static void vm_thread_swapout(struct thread *td); /* * vm_swapout_object_deactivate_pages * * Deactivate enough pages to satisfy the inactive target * requirements. * * The object and map must be locked. */ static void vm_swapout_object_deactivate_pages(pmap_t pmap, vm_object_t first_object, long desired) { vm_object_t backing_object, object; vm_page_t p; int act_delta, remove_mode; VM_OBJECT_ASSERT_LOCKED(first_object); if ((first_object->flags & OBJ_FICTITIOUS) != 0) return; for (object = first_object;; object = backing_object) { if (pmap_resident_count(pmap) <= desired) goto unlock_return; VM_OBJECT_ASSERT_LOCKED(object); if ((object->flags & OBJ_UNMANAGED) != 0 || object->paging_in_progress != 0) goto unlock_return; remove_mode = 0; if (object->shadow_count > 1) remove_mode = 1; /* * Scan the object's entire memory queue. */ TAILQ_FOREACH(p, &object->memq, listq) { if (pmap_resident_count(pmap) <= desired) goto unlock_return; if (should_yield()) goto unlock_return; if (vm_page_busied(p)) continue; VM_CNT_INC(v_pdpages); vm_page_lock(p); if (vm_page_wired(p) || !pmap_page_exists_quick(pmap, p)) { vm_page_unlock(p); continue; } act_delta = pmap_ts_referenced(p); if ((p->aflags & PGA_REFERENCED) != 0) { if (act_delta == 0) act_delta = 1; vm_page_aflag_clear(p, PGA_REFERENCED); } if (!vm_page_active(p) && act_delta != 0) { vm_page_activate(p); p->act_count += act_delta; } else if (vm_page_active(p)) { if (act_delta == 0) { p->act_count -= min(p->act_count, ACT_DECLINE); if (!remove_mode && p->act_count == 0) { pmap_remove_all(p); vm_page_deactivate(p); } else vm_page_requeue(p); } else { vm_page_activate(p); if (p->act_count < ACT_MAX - ACT_ADVANCE) p->act_count += ACT_ADVANCE; vm_page_requeue(p); } } else if (vm_page_inactive(p)) pmap_remove_all(p); vm_page_unlock(p); } if ((backing_object = object->backing_object) == NULL) goto unlock_return; VM_OBJECT_RLOCK(backing_object); if (object != first_object) VM_OBJECT_RUNLOCK(object); } unlock_return: if (object != first_object) VM_OBJECT_RUNLOCK(object); } /* * deactivate some number of pages in a map, try to do it fairly, but * that is really hard to do. */ static void vm_swapout_map_deactivate_pages(vm_map_t map, long desired) { vm_map_entry_t tmpe; vm_object_t obj, bigobj; int nothingwired; if (!vm_map_trylock_read(map)) return; bigobj = NULL; nothingwired = TRUE; /* * first, search out the biggest object, and try to free pages from * that. */ tmpe = map->header.next; while (tmpe != &map->header) { if ((tmpe->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) { obj = tmpe->object.vm_object; if (obj != NULL && VM_OBJECT_TRYRLOCK(obj)) { if (obj->shadow_count <= 1 && (bigobj == NULL || bigobj->resident_page_count < obj->resident_page_count)) { if (bigobj != NULL) VM_OBJECT_RUNLOCK(bigobj); bigobj = obj; } else VM_OBJECT_RUNLOCK(obj); } } if (tmpe->wired_count > 0) nothingwired = FALSE; tmpe = tmpe->next; } if (bigobj != NULL) { vm_swapout_object_deactivate_pages(map->pmap, bigobj, desired); VM_OBJECT_RUNLOCK(bigobj); } /* * Next, hunt around for other pages to deactivate. We actually * do this search sort of wrong -- .text first is not the best idea. */ tmpe = map->header.next; while (tmpe != &map->header) { if (pmap_resident_count(vm_map_pmap(map)) <= desired) break; if ((tmpe->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) { obj = tmpe->object.vm_object; if (obj != NULL) { VM_OBJECT_RLOCK(obj); vm_swapout_object_deactivate_pages(map->pmap, obj, desired); VM_OBJECT_RUNLOCK(obj); } } tmpe = tmpe->next; } /* * Remove all mappings if a process is swapped out, this will free page * table pages. */ if (desired == 0 && nothingwired) { pmap_remove(vm_map_pmap(map), vm_map_min(map), vm_map_max(map)); } vm_map_unlock_read(map); } /* * Swap out requests */ #define VM_SWAP_NORMAL 1 #define VM_SWAP_IDLE 2 void vm_swapout_run(void) { if (vm_swap_enabled) vm_req_vmdaemon(VM_SWAP_NORMAL); } /* * Idle process swapout -- run once per second when pagedaemons are * reclaiming pages. */ void vm_swapout_run_idle(void) { static long lsec; if (!vm_swap_idle_enabled || time_second == lsec) return; vm_req_vmdaemon(VM_SWAP_IDLE); lsec = time_second; } static void vm_req_vmdaemon(int req) { static int lastrun = 0; mtx_lock(&vm_daemon_mtx); vm_pageout_req_swapout |= req; if ((ticks > (lastrun + hz)) || (ticks < lastrun)) { wakeup(&vm_daemon_needed); lastrun = ticks; } mtx_unlock(&vm_daemon_mtx); } static void vm_daemon(void) { struct rlimit rsslim; struct proc *p; struct thread *td; struct vmspace *vm; int breakout, swapout_flags, tryagain, attempts; #ifdef RACCT uint64_t rsize, ravailable; #endif while (TRUE) { mtx_lock(&vm_daemon_mtx); msleep(&vm_daemon_needed, &vm_daemon_mtx, PPAUSE, "psleep", #ifdef RACCT racct_enable ? hz : 0 #else 0 #endif ); swapout_flags = vm_pageout_req_swapout; vm_pageout_req_swapout = 0; mtx_unlock(&vm_daemon_mtx); if (swapout_flags != 0) { /* * Drain the per-CPU page queue batches as a deadlock * avoidance measure. */ if ((swapout_flags & VM_SWAP_NORMAL) != 0) vm_page_drain_pqbatch(); swapout_procs(swapout_flags); } /* * scan the processes for exceeding their rlimits or if * process is swapped out -- deactivate pages */ tryagain = 0; attempts = 0; again: attempts++; sx_slock(&allproc_lock); FOREACH_PROC_IN_SYSTEM(p) { vm_pindex_t limit, size; /* * if this is a system process or if we have already * looked at this process, skip it. */ PROC_LOCK(p); if (p->p_state != PRS_NORMAL || p->p_flag & (P_INEXEC | P_SYSTEM | P_WEXIT)) { PROC_UNLOCK(p); continue; } /* * if the process is in a non-running type state, * don't touch it. */ breakout = 0; FOREACH_THREAD_IN_PROC(p, td) { thread_lock(td); if (!TD_ON_RUNQ(td) && !TD_IS_RUNNING(td) && !TD_IS_SLEEPING(td) && !TD_IS_SUSPENDED(td)) { thread_unlock(td); breakout = 1; break; } thread_unlock(td); } if (breakout) { PROC_UNLOCK(p); continue; } /* * get a limit */ lim_rlimit_proc(p, RLIMIT_RSS, &rsslim); limit = OFF_TO_IDX( qmin(rsslim.rlim_cur, rsslim.rlim_max)); /* * let processes that are swapped out really be * swapped out set the limit to nothing (will force a * swap-out.) */ if ((p->p_flag & P_INMEM) == 0) limit = 0; /* XXX */ vm = vmspace_acquire_ref(p); _PHOLD_LITE(p); PROC_UNLOCK(p); if (vm == NULL) { PRELE(p); continue; } sx_sunlock(&allproc_lock); size = vmspace_resident_count(vm); if (size >= limit) { vm_swapout_map_deactivate_pages( &vm->vm_map, limit); size = vmspace_resident_count(vm); } #ifdef RACCT if (racct_enable) { rsize = IDX_TO_OFF(size); PROC_LOCK(p); if (p->p_state == PRS_NORMAL) racct_set(p, RACCT_RSS, rsize); ravailable = racct_get_available(p, RACCT_RSS); PROC_UNLOCK(p); if (rsize > ravailable) { /* * Don't be overly aggressive; this * might be an innocent process, * and the limit could've been exceeded * by some memory hog. Don't try * to deactivate more than 1/4th * of process' resident set size. */ if (attempts <= 8) { if (ravailable < rsize - (rsize / 4)) { ravailable = rsize - (rsize / 4); } } vm_swapout_map_deactivate_pages( &vm->vm_map, OFF_TO_IDX(ravailable)); /* Update RSS usage after paging out. */ size = vmspace_resident_count(vm); rsize = IDX_TO_OFF(size); PROC_LOCK(p); if (p->p_state == PRS_NORMAL) racct_set(p, RACCT_RSS, rsize); PROC_UNLOCK(p); if (rsize > ravailable) tryagain = 1; } } #endif vmspace_free(vm); sx_slock(&allproc_lock); PRELE(p); } sx_sunlock(&allproc_lock); if (tryagain != 0 && attempts <= 10) { maybe_yield(); goto again; } } } /* * Allow a thread's kernel stack to be paged out. */ static void vm_thread_swapout(struct thread *td) { vm_object_t ksobj; vm_page_t m; int i, pages; cpu_thread_swapout(td); pages = td->td_kstack_pages; ksobj = td->td_kstack_obj; pmap_qremove(td->td_kstack, pages); VM_OBJECT_WLOCK(ksobj); for (i = 0; i < pages; i++) { m = vm_page_lookup(ksobj, i); if (m == NULL) panic("vm_thread_swapout: kstack already missing?"); vm_page_dirty(m); vm_page_lock(m); vm_page_unwire(m, PQ_LAUNDRY); vm_page_unlock(m); } VM_OBJECT_WUNLOCK(ksobj); } /* * Bring the kernel stack for a specified thread back in. */ static void vm_thread_swapin(struct thread *td, int oom_alloc) { vm_object_t ksobj; vm_page_t ma[KSTACK_MAX_PAGES]; int a, count, i, j, pages, rv; pages = td->td_kstack_pages; ksobj = td->td_kstack_obj; VM_OBJECT_WLOCK(ksobj); (void)vm_page_grab_pages(ksobj, 0, oom_alloc | VM_ALLOC_WIRED, ma, pages); for (i = 0; i < pages;) { vm_page_assert_xbusied(ma[i]); if (ma[i]->valid == VM_PAGE_BITS_ALL) { vm_page_xunbusy(ma[i]); i++; continue; } vm_object_pip_add(ksobj, 1); for (j = i + 1; j < pages; j++) if (ma[j]->valid == VM_PAGE_BITS_ALL) break; rv = vm_pager_has_page(ksobj, ma[i]->pindex, NULL, &a); KASSERT(rv == 1, ("%s: missing page %p", __func__, ma[i])); count = min(a + 1, j - i); rv = vm_pager_get_pages(ksobj, ma + i, count, NULL, NULL); KASSERT(rv == VM_PAGER_OK, ("%s: cannot get kstack for proc %d", __func__, td->td_proc->p_pid)); vm_object_pip_wakeup(ksobj); for (j = i; j < i + count; j++) vm_page_xunbusy(ma[j]); i += count; } VM_OBJECT_WUNLOCK(ksobj); pmap_qenter(td->td_kstack, ma, pages); cpu_thread_swapin(td); } void faultin(struct proc *p) { struct thread *td; int oom_alloc; PROC_LOCK_ASSERT(p, MA_OWNED); /* * If another process is swapping in this process, * just wait until it finishes. */ if (p->p_flag & P_SWAPPINGIN) { while (p->p_flag & P_SWAPPINGIN) msleep(&p->p_flag, &p->p_mtx, PVM, "faultin", 0); return; } if ((p->p_flag & P_INMEM) == 0) { oom_alloc = (p->p_flag & P_WKILLED) != 0 ? VM_ALLOC_SYSTEM : VM_ALLOC_NORMAL; /* * Don't let another thread swap process p out while we are * busy swapping it in. */ ++p->p_lock; p->p_flag |= P_SWAPPINGIN; PROC_UNLOCK(p); sx_xlock(&allproc_lock); MPASS(swapped_cnt > 0); swapped_cnt--; if (curthread != &thread0) swap_inprogress++; sx_xunlock(&allproc_lock); /* * We hold no lock here because the list of threads * can not change while all threads in the process are * swapped out. */ FOREACH_THREAD_IN_PROC(p, td) vm_thread_swapin(td, oom_alloc); if (curthread != &thread0) { sx_xlock(&allproc_lock); MPASS(swap_inprogress > 0); swap_inprogress--; last_swapin = ticks; sx_xunlock(&allproc_lock); } PROC_LOCK(p); swapclear(p); p->p_swtick = ticks; /* Allow other threads to swap p out now. */ wakeup(&p->p_flag); --p->p_lock; } } /* * This swapin algorithm attempts to swap-in processes only if there * is enough space for them. Of course, if a process waits for a long * time, it will be swapped in anyway. */ static struct proc * swapper_selector(bool wkilled_only) { struct proc *p, *res; struct thread *td; int ppri, pri, slptime, swtime; sx_assert(&allproc_lock, SA_SLOCKED); if (swapped_cnt == 0) return (NULL); res = NULL; ppri = INT_MIN; FOREACH_PROC_IN_SYSTEM(p) { PROC_LOCK(p); if (p->p_state == PRS_NEW || (p->p_flag & (P_SWAPPINGOUT | P_SWAPPINGIN | P_INMEM)) != 0) { PROC_UNLOCK(p); continue; } if (p->p_state == PRS_NORMAL && (p->p_flag & P_WKILLED) != 0) { /* * A swapped-out process might have mapped a * large portion of the system's pages as * anonymous memory. There is no other way to * release the memory other than to kill the * process, for which we need to swap it in. */ return (p); } if (wkilled_only) { PROC_UNLOCK(p); continue; } swtime = (ticks - p->p_swtick) / hz; FOREACH_THREAD_IN_PROC(p, td) { /* * An otherwise runnable thread of a process * swapped out has only the TDI_SWAPPED bit set. */ thread_lock(td); if (td->td_inhibitors == TDI_SWAPPED) { slptime = (ticks - td->td_slptick) / hz; pri = swtime + slptime; if ((td->td_flags & TDF_SWAPINREQ) == 0) pri -= p->p_nice * 8; /* * if this thread is higher priority * and there is enough space, then select * this process instead of the previous * selection. */ if (pri > ppri) { res = p; ppri = pri; } } thread_unlock(td); } PROC_UNLOCK(p); } if (res != NULL) PROC_LOCK(res); return (res); } #define SWAPIN_INTERVAL (MAXSLP * hz / 2) /* * Limit swapper to swap in one non-WKILLED process in MAXSLP/2 * interval, assuming that there is: * - at least one domain that is not suffering from a shortage of free memory; * - no parallel swap-ins; * - no other swap-ins in the current SWAPIN_INTERVAL. */ static bool swapper_wkilled_only(void) { return (vm_page_count_min_set(&all_domains) || swap_inprogress > 0 || (u_int)(ticks - last_swapin) < SWAPIN_INTERVAL); } void swapper(void) { struct proc *p; for (;;) { sx_slock(&allproc_lock); p = swapper_selector(swapper_wkilled_only()); sx_sunlock(&allproc_lock); if (p == NULL) { tsleep(&proc0, PVM, "swapin", SWAPIN_INTERVAL); } else { PROC_LOCK_ASSERT(p, MA_OWNED); /* * Another process may be bringing or may have * already brought this process in while we * traverse all threads. Or, this process may * have exited or even being swapped out * again. */ if (p->p_state == PRS_NORMAL && (p->p_flag & (P_INMEM | P_SWAPPINGOUT | P_SWAPPINGIN)) == 0) { faultin(p); } PROC_UNLOCK(p); } } } /* * First, if any processes have been sleeping or stopped for at least * "swap_idle_threshold1" seconds, they are swapped out. If, however, * no such processes exist, then the longest-sleeping or stopped * process is swapped out. Finally, and only as a last resort, if * there are no sleeping or stopped processes, the longest-resident * process is swapped out. */ static void swapout_procs(int action) { struct proc *p; struct thread *td; int slptime; bool didswap, doswap; MPASS((action & (VM_SWAP_NORMAL | VM_SWAP_IDLE)) != 0); didswap = false; sx_slock(&allproc_lock); FOREACH_PROC_IN_SYSTEM(p) { /* * Filter out not yet fully constructed processes. Do * not swap out held processes. Avoid processes which * are system, exiting, execing, traced, already swapped * out or are in the process of being swapped in or out. */ PROC_LOCK(p); if (p->p_state != PRS_NORMAL || p->p_lock != 0 || (p->p_flag & (P_SYSTEM | P_WEXIT | P_INEXEC | P_STOPPED_SINGLE | P_TRACED | P_SWAPPINGOUT | P_SWAPPINGIN | P_INMEM)) != P_INMEM) { PROC_UNLOCK(p); continue; } /* * Further consideration of this process for swap out * requires iterating over its threads. We release * allproc_lock here so that process creation and * destruction are not blocked while we iterate. * * To later reacquire allproc_lock and resume * iteration over the allproc list, we will first have * to release the lock on the process. We place a * hold on the process so that it remains in the * allproc list while it is unlocked. */ _PHOLD_LITE(p); sx_sunlock(&allproc_lock); /* * Do not swapout a realtime process. * Guarantee swap_idle_threshold1 time in memory. * If the system is under memory stress, or if we are * swapping idle processes >= swap_idle_threshold2, * then swap the process out. */ doswap = true; FOREACH_THREAD_IN_PROC(p, td) { thread_lock(td); slptime = (ticks - td->td_slptick) / hz; if (PRI_IS_REALTIME(td->td_pri_class) || slptime < swap_idle_threshold1 || !thread_safetoswapout(td) || ((action & VM_SWAP_NORMAL) == 0 && slptime < swap_idle_threshold2)) doswap = false; thread_unlock(td); if (!doswap) break; } if (doswap && swapout(p) == 0) didswap = true; PROC_UNLOCK(p); if (didswap) { sx_xlock(&allproc_lock); swapped_cnt++; sx_downgrade(&allproc_lock); } else sx_slock(&allproc_lock); PRELE(p); } sx_sunlock(&allproc_lock); /* * If we swapped something out, and another process needed memory, * then wakeup the sched process. */ if (didswap) wakeup(&proc0); } static void swapclear(struct proc *p) { struct thread *td; PROC_LOCK_ASSERT(p, MA_OWNED); FOREACH_THREAD_IN_PROC(p, td) { thread_lock(td); td->td_flags |= TDF_INMEM; td->td_flags &= ~TDF_SWAPINREQ; TD_CLR_SWAPPED(td); if (TD_CAN_RUN(td)) if (setrunnable(td)) { #ifdef INVARIANTS /* * XXX: We just cleared TDI_SWAPPED * above and set TDF_INMEM, so this * should never happen. */ panic("not waking up swapper"); #endif } thread_unlock(td); } p->p_flag &= ~(P_SWAPPINGIN | P_SWAPPINGOUT); p->p_flag |= P_INMEM; } static int swapout(struct proc *p) { struct thread *td; PROC_LOCK_ASSERT(p, MA_OWNED); /* * The states of this process and its threads may have changed * by now. Assuming that there is only one pageout daemon thread, * this process should still be in memory. */ KASSERT((p->p_flag & (P_INMEM | P_SWAPPINGOUT | P_SWAPPINGIN)) == P_INMEM, ("swapout: lost a swapout race?")); /* * Remember the resident count. */ p->p_vmspace->vm_swrss = vmspace_resident_count(p->p_vmspace); /* * Check and mark all threads before we proceed. */ p->p_flag &= ~P_INMEM; p->p_flag |= P_SWAPPINGOUT; FOREACH_THREAD_IN_PROC(p, td) { thread_lock(td); if (!thread_safetoswapout(td)) { thread_unlock(td); swapclear(p); return (EBUSY); } td->td_flags &= ~TDF_INMEM; TD_SET_SWAPPED(td); thread_unlock(td); } td = FIRST_THREAD_IN_PROC(p); ++td->td_ru.ru_nswap; PROC_UNLOCK(p); /* * This list is stable because all threads are now prevented from * running. The list is only modified in the context of a running * thread in this process. */ FOREACH_THREAD_IN_PROC(p, td) vm_thread_swapout(td); PROC_LOCK(p); p->p_flag &= ~P_SWAPPINGOUT; p->p_swtick = ticks; return (0); }