diff --git a/sys/amd64/include/vmparam.h b/sys/amd64/include/vmparam.h index 205848489644..880c46bba84d 100644 --- a/sys/amd64/include/vmparam.h +++ b/sys/amd64/include/vmparam.h @@ -1,310 +1,304 @@ /*- * SPDX-License-Identifier: BSD-4-Clause * * Copyright (c) 1990 The Regents of the University of California. * All rights reserved. * Copyright (c) 1994 John S. Dyson * All rights reserved. * Copyright (c) 2003 Peter Wemm * All rights reserved. * * This code is derived from software contributed to Berkeley by * William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)vmparam.h 5.9 (Berkeley) 5/12/91 * $FreeBSD$ */ #ifdef __i386__ #include #else /* !__i386__ */ #ifndef _MACHINE_VMPARAM_H_ #define _MACHINE_VMPARAM_H_ 1 /* * Machine dependent constants for AMD64. */ /* * Virtual memory related constants, all in bytes */ #define MAXTSIZ (32768UL*1024*1024) /* max text size */ #ifndef DFLDSIZ #define DFLDSIZ (32768UL*1024*1024) /* initial data size limit */ #endif #ifndef MAXDSIZ #define MAXDSIZ (32768UL*1024*1024) /* max data size */ #endif #ifndef DFLSSIZ #define DFLSSIZ (8UL*1024*1024) /* initial stack size limit */ #endif #ifndef MAXSSIZ #define MAXSSIZ (512UL*1024*1024) /* max stack size */ #endif #ifndef SGROWSIZ #define SGROWSIZ (128UL*1024) /* amount to grow stack */ #endif /* * We provide a machine specific single page allocator through the use * of the direct mapped segment. This uses 2MB pages for reduced * TLB pressure. */ #if !defined(KASAN) && !defined(KMSAN) #define UMA_MD_SMALL_ALLOC #endif /* * The physical address space is densely populated. */ #define VM_PHYSSEG_DENSE /* * The number of PHYSSEG entries must be one greater than the number * of phys_avail entries because the phys_avail entry that spans the * largest physical address that is accessible by ISA DMA is split * into two PHYSSEG entries. */ #define VM_PHYSSEG_MAX 63 /* * Create two free page pools: VM_FREEPOOL_DEFAULT is the default pool * from which physical pages are allocated and VM_FREEPOOL_DIRECT is * the pool from which physical pages for page tables and small UMA * objects are allocated. */ #define VM_NFREEPOOL 2 #define VM_FREEPOOL_DEFAULT 0 #define VM_FREEPOOL_DIRECT 1 /* * Create up to three free page lists: VM_FREELIST_DMA32 is for physical pages * that have physical addresses below 4G but are not accessible by ISA DMA, * and VM_FREELIST_ISADMA is for physical pages that are accessible by ISA * DMA. */ #define VM_NFREELIST 3 #define VM_FREELIST_DEFAULT 0 #define VM_FREELIST_DMA32 1 #define VM_FREELIST_LOWMEM 2 #define VM_LOWMEM_BOUNDARY (16 << 20) /* 16MB ISA DMA limit */ /* * Create the DMA32 free list only if the number of physical pages above * physical address 4G is at least 16M, which amounts to 64GB of physical * memory. */ #define VM_DMA32_NPAGES_THRESHOLD 16777216 /* * An allocation size of 16MB is supported in order to optimize the * use of the direct map by UMA. Specifically, a cache line contains * at most 8 PDEs, collectively mapping 16MB of physical memory. By * reducing the number of distinct 16MB "pages" that are used by UMA, * the physical memory allocator reduces the likelihood of both 2MB * page TLB misses and cache misses caused by 2MB page TLB misses. */ #define VM_NFREEORDER 13 /* * Enable superpage reservations: 1 level. */ #ifndef VM_NRESERVLEVEL #define VM_NRESERVLEVEL 1 #endif /* * Level 0 reservations consist of 512 pages. */ #ifndef VM_LEVEL_0_ORDER #define VM_LEVEL_0_ORDER 9 #endif #ifdef SMP #define PA_LOCK_COUNT 256 #endif /* * Kernel physical load address for non-UEFI boot and for legacy UEFI loader. * Newer UEFI loader loads kernel anywhere below 4G, with memory allocated * by boot services. * Needs to be aligned at 2MB superpage boundary. */ #ifndef KERNLOAD #define KERNLOAD 0x200000 #endif /* * Virtual addresses of things. Derived from the page directory and * page table indexes from pmap.h for precision. * * 0x0000000000000000 - 0x00007fffffffffff user map * 0x0000800000000000 - 0xffff7fffffffffff does not exist (hole) * 0xffff800000000000 - 0xffff804020100fff recursive page table (512GB slot) * 0xffff804020100fff - 0xffff807fffffffff unused * 0xffff808000000000 - 0xffff847fffffffff large map (can be tuned up) * 0xffff848000000000 - 0xfffff77fffffffff unused (large map extends there) * 0xfffff60000000000 - 0xfffff7ffffffffff 2TB KMSAN origin map, optional * 0xfffff78000000000 - 0xfffff7bfffffffff 512GB KASAN shadow map, optional * 0xfffff80000000000 - 0xfffffbffffffffff 4TB direct map * 0xfffffc0000000000 - 0xfffffdffffffffff 2TB KMSAN shadow map, optional * 0xfffffe0000000000 - 0xffffffffffffffff 2TB kernel map * * Within the kernel map: * * 0xfffffe0000000000 vm_page_array * 0xffffffff80000000 KERNBASE */ #define VM_MIN_KERNEL_ADDRESS KV4ADDR(KPML4BASE, 0, 0, 0) #define VM_MAX_KERNEL_ADDRESS KV4ADDR(KPML4BASE + NKPML4E - 1, \ NPDPEPG-1, NPDEPG-1, NPTEPG-1) #define DMAP_MIN_ADDRESS KV4ADDR(DMPML4I, 0, 0, 0) #define DMAP_MAX_ADDRESS KV4ADDR(DMPML4I + NDMPML4E, 0, 0, 0) #define KASAN_MIN_ADDRESS KV4ADDR(KASANPML4I, 0, 0, 0) #define KASAN_MAX_ADDRESS KV4ADDR(KASANPML4I + NKASANPML4E, 0, 0, 0) #define KMSAN_SHAD_MIN_ADDRESS KV4ADDR(KMSANSHADPML4I, 0, 0, 0) #define KMSAN_SHAD_MAX_ADDRESS KV4ADDR(KMSANSHADPML4I + NKMSANSHADPML4E, \ 0, 0, 0) #define KMSAN_ORIG_MIN_ADDRESS KV4ADDR(KMSANORIGPML4I, 0, 0, 0) #define KMSAN_ORIG_MAX_ADDRESS KV4ADDR(KMSANORIGPML4I + NKMSANORIGPML4E, \ 0, 0, 0) #define LARGEMAP_MIN_ADDRESS KV4ADDR(LMSPML4I, 0, 0, 0) #define LARGEMAP_MAX_ADDRESS KV4ADDR(LMEPML4I + 1, 0, 0, 0) /* * Formally kernel mapping starts at KERNBASE, but kernel linker * script leaves first PDE reserved. For legacy BIOS boot, kernel is * loaded at KERNLOAD = 2M, and initial kernel page table maps * physical memory from zero to KERNend starting at KERNBASE. * * KERNSTART is where the first actual kernel page is mapped, after * the compatibility mapping. */ #define KERNBASE KV4ADDR(KPML4I, KPDPI, 0, 0) #define KERNSTART (KERNBASE + NBPDR) #define UPT_MAX_ADDRESS KV4ADDR(PML4PML4I, PML4PML4I, PML4PML4I, PML4PML4I) #define UPT_MIN_ADDRESS KV4ADDR(PML4PML4I, 0, 0, 0) #define VM_MAXUSER_ADDRESS_LA57 UVADDR(NUPML5E, 0, 0, 0, 0) #define VM_MAXUSER_ADDRESS_LA48 UVADDR(0, NUP4ML4E, 0, 0, 0) #define VM_MAXUSER_ADDRESS VM_MAXUSER_ADDRESS_LA57 #define SHAREDPAGE_LA57 (VM_MAXUSER_ADDRESS_LA57 - PAGE_SIZE) #define SHAREDPAGE_LA48 (VM_MAXUSER_ADDRESS_LA48 - PAGE_SIZE) #define USRSTACK_LA57 SHAREDPAGE_LA57 #define USRSTACK_LA48 SHAREDPAGE_LA48 #define USRSTACK USRSTACK_LA48 #define PS_STRINGS_LA57 (USRSTACK_LA57 - sizeof(struct ps_strings)) #define PS_STRINGS_LA48 (USRSTACK_LA48 - sizeof(struct ps_strings)) #define VM_MAX_ADDRESS UPT_MAX_ADDRESS #define VM_MIN_ADDRESS (0) /* * XXX Allowing dmaplimit == 0 is a temporary workaround for vt(4) efifb's * early use of PHYS_TO_DMAP before the mapping is actually setup. This works * because the result is not actually accessed until later, but the early * vt fb startup needs to be reworked. */ #define PHYS_IN_DMAP(pa) (dmaplimit == 0 || (pa) < dmaplimit) #define VIRT_IN_DMAP(va) ((va) >= DMAP_MIN_ADDRESS && \ (va) < (DMAP_MIN_ADDRESS + dmaplimit)) #define PMAP_HAS_DMAP 1 #define PHYS_TO_DMAP(x) ({ \ KASSERT(PHYS_IN_DMAP(x), \ ("physical address %#jx not covered by the DMAP", \ (uintmax_t)x)); \ (x) | DMAP_MIN_ADDRESS; }) #define DMAP_TO_PHYS(x) ({ \ KASSERT(VIRT_IN_DMAP(x), \ ("virtual address %#jx not covered by the DMAP", \ (uintmax_t)x)); \ (x) & ~DMAP_MIN_ADDRESS; }) /* * amd64 maps the page array into KVA so that it can be more easily * allocated on the correct memory domains. */ #define PMAP_HAS_PAGE_ARRAY 1 /* * How many physical pages per kmem arena virtual page. */ #ifndef VM_KMEM_SIZE_SCALE #define VM_KMEM_SIZE_SCALE (1) #endif /* * Optional ceiling (in bytes) on the size of the kmem arena: 60% of the * kernel map. */ #ifndef VM_KMEM_SIZE_MAX #define VM_KMEM_SIZE_MAX ((VM_MAX_KERNEL_ADDRESS - \ VM_MIN_KERNEL_ADDRESS + 1) * 3 / 5) #endif /* initial pagein size of beginning of executable file */ #ifndef VM_INITIAL_PAGEIN #define VM_INITIAL_PAGEIN 16 #endif #define ZERO_REGION_SIZE (2 * 1024 * 1024) /* 2MB */ -/* - * Use a fairly large batch size since we expect amd64 systems to have lots of - * memory. - */ -#define VM_BATCHQUEUE_SIZE 63 - /* * The pmap can create non-transparent large page mappings. */ #define PMAP_HAS_LARGEPAGES 1 /* * Need a page dump array for minidump. */ #define MINIDUMP_PAGE_TRACKING 1 #endif /* _MACHINE_VMPARAM_H_ */ #endif /* __i386__ */ diff --git a/sys/powerpc/include/vmparam.h b/sys/powerpc/include/vmparam.h index 1b9873aede4a..0f3321379b47 100644 --- a/sys/powerpc/include/vmparam.h +++ b/sys/powerpc/include/vmparam.h @@ -1,338 +1,330 @@ /*- * SPDX-License-Identifier: BSD-4-Clause * * Copyright (C) 1995, 1996 Wolfgang Solfrank. * Copyright (C) 1995, 1996 TooLs GmbH. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by TooLs GmbH. * 4. The name of TooLs GmbH may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $NetBSD: vmparam.h,v 1.11 2000/02/11 19:25:16 thorpej Exp $ * $FreeBSD$ */ #ifndef _MACHINE_VMPARAM_H_ #define _MACHINE_VMPARAM_H_ #ifndef LOCORE #include #endif #define USRSTACK SHAREDPAGE #ifndef MAXTSIZ #define MAXTSIZ (1*1024*1024*1024) /* max text size */ #endif #ifndef DFLDSIZ #define DFLDSIZ (128*1024*1024) /* default data size */ #endif #ifndef MAXDSIZ #ifdef __powerpc64__ #define MAXDSIZ (32UL*1024*1024*1024) /* max data size */ #else #define MAXDSIZ (1*1024*1024*1024) /* max data size */ #endif #endif #ifndef DFLSSIZ #define DFLSSIZ (8*1024*1024) /* default stack size */ #endif #ifndef MAXSSIZ #ifdef __powerpc64__ #define MAXSSIZ (512*1024*1024) /* max stack size */ #else #define MAXSSIZ (64*1024*1024) /* max stack size */ #endif #endif #ifdef AIM #define VM_MAXUSER_ADDRESS32 0xfffff000 #else #define VM_MAXUSER_ADDRESS32 0x7ffff000 #endif /* * Would like to have MAX addresses = 0, but this doesn't (currently) work */ #ifdef __powerpc64__ /* * Virtual addresses of things. Derived from the page directory and * page table indexes from pmap.h for precision. * * kernel map should be able to start at 0xc008000000000000 - * but at least the functional simulator doesn't like it * * 0x0000000000000000 - 0x000fffffffffffff user map * 0xc000000000000000 - 0xc007ffffffffffff direct map * 0xc008000000000000 - 0xc00fffffffffffff kernel map * */ #define VM_MIN_ADDRESS 0x0000000000000000 #define VM_MAXUSER_ADDRESS 0x000fffffc0000000 #define VM_MAX_ADDRESS 0xc00fffffffffffff #define VM_MIN_KERNEL_ADDRESS 0xc008000000000000 #define VM_MAX_KERNEL_ADDRESS 0xc0080007ffffffff #define VM_MAX_SAFE_KERNEL_ADDRESS VM_MAX_KERNEL_ADDRESS #else #define VM_MIN_ADDRESS 0 #define VM_MAXUSER_ADDRESS VM_MAXUSER_ADDRESS32 #define VM_MAX_ADDRESS 0xffffffff #endif #define SHAREDPAGE (VM_MAXUSER_ADDRESS - PAGE_SIZE) #define FREEBSD32_SHAREDPAGE (VM_MAXUSER_ADDRESS32 - PAGE_SIZE) #define FREEBSD32_USRSTACK FREEBSD32_SHAREDPAGE #define KERNBASE 0x00100100 /* start of kernel virtual */ #ifdef AIM #ifndef __powerpc64__ #define VM_MIN_KERNEL_ADDRESS ((vm_offset_t)KERNEL_SR << ADDR_SR_SHFT) #define VM_MAX_SAFE_KERNEL_ADDRESS (VM_MIN_KERNEL_ADDRESS + 2*SEGMENT_LENGTH -1) #define VM_MAX_KERNEL_ADDRESS (VM_MIN_KERNEL_ADDRESS + 3*SEGMENT_LENGTH - 1) #endif /* * Use the direct-mapped BAT registers for UMA small allocs. This * takes pressure off the small amount of available KVA. */ #define UMA_MD_SMALL_ALLOC #else /* Book-E */ /* Use the direct map for UMA small allocs on powerpc64. */ #ifdef __powerpc64__ #define UMA_MD_SMALL_ALLOC #else #define VM_MIN_KERNEL_ADDRESS 0xc0000000 #define VM_MAX_KERNEL_ADDRESS 0xffffefff #define VM_MAX_SAFE_KERNEL_ADDRESS VM_MAX_KERNEL_ADDRESS #endif #endif /* AIM/E500 */ #if !defined(LOCORE) struct pmap_physseg { struct pv_entry *pvent; char *attrs; }; #endif #ifdef __powerpc64__ #define VM_PHYSSEG_MAX 63 /* 1? */ #else #define VM_PHYSSEG_MAX 16 /* 1? */ #endif #define PHYS_AVAIL_SZ 256 /* Allows up to 16GB Ram on pSeries with * logical memory block size of 64MB. * For more Ram increase the lmb or this value. */ /* XXX This is non-sensical. Phys avail should hold contiguous regions. */ #define PHYS_AVAIL_ENTRIES PHYS_AVAIL_SZ /* * The physical address space is densely populated on 32-bit systems, * but may not be on 64-bit ones. */ #ifdef __powerpc64__ #define VM_PHYSSEG_SPARSE #else #define VM_PHYSSEG_DENSE #endif /* * Create two free page pools: VM_FREEPOOL_DEFAULT is the default pool * from which physical pages are allocated and VM_FREEPOOL_DIRECT is * the pool from which physical pages for small UMA objects are * allocated. */ #define VM_NFREEPOOL 2 #define VM_FREEPOOL_DEFAULT 0 #define VM_FREEPOOL_DIRECT 1 /* * Create one free page list. */ #define VM_NFREELIST 1 #define VM_FREELIST_DEFAULT 0 #ifdef __powerpc64__ /* The largest allocation size is 16MB. */ #define VM_NFREEORDER 13 #else /* The largest allocation size is 4MB. */ #define VM_NFREEORDER 11 #endif #ifndef VM_NRESERVLEVEL #ifdef __powerpc64__ /* Enable superpage reservations: 1 level. */ #define VM_NRESERVLEVEL 1 #else /* Disable superpage reservations. */ #define VM_NRESERVLEVEL 0 #endif #endif #ifndef VM_LEVEL_0_ORDER /* Level 0 reservations consist of 512 (RPT) or 4096 (HPT) pages. */ #define VM_LEVEL_0_ORDER vm_level_0_order #ifndef __ASSEMBLER__ extern int vm_level_0_order; #endif #endif #ifndef VM_LEVEL_0_ORDER_MAX #define VM_LEVEL_0_ORDER_MAX 12 #endif #ifdef __powerpc64__ #ifdef SMP #define PA_LOCK_COUNT 256 #endif #endif #ifndef VM_INITIAL_PAGEIN #define VM_INITIAL_PAGEIN 16 #endif #ifndef SGROWSIZ #define SGROWSIZ (128UL*1024) /* amount to grow stack */ #endif /* * How many physical pages per kmem arena virtual page. */ #ifndef VM_KMEM_SIZE_SCALE #define VM_KMEM_SIZE_SCALE (3) #endif /* * Optional floor (in bytes) on the size of the kmem arena. */ #ifndef VM_KMEM_SIZE_MIN #define VM_KMEM_SIZE_MIN (12 * 1024 * 1024) #endif /* * Optional ceiling (in bytes) on the size of the kmem arena: 40% of the * usable KVA space. */ #ifndef VM_KMEM_SIZE_MAX #define VM_KMEM_SIZE_MAX ((VM_MAX_SAFE_KERNEL_ADDRESS - \ VM_MIN_KERNEL_ADDRESS + 1) * 2 / 5) #endif #ifdef __powerpc64__ #define ZERO_REGION_SIZE (2 * 1024 * 1024) /* 2MB */ #else #define ZERO_REGION_SIZE (64 * 1024) /* 64KB */ #endif -/* - * Use a fairly large batch size since we expect ppc64 systems to have lots of - * memory. - */ -#ifdef __powerpc64__ -#define VM_BATCHQUEUE_SIZE 63 -#endif - /* * On 32-bit OEA, the only purpose for which sf_buf is used is to implement * an opaque pointer required by the machine-independent parts of the kernel. * That pointer references the vm_page that is "mapped" by the sf_buf. The * actual mapping is provided by the direct virtual-to-physical mapping. * * On OEA64 and Book-E, we need to do something a little more complicated. Use * the runtime-detected hw_direct_map to pick between the two cases. Our * friends in vm_machdep.c will do the same to ensure nothing gets confused. */ #define SFBUF #define SFBUF_NOMD /* * We (usually) have a direct map of all physical memory, so provide * a macro to use to get the kernel VA address for a given PA. Check the * value of PMAP_HAS_PMAP before using. */ #ifndef LOCORE #ifdef __powerpc64__ #define DMAP_BASE_ADDRESS 0xc000000000000000UL #define DMAP_MIN_ADDRESS DMAP_BASE_ADDRESS #define DMAP_MAX_ADDRESS 0xc007ffffffffffffUL #else #define DMAP_BASE_ADDRESS 0x00000000UL #define DMAP_MAX_ADDRESS 0xbfffffffUL #endif #endif #if defined(__powerpc64__) || defined(BOOKE) /* * powerpc64 and Book-E will provide their own page array allocators. * * On AIM, this will allocate a single virtual array, with pages from the * correct memory domains. * On Book-E this will let us put the array in TLB1, removing the need for TLB * thrashing. * * VM_MIN_KERNEL_ADDRESS is just a dummy. It will get set by the MMU driver. */ #define PA_MIN_ADDRESS VM_MIN_KERNEL_ADDRESS #define PMAP_HAS_PAGE_ARRAY 1 #endif #if defined(__powerpc64__) /* * Need a page dump array for minidump. */ #define MINIDUMP_PAGE_TRACKING 1 #else /* * No minidump with 32-bit powerpc. */ #define MINIDUMP_PAGE_TRACKING 0 #endif #define PMAP_HAS_DMAP (hw_direct_map) #define PHYS_TO_DMAP(x) ({ \ KASSERT(hw_direct_map, ("Direct map not provided by PMAP")); \ (x) | DMAP_BASE_ADDRESS; }) #define DMAP_TO_PHYS(x) ({ \ KASSERT(hw_direct_map, ("Direct map not provided by PMAP")); \ (x) &~ DMAP_BASE_ADDRESS; }) /* * No non-transparent large page support in the pmap. */ #define PMAP_HAS_LARGEPAGES 0 #endif /* _MACHINE_VMPARAM_H_ */ diff --git a/sys/vm/vm_pagequeue.h b/sys/vm/vm_pagequeue.h index 268d53a391db..9624d31a75b7 100644 --- a/sys/vm/vm_pagequeue.h +++ b/sys/vm/vm_pagequeue.h @@ -1,470 +1,472 @@ /*- * SPDX-License-Identifier: (BSD-3-Clause AND MIT-CMU) * * Copyright (c) 1991, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * The Mach Operating System project at Carnegie-Mellon University. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)vm_page.h 8.2 (Berkeley) 12/13/93 * * * Copyright (c) 1987, 1990 Carnegie-Mellon University. * All rights reserved. * * Authors: Avadis Tevanian, Jr., Michael Wayne Young * * Permission to use, copy, modify and distribute this software and * its documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. * * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU * School of Computer Science * Carnegie Mellon University * Pittsburgh PA 15213-3890 * * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. * * $FreeBSD$ */ #ifndef _VM_PAGEQUEUE_ #define _VM_PAGEQUEUE_ #ifdef _KERNEL struct vm_pagequeue { struct mtx pq_mutex; struct pglist pq_pl; int pq_cnt; const char * const pq_name; uint64_t pq_pdpages; } __aligned(CACHE_LINE_SIZE); -#ifndef VM_BATCHQUEUE_SIZE +#if __SIZEOF_LONG__ == 8 +#define VM_BATCHQUEUE_SIZE 63 +#else #define VM_BATCHQUEUE_SIZE 15 #endif struct vm_batchqueue { vm_page_t bq_pa[VM_BATCHQUEUE_SIZE]; int bq_cnt; } __aligned(CACHE_LINE_SIZE); #include #include #include struct sysctl_oid; /* * One vm_domain per NUMA domain. Contains pagequeues, free page structures, * and accounting. * * Lock Key: * f vmd_free_mtx * p vmd_pageout_mtx * d vm_domainset_lock * a atomic * c const after boot * q page queue lock * * A unique page daemon thread manages each vm_domain structure and is * responsible for ensuring that some free memory is available by freeing * inactive pages and aging active pages. To decide how many pages to process, * it uses thresholds derived from the number of pages in the domain: * * vmd_page_count * --- * | * |-> vmd_inactive_target (~3%) * | - The active queue scan target is given by * | (vmd_inactive_target + vmd_free_target - vmd_free_count). * | * | * |-> vmd_free_target (~2%) * | - Target for page reclamation. * | * |-> vmd_pageout_wakeup_thresh (~1.8%) * | - Threshold for waking up the page daemon. * | * | * |-> vmd_free_min (~0.5%) * | - First low memory threshold. * | - Causes per-CPU caching to be lazily disabled in UMA. * | - vm_wait() sleeps below this threshold. * | * |-> vmd_free_severe (~0.25%) * | - Second low memory threshold. * | - Triggers aggressive UMA reclamation, disables delayed buffer * | writes. * | * |-> vmd_free_reserved (~0.13%) * | - Minimum for VM_ALLOC_NORMAL page allocations. * |-> vmd_pageout_free_min (32 + 2 pages) * | - Minimum for waking a page daemon thread sleeping in vm_wait(). * |-> vmd_interrupt_free_min (2 pages) * | - Minimum for VM_ALLOC_SYSTEM page allocations. * --- * *-- * Free page count regulation: * * The page daemon attempts to ensure that the free page count is above the free * target. It wakes up periodically (every 100ms) to input the current free * page shortage (free_target - free_count) to a PID controller, which in * response outputs the number of pages to attempt to reclaim. The shortage's * current magnitude, rate of change, and cumulative value are together used to * determine the controller's output. The page daemon target thus adapts * dynamically to the system's demand for free pages, resulting in less * burstiness than a simple hysteresis loop. * * When the free page count drops below the wakeup threshold, * vm_domain_allocate() proactively wakes up the page daemon. This helps ensure * that the system responds promptly to a large instantaneous free page * shortage. * * The page daemon also attempts to ensure that some fraction of the system's * memory is present in the inactive (I) and laundry (L) page queues, so that it * can respond promptly to a sudden free page shortage. In particular, the page * daemon thread aggressively scans active pages so long as the following * condition holds: * * len(I) + len(L) + free_target - free_count < inactive_target * * Otherwise, when the inactive target is met, the page daemon periodically * scans a small portion of the active queue in order to maintain up-to-date * per-page access history. Unreferenced pages in the active queue thus * eventually migrate to the inactive queue. * * The per-domain laundry thread periodically launders dirty pages based on the * number of clean pages freed by the page daemon since the last laundering. If * the page daemon fails to meet its scan target (i.e., the PID controller * output) because of a shortage of clean inactive pages, the laundry thread * attempts to launder enough pages to meet the free page target. * *-- * Page allocation priorities: * * The system defines three page allocation priorities: VM_ALLOC_NORMAL, * VM_ALLOC_SYSTEM and VM_ALLOC_INTERRUPT. An interrupt-priority allocation can * claim any free page. This priority is used in the pmap layer when attempting * to allocate a page for the kernel page tables; in such cases an allocation * failure will usually result in a kernel panic. The system priority is used * for most other kernel memory allocations, for instance by UMA's slab * allocator or the buffer cache. Such allocations will fail if the free count * is below interrupt_free_min. All other allocations occur at the normal * priority, which is typically used for allocation of user pages, for instance * in the page fault handler or when allocating page table pages or pv_entry * structures for user pmaps. Such allocations fail if the free count is below * the free_reserved threshold. * *-- * Free memory shortages: * * The system uses the free_min and free_severe thresholds to apply * back-pressure and give the page daemon a chance to recover. When a page * allocation fails due to a shortage and the allocating thread cannot handle * failure, it may call vm_wait() to sleep until free pages are available. * vm_domain_freecnt_inc() wakes sleeping threads once the free page count rises * above the free_min threshold; the page daemon and laundry threads are given * priority and will wake up once free_count reaches the (much smaller) * pageout_free_min threshold. * * On NUMA systems, the domainset iterators always prefer NUMA domains where the * free page count is above the free_min threshold. This means that given the * choice between two NUMA domains, one above the free_min threshold and one * below, the former will be used to satisfy the allocation request regardless * of the domain selection policy. * * In addition to reclaiming memory from the page queues, the vm_lowmem event * fires every ten seconds so long as the system is under memory pressure (i.e., * vmd_free_count < vmd_free_target). This allows kernel subsystems to register * for notifications of free page shortages, upon which they may shrink their * caches. Following a vm_lowmem event, UMA's caches are pruned to ensure that * they do not contain an excess of unused memory. When a domain is below the * free_min threshold, UMA limits the population of per-CPU caches. When a * domain falls below the free_severe threshold, UMA's caches are completely * drained. * * If the system encounters a global memory shortage, it may resort to the * out-of-memory (OOM) killer, which selects a process and delivers SIGKILL in a * last-ditch attempt to free up some pages. Either of the two following * conditions will activate the OOM killer: * * 1. The page daemons collectively fail to reclaim any pages during their * inactive queue scans. After vm_pageout_oom_seq consecutive scans fail, * the page daemon thread votes for an OOM kill, and an OOM kill is * triggered when all page daemons have voted. This heuristic is strict and * may fail to trigger even when the system is effectively deadlocked. * * 2. Threads in the user fault handler are repeatedly unable to make progress * while allocating a page to satisfy the fault. After * vm_pfault_oom_attempts page allocation failures with intervening * vm_wait() calls, the faulting thread will trigger an OOM kill. */ struct vm_domain { struct vm_pagequeue vmd_pagequeues[PQ_COUNT]; struct mtx_padalign vmd_free_mtx; struct mtx_padalign vmd_pageout_mtx; struct vm_pgcache { int domain; int pool; uma_zone_t zone; } vmd_pgcache[VM_NFREEPOOL]; struct vmem *vmd_kernel_arena; /* (c) per-domain kva R/W arena. */ struct vmem *vmd_kernel_rwx_arena; /* (c) per-domain kva R/W/X arena. */ u_int vmd_domain; /* (c) Domain number. */ u_int vmd_page_count; /* (c) Total page count. */ long vmd_segs; /* (c) bitmask of the segments */ u_int __aligned(CACHE_LINE_SIZE) vmd_free_count; /* (a,f) free page count */ u_int vmd_pageout_deficit; /* (a) Estimated number of pages deficit */ uint8_t vmd_pad[CACHE_LINE_SIZE - (sizeof(u_int) * 2)]; /* Paging control variables, used within single threaded page daemon. */ struct pidctrl vmd_pid; /* Pageout controller. */ boolean_t vmd_oom; u_int vmd_inactive_threads; u_int vmd_inactive_shortage; /* Per-thread shortage. */ blockcount_t vmd_inactive_running; /* Number of inactive threads. */ blockcount_t vmd_inactive_starting; /* Number of threads started. */ volatile u_int vmd_addl_shortage; /* Shortage accumulator. */ volatile u_int vmd_inactive_freed; /* Successful inactive frees. */ volatile u_int vmd_inactive_us; /* Microseconds for above. */ u_int vmd_inactive_pps; /* Exponential decay frees/second. */ int vmd_oom_seq; int vmd_last_active_scan; struct vm_page vmd_markers[PQ_COUNT]; /* (q) markers for queue scans */ struct vm_page vmd_inacthead; /* marker for LRU-defeating insertions */ struct vm_page vmd_clock[2]; /* markers for active queue scan */ int vmd_pageout_wanted; /* (a, p) pageout daemon wait channel */ int vmd_pageout_pages_needed; /* (d) page daemon waiting for pages? */ bool vmd_minset; /* (d) Are we in vm_min_domains? */ bool vmd_severeset; /* (d) Are we in vm_severe_domains? */ enum { VM_LAUNDRY_IDLE = 0, VM_LAUNDRY_BACKGROUND, VM_LAUNDRY_SHORTFALL } vmd_laundry_request; /* Paging thresholds and targets. */ u_int vmd_clean_pages_freed; /* (q) accumulator for laundry thread */ u_int vmd_background_launder_target; /* (c) */ u_int vmd_free_reserved; /* (c) pages reserved for deadlock */ u_int vmd_free_target; /* (c) pages desired free */ u_int vmd_free_min; /* (c) pages desired free */ u_int vmd_inactive_target; /* (c) pages desired inactive */ u_int vmd_pageout_free_min; /* (c) min pages reserved for kernel */ u_int vmd_pageout_wakeup_thresh;/* (c) min pages to wake pagedaemon */ u_int vmd_interrupt_free_min; /* (c) reserved pages for int code */ u_int vmd_free_severe; /* (c) severe page depletion point */ /* Name for sysctl etc. */ struct sysctl_oid *vmd_oid; char vmd_name[sizeof(__XSTRING(MAXMEMDOM))]; } __aligned(CACHE_LINE_SIZE); extern struct vm_domain vm_dom[MAXMEMDOM]; #define VM_DOMAIN(n) (&vm_dom[(n)]) #define VM_DOMAIN_EMPTY(n) (vm_dom[(n)].vmd_page_count == 0) #define vm_pagequeue_assert_locked(pq) mtx_assert(&(pq)->pq_mutex, MA_OWNED) #define vm_pagequeue_lock(pq) mtx_lock(&(pq)->pq_mutex) #define vm_pagequeue_lockptr(pq) (&(pq)->pq_mutex) #define vm_pagequeue_trylock(pq) mtx_trylock(&(pq)->pq_mutex) #define vm_pagequeue_unlock(pq) mtx_unlock(&(pq)->pq_mutex) #define vm_domain_free_assert_locked(n) \ mtx_assert(vm_domain_free_lockptr((n)), MA_OWNED) #define vm_domain_free_assert_unlocked(n) \ mtx_assert(vm_domain_free_lockptr((n)), MA_NOTOWNED) #define vm_domain_free_lock(d) \ mtx_lock(vm_domain_free_lockptr((d))) #define vm_domain_free_lockptr(d) \ (&(d)->vmd_free_mtx) #define vm_domain_free_trylock(d) \ mtx_trylock(vm_domain_free_lockptr((d))) #define vm_domain_free_unlock(d) \ mtx_unlock(vm_domain_free_lockptr((d))) #define vm_domain_pageout_lockptr(d) \ (&(d)->vmd_pageout_mtx) #define vm_domain_pageout_assert_locked(n) \ mtx_assert(vm_domain_pageout_lockptr((n)), MA_OWNED) #define vm_domain_pageout_assert_unlocked(n) \ mtx_assert(vm_domain_pageout_lockptr((n)), MA_NOTOWNED) #define vm_domain_pageout_lock(d) \ mtx_lock(vm_domain_pageout_lockptr((d))) #define vm_domain_pageout_unlock(d) \ mtx_unlock(vm_domain_pageout_lockptr((d))) static __inline void vm_pagequeue_cnt_add(struct vm_pagequeue *pq, int addend) { vm_pagequeue_assert_locked(pq); pq->pq_cnt += addend; } #define vm_pagequeue_cnt_inc(pq) vm_pagequeue_cnt_add((pq), 1) #define vm_pagequeue_cnt_dec(pq) vm_pagequeue_cnt_add((pq), -1) static inline void vm_pagequeue_remove(struct vm_pagequeue *pq, vm_page_t m) { TAILQ_REMOVE(&pq->pq_pl, m, plinks.q); vm_pagequeue_cnt_dec(pq); } static inline void vm_batchqueue_init(struct vm_batchqueue *bq) { bq->bq_cnt = 0; } static inline int vm_batchqueue_insert(struct vm_batchqueue *bq, vm_page_t m) { int slots_free; slots_free = nitems(bq->bq_pa) - bq->bq_cnt; if (slots_free > 0) { bq->bq_pa[bq->bq_cnt++] = m; return (slots_free); } return (slots_free); } static inline vm_page_t vm_batchqueue_pop(struct vm_batchqueue *bq) { if (bq->bq_cnt == 0) return (NULL); return (bq->bq_pa[--bq->bq_cnt]); } void vm_domain_set(struct vm_domain *vmd); void vm_domain_clear(struct vm_domain *vmd); int vm_domain_allocate(struct vm_domain *vmd, int req, int npages); /* * vm_pagequeue_domain: * * Return the memory domain the page belongs to. */ static inline struct vm_domain * vm_pagequeue_domain(vm_page_t m) { return (VM_DOMAIN(vm_page_domain(m))); } /* * Return the number of pages we need to free-up or cache * A positive number indicates that we do not have enough free pages. */ static inline int vm_paging_target(struct vm_domain *vmd) { return (vmd->vmd_free_target - vmd->vmd_free_count); } /* * Returns TRUE if the pagedaemon needs to be woken up. */ static inline int vm_paging_needed(struct vm_domain *vmd, u_int free_count) { return (free_count < vmd->vmd_pageout_wakeup_thresh); } /* * Returns TRUE if the domain is below the min paging target. */ static inline int vm_paging_min(struct vm_domain *vmd) { return (vmd->vmd_free_min > vmd->vmd_free_count); } /* * Returns TRUE if the domain is below the severe paging target. */ static inline int vm_paging_severe(struct vm_domain *vmd) { return (vmd->vmd_free_severe > vmd->vmd_free_count); } /* * Return the number of pages we need to launder. * A positive number indicates that we have a shortfall of clean pages. */ static inline int vm_laundry_target(struct vm_domain *vmd) { return (vm_paging_target(vmd)); } void pagedaemon_wakeup(int domain); static inline void vm_domain_freecnt_inc(struct vm_domain *vmd, int adj) { u_int old, new; old = atomic_fetchadd_int(&vmd->vmd_free_count, adj); new = old + adj; /* * Only update bitsets on transitions. Notice we short-circuit the * rest of the checks if we're above min already. */ if (old < vmd->vmd_free_min && (new >= vmd->vmd_free_min || (old < vmd->vmd_free_severe && new >= vmd->vmd_free_severe) || (old < vmd->vmd_pageout_free_min && new >= vmd->vmd_pageout_free_min))) vm_domain_clear(vmd); } #endif /* _KERNEL */ #endif /* !_VM_PAGEQUEUE_ */