Index: head/sys/amd64/include/vmparam.h =================================================================== --- head/sys/amd64/include/vmparam.h (revision 276438) +++ head/sys/amd64/include/vmparam.h (revision 276439) @@ -1,220 +1,228 @@ /*- * Copyright (c) 1990 The Regents of the University of California. * All rights reserved. * Copyright (c) 1994 John S. Dyson * All rights reserved. * Copyright (c) 2003 Peter Wemm * All rights reserved. * * This code is derived from software contributed to Berkeley by * William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)vmparam.h 5.9 (Berkeley) 5/12/91 * $FreeBSD$ */ #ifndef _MACHINE_VMPARAM_H_ #define _MACHINE_VMPARAM_H_ 1 /* * Machine dependent constants for AMD64. */ /* * Virtual memory related constants, all in bytes */ #define MAXTSIZ (128UL*1024*1024) /* max text size */ #ifndef DFLDSIZ #define DFLDSIZ (32768UL*1024*1024) /* initial data size limit */ #endif #ifndef MAXDSIZ #define MAXDSIZ (32768UL*1024*1024) /* max data size */ #endif #ifndef DFLSSIZ #define DFLSSIZ (8UL*1024*1024) /* initial stack size limit */ #endif #ifndef MAXSSIZ #define MAXSSIZ (512UL*1024*1024) /* max stack size */ #endif #ifndef SGROWSIZ #define SGROWSIZ (128UL*1024) /* amount to grow stack */ #endif /* * We provide a machine specific single page allocator through the use * of the direct mapped segment. This uses 2MB pages for reduced * TLB pressure. */ #define UMA_MD_SMALL_ALLOC /* * The physical address space is densely populated. */ #define VM_PHYSSEG_DENSE /* * The number of PHYSSEG entries must be one greater than the number * of phys_avail entries because the phys_avail entry that spans the * largest physical address that is accessible by ISA DMA is split * into two PHYSSEG entries. */ #define VM_PHYSSEG_MAX 63 /* * Create three free page pools: VM_FREEPOOL_DEFAULT is the default pool * from which physical pages are allocated and VM_FREEPOOL_DIRECT is * the pool from which physical pages for page tables and small UMA * objects are allocated. */ #define VM_NFREEPOOL 3 #define VM_FREEPOOL_CACHE 2 #define VM_FREEPOOL_DEFAULT 0 #define VM_FREEPOOL_DIRECT 1 /* - * Create two free page lists: VM_FREELIST_DEFAULT is for physical - * pages that are above the largest physical address that is - * accessible by ISA DMA and VM_FREELIST_ISADMA is for physical pages - * that are below that address. + * Create up to three free page lists: VM_FREELIST_DMA32 is for physical pages + * that have physical addresses below 4G but are not accessible by ISA DMA, + * and VM_FREELIST_ISADMA is for physical pages that are accessible by ISA + * DMA. */ -#define VM_NFREELIST 2 +#define VM_NFREELIST 3 #define VM_FREELIST_DEFAULT 0 -#define VM_FREELIST_ISADMA 1 +#define VM_FREELIST_DMA32 1 +#define VM_FREELIST_ISADMA 2 + +/* + * Create the DMA32 free list only if the number of physical pages above + * physical address 4G is at least 16M, which amounts to 64GB of physical + * memory. + */ +#define VM_DMA32_NPAGES_THRESHOLD 16777216 /* * An allocation size of 16MB is supported in order to optimize the * use of the direct map by UMA. Specifically, a cache line contains * at most 8 PDEs, collectively mapping 16MB of physical memory. By * reducing the number of distinct 16MB "pages" that are used by UMA, * the physical memory allocator reduces the likelihood of both 2MB * page TLB misses and cache misses caused by 2MB page TLB misses. */ #define VM_NFREEORDER 13 /* * Enable superpage reservations: 1 level. */ #ifndef VM_NRESERVLEVEL #define VM_NRESERVLEVEL 1 #endif /* * Level 0 reservations consist of 512 pages. */ #ifndef VM_LEVEL_0_ORDER #define VM_LEVEL_0_ORDER 9 #endif #ifdef SMP #define PA_LOCK_COUNT 256 #endif /* * Virtual addresses of things. Derived from the page directory and * page table indexes from pmap.h for precision. * * 0x0000000000000000 - 0x00007fffffffffff user map * 0x0000800000000000 - 0xffff7fffffffffff does not exist (hole) * 0xffff800000000000 - 0xffff804020100fff recursive page table (512GB slot) * 0xffff804020101000 - 0xfffff7ffffffffff unused * 0xfffff80000000000 - 0xfffffbffffffffff 4TB direct map * 0xfffffc0000000000 - 0xfffffdffffffffff unused * 0xfffffe0000000000 - 0xffffffffffffffff 2TB kernel map * * Within the kernel map: * * 0xffffffff80000000 KERNBASE */ #define VM_MIN_KERNEL_ADDRESS KVADDR(KPML4BASE, 0, 0, 0) #define VM_MAX_KERNEL_ADDRESS KVADDR(KPML4BASE + NKPML4E - 1, \ NPDPEPG-1, NPDEPG-1, NPTEPG-1) #define DMAP_MIN_ADDRESS KVADDR(DMPML4I, 0, 0, 0) #define DMAP_MAX_ADDRESS KVADDR(DMPML4I + NDMPML4E, 0, 0, 0) #define KERNBASE KVADDR(KPML4I, KPDPI, 0, 0) #define UPT_MAX_ADDRESS KVADDR(PML4PML4I, PML4PML4I, PML4PML4I, PML4PML4I) #define UPT_MIN_ADDRESS KVADDR(PML4PML4I, 0, 0, 0) #define VM_MAXUSER_ADDRESS UVADDR(NUPML4E, 0, 0, 0) #define SHAREDPAGE (VM_MAXUSER_ADDRESS - PAGE_SIZE) #define USRSTACK SHAREDPAGE #define VM_MAX_ADDRESS UPT_MAX_ADDRESS #define VM_MIN_ADDRESS (0) /* * XXX Allowing dmaplimit == 0 is a temporary workaround for vt(4) efifb's * early use of PHYS_TO_DMAP before the mapping is actually setup. This works * because the result is not actually accessed until later, but the early * vt fb startup needs to be reworked. */ #define PHYS_TO_DMAP(x) ({ \ KASSERT(dmaplimit == 0 || (x) < dmaplimit, \ ("physical address %#jx not covered by the DMAP", \ (uintmax_t)x)); \ (x) | DMAP_MIN_ADDRESS; }) #define DMAP_TO_PHYS(x) ({ \ KASSERT((x) < (DMAP_MIN_ADDRESS + dmaplimit) && \ (x) >= DMAP_MIN_ADDRESS, \ ("virtual address %#jx not covered by the DMAP", \ (uintmax_t)x)); \ (x) & ~DMAP_MIN_ADDRESS; }) /* * How many physical pages per kmem arena virtual page. */ #ifndef VM_KMEM_SIZE_SCALE #define VM_KMEM_SIZE_SCALE (1) #endif /* * Optional ceiling (in bytes) on the size of the kmem arena: 60% of the * kernel map. */ #ifndef VM_KMEM_SIZE_MAX #define VM_KMEM_SIZE_MAX ((VM_MAX_KERNEL_ADDRESS - \ VM_MIN_KERNEL_ADDRESS + 1) * 3 / 5) #endif /* initial pagein size of beginning of executable file */ #ifndef VM_INITIAL_PAGEIN #define VM_INITIAL_PAGEIN 16 #endif #define ZERO_REGION_SIZE (2 * 1024 * 1024) /* 2MB */ #endif /* _MACHINE_VMPARAM_H_ */ Index: head/sys/mips/include/vmparam.h =================================================================== --- head/sys/mips/include/vmparam.h (revision 276438) +++ head/sys/mips/include/vmparam.h (revision 276439) @@ -1,195 +1,193 @@ /* $OpenBSD: vmparam.h,v 1.2 1998/09/15 10:50:12 pefo Exp $ */ /* $NetBSD: vmparam.h,v 1.5 1994/10/26 21:10:10 cgd Exp $ */ /* * Copyright (c) 1988 University of Utah. * Copyright (c) 1992, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * the Systems Programming Group of the University of Utah Computer * Science Department and Ralph Campbell. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: Utah Hdr: vmparam.h 1.16 91/01/18 * @(#)vmparam.h 8.2 (Berkeley) 4/22/94 * JNPR: vmparam.h,v 1.3.2.1 2007/09/10 06:01:28 girish * $FreeBSD$ */ #ifndef _MACHINE_VMPARAM_H_ #define _MACHINE_VMPARAM_H_ /* * Machine dependent constants mips processors. */ /* * Virtual memory related constants, all in bytes */ #ifndef MAXTSIZ #define MAXTSIZ (128UL*1024*1024) /* max text size */ #endif #ifndef DFLDSIZ #define DFLDSIZ (128UL*1024*1024) /* initial data size limit */ #endif #ifndef MAXDSIZ #define MAXDSIZ (1*1024UL*1024*1024) /* max data size */ #endif #ifndef DFLSSIZ #define DFLSSIZ (8UL*1024*1024) /* initial stack size limit */ #endif #ifndef MAXSSIZ #define MAXSSIZ (64UL*1024*1024) /* max stack size */ #endif #ifndef SGROWSIZ #define SGROWSIZ (128UL*1024) /* amount to grow stack */ #endif /* * Mach derived constants */ /* user/kernel map constants */ #define VM_MIN_ADDRESS ((vm_offset_t)0x00000000) #define VM_MAX_ADDRESS ((vm_offset_t)(intptr_t)(int32_t)0xffffffff) #define VM_MINUSER_ADDRESS ((vm_offset_t)0x00000000) #ifdef __mips_n64 #define VM_MAXUSER_ADDRESS (VM_MINUSER_ADDRESS + (NPDEPG * NBSEG)) #define VM_MIN_KERNEL_ADDRESS ((vm_offset_t)0xc000000000000000) #define VM_MAX_KERNEL_ADDRESS (VM_MIN_KERNEL_ADDRESS + (NPDEPG * NBSEG)) #else #define VM_MAXUSER_ADDRESS ((vm_offset_t)0x80000000) #define VM_MIN_KERNEL_ADDRESS ((vm_offset_t)0xC0000000) #define VM_MAX_KERNEL_ADDRESS ((vm_offset_t)0xFFFFC000) #endif #define KERNBASE ((vm_offset_t)(intptr_t)(int32_t)0x80000000) /* * USRSTACK needs to start a little below 0x8000000 because the R8000 * and some QED CPUs perform some virtual address checks before the * offset is calculated. */ #define USRSTACK (VM_MAXUSER_ADDRESS - PAGE_SIZE) #ifdef __mips_n64 #define FREEBSD32_USRSTACK (((vm_offset_t)0x80000000) - PAGE_SIZE) #endif /* * Disable superpage reservations. (not sure if this is right * I copied it from ARM) */ #ifndef VM_NRESERVLEVEL #define VM_NRESERVLEVEL 0 #endif /* * How many physical pages per kmem arena virtual page. */ #ifndef VM_KMEM_SIZE_SCALE #define VM_KMEM_SIZE_SCALE (3) #endif /* * Optional floor (in bytes) on the size of the kmem arena. */ #ifndef VM_KMEM_SIZE_MIN #define VM_KMEM_SIZE_MIN (12 * 1024 * 1024) #endif /* * Optional ceiling (in bytes) on the size of the kmem arena: 40% of the * kernel map. */ #ifndef VM_KMEM_SIZE_MAX #define VM_KMEM_SIZE_MAX ((VM_MAX_KERNEL_ADDRESS - \ VM_MIN_KERNEL_ADDRESS + 1) * 2 / 5) #endif /* initial pagein size of beginning of executable file */ #ifndef VM_INITIAL_PAGEIN #define VM_INITIAL_PAGEIN 16 #endif #define UMA_MD_SMALL_ALLOC /* * max number of non-contig chunks of physical RAM you can have */ #define VM_PHYSSEG_MAX 32 /* * The physical address space is sparsely populated. */ #define VM_PHYSSEG_SPARSE /* * Create three free page pools: VM_FREEPOOL_DEFAULT is the default pool * from which physical pages are allocated and VM_FREEPOOL_DIRECT is * the pool from which physical pages for small UMA objects are * allocated. */ #define VM_NFREEPOOL 3 #define VM_FREEPOOL_CACHE 2 #define VM_FREEPOOL_DEFAULT 0 #define VM_FREEPOOL_DIRECT 1 /* - * we support 2 free lists: - * - * - DEFAULT for direct mapped (KSEG0) pages. - * Note: This usage of DEFAULT may be misleading because we use - * DEFAULT for allocating direct mapped pages. The normal page - * allocations use HIGHMEM if available, and then DEFAULT. - * - HIGHMEM for other pages + * Create up to two free lists on !__mips_n64: VM_FREELIST_DEFAULT is for + * physical pages that are above the largest physical address that is + * accessible through the direct map (KSEG0) and VM_FREELIST_LOWMEM is for + * physical pages that are below that address. VM_LOWMEM_BOUNDARY is the + * physical address for the end of the direct map (KSEG0). */ #ifdef __mips_n64 #define VM_NFREELIST 1 #define VM_FREELIST_DEFAULT 0 #define VM_FREELIST_DIRECT VM_FREELIST_DEFAULT #else #define VM_NFREELIST 2 -#define VM_FREELIST_DEFAULT 1 -#define VM_FREELIST_HIGHMEM 0 -#define VM_FREELIST_DIRECT VM_FREELIST_DEFAULT -#define VM_HIGHMEM_ADDRESS ((vm_paddr_t)0x20000000) +#define VM_FREELIST_DEFAULT 0 +#define VM_FREELIST_LOWMEM 1 +#define VM_FREELIST_DIRECT VM_FREELIST_LOWMEM +#define VM_LOWMEM_BOUNDARY ((vm_paddr_t)0x20000000) #endif /* * The largest allocation size is 1MB. */ #define VM_NFREEORDER 9 #define ZERO_REGION_SIZE (64 * 1024) /* 64KB */ #ifndef __mips_n64 #define SFBUF #define SFBUF_MAP #endif #endif /* !_MACHINE_VMPARAM_H_ */ Index: head/sys/vm/vm_phys.c =================================================================== --- head/sys/vm/vm_phys.c (revision 276438) +++ head/sys/vm/vm_phys.c (revision 276439) @@ -1,1138 +1,1251 @@ /*- * Copyright (c) 2002-2006 Rice University * Copyright (c) 2007 Alan L. Cox * All rights reserved. * * This software was developed for the FreeBSD Project by Alan L. Cox, * Olivier Crameri, Peter Druschel, Sitaram Iyer, and Juan Navarro. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * Physical memory system implementation * * Any external functions defined by this module are only to be used by the * virtual memory system. */ #include __FBSDID("$FreeBSD$"); #include "opt_ddb.h" #include "opt_vm.h" #include #include #include #include #include #include #if MAXMEMDOM > 1 #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include _Static_assert(sizeof(long) * NBBY >= VM_PHYSSEG_MAX, "Too many physsegs."); struct mem_affinity *mem_affinity; int vm_ndomains = 1; struct vm_phys_seg vm_phys_segs[VM_PHYSSEG_MAX]; int vm_phys_nsegs; struct vm_phys_fictitious_seg; static int vm_phys_fictitious_cmp(struct vm_phys_fictitious_seg *, struct vm_phys_fictitious_seg *); RB_HEAD(fict_tree, vm_phys_fictitious_seg) vm_phys_fictitious_tree = RB_INITIALIZER(_vm_phys_fictitious_tree); struct vm_phys_fictitious_seg { RB_ENTRY(vm_phys_fictitious_seg) node; /* Memory region data */ vm_paddr_t start; vm_paddr_t end; vm_page_t first_page; }; RB_GENERATE_STATIC(fict_tree, vm_phys_fictitious_seg, node, vm_phys_fictitious_cmp); static struct rwlock vm_phys_fictitious_reg_lock; MALLOC_DEFINE(M_FICT_PAGES, "vm_fictitious", "Fictitious VM pages"); static struct vm_freelist vm_phys_free_queues[MAXMEMDOM][VM_NFREELIST][VM_NFREEPOOL][VM_NFREEORDER]; -static int vm_nfreelists = VM_FREELIST_DEFAULT + 1; +static int vm_nfreelists; +/* + * Provides the mapping from VM_FREELIST_* to free list indices (flind). + */ +static int vm_freelist_to_flind[VM_NFREELIST]; + +CTASSERT(VM_FREELIST_DEFAULT == 0); + +#ifdef VM_FREELIST_ISADMA +#define VM_ISADMA_BOUNDARY 16777216 +#endif +#ifdef VM_FREELIST_DMA32 +#define VM_DMA32_BOUNDARY ((vm_paddr_t)1 << 32) +#endif + +/* + * Enforce the assumptions made by vm_phys_add_seg() and vm_phys_init() about + * the ordering of the free list boundaries. + */ +#if defined(VM_ISADMA_BOUNDARY) && defined(VM_LOWMEM_BOUNDARY) +CTASSERT(VM_ISADMA_BOUNDARY < VM_LOWMEM_BOUNDARY); +#endif +#if defined(VM_LOWMEM_BOUNDARY) && defined(VM_DMA32_BOUNDARY) +CTASSERT(VM_LOWMEM_BOUNDARY < VM_DMA32_BOUNDARY); +#endif + static int cnt_prezero; SYSCTL_INT(_vm_stats_misc, OID_AUTO, cnt_prezero, CTLFLAG_RD, &cnt_prezero, 0, "The number of physical pages prezeroed at idle time"); static int sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS); SYSCTL_OID(_vm, OID_AUTO, phys_free, CTLTYPE_STRING | CTLFLAG_RD, NULL, 0, sysctl_vm_phys_free, "A", "Phys Free Info"); static int sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS); SYSCTL_OID(_vm, OID_AUTO, phys_segs, CTLTYPE_STRING | CTLFLAG_RD, NULL, 0, sysctl_vm_phys_segs, "A", "Phys Seg Info"); SYSCTL_INT(_vm, OID_AUTO, ndomains, CTLFLAG_RD, &vm_ndomains, 0, "Number of physical memory domains available."); static vm_page_t vm_phys_alloc_domain_pages(int domain, int flind, int pool, int order); -static void _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind, - int domain); -static void vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind); +static void _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int domain); +static void vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end); static int vm_phys_paddr_to_segind(vm_paddr_t pa); static void vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, int order); /* * Red-black tree helpers for vm fictitious range management. */ static inline int vm_phys_fictitious_in_range(struct vm_phys_fictitious_seg *p, struct vm_phys_fictitious_seg *range) { KASSERT(range->start != 0 && range->end != 0, ("Invalid range passed on search for vm_fictitious page")); if (p->start >= range->end) return (1); if (p->start < range->start) return (-1); return (0); } static int vm_phys_fictitious_cmp(struct vm_phys_fictitious_seg *p1, struct vm_phys_fictitious_seg *p2) { /* Check if this is a search for a page */ if (p1->end == 0) return (vm_phys_fictitious_in_range(p1, p2)); KASSERT(p2->end != 0, ("Invalid range passed as second parameter to vm fictitious comparison")); /* Searching to add a new range */ if (p1->end <= p2->start) return (-1); if (p1->start >= p2->end) return (1); panic("Trying to add overlapping vm fictitious ranges:\n" "[%#jx:%#jx] and [%#jx:%#jx]", (uintmax_t)p1->start, (uintmax_t)p1->end, (uintmax_t)p2->start, (uintmax_t)p2->end); } static __inline int vm_rr_selectdomain(void) { #if MAXMEMDOM > 1 struct thread *td; td = curthread; td->td_dom_rr_idx++; td->td_dom_rr_idx %= vm_ndomains; return (td->td_dom_rr_idx); #else return (0); #endif } boolean_t vm_phys_domain_intersects(long mask, vm_paddr_t low, vm_paddr_t high) { struct vm_phys_seg *s; int idx; while ((idx = ffsl(mask)) != 0) { idx--; /* ffsl counts from 1 */ mask &= ~(1UL << idx); s = &vm_phys_segs[idx]; if (low < s->end && high > s->start) return (TRUE); } return (FALSE); } /* * Outputs the state of the physical memory allocator, specifically, * the amount of physical memory in each free list. */ static int sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS) { struct sbuf sbuf; struct vm_freelist *fl; int dom, error, flind, oind, pind; error = sysctl_wire_old_buffer(req, 0); if (error != 0) return (error); sbuf_new_for_sysctl(&sbuf, NULL, 128 * vm_ndomains, req); for (dom = 0; dom < vm_ndomains; dom++) { sbuf_printf(&sbuf,"\nDOMAIN %d:\n", dom); for (flind = 0; flind < vm_nfreelists; flind++) { sbuf_printf(&sbuf, "\nFREE LIST %d:\n" "\n ORDER (SIZE) | NUMBER" "\n ", flind); for (pind = 0; pind < VM_NFREEPOOL; pind++) sbuf_printf(&sbuf, " | POOL %d", pind); sbuf_printf(&sbuf, "\n-- "); for (pind = 0; pind < VM_NFREEPOOL; pind++) sbuf_printf(&sbuf, "-- -- "); sbuf_printf(&sbuf, "--\n"); for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) { sbuf_printf(&sbuf, " %2d (%6dK)", oind, 1 << (PAGE_SHIFT - 10 + oind)); for (pind = 0; pind < VM_NFREEPOOL; pind++) { fl = vm_phys_free_queues[dom][flind][pind]; sbuf_printf(&sbuf, " | %6d", fl[oind].lcnt); } sbuf_printf(&sbuf, "\n"); } } } error = sbuf_finish(&sbuf); sbuf_delete(&sbuf); return (error); } /* * Outputs the set of physical memory segments. */ static int sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS) { struct sbuf sbuf; struct vm_phys_seg *seg; int error, segind; error = sysctl_wire_old_buffer(req, 0); if (error != 0) return (error); sbuf_new_for_sysctl(&sbuf, NULL, 128, req); for (segind = 0; segind < vm_phys_nsegs; segind++) { sbuf_printf(&sbuf, "\nSEGMENT %d:\n\n", segind); seg = &vm_phys_segs[segind]; sbuf_printf(&sbuf, "start: %#jx\n", (uintmax_t)seg->start); sbuf_printf(&sbuf, "end: %#jx\n", (uintmax_t)seg->end); sbuf_printf(&sbuf, "domain: %d\n", seg->domain); sbuf_printf(&sbuf, "free list: %p\n", seg->free_queues); } error = sbuf_finish(&sbuf); sbuf_delete(&sbuf); return (error); } static void vm_freelist_add(struct vm_freelist *fl, vm_page_t m, int order, int tail) { m->order = order; if (tail) TAILQ_INSERT_TAIL(&fl[order].pl, m, plinks.q); else TAILQ_INSERT_HEAD(&fl[order].pl, m, plinks.q); fl[order].lcnt++; } static void vm_freelist_rem(struct vm_freelist *fl, vm_page_t m, int order) { TAILQ_REMOVE(&fl[order].pl, m, plinks.q); fl[order].lcnt--; m->order = VM_NFREEORDER; } /* * Create a physical memory segment. */ static void -_vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind, int domain) +_vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int domain) { struct vm_phys_seg *seg; KASSERT(vm_phys_nsegs < VM_PHYSSEG_MAX, ("vm_phys_create_seg: increase VM_PHYSSEG_MAX")); KASSERT(domain < vm_ndomains, ("vm_phys_create_seg: invalid domain provided")); seg = &vm_phys_segs[vm_phys_nsegs++]; while (seg > vm_phys_segs && (seg - 1)->start >= end) { *seg = *(seg - 1); seg--; } seg->start = start; seg->end = end; seg->domain = domain; - seg->free_queues = &vm_phys_free_queues[domain][flind]; } static void -vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind) +vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end) { int i; if (mem_affinity == NULL) { - _vm_phys_create_seg(start, end, flind, 0); + _vm_phys_create_seg(start, end, 0); return; } for (i = 0;; i++) { if (mem_affinity[i].end == 0) panic("Reached end of affinity info"); if (mem_affinity[i].end <= start) continue; if (mem_affinity[i].start > start) panic("No affinity info for start %jx", (uintmax_t)start); if (mem_affinity[i].end >= end) { - _vm_phys_create_seg(start, end, flind, + _vm_phys_create_seg(start, end, mem_affinity[i].domain); break; } - _vm_phys_create_seg(start, mem_affinity[i].end, flind, + _vm_phys_create_seg(start, mem_affinity[i].end, mem_affinity[i].domain); start = mem_affinity[i].end; } } /* * Add a physical memory segment. */ void vm_phys_add_seg(vm_paddr_t start, vm_paddr_t end) { + vm_paddr_t paddr; KASSERT((start & PAGE_MASK) == 0, ("vm_phys_define_seg: start is not page aligned")); KASSERT((end & PAGE_MASK) == 0, ("vm_phys_define_seg: end is not page aligned")); + + /* + * Split the physical memory segment if it spans two or more free + * list boundaries. + */ + paddr = start; #ifdef VM_FREELIST_ISADMA - if (start < 16777216) { - if (end > 16777216) { - vm_phys_create_seg(start, 16777216, - VM_FREELIST_ISADMA); - vm_phys_create_seg(16777216, end, VM_FREELIST_DEFAULT); - } else - vm_phys_create_seg(start, end, VM_FREELIST_ISADMA); - if (VM_FREELIST_ISADMA >= vm_nfreelists) - vm_nfreelists = VM_FREELIST_ISADMA + 1; - } else + if (paddr < VM_ISADMA_BOUNDARY && end > VM_ISADMA_BOUNDARY) { + vm_phys_create_seg(paddr, VM_ISADMA_BOUNDARY); + paddr = VM_ISADMA_BOUNDARY; + } #endif -#ifdef VM_FREELIST_HIGHMEM - if (end > VM_HIGHMEM_ADDRESS) { - if (start < VM_HIGHMEM_ADDRESS) { - vm_phys_create_seg(start, VM_HIGHMEM_ADDRESS, - VM_FREELIST_DEFAULT); - vm_phys_create_seg(VM_HIGHMEM_ADDRESS, end, - VM_FREELIST_HIGHMEM); - } else - vm_phys_create_seg(start, end, VM_FREELIST_HIGHMEM); - if (VM_FREELIST_HIGHMEM >= vm_nfreelists) - vm_nfreelists = VM_FREELIST_HIGHMEM + 1; - } else +#ifdef VM_FREELIST_LOWMEM + if (paddr < VM_LOWMEM_BOUNDARY && end > VM_LOWMEM_BOUNDARY) { + vm_phys_create_seg(paddr, VM_LOWMEM_BOUNDARY); + paddr = VM_LOWMEM_BOUNDARY; + } #endif - vm_phys_create_seg(start, end, VM_FREELIST_DEFAULT); +#ifdef VM_FREELIST_DMA32 + if (paddr < VM_DMA32_BOUNDARY && end > VM_DMA32_BOUNDARY) { + vm_phys_create_seg(paddr, VM_DMA32_BOUNDARY); + paddr = VM_DMA32_BOUNDARY; + } +#endif + vm_phys_create_seg(paddr, end); } /* * Initialize the physical memory allocator. + * + * Requires that vm_page_array is initialized! */ void vm_phys_init(void) { struct vm_freelist *fl; struct vm_phys_seg *seg; -#ifdef VM_PHYSSEG_SPARSE - long pages; + u_long npages; + int dom, flind, freelist, oind, pind, segind; + + /* + * Compute the number of free lists, and generate the mapping from the + * manifest constants VM_FREELIST_* to the free list indices. + * + * Initially, the entries of vm_freelist_to_flind[] are set to either + * 0 or 1 to indicate which free lists should be created. + */ + npages = 0; + for (segind = vm_phys_nsegs - 1; segind >= 0; segind--) { + seg = &vm_phys_segs[segind]; +#ifdef VM_FREELIST_ISADMA + if (seg->end <= VM_ISADMA_BOUNDARY) + vm_freelist_to_flind[VM_FREELIST_ISADMA] = 1; + else #endif - int dom, flind, oind, pind, segind; +#ifdef VM_FREELIST_LOWMEM + if (seg->end <= VM_LOWMEM_BOUNDARY) + vm_freelist_to_flind[VM_FREELIST_LOWMEM] = 1; + else +#endif +#ifdef VM_FREELIST_DMA32 + if ( +#ifdef VM_DMA32_NPAGES_THRESHOLD + /* + * Create the DMA32 free list only if the amount of + * physical memory above physical address 4G exceeds the + * given threshold. + */ + npages > VM_DMA32_NPAGES_THRESHOLD && +#endif + seg->end <= VM_DMA32_BOUNDARY) + vm_freelist_to_flind[VM_FREELIST_DMA32] = 1; + else +#endif + { + npages += atop(seg->end - seg->start); + vm_freelist_to_flind[VM_FREELIST_DEFAULT] = 1; + } + } + /* Change each entry into a running total of the free lists. */ + for (freelist = 1; freelist < VM_NFREELIST; freelist++) { + vm_freelist_to_flind[freelist] += + vm_freelist_to_flind[freelist - 1]; + } + vm_nfreelists = vm_freelist_to_flind[VM_NFREELIST - 1]; + KASSERT(vm_nfreelists > 0, ("vm_phys_init: no free lists")); + /* Change each entry into a free list index. */ + for (freelist = 0; freelist < VM_NFREELIST; freelist++) + vm_freelist_to_flind[freelist]--; + /* + * Initialize the first_page and free_queues fields of each physical + * memory segment. + */ #ifdef VM_PHYSSEG_SPARSE - pages = 0; + npages = 0; #endif for (segind = 0; segind < vm_phys_nsegs; segind++) { seg = &vm_phys_segs[segind]; #ifdef VM_PHYSSEG_SPARSE - seg->first_page = &vm_page_array[pages]; - pages += atop(seg->end - seg->start); + seg->first_page = &vm_page_array[npages]; + npages += atop(seg->end - seg->start); #else seg->first_page = PHYS_TO_VM_PAGE(seg->start); #endif +#ifdef VM_FREELIST_ISADMA + if (seg->end <= VM_ISADMA_BOUNDARY) { + flind = vm_freelist_to_flind[VM_FREELIST_ISADMA]; + KASSERT(flind >= 0, + ("vm_phys_init: ISADMA flind < 0")); + } else +#endif +#ifdef VM_FREELIST_LOWMEM + if (seg->end <= VM_LOWMEM_BOUNDARY) { + flind = vm_freelist_to_flind[VM_FREELIST_LOWMEM]; + KASSERT(flind >= 0, + ("vm_phys_init: LOWMEM flind < 0")); + } else +#endif +#ifdef VM_FREELIST_DMA32 + if (seg->end <= VM_DMA32_BOUNDARY) { + flind = vm_freelist_to_flind[VM_FREELIST_DMA32]; + KASSERT(flind >= 0, + ("vm_phys_init: DMA32 flind < 0")); + } else +#endif + { + flind = vm_freelist_to_flind[VM_FREELIST_DEFAULT]; + KASSERT(flind >= 0, + ("vm_phys_init: DEFAULT flind < 0")); + } + seg->free_queues = &vm_phys_free_queues[seg->domain][flind]; } + + /* + * Initialize the free queues. + */ for (dom = 0; dom < vm_ndomains; dom++) { for (flind = 0; flind < vm_nfreelists; flind++) { for (pind = 0; pind < VM_NFREEPOOL; pind++) { fl = vm_phys_free_queues[dom][flind][pind]; for (oind = 0; oind < VM_NFREEORDER; oind++) TAILQ_INIT(&fl[oind].pl); } } } + rw_init(&vm_phys_fictitious_reg_lock, "vmfctr"); } /* * Split a contiguous, power of two-sized set of physical pages. */ static __inline void vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, int order) { vm_page_t m_buddy; while (oind > order) { oind--; m_buddy = &m[1 << oind]; KASSERT(m_buddy->order == VM_NFREEORDER, ("vm_phys_split_pages: page %p has unexpected order %d", m_buddy, m_buddy->order)); vm_freelist_add(fl, m_buddy, oind, 0); } } /* * Initialize a physical page and add it to the free lists. */ void vm_phys_add_page(vm_paddr_t pa) { vm_page_t m; struct vm_domain *vmd; vm_cnt.v_page_count++; m = vm_phys_paddr_to_vm_page(pa); m->phys_addr = pa; m->queue = PQ_NONE; m->segind = vm_phys_paddr_to_segind(pa); vmd = vm_phys_domain(m); vmd->vmd_page_count++; vmd->vmd_segs |= 1UL << m->segind; KASSERT(m->order == VM_NFREEORDER, ("vm_phys_add_page: page %p has unexpected order %d", m, m->order)); m->pool = VM_FREEPOOL_DEFAULT; pmap_page_init(m); mtx_lock(&vm_page_queue_free_mtx); vm_phys_freecnt_adj(m, 1); vm_phys_free_pages(m, 0); mtx_unlock(&vm_page_queue_free_mtx); } /* * Allocate a contiguous, power of two-sized set of physical pages * from the free lists. * * The free page queues must be locked. */ vm_page_t vm_phys_alloc_pages(int pool, int order) { vm_page_t m; int dom, domain, flind; KASSERT(pool < VM_NFREEPOOL, ("vm_phys_alloc_pages: pool %d is out of range", pool)); KASSERT(order < VM_NFREEORDER, ("vm_phys_alloc_pages: order %d is out of range", order)); for (dom = 0; dom < vm_ndomains; dom++) { domain = vm_rr_selectdomain(); for (flind = 0; flind < vm_nfreelists; flind++) { m = vm_phys_alloc_domain_pages(domain, flind, pool, order); if (m != NULL) return (m); } } return (NULL); } /* - * Find and dequeue a free page on the given free list, with the - * specified pool and order + * Allocate a contiguous, power of two-sized set of physical pages from the + * specified free list. The free list must be specified using one of the + * manifest constants VM_FREELIST_*. + * + * The free page queues must be locked. */ vm_page_t -vm_phys_alloc_freelist_pages(int flind, int pool, int order) +vm_phys_alloc_freelist_pages(int freelist, int pool, int order) { vm_page_t m; int dom, domain; - KASSERT(flind < VM_NFREELIST, - ("vm_phys_alloc_freelist_pages: freelist %d is out of range", flind)); + KASSERT(freelist < VM_NFREELIST, + ("vm_phys_alloc_freelist_pages: freelist %d is out of range", + freelist)); KASSERT(pool < VM_NFREEPOOL, ("vm_phys_alloc_freelist_pages: pool %d is out of range", pool)); KASSERT(order < VM_NFREEORDER, ("vm_phys_alloc_freelist_pages: order %d is out of range", order)); - for (dom = 0; dom < vm_ndomains; dom++) { domain = vm_rr_selectdomain(); - m = vm_phys_alloc_domain_pages(domain, flind, pool, order); + m = vm_phys_alloc_domain_pages(domain, + vm_freelist_to_flind[freelist], pool, order); if (m != NULL) return (m); } return (NULL); } static vm_page_t vm_phys_alloc_domain_pages(int domain, int flind, int pool, int order) { struct vm_freelist *fl; struct vm_freelist *alt; int oind, pind; vm_page_t m; mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); fl = &vm_phys_free_queues[domain][flind][pool][0]; for (oind = order; oind < VM_NFREEORDER; oind++) { m = TAILQ_FIRST(&fl[oind].pl); if (m != NULL) { vm_freelist_rem(fl, m, oind); vm_phys_split_pages(m, oind, fl, order); return (m); } } /* * The given pool was empty. Find the largest * contiguous, power-of-two-sized set of pages in any * pool. Transfer these pages to the given pool, and * use them to satisfy the allocation. */ for (oind = VM_NFREEORDER - 1; oind >= order; oind--) { for (pind = 0; pind < VM_NFREEPOOL; pind++) { alt = &vm_phys_free_queues[domain][flind][pind][0]; m = TAILQ_FIRST(&alt[oind].pl); if (m != NULL) { vm_freelist_rem(alt, m, oind); vm_phys_set_pool(pool, m, oind); vm_phys_split_pages(m, oind, fl, order); return (m); } } } return (NULL); } /* * Find the vm_page corresponding to the given physical address. */ vm_page_t vm_phys_paddr_to_vm_page(vm_paddr_t pa) { struct vm_phys_seg *seg; int segind; for (segind = 0; segind < vm_phys_nsegs; segind++) { seg = &vm_phys_segs[segind]; if (pa >= seg->start && pa < seg->end) return (&seg->first_page[atop(pa - seg->start)]); } return (NULL); } vm_page_t vm_phys_fictitious_to_vm_page(vm_paddr_t pa) { struct vm_phys_fictitious_seg tmp, *seg; vm_page_t m; m = NULL; tmp.start = pa; tmp.end = 0; rw_rlock(&vm_phys_fictitious_reg_lock); seg = RB_FIND(fict_tree, &vm_phys_fictitious_tree, &tmp); rw_runlock(&vm_phys_fictitious_reg_lock); if (seg == NULL) return (NULL); m = &seg->first_page[atop(pa - seg->start)]; KASSERT((m->flags & PG_FICTITIOUS) != 0, ("%p not fictitious", m)); return (m); } static inline void vm_phys_fictitious_init_range(vm_page_t range, vm_paddr_t start, long page_count, vm_memattr_t memattr) { long i; for (i = 0; i < page_count; i++) { vm_page_initfake(&range[i], start + PAGE_SIZE * i, memattr); range[i].oflags &= ~VPO_UNMANAGED; range[i].busy_lock = VPB_UNBUSIED; } } int vm_phys_fictitious_reg_range(vm_paddr_t start, vm_paddr_t end, vm_memattr_t memattr) { struct vm_phys_fictitious_seg *seg; vm_page_t fp; long page_count; #ifdef VM_PHYSSEG_DENSE long pi, pe; long dpage_count; #endif KASSERT(start < end, ("Start of segment isn't less than end (start: %jx end: %jx)", (uintmax_t)start, (uintmax_t)end)); page_count = (end - start) / PAGE_SIZE; #ifdef VM_PHYSSEG_DENSE pi = atop(start); pe = atop(end); if (pi >= first_page && (pi - first_page) < vm_page_array_size) { fp = &vm_page_array[pi - first_page]; if ((pe - first_page) > vm_page_array_size) { /* * We have a segment that starts inside * of vm_page_array, but ends outside of it. * * Use vm_page_array pages for those that are * inside of the vm_page_array range, and * allocate the remaining ones. */ dpage_count = vm_page_array_size - (pi - first_page); vm_phys_fictitious_init_range(fp, start, dpage_count, memattr); page_count -= dpage_count; start += ptoa(dpage_count); goto alloc; } /* * We can allocate the full range from vm_page_array, * so there's no need to register the range in the tree. */ vm_phys_fictitious_init_range(fp, start, page_count, memattr); return (0); } else if (pe > first_page && (pe - first_page) < vm_page_array_size) { /* * We have a segment that ends inside of vm_page_array, * but starts outside of it. */ fp = &vm_page_array[0]; dpage_count = pe - first_page; vm_phys_fictitious_init_range(fp, ptoa(first_page), dpage_count, memattr); end -= ptoa(dpage_count); page_count -= dpage_count; goto alloc; } else if (pi < first_page && pe > (first_page + vm_page_array_size)) { /* * Trying to register a fictitious range that expands before * and after vm_page_array. */ return (EINVAL); } else { alloc: #endif fp = malloc(page_count * sizeof(struct vm_page), M_FICT_PAGES, M_WAITOK | M_ZERO); #ifdef VM_PHYSSEG_DENSE } #endif vm_phys_fictitious_init_range(fp, start, page_count, memattr); seg = malloc(sizeof(*seg), M_FICT_PAGES, M_WAITOK | M_ZERO); seg->start = start; seg->end = end; seg->first_page = fp; rw_wlock(&vm_phys_fictitious_reg_lock); RB_INSERT(fict_tree, &vm_phys_fictitious_tree, seg); rw_wunlock(&vm_phys_fictitious_reg_lock); return (0); } void vm_phys_fictitious_unreg_range(vm_paddr_t start, vm_paddr_t end) { struct vm_phys_fictitious_seg *seg, tmp; #ifdef VM_PHYSSEG_DENSE long pi, pe; #endif KASSERT(start < end, ("Start of segment isn't less than end (start: %jx end: %jx)", (uintmax_t)start, (uintmax_t)end)); #ifdef VM_PHYSSEG_DENSE pi = atop(start); pe = atop(end); if (pi >= first_page && (pi - first_page) < vm_page_array_size) { if ((pe - first_page) <= vm_page_array_size) { /* * This segment was allocated using vm_page_array * only, there's nothing to do since those pages * were never added to the tree. */ return; } /* * We have a segment that starts inside * of vm_page_array, but ends outside of it. * * Calculate how many pages were added to the * tree and free them. */ start = ptoa(first_page + vm_page_array_size); } else if (pe > first_page && (pe - first_page) < vm_page_array_size) { /* * We have a segment that ends inside of vm_page_array, * but starts outside of it. */ end = ptoa(first_page); } else if (pi < first_page && pe > (first_page + vm_page_array_size)) { /* Since it's not possible to register such a range, panic. */ panic( "Unregistering not registered fictitious range [%#jx:%#jx]", (uintmax_t)start, (uintmax_t)end); } #endif tmp.start = start; tmp.end = 0; rw_wlock(&vm_phys_fictitious_reg_lock); seg = RB_FIND(fict_tree, &vm_phys_fictitious_tree, &tmp); if (seg->start != start || seg->end != end) { rw_wunlock(&vm_phys_fictitious_reg_lock); panic( "Unregistering not registered fictitious range [%#jx:%#jx]", (uintmax_t)start, (uintmax_t)end); } RB_REMOVE(fict_tree, &vm_phys_fictitious_tree, seg); rw_wunlock(&vm_phys_fictitious_reg_lock); free(seg->first_page, M_FICT_PAGES); free(seg, M_FICT_PAGES); } /* * Find the segment containing the given physical address. */ static int vm_phys_paddr_to_segind(vm_paddr_t pa) { struct vm_phys_seg *seg; int segind; for (segind = 0; segind < vm_phys_nsegs; segind++) { seg = &vm_phys_segs[segind]; if (pa >= seg->start && pa < seg->end) return (segind); } panic("vm_phys_paddr_to_segind: paddr %#jx is not in any segment" , (uintmax_t)pa); } /* * Free a contiguous, power of two-sized set of physical pages. * * The free page queues must be locked. */ void vm_phys_free_pages(vm_page_t m, int order) { struct vm_freelist *fl; struct vm_phys_seg *seg; vm_paddr_t pa; vm_page_t m_buddy; KASSERT(m->order == VM_NFREEORDER, ("vm_phys_free_pages: page %p has unexpected order %d", m, m->order)); KASSERT(m->pool < VM_NFREEPOOL, ("vm_phys_free_pages: page %p has unexpected pool %d", m, m->pool)); KASSERT(order < VM_NFREEORDER, ("vm_phys_free_pages: order %d is out of range", order)); mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); seg = &vm_phys_segs[m->segind]; if (order < VM_NFREEORDER - 1) { pa = VM_PAGE_TO_PHYS(m); do { pa ^= ((vm_paddr_t)1 << (PAGE_SHIFT + order)); if (pa < seg->start || pa >= seg->end) break; m_buddy = &seg->first_page[atop(pa - seg->start)]; if (m_buddy->order != order) break; fl = (*seg->free_queues)[m_buddy->pool]; vm_freelist_rem(fl, m_buddy, order); if (m_buddy->pool != m->pool) vm_phys_set_pool(m->pool, m_buddy, order); order++; pa &= ~(((vm_paddr_t)1 << (PAGE_SHIFT + order)) - 1); m = &seg->first_page[atop(pa - seg->start)]; } while (order < VM_NFREEORDER - 1); } fl = (*seg->free_queues)[m->pool]; vm_freelist_add(fl, m, order, 1); } /* * Free a contiguous, arbitrarily sized set of physical pages. * * The free page queues must be locked. */ void vm_phys_free_contig(vm_page_t m, u_long npages) { u_int n; int order; /* * Avoid unnecessary coalescing by freeing the pages in the largest * possible power-of-two-sized subsets. */ mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); for (;; npages -= n) { /* * Unsigned "min" is used here so that "order" is assigned * "VM_NFREEORDER - 1" when "m"'s physical address is zero * or the low-order bits of its physical address are zero * because the size of a physical address exceeds the size of * a long. */ order = min(ffsl(VM_PAGE_TO_PHYS(m) >> PAGE_SHIFT) - 1, VM_NFREEORDER - 1); n = 1 << order; if (npages < n) break; vm_phys_free_pages(m, order); m += n; } /* The residual "npages" is less than "1 << (VM_NFREEORDER - 1)". */ for (; npages > 0; npages -= n) { order = flsl(npages) - 1; n = 1 << order; vm_phys_free_pages(m, order); m += n; } } /* * Set the pool for a contiguous, power of two-sized set of physical pages. */ void vm_phys_set_pool(int pool, vm_page_t m, int order) { vm_page_t m_tmp; for (m_tmp = m; m_tmp < &m[1 << order]; m_tmp++) m_tmp->pool = pool; } /* * Search for the given physical page "m" in the free lists. If the search * succeeds, remove "m" from the free lists and return TRUE. Otherwise, return * FALSE, indicating that "m" is not in the free lists. * * The free page queues must be locked. */ boolean_t vm_phys_unfree_page(vm_page_t m) { struct vm_freelist *fl; struct vm_phys_seg *seg; vm_paddr_t pa, pa_half; vm_page_t m_set, m_tmp; int order; mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); /* * First, find the contiguous, power of two-sized set of free * physical pages containing the given physical page "m" and * assign it to "m_set". */ seg = &vm_phys_segs[m->segind]; for (m_set = m, order = 0; m_set->order == VM_NFREEORDER && order < VM_NFREEORDER - 1; ) { order++; pa = m->phys_addr & (~(vm_paddr_t)0 << (PAGE_SHIFT + order)); if (pa >= seg->start) m_set = &seg->first_page[atop(pa - seg->start)]; else return (FALSE); } if (m_set->order < order) return (FALSE); if (m_set->order == VM_NFREEORDER) return (FALSE); KASSERT(m_set->order < VM_NFREEORDER, ("vm_phys_unfree_page: page %p has unexpected order %d", m_set, m_set->order)); /* * Next, remove "m_set" from the free lists. Finally, extract * "m" from "m_set" using an iterative algorithm: While "m_set" * is larger than a page, shrink "m_set" by returning the half * of "m_set" that does not contain "m" to the free lists. */ fl = (*seg->free_queues)[m_set->pool]; order = m_set->order; vm_freelist_rem(fl, m_set, order); while (order > 0) { order--; pa_half = m_set->phys_addr ^ (1 << (PAGE_SHIFT + order)); if (m->phys_addr < pa_half) m_tmp = &seg->first_page[atop(pa_half - seg->start)]; else { m_tmp = m_set; m_set = &seg->first_page[atop(pa_half - seg->start)]; } vm_freelist_add(fl, m_tmp, order, 0); } KASSERT(m_set == m, ("vm_phys_unfree_page: fatal inconsistency")); return (TRUE); } /* * Try to zero one physical page. Used by an idle priority thread. */ boolean_t vm_phys_zero_pages_idle(void) { static struct vm_freelist *fl; static int flind, oind, pind; vm_page_t m, m_tmp; int domain; domain = vm_rr_selectdomain(); fl = vm_phys_free_queues[domain][0][0]; mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); for (;;) { TAILQ_FOREACH_REVERSE(m, &fl[oind].pl, pglist, plinks.q) { for (m_tmp = m; m_tmp < &m[1 << oind]; m_tmp++) { if ((m_tmp->flags & (PG_CACHED | PG_ZERO)) == 0) { vm_phys_unfree_page(m_tmp); vm_phys_freecnt_adj(m, -1); mtx_unlock(&vm_page_queue_free_mtx); pmap_zero_page_idle(m_tmp); m_tmp->flags |= PG_ZERO; mtx_lock(&vm_page_queue_free_mtx); vm_phys_freecnt_adj(m, 1); vm_phys_free_pages(m_tmp, 0); vm_page_zero_count++; cnt_prezero++; return (TRUE); } } } oind++; if (oind == VM_NFREEORDER) { oind = 0; pind++; if (pind == VM_NFREEPOOL) { pind = 0; flind++; if (flind == vm_nfreelists) flind = 0; } fl = vm_phys_free_queues[domain][flind][pind]; } } } /* * Allocate a contiguous set of physical pages of the given size * "npages" from the free lists. All of the physical pages must be at * or above the given physical address "low" and below the given * physical address "high". The given value "alignment" determines the * alignment of the first physical page in the set. If the given value * "boundary" is non-zero, then the set of physical pages cannot cross * any physical address boundary that is a multiple of that value. Both * "alignment" and "boundary" must be a power of two. */ vm_page_t vm_phys_alloc_contig(u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary) { struct vm_freelist *fl; struct vm_phys_seg *seg; vm_paddr_t pa, pa_last, size; vm_page_t m, m_ret; u_long npages_end; int dom, domain, flind, oind, order, pind; mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); size = npages << PAGE_SHIFT; KASSERT(size != 0, ("vm_phys_alloc_contig: size must not be 0")); KASSERT((alignment & (alignment - 1)) == 0, ("vm_phys_alloc_contig: alignment must be a power of 2")); KASSERT((boundary & (boundary - 1)) == 0, ("vm_phys_alloc_contig: boundary must be a power of 2")); /* Compute the queue that is the best fit for npages. */ for (order = 0; (1 << order) < npages; order++); dom = 0; restartdom: domain = vm_rr_selectdomain(); for (flind = 0; flind < vm_nfreelists; flind++) { for (oind = min(order, VM_NFREEORDER - 1); oind < VM_NFREEORDER; oind++) { for (pind = 0; pind < VM_NFREEPOOL; pind++) { fl = &vm_phys_free_queues[domain][flind][pind][0]; TAILQ_FOREACH(m_ret, &fl[oind].pl, plinks.q) { /* * A free list may contain physical pages * from one or more segments. */ seg = &vm_phys_segs[m_ret->segind]; if (seg->start > high || low >= seg->end) continue; /* * Is the size of this allocation request * larger than the largest block size? */ if (order >= VM_NFREEORDER) { /* * Determine if a sufficient number * of subsequent blocks to satisfy * the allocation request are free. */ pa = VM_PAGE_TO_PHYS(m_ret); pa_last = pa + size; for (;;) { pa += 1 << (PAGE_SHIFT + VM_NFREEORDER - 1); if (pa >= pa_last) break; if (pa < seg->start || pa >= seg->end) break; m = &seg->first_page[atop(pa - seg->start)]; if (m->order != VM_NFREEORDER - 1) break; } /* If not, continue to the next block. */ if (pa < pa_last) continue; } /* * Determine if the blocks are within the given range, * satisfy the given alignment, and do not cross the * given boundary. */ pa = VM_PAGE_TO_PHYS(m_ret); if (pa >= low && pa + size <= high && (pa & (alignment - 1)) == 0 && ((pa ^ (pa + size - 1)) & ~(boundary - 1)) == 0) goto done; } } } } if (++dom < vm_ndomains) goto restartdom; return (NULL); done: for (m = m_ret; m < &m_ret[npages]; m = &m[1 << oind]) { fl = (*seg->free_queues)[m->pool]; vm_freelist_rem(fl, m, m->order); } if (m_ret->pool != VM_FREEPOOL_DEFAULT) vm_phys_set_pool(VM_FREEPOOL_DEFAULT, m_ret, oind); fl = (*seg->free_queues)[m_ret->pool]; vm_phys_split_pages(m_ret, oind, fl, order); /* Return excess pages to the free lists. */ npages_end = roundup2(npages, 1 << imin(oind, order)); if (npages < npages_end) vm_phys_free_contig(&m_ret[npages], npages_end - npages); return (m_ret); } #ifdef DDB /* * Show the number of physical pages in each of the free lists. */ DB_SHOW_COMMAND(freepages, db_show_freepages) { struct vm_freelist *fl; int flind, oind, pind, dom; for (dom = 0; dom < vm_ndomains; dom++) { db_printf("DOMAIN: %d\n", dom); for (flind = 0; flind < vm_nfreelists; flind++) { db_printf("FREE LIST %d:\n" "\n ORDER (SIZE) | NUMBER" "\n ", flind); for (pind = 0; pind < VM_NFREEPOOL; pind++) db_printf(" | POOL %d", pind); db_printf("\n-- "); for (pind = 0; pind < VM_NFREEPOOL; pind++) db_printf("-- -- "); db_printf("--\n"); for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) { db_printf(" %2.2d (%6.6dK)", oind, 1 << (PAGE_SHIFT - 10 + oind)); for (pind = 0; pind < VM_NFREEPOOL; pind++) { fl = vm_phys_free_queues[dom][flind][pind]; db_printf(" | %6.6d", fl[oind].lcnt); } db_printf("\n"); } db_printf("\n"); } db_printf("\n"); } } #endif Index: head/sys/vm/vm_phys.h =================================================================== --- head/sys/vm/vm_phys.h (revision 276438) +++ head/sys/vm/vm_phys.h (revision 276439) @@ -1,122 +1,122 @@ /*- * Copyright (c) 2002-2006 Rice University * Copyright (c) 2007 Alan L. Cox * All rights reserved. * * This software was developed for the FreeBSD Project by Alan L. Cox, * Olivier Crameri, Peter Druschel, Sitaram Iyer, and Juan Navarro. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ /* * Physical memory system definitions */ #ifndef _VM_PHYS_H_ #define _VM_PHYS_H_ #ifdef _KERNEL /* Domains must be dense (non-sparse) and zero-based. */ struct mem_affinity { vm_paddr_t start; vm_paddr_t end; int domain; }; struct vm_freelist { struct pglist pl; int lcnt; }; struct vm_phys_seg { vm_paddr_t start; vm_paddr_t end; vm_page_t first_page; int domain; struct vm_freelist (*free_queues)[VM_NFREEPOOL][VM_NFREEORDER]; }; extern struct mem_affinity *mem_affinity; extern int vm_ndomains; extern struct vm_phys_seg vm_phys_segs[]; extern int vm_phys_nsegs; /* * The following functions are only to be used by the virtual memory system. */ void vm_phys_add_page(vm_paddr_t pa); void vm_phys_add_seg(vm_paddr_t start, vm_paddr_t end); vm_page_t vm_phys_alloc_contig(u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary); -vm_page_t vm_phys_alloc_freelist_pages(int flind, int pool, int order); +vm_page_t vm_phys_alloc_freelist_pages(int freelist, int pool, int order); vm_page_t vm_phys_alloc_pages(int pool, int order); boolean_t vm_phys_domain_intersects(long mask, vm_paddr_t low, vm_paddr_t high); int vm_phys_fictitious_reg_range(vm_paddr_t start, vm_paddr_t end, vm_memattr_t memattr); void vm_phys_fictitious_unreg_range(vm_paddr_t start, vm_paddr_t end); vm_page_t vm_phys_fictitious_to_vm_page(vm_paddr_t pa); void vm_phys_free_contig(vm_page_t m, u_long npages); void vm_phys_free_pages(vm_page_t m, int order); void vm_phys_init(void); vm_page_t vm_phys_paddr_to_vm_page(vm_paddr_t pa); void vm_phys_set_pool(int pool, vm_page_t m, int order); boolean_t vm_phys_unfree_page(vm_page_t m); boolean_t vm_phys_zero_pages_idle(void); /* * vm_phys_domain: * * Return the memory domain the page belongs to. */ static inline struct vm_domain * vm_phys_domain(vm_page_t m) { #if MAXMEMDOM > 1 int domn, segind; /* XXXKIB try to assert that the page is managed */ segind = m->segind; KASSERT(segind < vm_phys_nsegs, ("segind %d m %p", segind, m)); domn = vm_phys_segs[segind].domain; KASSERT(domn < vm_ndomains, ("domain %d m %p", domn, m)); return (&vm_dom[domn]); #else return (&vm_dom[0]); #endif } static inline void vm_phys_freecnt_adj(vm_page_t m, int adj) { mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); vm_cnt.v_free_count += adj; vm_phys_domain(m)->vmd_free_count += adj; } #endif /* _KERNEL */ #endif /* !_VM_PHYS_H_ */