Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F144370461
D40772.id123834.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
33 KB
Referenced Files
None
Subscribers
None
D40772.id123834.diff
View Options
Index: sys/conf/files
===================================================================
--- sys/conf/files
+++ sys/conf/files
@@ -5189,6 +5189,7 @@
vm/uma_core.c standard
vm/uma_dbg.c standard
vm/memguard.c optional DEBUG_MEMGUARD
+vm/vm_compact.c standard
vm/vm_domainset.c standard
vm/vm_fault.c standard
vm/vm_glue.c standard
Index: sys/vm/vm_compact.h
===================================================================
--- /dev/null
+++ sys/vm/vm_compact.h
@@ -0,0 +1,56 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2023. Bojan Novković <bnovkov@freebsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+ * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/malloc.h>
+
+#include <vm/vm.h>
+
+MALLOC_DECLARE(M_VMCOMPACT);
+
+struct vm_compact_region {
+ vm_paddr_t start;
+ vm_paddr_t end;
+ SLIST_ENTRY(vm_compact_region) entries;
+};
+typedef struct vm_compact_region *vm_compact_region_t;
+
+SLIST_HEAD(vm_compact_region_head, vm_compact_region);
+
+typedef int (
+ *vm_compact_search_fn)(struct vm_compact_region_head *, int, void *);
+typedef size_t (
+ *vm_compact_defrag_fn)(struct vm_compact_region_head *, int, void *);
+typedef bool (*vm_compact_end_fn)(void);
+typedef void (*vm_compact_ctx_init_fn)(void **);
+
+void *vm_compact_create_job(vm_compact_search_fn sfn, vm_compact_defrag_fn dfn,
+ vm_compact_ctx_init_fn ctxfn, vm_paddr_t start, vm_paddr_t end, int order,
+ int domain, int *error);
+void vm_compact_free_job(void *ctx);
+int vm_compact_run(void *ctx);
Index: sys/vm/vm_compact.c
===================================================================
--- /dev/null
+++ sys/vm/vm_compact.c
@@ -0,0 +1,151 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2023. Bojan Novković <bnovkov@freebsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+ * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/queue.h>
+#include <sys/sysctl.h>
+
+#include <vm/vm.h>
+#include <vm/vm_compact.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pagequeue.h>
+#include <vm/vm_phys.h>
+
+#define VM_COMPACT_LOCK() mtx_lock(&compact_lock)
+#define VM_COMPACT_UNLOCK() mtx_unlock(&compact_lock)
+
+MALLOC_DEFINE(M_VMCOMPACT, "vm_compact_ctx", "memory compaction context");
+
+static struct mtx compact_lock;
+static LIST_HEAD(, vm_compact_ctx) active_compactions[MAXMEMDOM];
+
+struct vm_compact_ctx {
+ vm_compact_search_fn search_fn;
+ vm_compact_defrag_fn defrag_fn;
+
+ vm_paddr_t start;
+ vm_paddr_t end;
+
+ int order;
+ int domain;
+ struct vm_compact_region_head regions;
+
+ void *p_data;
+
+ LIST_ENTRY(vm_compact_ctx) entries;
+};
+
+static bool
+vm_compact_job_overlaps(struct vm_compact_ctx *ctxp1,
+ struct vm_compact_ctx *ctxp2)
+{
+ return (ctxp1->start <= ctxp2->start && ctxp2->start <= ctxp1->end);
+}
+
+void *
+vm_compact_create_job(vm_compact_search_fn sfn, vm_compact_defrag_fn dfn,
+ vm_compact_ctx_init_fn ctxfn, vm_paddr_t start, vm_paddr_t end, int order,
+ int domain, int *error)
+{
+ struct vm_compact_ctx *ctxp;
+ /* Arguments sanity check. */
+ if (end <= start || order > (VM_NFREEORDER_MAX - 1)) {
+ *error = (EINVAL);
+ return (NULL);
+ }
+
+ ctxp = malloc(sizeof(struct vm_compact_ctx), M_VMCOMPACT,
+ M_WAITOK | M_ZERO);
+
+ ctxp->search_fn = sfn;
+ ctxp->defrag_fn = dfn;
+ ctxp->start = start;
+ ctxp->order = order;
+ ctxp->domain = domain;
+ SLIST_INIT(&ctxp->regions);
+
+ ctxfn(&ctxp->p_data);
+
+ return ((void *)ctxp);
+}
+
+void
+vm_compact_free_job(void *ctx)
+{
+ free(ctx, M_VMCOMPACT);
+}
+
+int
+vm_compact_run(void *ctx)
+{
+ struct vm_compact_ctx *ctxp = (struct vm_compact_ctx *)ctx;
+ struct vm_compact_ctx *ctxp_tmp;
+ int retval;
+
+ VM_COMPACT_LOCK();
+ /* Check if the requested compaction overlaps with an existing one. */
+ LIST_FOREACH (ctxp_tmp, &active_compactions[ctxp->domain], entries) {
+ if (vm_compact_job_overlaps(ctxp, ctxp_tmp)) {
+ VM_COMPACT_UNLOCK();
+ return (-EINPROGRESS);
+ }
+ }
+
+ LIST_INSERT_HEAD(&active_compactions[ctxp->domain], ctxp, entries);
+ VM_COMPACT_UNLOCK();
+
+ /* Run compaction job. */
+ if (ctxp->search_fn(&ctxp->regions, ctxp->domain, ctxp->p_data)) {
+ retval = 0;
+ goto cleanup;
+ }
+
+ retval = ctxp->defrag_fn(&ctxp->regions, ctxp->domain, ctxp->p_data);
+
+cleanup:
+ VM_COMPACT_LOCK();
+ LIST_REMOVE(ctxp, entries);
+ VM_COMPACT_UNLOCK();
+
+ return retval;
+}
+
+static void
+vm_compact_init(void *arg)
+{
+ mtx_init(&compact_lock, "vm_compact", NULL, MTX_DEF);
+ for (int i = 0; i < MAXMEMDOM; i++)
+ LIST_INIT(&active_compactions[i]);
+}
+
+SYSINIT(vm_compact, SI_SUB_KMEM + 2, SI_ORDER_ANY, vm_compact_init, NULL);
Index: sys/vm/vm_page.h
===================================================================
--- sys/vm/vm_page.h
+++ sys/vm/vm_page.h
@@ -711,6 +711,7 @@
vm_page_bits_t vm_page_bits(int base, int size);
void vm_page_zero_invalid(vm_page_t m, boolean_t setvalid);
void vm_page_free_pages_toq(struct spglist *free, bool update_wire_count);
+int vm_page_relocate_page(vm_page_t src, vm_page_t dst, int domain);
void vm_page_dirty_KBI(vm_page_t m);
void vm_page_lock_KBI(vm_page_t m, const char *file, int line);
Index: sys/vm/vm_page.c
===================================================================
--- sys/vm/vm_page.c
+++ sys/vm/vm_page.c
@@ -738,6 +738,8 @@
#endif
phys_avail[biggestone + 1] = new_end;
+ vm_phys_search_index_startup(&vaddr);
+
/*
* Add physical memory segments corresponding to the available
* physical pages.
@@ -5595,6 +5597,86 @@
}
#endif
+/*
+ * Tries to move 'src' into 'dst'. The 'src' page must be busied and its object
+ * locked. Returns 0 on success, 1 if the error was caused by the src page, 2 if
+ * caused by the dst page.
+ */
+int
+vm_page_relocate_page(vm_page_t src, vm_page_t dst, int domain)
+{
+ int error = 0;
+ struct vm_domain *vmd = VM_DOMAIN(domain);
+ vm_object_t obj = src->object;
+
+ vm_page_assert_xbusied(src);
+
+ VM_OBJECT_ASSERT_WLOCKED(obj);
+ KASSERT(vm_page_domain(src) == domain,
+ ("Source page is from a different domain"));
+ KASSERT(vm_page_domain(dst) == domain,
+ ("Destination page is from a different domain"));
+
+ vm_domain_free_lock(vmd);
+ /* Check if the dst page is still eligible and remove it from the
+ * freelist. */
+ if (dst->order != 0 || !vm_page_none_valid(dst)) {
+ error = 2;
+ vm_page_xunbusy(src);
+ vm_domain_free_unlock(vmd);
+ goto unlock;
+ }
+
+ vm_page_dequeue(dst);
+ vm_phys_unfree_page(dst);
+ vm_domain_free_unlock(vmd);
+ vm_domain_freecnt_inc(vmd, -1);
+
+ /* Unmap src page */
+ if (obj->ref_count != 0 && !vm_page_try_remove_all(src)) {
+ error = 1;
+
+ vm_page_xunbusy(src);
+ /* Place dst page back on the freelists. */
+ vm_domain_free_lock(vmd);
+ vm_phys_free_pages(dst, 0);
+ vm_domain_free_unlock(vmd);
+ vm_domain_freecnt_inc(vmd, 1);
+ goto unlock;
+ }
+ /* Note - if this is missing the calling process gets stuck at the
+ * 'vmpfw' channel */
+ if (dst->busy_lock == VPB_FREED) {
+ dst->busy_lock = VPB_UNBUSIED;
+ }
+
+ /* Copy page attributes */
+ dst->a.flags = src->a.flags & ~PGA_QUEUE_STATE_MASK;
+ dst->oflags = 0;
+ pmap_copy_page(src, dst);
+
+ dst->valid = src->valid;
+ dst->dirty = src->dirty;
+ src->flags &= ~PG_ZERO;
+ vm_page_dequeue(src);
+
+ if (vm_page_replace_hold(dst, obj, src->pindex, src) &&
+ vm_page_free_prep(src)) {
+ /* Return src page to freelist. */
+ vm_domain_free_lock(vmd);
+ vm_phys_free_pages(src, 0);
+ vm_domain_free_unlock(vmd);
+
+ vm_domain_freecnt_inc(vmd, 1);
+ }
+
+ vm_page_deactivate(dst);
+unlock:
+ VM_OBJECT_WUNLOCK(obj);
+
+ return error;
+}
+
#include "opt_ddb.h"
#ifdef DDB
#include <sys/kernel.h>
Index: sys/vm/vm_phys.h
===================================================================
--- sys/vm/vm_phys.h
+++ sys/vm/vm_phys.h
@@ -87,7 +87,8 @@
int vm_phys_avail_largest(void);
vm_paddr_t vm_phys_avail_size(int i);
bool vm_phys_is_dumpable(vm_paddr_t pa);
-
+int vm_phys_fragmentation_index(int order, int domain);
+void vm_phys_search_index_startup(vm_offset_t *vaddr);
static inline int
vm_phys_domain(vm_paddr_t pa)
{
Index: sys/vm/vm_phys.c
===================================================================
--- sys/vm/vm_phys.c
+++ sys/vm/vm_phys.c
@@ -47,8 +47,10 @@
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/domainset.h>
-#include <sys/lock.h>
+#include <sys/eventhandler.h>
#include <sys/kernel.h>
+#include <sys/kthread.h>
+#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/mutex.h>
#include <sys/proc.h>
@@ -62,6 +64,8 @@
#include <ddb/ddb.h>
#include <vm/vm.h>
+
+#include <vm/vm_compact.h>
#include <vm/vm_extern.h>
#include <vm/vm_param.h>
#include <vm/vm_kern.h>
@@ -131,6 +135,53 @@
vm_paddr_t phys_avail[PHYS_AVAIL_COUNT];
vm_paddr_t dump_avail[PHYS_AVAIL_COUNT];
+/*
+ * Structures used for memory compaction.
+ */
+
+/* Tracks invalid physical memory ranges. */
+struct vm_phys_hole {
+ vm_paddr_t start;
+ vm_paddr_t end;
+ int domain;
+};
+
+/* Used to track valid memory ranges inside search index chunks containing
+ * memory holes. */
+struct vm_phys_subseg {
+ struct vm_compact_region region;
+ SLIST_ENTRY(vm_phys_subseg) link;
+};
+SLIST_HEAD(vm_phys_subseg_head, vm_phys_subseg);
+
+/* Tracks various metrics and valid memory segments for a fixed-size physical
+ * memory region. */
+struct vm_phys_search_chunk {
+ int holecnt;
+ int score;
+ int skipidx;
+ struct vm_phys_subseg_head *shp;
+};
+
+struct vm_phys_search_index {
+ struct vm_phys_search_chunk *chunks;
+ int nchunks;
+ vm_paddr_t dom_start;
+ vm_paddr_t dom_end;
+};
+
+static void vm_phys_update_search_index(vm_page_t m, int order, bool alloc);
+
+static struct vm_phys_search_index vm_phys_search_index[MAXMEMDOM];
+
+static struct vm_phys_hole vm_phys_holes[VM_PHYSSEG_MAX * 2];
+static int vm_phys_nholes;
+
+struct vm_phys_info {
+ uint64_t free_pages;
+ uint64_t free_blocks;
+};
+
/*
* Provides the mapping from VM_FREELIST_* to free list indices (flind).
*/
@@ -156,6 +207,11 @@
sysctl_vm_phys_free, "A",
"Phys Free Info");
+static int sysctl_vm_phys_frag_idx(SYSCTL_HANDLER_ARGS);
+SYSCTL_OID(_vm, OID_AUTO, phys_frag_idx,
+ CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
+ sysctl_vm_phys_frag_idx, "A", "Phys Frag Info");
+
static int sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS);
SYSCTL_OID(_vm, OID_AUTO, phys_segs,
CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
@@ -291,6 +347,80 @@
return (error);
}
+static void
+vm_phys_get_info(struct vm_phys_info *info, int domain)
+{
+ struct vm_freelist *fl;
+ int pind, oind, flind;
+
+ /* Calculate total number of free pages and blocks */
+ info->free_pages = info->free_blocks = 0;
+ for (flind = 0; flind < vm_nfreelists; flind++) {
+ for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) {
+ for (pind = 0; pind < VM_NFREEPOOL; pind++) {
+ fl = vm_phys_free_queues[domain][flind][pind];
+ info->free_pages += fl[oind].lcnt << oind;
+ info->free_blocks += fl[oind].lcnt;
+ }
+ }
+ }
+}
+
+int
+vm_phys_fragmentation_index(int order, int domain)
+{
+ struct vm_phys_info info;
+
+ vm_domain_free_assert_locked(VM_DOMAIN(domain));
+ vm_phys_get_info(&info, domain);
+
+ if (info.free_blocks == 0) {
+ return (0);
+ }
+
+ return (1000 -
+ ((info.free_pages * 1000) / (1 << order) / info.free_blocks));
+}
+
+/*
+ * Outputs the value of the Free Memory Fragmentation Index (FMFI) for each
+ * domain.
+ */
+static int
+sysctl_vm_phys_frag_idx(SYSCTL_HANDLER_ARGS)
+{
+ struct sbuf sbuf;
+ int64_t idx;
+ int oind, dom, error;
+
+ error = sysctl_wire_old_buffer(req, 0);
+ if (error != 0)
+ return (error);
+ sbuf_new_for_sysctl(&sbuf, NULL, 128 * vm_ndomains, req);
+
+ for (dom = 0; dom < vm_ndomains; dom++) {
+ vm_domain_free_lock(VM_DOMAIN(dom));
+
+ sbuf_printf(&sbuf, "\n--\n");
+ sbuf_printf(&sbuf, "\nDOMAIN %d\n", dom);
+ sbuf_printf(&sbuf, "\n ORDER (SIZE) | FMFI\n");
+ sbuf_printf(&sbuf, "--\n");
+
+ for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) {
+ idx = vm_phys_fragmentation_index(oind, dom);
+ sbuf_printf(&sbuf, " %2d (%6dK) ", oind,
+ 1 << (PAGE_SHIFT - 10 + oind));
+ sbuf_printf(&sbuf, "| %ld \n", idx);
+ }
+
+ vm_domain_free_unlock(VM_DOMAIN(dom));
+ }
+
+ error = sbuf_finish(&sbuf);
+ sbuf_delete(&sbuf);
+ return (error);
+}
+
/*
* Outputs the set of physical memory segments.
*/
@@ -378,6 +508,7 @@
else
TAILQ_INSERT_HEAD(&fl[order].pl, m, listq);
fl[order].lcnt++;
+ vm_phys_update_search_index(m, order, false);
}
static void
@@ -387,6 +518,7 @@
TAILQ_REMOVE(&fl[order].pl, m, listq);
fl[order].lcnt--;
m->order = VM_NFREEORDER;
+ vm_phys_update_search_index(m, order, true);
}
/*
@@ -614,6 +746,47 @@
}
}
+ /*
+ * Initialize hole array.
+ */
+ struct vm_phys_hole *hp;
+
+ vm_phys_nholes = 0;
+ if (vm_phys_segs[0].start != 0) {
+ hp = &vm_phys_holes[0];
+ hp->start = 0;
+ hp->end = vm_phys_segs[0].start;
+ hp->domain = vm_phys_segs[0].domain;
+ vm_phys_nholes++;
+ }
+
+ struct vm_phys_search_index *sip;
+ /* Initialize memory hole array. */
+ for (int i = 0; i + 1 < vm_phys_nsegs; i++, vm_phys_nholes++) {
+ hp = &vm_phys_holes[vm_phys_nholes];
+ hp->start = vm_phys_segs[i].end;
+ hp->end = vm_phys_segs[i + 1].start;
+ hp->domain = vm_phys_segs[i].domain;
+ sip = &vm_phys_search_index[hp->domain];
+
+ /* Does this hole span two domains? */
+ if (vm_phys_segs[i].domain != vm_phys_segs[i + 1].domain &&
+ hp->end > sip->dom_end) {
+ /* Clamp end of current hole to domain end */
+ sip = &vm_phys_search_index[hp->domain];
+ hp->end = sip->dom_end;
+ /* Add new hole at beginning of subsequent domain */
+ vm_phys_nholes++;
+ hp = &vm_phys_holes[vm_phys_nholes];
+ hp->domain = vm_phys_segs[i + 1].domain;
+ sip = &vm_phys_search_index[hp->domain];
+ /* Hole starts at domain start and ends at the start of
+ * the first segment. */
+ hp->start = sip->dom_start;
+ hp->end = vm_phys_segs[i + 1].start;
+ }
+ }
+
rw_init(&vm_phys_fictitious_reg_lock, "vmfctr");
}
@@ -1892,3 +2065,654 @@
}
}
#endif
+
+#define VM_PHYS_SEARCH_CHUNK_ORDER (14)
+#define VM_PHYS_SEARCH_CHUNK_NPAGES (1 << (VM_PHYS_SEARCH_CHUNK_ORDER))
+#define VM_PHYS_SEARCH_CHUNK_SIZE \
+ (1 << (PAGE_SHIFT + VM_PHYS_SEARCH_CHUNK_ORDER))
+#define VM_PHYS_SEARCH_CHUNK_MASK (VM_PHYS_SEARCH_CHUNK_SIZE - 1)
+#define VM_PHYS_HOLECNT_HI ((1 << (VM_PHYS_SEARCH_CHUNK_ORDER)) - 100)
+#define VM_PHYS_HOLECNT_LO (16)
+
+static __inline vm_paddr_t
+vm_phys_search_idx_to_paddr(int idx, int domain)
+{
+ vm_paddr_t paddr;
+ struct vm_phys_search_index *sip = &vm_phys_search_index[domain];
+
+ paddr = (vm_paddr_t)idx << ((VM_PHYS_SEARCH_CHUNK_ORDER) + PAGE_SHIFT);
+ /* Adjust address relative to domain start */
+ paddr += sip->dom_start & ~VM_PHYS_SEARCH_CHUNK_MASK;
+
+ return (paddr);
+}
+
+static __inline int
+vm_phys_paddr_to_chunk_idx(vm_paddr_t paddr, int domain)
+{
+ struct vm_phys_search_index *sip = &vm_phys_search_index[domain];
+
+ /* Adjust address relative to domain start */
+ paddr -= sip->dom_start & ~VM_PHYS_SEARCH_CHUNK_MASK;
+ /* Strip lower bits */
+ paddr &= ~VM_PHYS_SEARCH_CHUNK_MASK;
+ return (int)(paddr >> ((VM_PHYS_SEARCH_CHUNK_ORDER) + PAGE_SHIFT));
+}
+
+static __inline struct vm_phys_search_chunk *
+vm_phys_search_get_chunk(struct vm_phys_search_index *sip, int idx)
+{
+ KASSERT(idx >= 0 && idx < sip->nchunks,
+ ("%s: search index out-of-bounds access, idx: %d, dom_start: %p, dom_end: %p, nchunks: %d",
+ __func__, idx, (void *)sip->dom_start, (void *)sip->dom_end,
+ sip->nchunks));
+
+ return (&sip->chunks[idx]);
+}
+
+static struct vm_phys_search_chunk *
+vm_phys_paddr_to_search_chunk(vm_paddr_t paddr, int domain)
+{
+ struct vm_phys_search_index *sip = &vm_phys_search_index[domain];
+ int idx = vm_phys_paddr_to_chunk_idx(paddr, domain);
+
+ return vm_phys_search_get_chunk(sip, idx);
+}
+
+/*
+ * Allocates physical memory required for the memory compaction search index.
+ */
+void
+vm_phys_search_index_startup(vm_offset_t *vaddr)
+{
+ struct vm_phys_search_index *cur_idx;
+ vm_paddr_t pa;
+ vm_paddr_t dom_start, dom_end;
+ size_t alloc_size;
+ int dom_nsearch_chunks;
+ int i;
+
+ for (int dom = 0; dom < vm_ndomains; dom++) {
+ cur_idx = &vm_phys_search_index[dom];
+ dom_nsearch_chunks = 0;
+ /* Calculate number of of search index chunks for current domain
+ */
+ if (mem_affinity != NULL) {
+ for (i = 0; mem_affinity[i].end != 0; i++) {
+ if (mem_affinity[i].domain == dom) {
+ dom_start = mem_affinity[i].start;
+ while (mem_affinity[i].domain == dom) {
+ i++;
+ }
+ dom_end = mem_affinity[i - 1].end;
+ }
+ }
+ } else {
+ dom_start = phys_avail[0];
+ i = 1;
+ while (phys_avail[i + 1] != 0) {
+ i++;
+ }
+ dom_end = phys_avail[i];
+ }
+ /* Allocate search index for current domain */
+ dom_nsearch_chunks = atop(dom_end - dom_start) /
+ VM_PHYS_SEARCH_CHUNK_NPAGES;
+ /* Add additional chunks if beginning and start aren't search
+ * chunk-aligned. */
+ if (dom_start & VM_PHYS_SEARCH_CHUNK_MASK)
+ dom_nsearch_chunks++;
+ if (dom_end & VM_PHYS_SEARCH_CHUNK_MASK)
+ dom_nsearch_chunks++;
+
+ alloc_size = round_page(
+ dom_nsearch_chunks * sizeof(struct vm_phys_search_chunk));
+ pa = vm_phys_early_alloc(dom, alloc_size);
+
+ /* Map and zero the array */
+ cur_idx->chunks = (void *)(uintptr_t)pmap_map(vaddr, pa,
+ pa + alloc_size, VM_PROT_READ | VM_PROT_WRITE);
+ cur_idx->nchunks = dom_nsearch_chunks;
+ cur_idx->dom_start = dom_start;
+ cur_idx->dom_end = dom_end;
+
+ if (cur_idx->chunks == NULL) {
+ panic("Unable to allocate search index for domain %d\n",
+ dom);
+ }
+
+ bzero(cur_idx->chunks, alloc_size);
+ }
+}
+
+static void
+vm_phys_update_search_index(vm_page_t m, int order, bool alloc)
+{
+ int domain = vm_page_domain(m);
+ struct vm_phys_search_chunk *scp =
+ vm_phys_paddr_to_search_chunk(m->phys_addr, domain);
+ int pgcnt = 1 << order;
+
+ /* Update chunk hole count */
+ scp->holecnt += alloc ? -pgcnt : pgcnt;
+ KASSERT(scp->holecnt >= 0 &&
+ scp->holecnt <= VM_PHYS_SEARCH_CHUNK_NPAGES,
+ ("%s: inconsistent hole count: %d", __func__, scp->holecnt));
+
+ /* Update chunk fragmentation score */
+ if (order == 0) {
+ scp->score += alloc ? -1 : 1;
+ if (scp->score < 0) {
+ scp->score = 0;
+ }
+ }
+}
+
+static void
+vm_phys_chunk_register_hole(struct vm_phys_search_chunk *cp,
+ vm_paddr_t hole_start, vm_paddr_t hole_end)
+{
+ struct vm_phys_subseg *ssp;
+
+ if (cp->shp == NULL) {
+ vm_paddr_t chunk_start = hole_start &
+ ~VM_PHYS_SEARCH_CHUNK_MASK;
+ cp->shp = malloc(sizeof(*cp->shp), M_TEMP, M_ZERO | M_WAITOK);
+ SLIST_INIT(cp->shp);
+ /* Split chunk into a subseg */
+ ssp = malloc(sizeof(*ssp), M_TEMP, M_ZERO | M_WAITOK);
+ ssp->region.start = chunk_start;
+ ssp->region.end = chunk_start + VM_PHYS_SEARCH_CHUNK_SIZE;
+
+ SLIST_INSERT_HEAD(cp->shp, ssp, link);
+ }
+
+ /*
+ * Holes are ordered by paddr - hole registration will
+ * thus always affect the last subsegment in the list.
+ * Take last subseg and split it.
+ */
+ ssp = SLIST_FIRST(cp->shp);
+ while (SLIST_NEXT(ssp, link)) {
+ ssp = SLIST_NEXT(ssp, link);
+ }
+
+ if (hole_start == ssp->region.start) {
+ ssp->region.start = hole_end;
+ } else if (hole_end == ssp->region.end) {
+ ssp->region.end = hole_start;
+ } else { /* Hole splits the subseg - create and enqueue new subseg */
+ struct vm_phys_subseg *nssp = malloc(sizeof(*nssp), M_TEMP,
+ M_ZERO | M_WAITOK);
+
+ nssp->region.start = hole_end;
+ nssp->region.end = ssp->region.end;
+ ssp->region.end = hole_start;
+ KASSERT(nssp->region.end > nssp->region.start,
+ ("%s: inconsistent subsegment after splitting", __func__));
+
+ SLIST_INSERT_AFTER(ssp, nssp, link);
+ }
+
+ KASSERT(ssp->region.end > ssp->region.start,
+ ("%s: inconsistent subsegment", __func__));
+}
+
+/*
+ * Populates compaction search index with hole information.
+ */
+static void
+vm_phys_compact_init_holes(void)
+{
+ int dom;
+ struct vm_phys_search_index *sip;
+ struct vm_phys_search_chunk *start_chunk, *end_chunk;
+ struct vm_phys_hole *hp;
+ int start_idx, end_idx;
+
+ for (dom = 0; dom < vm_ndomains; dom++) {
+ sip = &vm_phys_search_index[dom];
+
+ /* Add hole information to domain search chunks */
+ for (int i = 0; i < vm_phys_nholes; i++) {
+ hp = &vm_phys_holes[i];
+ if (hp->domain != dom)
+ continue;
+
+ start_idx = vm_phys_paddr_to_chunk_idx(hp->start, dom);
+ end_idx = vm_phys_paddr_to_chunk_idx(hp->end, dom);
+
+ start_chunk = vm_phys_search_get_chunk(sip, start_idx);
+ /*
+ * If the domain end address is search chunk-aligned
+ * and a hole ends there, decrement the index to avoid
+ * an out of bounds access to the search index chunks.
+ */
+ if ((sip->dom_end & VM_PHYS_SEARCH_CHUNK_MASK) == 0 &&
+ hp->end == sip->dom_end) {
+ end_chunk = vm_phys_search_get_chunk(sip,
+ end_idx - 1);
+ /* This is the last search chunk, point it to
+ * the first one */
+ end_chunk->skipidx = 1;
+ } else {
+ end_chunk = vm_phys_search_get_chunk(sip,
+ end_idx);
+ }
+
+ /* Hole is completely inside this chunk */
+ if (start_chunk == end_chunk) {
+ /* Register hole in current chunk. */
+ vm_phys_chunk_register_hole(start_chunk,
+ hp->start, hp->end);
+ } else { /* Hole spans multiple chunks */
+ if (hp->start & VM_PHYS_SEARCH_CHUNK_MASK) {
+ /* Partial overlap - register hole in
+ * first chunk. */
+ vm_phys_chunk_register_hole(start_chunk,
+ hp->start,
+ (hp->start &
+ ~VM_PHYS_SEARCH_CHUNK_MASK) +
+ VM_PHYS_SEARCH_CHUNK_SIZE);
+ start_chunk++;
+ }
+ /* Mark all chunks that are completely covered
+ * by this hole as invalid. */
+ while (start_chunk < end_chunk) {
+ start_chunk->skipidx = end_idx;
+ start_chunk++;
+ }
+
+ if (hp->end & VM_PHYS_SEARCH_CHUNK_MASK) {
+ /* Partial overlap - register hole in
+ * last chunk. */
+ vm_phys_chunk_register_hole(end_chunk,
+ (hp->end &
+ ~VM_PHYS_SEARCH_CHUNK_MASK),
+ hp->end);
+ }
+ }
+ }
+ /* Register search index holes at domain end */
+ if (sip->dom_end & VM_PHYS_SEARCH_CHUNK_MASK) {
+ end_idx = vm_phys_paddr_to_chunk_idx(sip->dom_end, dom);
+ end_chunk = vm_phys_paddr_to_search_chunk(sip->dom_end,
+ dom);
+
+ vm_phys_chunk_register_hole(end_chunk, sip->dom_end,
+ vm_phys_search_idx_to_paddr(end_idx + 1, dom));
+ }
+ }
+}
+
+/* Initializes holes. */
+static void
+vm_phys_init_compact(void *arg)
+{
+ vm_phys_compact_init_holes();
+}
+
+SYSINIT(vm_phys_compact, SI_SUB_KMEM + 1, SI_ORDER_ANY, vm_phys_init_compact,
+ NULL);
+
+/* Maximum number of memory regions enqueued during a search function run. */
+#define VM_PHYS_COMPACT_MAX_SEARCH_REGIONS 10
+
+struct vm_phys_compact_ctx {
+ int last_idx;
+ struct vm_compact_region region[VM_PHYS_COMPACT_MAX_SEARCH_REGIONS];
+};
+
+static void
+vm_phys_compact_ctx_init(void **p_data)
+{
+ *p_data = (void *)malloc(sizeof(struct vm_phys_compact_ctx),
+ M_VMCOMPACT, M_ZERO | M_WAITOK);
+}
+
+static struct vm_compact_region *
+vm_phys_compact_ctx_get_region(struct vm_phys_compact_ctx *ctxp, int idx)
+{
+ KASSERT(idx < VM_PHYS_COMPACT_MAX_SEARCH_REGIONS,
+ ("%s: Not enough memory for regions: %d\n", __func__, idx));
+ return (&ctxp->region[idx]);
+}
+
+/*
+ * Scans the search index for physical memory regions that could be potential
+ * compaction candidates. Eligible regions are enqueued on a slist.
+ */
+static int
+vm_phys_compact_search(struct vm_compact_region_head *headp, int domain,
+ void *p_data)
+{
+ struct vm_phys_search_chunk *scp;
+ struct vm_phys_compact_ctx *ctx = (struct vm_phys_compact_ctx *)p_data;
+ struct vm_phys_search_index *sip = &vm_phys_search_index[domain];
+ struct vm_phys_subseg *ssegp;
+ struct vm_compact_region *rp;
+ vm_paddr_t start, end;
+ int idx, region_cnt = 0;
+ int ctx_region_idx = 0;
+ int chunks_scanned = 0;
+
+ SLIST_INIT(headp);
+
+ idx = ctx->last_idx;
+ while (chunks_scanned < sip->nchunks &&
+ region_cnt < VM_PHYS_COMPACT_MAX_SEARCH_REGIONS) {
+ for (;
+ chunks_scanned < sip->nchunks && idx < sip->nchunks - 1 &&
+ region_cnt < VM_PHYS_COMPACT_MAX_SEARCH_REGIONS;
+ chunks_scanned++, idx++) {
+
+ scp = vm_phys_search_get_chunk(sip, idx);
+ /* Skip current chunk if it was marked as invalid */
+ if (scp->skipidx) {
+ idx = scp->skipidx - 1;
+ chunks_scanned += (scp->skipidx - 1) - idx;
+ continue;
+ }
+
+ /* Determine whether the current chunk is eligible to be
+ * compacted */
+ if (scp->score > 1 &&
+ scp->holecnt >= VM_PHYS_HOLECNT_LO &&
+ scp->holecnt <= VM_PHYS_HOLECNT_HI) {
+ if (scp->shp) {
+ /* Enqueue subsegments in chunks with
+ * holes. */
+ SLIST_FOREACH (ssegp, scp->shp, link) {
+ SLIST_INSERT_HEAD(headp,
+ &ssegp->region, entries);
+ }
+
+ } else {
+ start = vm_phys_search_idx_to_paddr(idx,
+ domain);
+ end = vm_phys_search_idx_to_paddr(idx +
+ 1,
+ domain);
+
+ rp = vm_phys_compact_ctx_get_region(ctx,
+ ctx_region_idx);
+ rp->start = start;
+ rp->end = end;
+ SLIST_INSERT_HEAD(headp, rp, entries);
+
+ ctx_region_idx++;
+ }
+
+ region_cnt++;
+ }
+ }
+ idx = (idx + 1) % (sip->nchunks - 1);
+ }
+ ctx->last_idx = (idx + 1) % (sip->nchunks - 1);
+
+ return SLIST_EMPTY(headp);
+}
+
+/*
+ * Determine whether a given page is eligible as a relocation destination.
+ */
+static __noinline bool
+vm_phys_defrag_page_free(vm_page_t p)
+{
+ return (p->order == 0);
+}
+
+/*
+ * Determine whether a given page is eligible to be relocated.
+ * A suitable page is left in a xbusied state and its object is locked.
+ */
+static __noinline bool
+vm_phys_defrag_page_relocatable(vm_page_t p)
+{
+ vm_object_t obj;
+
+ if (p->order != VM_NFREEORDER || vm_page_wired(p) ||
+ (obj = atomic_load_ptr(&p->object)) == NULL)
+ return false;
+
+ VM_OBJECT_WLOCK(obj);
+ if (obj != p->object ||
+ (obj->type != OBJT_DEFAULT && obj->type != OBJT_VNODE)) {
+ goto unlock;
+ }
+
+ if (vm_page_tryxbusy(p) == 0)
+ goto unlock;
+
+ if (!vm_page_wired(p) && !vm_page_none_valid(p)) {
+ return true;
+ }
+
+ vm_page_xunbusy(p);
+unlock:
+ VM_OBJECT_WUNLOCK(obj);
+ return false;
+}
+
+static size_t
+vm_phys_defrag(struct vm_compact_region_head *headp, int domain, void *p_data)
+{
+ vm_compact_region_t rp;
+ size_t nrelocated = 0;
+ int error;
+ while (!SLIST_EMPTY(headp)) {
+ rp = SLIST_FIRST(headp);
+ SLIST_REMOVE_HEAD(headp, entries);
+
+ vm_page_t free = PHYS_TO_VM_PAGE(rp->start);
+ vm_page_t scan = PHYS_TO_VM_PAGE(rp->end - PAGE_SIZE);
+
+ KASSERT(free && scan,
+ ("%s: pages are null %p, %p, region start: %p, region end: %p",
+ __func__, free, scan, (void *)rp->start,
+ (void *)rp->end));
+ KASSERT(free->phys_addr && scan->phys_addr,
+ ("%s: pages have null paddr %p, %p", __func__,
+ (void *)free->phys_addr, (void *)scan->phys_addr));
+
+ while (free < scan) {
+
+ /* Find suitable destination page ("hole"). */
+ while (free < scan && !vm_phys_defrag_page_free(free)) {
+ free++;
+ }
+
+ if (__predict_false(free >= scan)) {
+ break;
+ }
+
+ /* Find suitable relocation candidate. */
+ while (free < scan &&
+ !vm_phys_defrag_page_relocatable(scan)) {
+ scan--;
+ }
+
+ if (__predict_false(free >= scan)) {
+ break;
+ }
+
+ /* Swap the two pages and move "fingers". */
+ error = vm_page_relocate_page(scan, free, domain);
+ if (error == 0) {
+ nrelocated++;
+ scan--;
+ free++;
+ } else if (error == 1) {
+ scan--;
+ } else {
+ free++;
+ }
+ }
+ }
+
+ return nrelocated;
+}
+
+/*
+ * Value of FMFI metric below which compaction will not start.
+ */
+static int vm_phys_compact_thresh = 300; /* 200 - 1000 */
+static int sysctl_vm_phys_compact_thresh(SYSCTL_HANDLER_ARGS);
+SYSCTL_OID(_vm, OID_AUTO, phys_compact_thresh, CTLTYPE_INT | CTLFLAG_RW, NULL,
+ 0, sysctl_vm_phys_compact_thresh, "I",
+ "Fragmentation index threshold for memory compaction");
+
+static int
+sysctl_vm_phys_compact_thresh(SYSCTL_HANDLER_ARGS)
+{
+ int error;
+ int new = vm_phys_compact_thresh;
+
+ error = sysctl_handle_int(oidp, &new, 0, req);
+ if (error != 0 || req->newptr == NULL)
+ return (error);
+
+ if (new != vm_phys_compact_thresh) {
+ if (new < 200) {
+ new = 200;
+ } else if (new > 1000) {
+ new = 1000;
+ }
+ vm_phys_compact_thresh = new;
+ }
+
+ return (0);
+}
+
+/*
+ * Structures and routines used by the compaction daemon.
+ */
+static struct proc *compactproc;
+static struct thread *compact_threads[MAXMEMDOM - 1];
+
+static void
+vm_phys_compact_thread(void *arg)
+{
+ void *cctx;
+ size_t domain = (size_t)arg;
+ void *chan = (void *)&compact_threads[domain];
+ struct vm_domain *dom = VM_DOMAIN(domain);
+
+ int error;
+ int old_frag_idx, frag_idx, nretries = 0;
+ int nrelocated;
+ int timo = hz;
+
+ vm_paddr_t start, end;
+
+ start = vm_phys_search_index[domain].dom_start;
+ end = vm_phys_search_index[domain].dom_end;
+ cctx = vm_compact_create_job(vm_phys_compact_search, vm_phys_defrag,
+ vm_phys_compact_ctx_init, start, end, VM_LEVEL_0_ORDER, domain,
+ &error);
+ KASSERT(cctx != NULL, ("Error creating compaction job: %d\n", error));
+
+ while (true) {
+ tsleep(chan, PPAUSE | PCATCH | PNOLOCK, "cmpctslp", timo);
+ kproc_suspend_check(compactproc);
+
+ vm_domain_free_lock(dom);
+ frag_idx = vm_phys_fragmentation_index(VM_LEVEL_0_ORDER,
+ domain);
+ vm_domain_free_unlock(dom);
+
+ nretries = 0;
+
+ /* Run compaction until the fragmentation metric stops
+ * improving. */
+ do {
+ /* No need to compact if fragmentation is below the
+ * threshold. */
+ if (frag_idx < vm_phys_compact_thresh) {
+ break;
+ }
+
+ old_frag_idx = frag_idx;
+
+ nrelocated = vm_compact_run(cctx);
+ /* An error occured. */
+ if (nrelocated < 0) {
+ break;
+ }
+
+ vm_domain_free_lock(dom);
+ frag_idx = vm_phys_fragmentation_index(VM_LEVEL_0_ORDER,
+ domain);
+ vm_domain_free_unlock(dom);
+
+ if (nrelocated == 0 || (frag_idx >= old_frag_idx)) {
+ nretries++;
+ } else {
+ nretries = 0;
+ }
+ } while (nretries < 5);
+
+ /* If compaction was not able to lower the fragmentation score,
+ * sleep for a longer period of time. */
+ if (nretries == 5) {
+ timo = 10 * hz;
+ } else {
+ timo = hz;
+ }
+ }
+ vm_compact_free_job(cctx);
+}
+
+static void
+vm_phys_compact_daemon(void)
+{
+ int error;
+
+ EVENTHANDLER_REGISTER(shutdown_pre_sync, kproc_shutdown, compactproc,
+ SHUTDOWN_PRI_FIRST);
+
+ for (size_t i = 1; i < vm_ndomains; i++) {
+ error = kproc_kthread_add(vm_phys_compact_thread, (void *)i,
+ &compactproc, &compact_threads[i - 1], 0, 0,
+ "compactdaemon", "compact%zu", i);
+ if (error) {
+ panic("%s: cannot start compaction thread, error: %d",
+ __func__, error);
+ }
+ }
+
+ vm_phys_compact_thread((void *)0);
+}
+
+static struct kproc_desc compact_kp = { "compactdaemon", vm_phys_compact_daemon,
+ &compactproc };
+SYSINIT(compactdaemon, SI_SUB_KTHREAD_VM, SI_ORDER_ANY, kproc_start,
+ &compact_kp);
+
+static int sysctl_vm_phys_compact(SYSCTL_HANDLER_ARGS);
+SYSCTL_OID(_vm, OID_AUTO, phys_compact, CTLTYPE_STRING | CTLFLAG_RD, NULL, 0,
+ sysctl_vm_phys_compact, "A", "Compact physical memory");
+
+static int
+sysctl_vm_phys_compact(SYSCTL_HANDLER_ARGS)
+{
+ struct sbuf sbuf;
+ int error;
+
+ error = sysctl_wire_old_buffer(req, 0);
+ if (error != 0)
+ return (error);
+ sbuf_new_for_sysctl(&sbuf, NULL, 32, req);
+
+ for (int i = 0; i < vm_ndomains; i++) {
+ void *chan = (void *)&compact_threads[i];
+ wakeup_one(chan);
+ }
+
+ sbuf_printf(&sbuf, "Kicked compaction daemon");
+
+ error = sbuf_finish(&sbuf);
+ sbuf_delete(&sbuf);
+
+ return (error);
+}
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Mon, Feb 9, 2:30 AM (19 h, 11 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
28526601
Default Alt Text
D40772.id123834.diff (33 KB)
Attached To
Mode
D40772: Tentative physical memory compaction
Attached
Detach File
Event Timeline
Log In to Comment