Index: sys/arm64/arm64/pmap.c =================================================================== --- sys/arm64/arm64/pmap.c +++ sys/arm64/arm64/pmap.c @@ -188,6 +188,64 @@ #define pmap_l1_pindex(v) (NUL2E + ((v) >> L1_SHIFT)) #define pmap_l2_pindex(v) ((v) >> L2_SHIFT) +#ifdef NUMA +struct pmap_large_md_page { + struct rwlock pv_lock; + struct md_page pv_page; + /* Pad to a power of 2 */ + int pv_pad[2]; +}; + +static struct pmap_large_md_page * +_pa_to_pmdp(vm_paddr_t pa) +{ + struct vm_phys_seg *seg; + int segind; + + for (segind = 0; segind < vm_phys_nsegs; segind++) { + seg = &vm_phys_segs[segind]; + if (pa >= seg->start && pa < seg->end) + return ((struct pmap_large_md_page *)seg->md_first + + pmap_l2_pindex(pa) - pmap_l2_pindex(seg->start)); + } + return (NULL); +} + +static struct pmap_large_md_page * +pa_to_pmdp(vm_paddr_t pa) +{ + struct pmap_large_md_page *pvd; + + pvd = _pa_to_pmdp(pa); + if (pvd == NULL) + panic("pa 0x%jx not within vm_phys_segs", (uintmax_t)pa); + return (pvd); +} + +static struct pmap_large_md_page * +page_to_pmdp(vm_page_t m) +{ + struct vm_phys_seg *seg; + + seg = &vm_phys_segs[m->segind]; + return ((struct pmap_large_md_page *)seg->md_first + + pmap_l2_pindex(VM_PAGE_TO_PHYS(m)) - pmap_l2_pindex(seg->start)); +} + +#define pa_to_pvh(pa) (&(pa_to_pmdp(pa)->pv_page)) +#define page_to_pvh(m) (&(page_to_pmdp(m)->pv_page)) + +#define PHYS_TO_PV_LIST_LOCK(pa) ({ \ + struct pmap_large_md_page *_pvd; \ + struct rwlock *_lock; \ + _pvd = _pa_to_pmdp(pa); \ + if (__predict_false(_pvd == NULL)) \ + _lock = &pv_dummy_large.pv_lock; \ + else \ + _lock = &(_pvd->pv_lock); \ + _lock; \ +}) +#else static struct md_page * pa_to_pvh(vm_paddr_t pa) { @@ -217,6 +275,7 @@ #define PHYS_TO_PV_LIST_LOCK(pa) \ (&pv_list_locks[pa_index(pa) % NPV_LIST_LOCKS]) +#endif #define CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa) do { \ struct rwlock **_lockp = (lockp); \ @@ -304,9 +363,16 @@ struct pv_chunks_list __exclusive_cache_line pv_chunks[PMAP_MEMDOM]; +#ifdef NUMA +__exclusive_cache_line static struct pmap_large_md_page pv_dummy_large; +#define pv_dummy pv_dummy_large.pv_page +__read_mostly static struct pmap_large_md_page *pv_table; +__read_mostly vm_paddr_t pmap_last_pa; +#else static struct rwlock pv_list_locks[NPV_LIST_LOCKS]; static struct md_page *pv_table; static struct md_page pv_dummy; +#endif vm_paddr_t dmap_phys_base; /* The start of the dmap region */ vm_paddr_t dmap_phys_max; /* The limit of the dmap region */ @@ -1284,59 +1350,111 @@ mtx_init(&set->asid_set_mutex, "asid set", NULL, MTX_SPIN); } -/* - * Initialize the pmap module. - * Called by vm_init, to initialize any structures that the pmap - * system needs to map virtual memory. - */ -void -pmap_init(void) +#ifdef NUMA +static void +pmap_init_pv_table(void) { struct vm_phys_seg *seg, *next_seg; - struct md_page *pvh; + struct pmap_large_md_page *pvd; vm_size_t s; - uint64_t mmfr1; - int i, pv_npg, vmid_bits; + long start, end, highest, pv_npg; + int domain, i, j, pages; /* - * Are large page mappings enabled? + * We strongly depend on the size being a power of two, so the assert + * is overzealous. However, should the struct be resized to a + * different power of two, the code below needs to be revisited. */ - TUNABLE_INT_FETCH("vm.pmap.superpages_enabled", &superpages_enabled); - if (superpages_enabled) { - KASSERT(MAXPAGESIZES > 1 && pagesizes[1] == 0, - ("pmap_init: can't assign to pagesizes[1]")); - pagesizes[1] = L2_SIZE; - if (L1_BLOCKS_SUPPORTED) { - KASSERT(MAXPAGESIZES > 2 && pagesizes[2] == 0, - ("pmap_init: can't assign to pagesizes[2]")); - pagesizes[2] = L1_SIZE; - } + CTASSERT((sizeof(*pvd) == 64)); + + /* + * Calculate the size of the array. + */ + pv_npg = 0; + for (i = 0; i < vm_phys_nsegs; i++) { + seg = &vm_phys_segs[i]; + pv_npg += pmap_l2_pindex(roundup2(seg->end, L2_SIZE)) - + pmap_l2_pindex(seg->start); } + s = (vm_size_t)pv_npg * sizeof(struct pmap_large_md_page); + s = round_page(s); + pv_table = (struct pmap_large_md_page *)kva_alloc(s); + if (pv_table == NULL) + panic("%s: kva_alloc failed\n", __func__); /* - * Initialize the ASID allocator. + * Iterate physical segments to allocate space for respective pages. */ - pmap_init_asids(&asids, - (READ_SPECIALREG(tcr_el1) & TCR_ASID_16) != 0 ? 16 : 8); + highest = -1; + s = 0; + for (i = 0; i < vm_phys_nsegs; i++) { + seg = &vm_phys_segs[i]; + start = highest + 1; + end = start + pmap_l2_pindex(roundup2(seg->end, L2_SIZE)) - + pmap_l2_pindex(seg->start); + domain = seg->domain; - if (has_hyp()) { - mmfr1 = READ_SPECIALREG(id_aa64mmfr1_el1); - vmid_bits = 8; + if (highest >= end) + continue; - if (ID_AA64MMFR1_VMIDBits_VAL(mmfr1) == - ID_AA64MMFR1_VMIDBits_16) - vmid_bits = 16; - pmap_init_asids(&vmids, vmid_bits); + pvd = &pv_table[start]; + + pages = end - start + 1; + s = round_page(pages * sizeof(*pvd)); + highest = start + (s / sizeof(*pvd)) - 1; + + for (j = 0; j < s; j += PAGE_SIZE) { + vm_page_t m = vm_page_alloc_noobj_domain(domain, 0); + if (m == NULL) + panic("failed to allocate PV table page"); + pmap_zero_page(m); + pmap_qenter((vm_offset_t)pvd + j, &m, 1); + } + + for (j = 0; j < s / sizeof(*pvd); j++) { + rw_init_flags(&pvd->pv_lock, "pmap pv list", RW_NEW); + TAILQ_INIT(&pvd->pv_page.pv_list); + pvd++; + } } + pvd = &pv_dummy_large; + memset(pvd, 0, sizeof(*pvd)); + rw_init_flags(&pvd->pv_lock, "pmap pv list dummy", RW_NEW); + TAILQ_INIT(&pvd->pv_page.pv_list); /* - * Initialize pv chunk lists. + * Set pointers from vm_phys_segs to pv_table. */ - for (i = 0; i < PMAP_MEMDOM; i++) { - mtx_init(&pv_chunks[i].pvc_lock, "pmap pv chunk list", NULL, - MTX_DEF); - TAILQ_INIT(&pv_chunks[i].pvc_list); + for (i = 0, pvd = pv_table; i < vm_phys_nsegs; i++) { + seg = &vm_phys_segs[i]; + seg->md_first = pvd; + pvd += pmap_l2_pindex(roundup2(seg->end, L2_SIZE)) - + pmap_l2_pindex(seg->start); + + /* + * If there is a following segment, and the final + * superpage of this segment and the initial superpage + * of the next segment are the same then adjust the + * pv_table entry for that next segment down by one so + * that the pv_table entries will be shared. + */ + if (i + 1 < vm_phys_nsegs) { + next_seg = &vm_phys_segs[i + 1]; + if (pmap_l2_pindex(roundup2(seg->end, L2_SIZE)) - 1 == + pmap_l2_pindex(next_seg->start)) { + pvd--; + } + } } +} +#else +static void +pmap_init_pv_table(void) +{ + struct vm_phys_seg *seg, *next_seg; + struct md_page *pvh; + vm_size_t s; + long i, pv_npg; /* * Initialize the pool of pv list locks. @@ -1357,7 +1475,7 @@ /* * Allocate memory for the pv head table for superpages. */ - s = (vm_size_t)(pv_npg * sizeof(struct md_page)); + s = (vm_size_t)pv_npg * sizeof(struct md_page); s = round_page(s); pv_table = (struct md_page *)kmem_malloc(s, M_WAITOK | M_ZERO); for (i = 0; i < pv_npg; i++) @@ -1388,6 +1506,61 @@ } } } +} +#endif + +/* + * Initialize the pmap module. + * Called by vm_init, to initialize any structures that the pmap + * system needs to map virtual memory. + */ +void +pmap_init(void) +{ + uint64_t mmfr1; + int i, vmid_bits; + + /* + * Are large page mappings enabled? + */ + TUNABLE_INT_FETCH("vm.pmap.superpages_enabled", &superpages_enabled); + if (superpages_enabled) { + KASSERT(MAXPAGESIZES > 1 && pagesizes[1] == 0, + ("pmap_init: can't assign to pagesizes[1]")); + pagesizes[1] = L2_SIZE; + if (L1_BLOCKS_SUPPORTED) { + KASSERT(MAXPAGESIZES > 2 && pagesizes[2] == 0, + ("pmap_init: can't assign to pagesizes[2]")); + pagesizes[2] = L1_SIZE; + } + } + + /* + * Initialize the ASID allocator. + */ + pmap_init_asids(&asids, + (READ_SPECIALREG(tcr_el1) & TCR_ASID_16) != 0 ? 16 : 8); + + if (has_hyp()) { + mmfr1 = READ_SPECIALREG(id_aa64mmfr1_el1); + vmid_bits = 8; + + if (ID_AA64MMFR1_VMIDBits_VAL(mmfr1) == + ID_AA64MMFR1_VMIDBits_16) + vmid_bits = 16; + pmap_init_asids(&vmids, vmid_bits); + } + + /* + * Initialize pv chunk lists. + */ + for (i = 0; i < PMAP_MEMDOM; i++) { + mtx_init(&pv_chunks[i].pvc_lock, "pmap pv chunk list", NULL, + MTX_DEF); + TAILQ_INIT(&pv_chunks[i].pvc_list); + } + pmap_init_pv_table(); + vm_initialized = 1; }