Index: head/sys/arm/arm/pmap-v6.c =================================================================== --- head/sys/arm/arm/pmap-v6.c (revision 254917) +++ head/sys/arm/arm/pmap-v6.c (revision 254918) @@ -1,4249 +1,5231 @@ /* From: $NetBSD: pmap.c,v 1.148 2004/04/03 04:35:48 bsh Exp $ */ /*- * Copyright 2011 Semihalf * Copyright 2004 Olivier Houchard. * Copyright 2003 Wasabi Systems, Inc. * All rights reserved. * * Written by Steve C. Woodford for Wasabi Systems, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed for the NetBSD Project by * Wasabi Systems, Inc. * 4. The name of Wasabi Systems, Inc. may not be used to endorse * or promote products derived from this software without specific prior * written permission. * * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * From: FreeBSD: src/sys/arm/arm/pmap.c,v 1.113 2009/07/24 13:50:29 */ /*- * Copyright (c) 2002-2003 Wasabi Systems, Inc. * Copyright (c) 2001 Richard Earnshaw * Copyright (c) 2001-2002 Christopher Gilbert * All rights reserved. * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the company nor the name of the author may be used to * endorse or promote products derived from this software without specific * prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /*- * Copyright (c) 1999 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Charles M. Hannum. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /*- * Copyright (c) 1994-1998 Mark Brinicombe. * Copyright (c) 1994 Brini. * All rights reserved. * * This code is derived from software written for Brini by Mark Brinicombe * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Mark Brinicombe. * 4. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * * RiscBSD kernel project * * pmap.c * * Machine dependant vm stuff * * Created : 20/09/94 */ /* * Special compilation symbols * PMAP_DEBUG - Build in pmap_debug_level code */ /* Include header files */ #include "opt_vm.h" #include "opt_pmap.h" #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include +#include #include #include #include #include #ifdef DEBUG extern int last_fault_code; #endif #ifdef PMAP_DEBUG #define PDEBUG(_lev_,_stat_) \ if (pmap_debug_level >= (_lev_)) \ ((_stat_)) #define dprintf printf int pmap_debug_level = 0; #define PMAP_INLINE #else /* PMAP_DEBUG */ #define PDEBUG(_lev_,_stat_) /* Nothing */ #define dprintf(x, arg...) #define PMAP_INLINE __inline #endif /* PMAP_DEBUG */ #ifdef PV_STATS #define PV_STAT(x) do { x ; } while (0) #else #define PV_STAT(x) do { } while (0) #endif +#define pa_to_pvh(pa) (&pv_table[pa_index(pa)]) + #ifdef ARM_L2_PIPT #define pmap_l2cache_wbinv_range(va, pa, size) cpu_l2cache_wbinv_range((pa), (size)) #define pmap_l2cache_inv_range(va, pa, size) cpu_l2cache_inv_range((pa), (size)) #else #define pmap_l2cache_wbinv_range(va, pa, size) cpu_l2cache_wbinv_range((va), (size)) #define pmap_l2cache_inv_range(va, pa, size) cpu_l2cache_inv_range((va), (size)) #endif extern struct pv_addr systempage; /* * Internal function prototypes */ +static PMAP_INLINE +struct pv_entry *pmap_find_pv(struct md_page *, pmap_t, vm_offset_t); static void pmap_free_pv_chunk(struct pv_chunk *pc); static void pmap_free_pv_entry(pmap_t pmap, pv_entry_t pv); static pv_entry_t pmap_get_pv_entry(pmap_t pmap, boolean_t try); static vm_page_t pmap_pv_reclaim(pmap_t locked_pmap); +static boolean_t pmap_pv_insert_section(pmap_t, vm_offset_t, + vm_paddr_t); +static struct pv_entry *pmap_remove_pv(struct vm_page *, pmap_t, vm_offset_t); +static int pmap_pvh_wired_mappings(struct md_page *, int); static void pmap_enter_locked(pmap_t, vm_offset_t, vm_prot_t, vm_page_t, vm_prot_t, boolean_t, int); static vm_paddr_t pmap_extract_locked(pmap_t pmap, vm_offset_t va); static void pmap_alloc_l1(pmap_t); static void pmap_free_l1(pmap_t); +static void pmap_map_section(pmap_t, vm_offset_t, vm_offset_t, + vm_prot_t, boolean_t); +static void pmap_promote_section(pmap_t, vm_offset_t); +static boolean_t pmap_demote_section(pmap_t, vm_offset_t); +static boolean_t pmap_enter_section(pmap_t, vm_offset_t, vm_page_t, + vm_prot_t); +static void pmap_remove_section(pmap_t, vm_offset_t); + static int pmap_clearbit(struct vm_page *, u_int); static struct l2_bucket *pmap_get_l2_bucket(pmap_t, vm_offset_t); static struct l2_bucket *pmap_alloc_l2_bucket(pmap_t, vm_offset_t); static void pmap_free_l2_bucket(pmap_t, struct l2_bucket *, u_int); static vm_offset_t kernel_pt_lookup(vm_paddr_t); static MALLOC_DEFINE(M_VMPMAP, "pmap", "PMAP L1"); vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */ vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ vm_offset_t pmap_curmaxkvaddr; vm_paddr_t kernel_l1pa; vm_offset_t kernel_vm_end = 0; vm_offset_t vm_max_kernel_address; struct pmap kernel_pmap_store; static pt_entry_t *csrc_pte, *cdst_pte; static vm_offset_t csrcp, cdstp; static struct mtx cmtx; static void pmap_init_l1(struct l1_ttable *, pd_entry_t *); /* * These routines are called when the CPU type is identified to set up * the PTE prototypes, cache modes, etc. * * The variables are always here, just in case LKMs need to reference * them (though, they shouldn't). */ static void pmap_set_prot(pt_entry_t *pte, vm_prot_t prot, uint8_t user); pt_entry_t pte_l1_s_cache_mode; pt_entry_t pte_l1_s_cache_mode_pt; pt_entry_t pte_l2_l_cache_mode; pt_entry_t pte_l2_l_cache_mode_pt; pt_entry_t pte_l2_s_cache_mode; pt_entry_t pte_l2_s_cache_mode_pt; struct msgbuf *msgbufp = 0; /* * Crashdump maps. */ static caddr_t crashdumpmap; extern void bcopy_page(vm_offset_t, vm_offset_t); extern void bzero_page(vm_offset_t); char *_tmppt; /* * Metadata for L1 translation tables. */ struct l1_ttable { /* Entry on the L1 Table list */ SLIST_ENTRY(l1_ttable) l1_link; /* Entry on the L1 Least Recently Used list */ TAILQ_ENTRY(l1_ttable) l1_lru; /* Track how many domains are allocated from this L1 */ volatile u_int l1_domain_use_count; /* * A free-list of domain numbers for this L1. * We avoid using ffs() and a bitmap to track domains since ffs() * is slow on ARM. */ u_int8_t l1_domain_first; u_int8_t l1_domain_free[PMAP_DOMAINS]; /* Physical address of this L1 page table */ vm_paddr_t l1_physaddr; /* KVA of this L1 page table */ pd_entry_t *l1_kva; }; /* * Convert a virtual address into its L1 table index. That is, the * index used to locate the L2 descriptor table pointer in an L1 table. * This is basically used to index l1->l1_kva[]. * * Each L2 descriptor table represents 1MB of VA space. */ #define L1_IDX(va) (((vm_offset_t)(va)) >> L1_S_SHIFT) /* * L1 Page Tables are tracked using a Least Recently Used list. * - New L1s are allocated from the HEAD. * - Freed L1s are added to the TAIl. * - Recently accessed L1s (where an 'access' is some change to one of * the userland pmaps which owns this L1) are moved to the TAIL. */ static TAILQ_HEAD(, l1_ttable) l1_lru_list; /* * A list of all L1 tables */ static SLIST_HEAD(, l1_ttable) l1_list; static struct mtx l1_lru_lock; /* * The l2_dtable tracks L2_BUCKET_SIZE worth of L1 slots. * * This is normally 16MB worth L2 page descriptors for any given pmap. * Reference counts are maintained for L2 descriptors so they can be * freed when empty. */ struct l2_dtable { /* The number of L2 page descriptors allocated to this l2_dtable */ u_int l2_occupancy; /* List of L2 page descriptors */ struct l2_bucket { pt_entry_t *l2b_kva; /* KVA of L2 Descriptor Table */ vm_paddr_t l2b_phys; /* Physical address of same */ u_short l2b_l1idx; /* This L2 table's L1 index */ u_short l2b_occupancy; /* How many active descriptors */ } l2_bucket[L2_BUCKET_SIZE]; }; /* pmap_kenter_internal flags */ #define KENTER_CACHE 0x1 #define KENTER_USER 0x2 /* * Given an L1 table index, calculate the corresponding l2_dtable index * and bucket index within the l2_dtable. */ #define L2_IDX(l1idx) (((l1idx) >> L2_BUCKET_LOG2) & \ (L2_SIZE - 1)) #define L2_BUCKET(l1idx) ((l1idx) & (L2_BUCKET_SIZE - 1)) /* * Given a virtual address, this macro returns the * virtual address required to drop into the next L2 bucket. */ #define L2_NEXT_BUCKET(va) (((va) & L1_S_FRAME) + L1_S_SIZE) /* * We try to map the page tables write-through, if possible. However, not * all CPUs have a write-through cache mode, so on those we have to sync * the cache when we frob page tables. * * We try to evaluate this at compile time, if possible. However, it's * not always possible to do that, hence this run-time var. */ int pmap_needs_pte_sync; /* * Macro to determine if a mapping might be resident in the * instruction cache and/or TLB */ #define PTE_BEEN_EXECD(pte) (L2_S_EXECUTABLE(pte) && L2_S_REFERENCED(pte)) /* * Macro to determine if a mapping might be resident in the * data cache and/or TLB */ #define PTE_BEEN_REFD(pte) (L2_S_REFERENCED(pte)) #ifndef PMAP_SHPGPERPROC #define PMAP_SHPGPERPROC 200 #endif #define pmap_is_current(pm) ((pm) == pmap_kernel() || \ curproc->p_vmspace->vm_map.pmap == (pm)) /* * Data for the pv entry allocation mechanism */ static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks); static int pv_entry_count, pv_entry_max, pv_entry_high_water; +static struct md_page *pv_table; static int shpgperproc = PMAP_SHPGPERPROC; struct pv_chunk *pv_chunkbase; /* KVA block for pv_chunks */ int pv_maxchunks; /* How many chunks we have KVA for */ vm_offset_t pv_vafree; /* Freelist stored in the PTE */ static __inline struct pv_chunk * pv_to_chunk(pv_entry_t pv) { return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK)); } #define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap) CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE); CTASSERT(_NPCM == 8); CTASSERT(_NPCPV == 252); #define PC_FREE0_6 0xfffffffful /* Free values for index 0 through 6 */ #define PC_FREE7 0x0ffffffful /* Free values for index 7 */ static const uint32_t pc_freemask[_NPCM] = { PC_FREE0_6, PC_FREE0_6, PC_FREE0_6, PC_FREE0_6, PC_FREE0_6, PC_FREE0_6, PC_FREE0_6, PC_FREE7 }; static SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters"); +/* Superpages utilization enabled = 1 / disabled = 0 */ +static int sp_enabled = 0; +SYSCTL_INT(_vm_pmap, OID_AUTO, sp_enabled, CTLFLAG_RDTUN, &sp_enabled, 0, + "Are large page mappings enabled?"); + SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0, "Current number of pv entries"); #ifdef PV_STATS static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail; SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0, "Current number of pv entry chunks"); SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0, "Current number of pv entry chunks allocated"); SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0, "Current number of pv entry chunks frees"); SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0, "Number of times tried to get a chunk page but failed."); static long pv_entry_frees, pv_entry_allocs; static int pv_entry_spare; SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0, "Current number of pv entry frees"); SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0, "Current number of pv entry allocs"); SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0, "Current number of spare pv entries"); #endif uma_zone_t l2zone; static uma_zone_t l2table_zone; static vm_offset_t pmap_kernel_l2dtable_kva; static vm_offset_t pmap_kernel_l2ptp_kva; static vm_paddr_t pmap_kernel_l2ptp_phys; static struct rwlock pvh_global_lock; int l1_mem_types[] = { ARM_L1S_STRONG_ORD, ARM_L1S_DEVICE_NOSHARE, ARM_L1S_DEVICE_SHARE, ARM_L1S_NRML_NOCACHE, ARM_L1S_NRML_IWT_OWT, ARM_L1S_NRML_IWB_OWB, ARM_L1S_NRML_IWBA_OWBA }; int l2l_mem_types[] = { ARM_L2L_STRONG_ORD, ARM_L2L_DEVICE_NOSHARE, ARM_L2L_DEVICE_SHARE, ARM_L2L_NRML_NOCACHE, ARM_L2L_NRML_IWT_OWT, ARM_L2L_NRML_IWB_OWB, ARM_L2L_NRML_IWBA_OWBA }; int l2s_mem_types[] = { ARM_L2S_STRONG_ORD, ARM_L2S_DEVICE_NOSHARE, ARM_L2S_DEVICE_SHARE, ARM_L2S_NRML_NOCACHE, ARM_L2S_NRML_IWT_OWT, ARM_L2S_NRML_IWB_OWB, ARM_L2S_NRML_IWBA_OWBA }; /* * This list exists for the benefit of pmap_map_chunk(). It keeps track * of the kernel L2 tables during bootstrap, so that pmap_map_chunk() can * find them as necessary. * * Note that the data on this list MUST remain valid after initarm() returns, * as pmap_bootstrap() uses it to contruct L2 table metadata. */ SLIST_HEAD(, pv_addr) kernel_pt_list = SLIST_HEAD_INITIALIZER(kernel_pt_list); static void pmap_init_l1(struct l1_ttable *l1, pd_entry_t *l1pt) { int i; l1->l1_kva = l1pt; l1->l1_domain_use_count = 0; l1->l1_domain_first = 0; for (i = 0; i < PMAP_DOMAINS; i++) l1->l1_domain_free[i] = i + 1; /* * Copy the kernel's L1 entries to each new L1. */ if (l1pt != pmap_kernel()->pm_l1->l1_kva) memcpy(l1pt, pmap_kernel()->pm_l1->l1_kva, L1_TABLE_SIZE); if ((l1->l1_physaddr = pmap_extract(pmap_kernel(), (vm_offset_t)l1pt)) == 0) panic("pmap_init_l1: can't get PA of L1 at %p", l1pt); SLIST_INSERT_HEAD(&l1_list, l1, l1_link); TAILQ_INSERT_TAIL(&l1_lru_list, l1, l1_lru); } static vm_offset_t kernel_pt_lookup(vm_paddr_t pa) { struct pv_addr *pv; SLIST_FOREACH(pv, &kernel_pt_list, pv_list) { if (pv->pv_pa == pa) return (pv->pv_va); } return (0); } void pmap_pte_init_mmu_v6(void) { if (PTE_PAGETABLE >= 3) pmap_needs_pte_sync = 1; pte_l1_s_cache_mode = l1_mem_types[PTE_CACHE]; pte_l2_l_cache_mode = l2l_mem_types[PTE_CACHE]; pte_l2_s_cache_mode = l2s_mem_types[PTE_CACHE]; pte_l1_s_cache_mode_pt = l1_mem_types[PTE_PAGETABLE]; pte_l2_l_cache_mode_pt = l2l_mem_types[PTE_PAGETABLE]; pte_l2_s_cache_mode_pt = l2s_mem_types[PTE_PAGETABLE]; } /* * Allocate an L1 translation table for the specified pmap. * This is called at pmap creation time. */ static void pmap_alloc_l1(pmap_t pmap) { struct l1_ttable *l1; u_int8_t domain; /* * Remove the L1 at the head of the LRU list */ mtx_lock(&l1_lru_lock); l1 = TAILQ_FIRST(&l1_lru_list); TAILQ_REMOVE(&l1_lru_list, l1, l1_lru); /* * Pick the first available domain number, and update * the link to the next number. */ domain = l1->l1_domain_first; l1->l1_domain_first = l1->l1_domain_free[domain]; /* * If there are still free domain numbers in this L1, * put it back on the TAIL of the LRU list. */ if (++l1->l1_domain_use_count < PMAP_DOMAINS) TAILQ_INSERT_TAIL(&l1_lru_list, l1, l1_lru); mtx_unlock(&l1_lru_lock); /* * Fix up the relevant bits in the pmap structure */ pmap->pm_l1 = l1; pmap->pm_domain = domain + 1; } /* * Free an L1 translation table. * This is called at pmap destruction time. */ static void pmap_free_l1(pmap_t pmap) { struct l1_ttable *l1 = pmap->pm_l1; mtx_lock(&l1_lru_lock); /* * If this L1 is currently on the LRU list, remove it. */ if (l1->l1_domain_use_count < PMAP_DOMAINS) TAILQ_REMOVE(&l1_lru_list, l1, l1_lru); /* * Free up the domain number which was allocated to the pmap */ l1->l1_domain_free[pmap->pm_domain - 1] = l1->l1_domain_first; l1->l1_domain_first = pmap->pm_domain - 1; l1->l1_domain_use_count--; /* * The L1 now must have at least 1 free domain, so add * it back to the LRU list. If the use count is zero, * put it at the head of the list, otherwise it goes * to the tail. */ if (l1->l1_domain_use_count == 0) { TAILQ_INSERT_HEAD(&l1_lru_list, l1, l1_lru); } else TAILQ_INSERT_TAIL(&l1_lru_list, l1, l1_lru); mtx_unlock(&l1_lru_lock); } /* * Returns a pointer to the L2 bucket associated with the specified pmap * and VA, or NULL if no L2 bucket exists for the address. */ static PMAP_INLINE struct l2_bucket * pmap_get_l2_bucket(pmap_t pmap, vm_offset_t va) { struct l2_dtable *l2; struct l2_bucket *l2b; u_short l1idx; l1idx = L1_IDX(va); if ((l2 = pmap->pm_l2[L2_IDX(l1idx)]) == NULL || (l2b = &l2->l2_bucket[L2_BUCKET(l1idx)])->l2b_kva == NULL) return (NULL); return (l2b); } /* * Returns a pointer to the L2 bucket associated with the specified pmap * and VA. * * If no L2 bucket exists, perform the necessary allocations to put an L2 * bucket/page table in place. * * Note that if a new L2 bucket/page was allocated, the caller *must* * increment the bucket occupancy counter appropriately *before* * releasing the pmap's lock to ensure no other thread or cpu deallocates * the bucket/page in the meantime. */ static struct l2_bucket * pmap_alloc_l2_bucket(pmap_t pmap, vm_offset_t va) { struct l2_dtable *l2; struct l2_bucket *l2b; u_short l1idx; l1idx = L1_IDX(va); PMAP_ASSERT_LOCKED(pmap); rw_assert(&pvh_global_lock, RA_WLOCKED); if ((l2 = pmap->pm_l2[L2_IDX(l1idx)]) == NULL) { /* * No mapping at this address, as there is * no entry in the L1 table. * Need to allocate a new l2_dtable. */ PMAP_UNLOCK(pmap); rw_wunlock(&pvh_global_lock); if ((l2 = uma_zalloc(l2table_zone, M_NOWAIT)) == NULL) { rw_wlock(&pvh_global_lock); PMAP_LOCK(pmap); return (NULL); } rw_wlock(&pvh_global_lock); PMAP_LOCK(pmap); if (pmap->pm_l2[L2_IDX(l1idx)] != NULL) { /* * Someone already allocated the l2_dtable while * we were doing the same. */ uma_zfree(l2table_zone, l2); l2 = pmap->pm_l2[L2_IDX(l1idx)]; } else { bzero(l2, sizeof(*l2)); /* * Link it into the parent pmap */ pmap->pm_l2[L2_IDX(l1idx)] = l2; } } l2b = &l2->l2_bucket[L2_BUCKET(l1idx)]; /* * Fetch pointer to the L2 page table associated with the address. */ if (l2b->l2b_kva == NULL) { pt_entry_t *ptep; /* * No L2 page table has been allocated. Chances are, this * is because we just allocated the l2_dtable, above. */ PMAP_UNLOCK(pmap); rw_wunlock(&pvh_global_lock); ptep = uma_zalloc(l2zone, M_NOWAIT); rw_wlock(&pvh_global_lock); PMAP_LOCK(pmap); if (l2b->l2b_kva != 0) { /* We lost the race. */ uma_zfree(l2zone, ptep); return (l2b); } l2b->l2b_phys = vtophys(ptep); if (ptep == NULL) { /* * Oops, no more L2 page tables available at this * time. We may need to deallocate the l2_dtable * if we allocated a new one above. */ if (l2->l2_occupancy == 0) { pmap->pm_l2[L2_IDX(l1idx)] = NULL; uma_zfree(l2table_zone, l2); } return (NULL); } l2->l2_occupancy++; l2b->l2b_kva = ptep; l2b->l2b_l1idx = l1idx; } return (l2b); } static PMAP_INLINE void pmap_free_l2_ptp(pt_entry_t *l2) { uma_zfree(l2zone, l2); } /* * One or more mappings in the specified L2 descriptor table have just been * invalidated. * * Garbage collect the metadata and descriptor table itself if necessary. * * The pmap lock must be acquired when this is called (not necessary * for the kernel pmap). */ static void pmap_free_l2_bucket(pmap_t pmap, struct l2_bucket *l2b, u_int count) { struct l2_dtable *l2; pd_entry_t *pl1pd, l1pd; pt_entry_t *ptep; u_short l1idx; /* * Update the bucket's reference count according to how many * PTEs the caller has just invalidated. */ l2b->l2b_occupancy -= count; /* * Note: * * Level 2 page tables allocated to the kernel pmap are never freed * as that would require checking all Level 1 page tables and * removing any references to the Level 2 page table. See also the * comment elsewhere about never freeing bootstrap L2 descriptors. * * We make do with just invalidating the mapping in the L2 table. * * This isn't really a big deal in practice and, in fact, leads * to a performance win over time as we don't need to continually * alloc/free. */ if (l2b->l2b_occupancy > 0 || pmap == pmap_kernel()) return; /* * There are no more valid mappings in this level 2 page table. * Go ahead and NULL-out the pointer in the bucket, then * free the page table. */ l1idx = l2b->l2b_l1idx; ptep = l2b->l2b_kva; l2b->l2b_kva = NULL; pl1pd = &pmap->pm_l1->l1_kva[l1idx]; /* * If the L1 slot matches the pmap's domain * number, then invalidate it. */ l1pd = *pl1pd & (L1_TYPE_MASK | L1_C_DOM_MASK); if (l1pd == (L1_C_DOM(pmap->pm_domain) | L1_TYPE_C)) { *pl1pd = 0; PTE_SYNC(pl1pd); } /* * Release the L2 descriptor table back to the pool cache. */ pmap_free_l2_ptp(ptep); /* * Update the reference count in the associated l2_dtable */ l2 = pmap->pm_l2[L2_IDX(l1idx)]; if (--l2->l2_occupancy > 0) return; /* * There are no more valid mappings in any of the Level 1 * slots managed by this l2_dtable. Go ahead and NULL-out * the pointer in the parent pmap and free the l2_dtable. */ pmap->pm_l2[L2_IDX(l1idx)] = NULL; uma_zfree(l2table_zone, l2); } /* * Pool cache constructors for L2 descriptor tables, metadata and pmap * structures. */ static int pmap_l2ptp_ctor(void *mem, int size, void *arg, int flags) { struct l2_bucket *l2b; pt_entry_t *ptep, pte; vm_offset_t va = (vm_offset_t)mem & ~PAGE_MASK; /* * The mappings for these page tables were initially made using * pmap_kenter() by the pool subsystem. Therefore, the cache- * mode will not be right for page table mappings. To avoid * polluting the pmap_kenter() code with a special case for * page tables, we simply fix up the cache-mode here if it's not * correct. */ l2b = pmap_get_l2_bucket(pmap_kernel(), va); ptep = &l2b->l2b_kva[l2pte_index(va)]; pte = *ptep; cpu_idcache_wbinv_range(va, PAGE_SIZE); pmap_l2cache_wbinv_range(va, pte & L2_S_FRAME, PAGE_SIZE); if ((pte & L2_S_CACHE_MASK) != pte_l2_s_cache_mode_pt) { /* * Page tables must have the cache-mode set to * Write-Thru. */ *ptep = (pte & ~L2_S_CACHE_MASK) | pte_l2_s_cache_mode_pt; PTE_SYNC(ptep); cpu_tlb_flushD_SE(va); cpu_cpwait(); } memset(mem, 0, L2_TABLE_SIZE_REAL); return (0); } /* * Modify pte bits for all ptes corresponding to the given physical address. * We use `maskbits' rather than `clearbits' because we're always passing * constants and the latter would require an extra inversion at run-time. */ static int pmap_clearbit(struct vm_page *m, u_int maskbits) { struct l2_bucket *l2b; - struct pv_entry *pv; + struct pv_entry *pv, *pve, *next_pv; + struct md_page *pvh; + pd_entry_t *pl1pd; pt_entry_t *ptep, npte, opte; pmap_t pmap; vm_offset_t va; u_int oflags; int count = 0; rw_wlock(&pvh_global_lock); + if ((m->flags & PG_FICTITIOUS) != 0) + goto small_mappings; + pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); + TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, next_pv) { + va = pv->pv_va; + pmap = PV_PMAP(pv); + PMAP_LOCK(pmap); + pl1pd = &pmap->pm_l1->l1_kva[L1_IDX(va)]; + KASSERT((*pl1pd & L1_TYPE_MASK) == L1_S_PROTO, + ("pmap_clearbit: valid section mapping expected")); + if ((maskbits & PVF_WRITE) && (pv->pv_flags & PVF_WRITE)) + (void)pmap_demote_section(pmap, va); + else if ((maskbits & PVF_REF) && L1_S_REFERENCED(*pl1pd)) { + if (pmap_demote_section(pmap, va)) { + if ((pv->pv_flags & PVF_WIRED) == 0) { + /* + * Remove the mapping to a single page + * so that a subsequent access may + * repromote. Since the underlying + * l2_bucket is fully populated, this + * removal never frees an entire + * l2_bucket. + */ + va += (VM_PAGE_TO_PHYS(m) & + L1_S_OFFSET); + l2b = pmap_get_l2_bucket(pmap, va); + KASSERT(l2b != NULL, + ("pmap_clearbit: no l2 bucket for " + "va 0x%#x, pmap 0x%p", va, pmap)); + ptep = &l2b->l2b_kva[l2pte_index(va)]; + *ptep = 0; + PTE_SYNC(ptep); + pmap_free_l2_bucket(pmap, l2b, 1); + pve = pmap_remove_pv(m, pmap, va); + KASSERT(pve != NULL, ("pmap_clearbit: " + "no PV entry for managed mapping")); + pmap_free_pv_entry(pmap, pve); + + } + } + } else if ((maskbits & PVF_MOD) && L1_S_WRITABLE(*pl1pd)) { + if (pmap_demote_section(pmap, va)) { + if ((pv->pv_flags & PVF_WIRED) == 0) { + /* + * Write protect the mapping to a + * single page so that a subsequent + * write access may repromote. + */ + va += (VM_PAGE_TO_PHYS(m) & + L1_S_OFFSET); + l2b = pmap_get_l2_bucket(pmap, va); + KASSERT(l2b != NULL, + ("pmap_clearbit: no l2 bucket for " + "va 0x%#x, pmap 0x%p", va, pmap)); + ptep = &l2b->l2b_kva[l2pte_index(va)]; + if ((*ptep & L2_S_PROTO) != 0) { + pve = pmap_find_pv(&m->md, + pmap, va); + KASSERT(pve != NULL, + ("pmap_clearbit: no PV " + "entry for managed mapping")); + pve->pv_flags &= ~PVF_WRITE; + *ptep &= ~L2_APX; + PTE_SYNC(ptep); + } + } + } + } + PMAP_UNLOCK(pmap); + } + +small_mappings: if (TAILQ_EMPTY(&m->md.pv_list)) { rw_wunlock(&pvh_global_lock); return (0); } /* * Loop over all current mappings setting/clearing as appropos */ TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { va = pv->pv_va; pmap = PV_PMAP(pv); oflags = pv->pv_flags; pv->pv_flags &= ~maskbits; PMAP_LOCK(pmap); l2b = pmap_get_l2_bucket(pmap, va); + KASSERT(l2b != NULL, ("pmap_clearbit: no l2 bucket for " + "va 0x%#x, pmap 0x%p", va, pmap)); ptep = &l2b->l2b_kva[l2pte_index(va)]; npte = opte = *ptep; if (maskbits & (PVF_WRITE | PVF_MOD)) { /* make the pte read only */ npte |= L2_APX; } if (maskbits & PVF_REF) { /* * Clear referenced flag in PTE so that we * will take a flag fault the next time the mapping * is referenced. */ npte &= ~L2_S_REF; } CTR4(KTR_PMAP,"clearbit: pmap:%p bits:%x pte:%x->%x", pmap, maskbits, opte, npte); if (npte != opte) { count++; *ptep = npte; PTE_SYNC(ptep); /* Flush the TLB entry if a current pmap. */ if (PTE_BEEN_EXECD(opte)) cpu_tlb_flushID_SE(pv->pv_va); else if (PTE_BEEN_REFD(opte)) cpu_tlb_flushD_SE(pv->pv_va); } PMAP_UNLOCK(pmap); } if (maskbits & PVF_WRITE) vm_page_aflag_clear(m, PGA_WRITEABLE); rw_wunlock(&pvh_global_lock); return (count); } /* * main pv_entry manipulation functions: * pmap_enter_pv: enter a mapping onto a vm_page list * pmap_remove_pv: remove a mappiing from a vm_page list * * NOTE: pmap_enter_pv expects to lock the pvh itself * pmap_remove_pv expects the caller to lock the pvh before calling */ /* * pmap_enter_pv: enter a mapping onto a vm_page's PV list * * => caller should hold the proper lock on pvh_global_lock * => caller should have pmap locked * => we will (someday) gain the lock on the vm_page's PV list * => caller should adjust ptp's wire_count before calling * => caller should not adjust pmap's wire_count */ static void pmap_enter_pv(struct vm_page *m, struct pv_entry *pve, pmap_t pmap, vm_offset_t va, u_int flags) { rw_assert(&pvh_global_lock, RA_WLOCKED); PMAP_ASSERT_LOCKED(pmap); pve->pv_va = va; pve->pv_flags = flags; TAILQ_INSERT_HEAD(&m->md.pv_list, pve, pv_list); if (pve->pv_flags & PVF_WIRED) ++pmap->pm_stats.wired_count; } /* * * pmap_find_pv: Find a pv entry * * => caller should hold lock on vm_page */ static PMAP_INLINE struct pv_entry * -pmap_find_pv(struct vm_page *m, pmap_t pmap, vm_offset_t va) +pmap_find_pv(struct md_page *md, pmap_t pmap, vm_offset_t va) { struct pv_entry *pv; rw_assert(&pvh_global_lock, RA_WLOCKED); - TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) - if (pmap == PV_PMAP(pv) && va == pv->pv_va) - break; + TAILQ_FOREACH(pv, &md->pv_list, pv_list) + if (pmap == PV_PMAP(pv) && va == pv->pv_va) + break; + return (pv); } /* * vector_page_setprot: * * Manipulate the protection of the vector page. */ void vector_page_setprot(int prot) { struct l2_bucket *l2b; pt_entry_t *ptep; l2b = pmap_get_l2_bucket(pmap_kernel(), vector_page); ptep = &l2b->l2b_kva[l2pte_index(vector_page)]; /* * Set referenced flag. * Vectors' page is always desired * to be allowed to reside in TLB. */ *ptep |= L2_S_REF; pmap_set_prot(ptep, prot|VM_PROT_EXECUTE, 0); cpu_tlb_flushD_SE(vector_page); cpu_cpwait(); } static void pmap_set_prot(pt_entry_t *ptep, vm_prot_t prot, uint8_t user) { *ptep &= ~(L2_S_PROT_MASK | L2_XN); if (!(prot & VM_PROT_EXECUTE)) *ptep |= L2_XN; /* Set defaults first - kernel read access */ *ptep |= L2_APX; *ptep |= L2_S_PROT_R; /* Now tune APs as desired */ if (user) *ptep |= L2_S_PROT_U; if (prot & VM_PROT_WRITE) *ptep &= ~(L2_APX); } /* * pmap_remove_pv: try to remove a mapping from a pv_list * * => caller should hold proper lock on pmap_main_lock * => pmap should be locked * => caller should hold lock on vm_page [so that attrs can be adjusted] * => caller should adjust ptp's wire_count and free PTP if needed * => caller should NOT adjust pmap's wire_count * => we return the removed pve */ static struct pv_entry * pmap_remove_pv(struct vm_page *m, pmap_t pmap, vm_offset_t va) { struct pv_entry *pve; rw_assert(&pvh_global_lock, RA_WLOCKED); PMAP_ASSERT_LOCKED(pmap); - pve = pmap_find_pv(m, pmap, va); /* find corresponding pve */ + pve = pmap_find_pv(&m->md, pmap, va); /* find corresponding pve */ if (pve != NULL) { TAILQ_REMOVE(&m->md.pv_list, pve, pv_list); if (pve->pv_flags & PVF_WIRED) --pmap->pm_stats.wired_count; } if (TAILQ_EMPTY(&m->md.pv_list)) vm_page_aflag_clear(m, PGA_WRITEABLE); return(pve); /* return removed pve */ } /* * * pmap_modify_pv: Update pv flags * * => caller should hold lock on vm_page [so that attrs can be adjusted] * => caller should NOT adjust pmap's wire_count * => we return the old flags * * Modify a physical-virtual mapping in the pv table */ static u_int pmap_modify_pv(struct vm_page *m, pmap_t pmap, vm_offset_t va, u_int clr_mask, u_int set_mask) { struct pv_entry *npv; u_int flags, oflags; PMAP_ASSERT_LOCKED(pmap); rw_assert(&pvh_global_lock, RA_WLOCKED); - if ((npv = pmap_find_pv(m, pmap, va)) == NULL) + if ((npv = pmap_find_pv(&m->md, pmap, va)) == NULL) return (0); /* * There is at least one VA mapping this page. */ oflags = npv->pv_flags; npv->pv_flags = flags = (oflags & ~clr_mask) | set_mask; if ((flags ^ oflags) & PVF_WIRED) { if (flags & PVF_WIRED) ++pmap->pm_stats.wired_count; else --pmap->pm_stats.wired_count; } return (oflags); } /* Function to set the debug level of the pmap code */ #ifdef PMAP_DEBUG void pmap_debug(int level) { pmap_debug_level = level; dprintf("pmap_debug: level=%d\n", pmap_debug_level); } #endif /* PMAP_DEBUG */ void pmap_pinit0(struct pmap *pmap) { PDEBUG(1, printf("pmap_pinit0: pmap = %08x\n", (u_int32_t) pmap)); bcopy(kernel_pmap, pmap, sizeof(*pmap)); bzero(&pmap->pm_mtx, sizeof(pmap->pm_mtx)); PMAP_LOCK_INIT(pmap); TAILQ_INIT(&pmap->pm_pvchunk); } /* * Initialize a vm_page's machine-dependent fields. */ void pmap_page_init(vm_page_t m) { TAILQ_INIT(&m->md.pv_list); m->md.pv_memattr = VM_MEMATTR_DEFAULT; } static vm_offset_t pmap_ptelist_alloc(vm_offset_t *head) { pt_entry_t *pte; vm_offset_t va; va = *head; if (va == 0) return (va); /* Out of memory */ pte = vtopte(va); *head = *pte; if ((*head & L2_TYPE_MASK) != L2_TYPE_INV) panic("%s: va is not L2_TYPE_INV!", __func__); *pte = 0; return (va); } static void pmap_ptelist_free(vm_offset_t *head, vm_offset_t va) { pt_entry_t *pte; if ((va & L2_TYPE_MASK) != L2_TYPE_INV) panic("%s: freeing va that is not L2_TYPE INV!", __func__); pte = vtopte(va); *pte = *head; /* virtual! L2_TYPE is L2_TYPE_INV though */ *head = va; } static void pmap_ptelist_init(vm_offset_t *head, void *base, int npages) { int i; vm_offset_t va; *head = 0; for (i = npages - 1; i >= 0; i--) { va = (vm_offset_t)base + i * PAGE_SIZE; pmap_ptelist_free(head, va); } } /* * Initialize the pmap module. * Called by vm_init, to initialize any structures that the pmap * system needs to map virtual memory. */ void pmap_init(void) { + vm_size_t s; + int i, pv_npg; PDEBUG(1, printf("pmap_init: phys_start = %08x\n", PHYSADDR)); l2zone = uma_zcreate("L2 Table", L2_TABLE_SIZE_REAL, pmap_l2ptp_ctor, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE); l2table_zone = uma_zcreate("L2 Table", sizeof(struct l2_dtable), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE); /* + * Are large page mappings supported and enabled? + */ + TUNABLE_INT_FETCH("vm.pmap.sp_enabled", &sp_enabled); + if (sp_enabled) { + KASSERT(MAXPAGESIZES > 1 && pagesizes[1] == 0, + ("pmap_init: can't assign to pagesizes[1]")); + pagesizes[1] = NBPDR; + } + + /* + * Calculate the size of the pv head table for superpages. + */ + for (i = 0; phys_avail[i + 1]; i += 2); + pv_npg = round_1mpage(phys_avail[(i - 2) + 1]) / NBPDR; + + /* + * Allocate memory for the pv head table for superpages. + */ + s = (vm_size_t)(pv_npg * sizeof(struct md_page)); + s = round_page(s); + pv_table = (struct md_page *)kmem_malloc(kernel_arena, s, + M_WAITOK | M_ZERO); + for (i = 0; i < pv_npg; i++) + TAILQ_INIT(&pv_table[i].pv_list); + + /* * Initialize the address space for the pv chunks. */ TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc); pv_entry_max = shpgperproc * maxproc + cnt.v_page_count; TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max); pv_entry_max = roundup(pv_entry_max, _NPCPV); pv_entry_high_water = 9 * (pv_entry_max / 10); pv_maxchunks = MAX(pv_entry_max / _NPCPV, maxproc); pv_chunkbase = (struct pv_chunk *)kva_alloc(PAGE_SIZE * pv_maxchunks); if (pv_chunkbase == NULL) panic("pmap_init: not enough kvm for pv chunks"); pmap_ptelist_init(&pv_vafree, pv_chunkbase, pv_maxchunks); /* * Now it is safe to enable pv_table recording. */ PDEBUG(1, printf("pmap_init: done!\n")); } SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_max, CTLFLAG_RD, &pv_entry_max, 0, "Max number of PV entries"); SYSCTL_INT(_vm_pmap, OID_AUTO, shpgperproc, CTLFLAG_RD, &shpgperproc, 0, "Page share factor per proc"); +static SYSCTL_NODE(_vm_pmap, OID_AUTO, section, CTLFLAG_RD, 0, + "1MB page mapping counters"); + +static u_long pmap_section_demotions; +SYSCTL_ULONG(_vm_pmap_section, OID_AUTO, demotions, CTLFLAG_RD, + &pmap_section_demotions, 0, "1MB page demotions"); + +static u_long pmap_section_mappings; +SYSCTL_ULONG(_vm_pmap_section, OID_AUTO, mappings, CTLFLAG_RD, + &pmap_section_mappings, 0, "1MB page mappings"); + +static u_long pmap_section_p_failures; +SYSCTL_ULONG(_vm_pmap_section, OID_AUTO, p_failures, CTLFLAG_RD, + &pmap_section_p_failures, 0, "1MB page promotion failures"); + +static u_long pmap_section_promotions; +SYSCTL_ULONG(_vm_pmap_section, OID_AUTO, promotions, CTLFLAG_RD, + &pmap_section_promotions, 0, "1MB page promotions"); + int pmap_fault_fixup(pmap_t pmap, vm_offset_t va, vm_prot_t ftype, int user) { struct l2_dtable *l2; struct l2_bucket *l2b; pd_entry_t *pl1pd, l1pd; pt_entry_t *ptep, pte; vm_paddr_t pa; u_int l1idx; int rv = 0; l1idx = L1_IDX(va); rw_wlock(&pvh_global_lock); PMAP_LOCK(pmap); - /* + * Check and possibly fix-up L1 section mapping + * only when superpage mappings are enabled to speed up. + */ + if (sp_enabled) { + pl1pd = &pmap->pm_l1->l1_kva[l1idx]; + l1pd = *pl1pd; + if ((l1pd & L1_TYPE_MASK) == L1_S_PROTO) { + /* Catch an access to the vectors section */ + if (l1idx == L1_IDX(vector_page)) + goto out; + /* + * Stay away from the kernel mappings. + * None of them should fault from L1 entry. + */ + if (pmap == pmap_kernel()) + goto out; + /* + * Catch a forbidden userland access + */ + if (user && !(l1pd & L1_S_PROT_U)) + goto out; + /* + * Superpage is always either mapped read only + * or it is modified and permitted to be written + * by default. Therefore, process only reference + * flag fault and demote page in case of write fault. + */ + if ((ftype & VM_PROT_WRITE) && !L1_S_WRITABLE(l1pd) && + L1_S_REFERENCED(l1pd)) { + (void)pmap_demote_section(pmap, va); + goto out; + } else if (!L1_S_REFERENCED(l1pd)) { + /* Mark the page "referenced" */ + *pl1pd = l1pd | L1_S_REF; + PTE_SYNC(pl1pd); + goto l1_section_out; + } else + goto out; + } + } + /* * If there is no l2_dtable for this address, then the process * has no business accessing it. * * Note: This will catch userland processes trying to access * kernel addresses. */ l2 = pmap->pm_l2[L2_IDX(l1idx)]; if (l2 == NULL) goto out; /* * Likewise if there is no L2 descriptor table */ l2b = &l2->l2_bucket[L2_BUCKET(l1idx)]; if (l2b->l2b_kva == NULL) goto out; /* * Check the PTE itself. */ ptep = &l2b->l2b_kva[l2pte_index(va)]; pte = *ptep; if (pte == 0) goto out; /* * Catch a userland access to the vector page mapped at 0x0 */ if (user && !(pte & L2_S_PROT_U)) goto out; if (va == vector_page) goto out; pa = l2pte_pa(pte); CTR5(KTR_PMAP, "pmap_fault_fix: pmap:%p va:%x pte:0x%x ftype:%x user:%x", pmap, va, pte, ftype, user); if ((ftype & VM_PROT_WRITE) && !(L2_S_WRITABLE(pte)) && L2_S_REFERENCED(pte)) { /* * This looks like a good candidate for "page modified" * emulation... */ struct pv_entry *pv; struct vm_page *m; /* Extract the physical address of the page */ if ((m = PHYS_TO_VM_PAGE(pa)) == NULL) { goto out; } /* Get the current flags for this page. */ - pv = pmap_find_pv(m, pmap, va); + pv = pmap_find_pv(&m->md, pmap, va); if (pv == NULL) { goto out; } /* * Do the flags say this page is writable? If not then it * is a genuine write fault. If yes then the write fault is * our fault as we did not reflect the write access in the * PTE. Now we know a write has occurred we can correct this * and also set the modified bit */ if ((pv->pv_flags & PVF_WRITE) == 0) { goto out; } vm_page_dirty(m); /* Re-enable write permissions for the page */ pmap_set_prot(ptep, VM_PROT_WRITE, *ptep & L2_S_PROT_U); CTR1(KTR_PMAP, "pmap_fault_fix: new pte:0x%x", pte); PTE_SYNC(ptep); rv = 1; } else if (!L2_S_REFERENCED(pte)) { /* * This looks like a good candidate for "page referenced" * emulation. */ struct pv_entry *pv; struct vm_page *m; /* Extract the physical address of the page */ if ((m = PHYS_TO_VM_PAGE(pa)) == NULL) goto out; /* Get the current flags for this page. */ - pv = pmap_find_pv(m, pmap, va); + pv = pmap_find_pv(&m->md, pmap, va); if (pv == NULL) goto out; vm_page_aflag_set(m, PGA_REFERENCED); /* Mark the page "referenced" */ *ptep = pte | L2_S_REF; PTE_SYNC(ptep); rv = 1; } /* * We know there is a valid mapping here, so simply * fix up the L1 if necessary. */ pl1pd = &pmap->pm_l1->l1_kva[l1idx]; l1pd = l2b->l2b_phys | L1_C_DOM(pmap->pm_domain) | L1_C_PROTO; if (*pl1pd != l1pd) { *pl1pd = l1pd; PTE_SYNC(pl1pd); rv = 1; } #ifdef DEBUG /* * If 'rv == 0' at this point, it generally indicates that there is a * stale TLB entry for the faulting address. This happens when two or * more processes are sharing an L1. Since we don't flush the TLB on * a context switch between such processes, we can take domain faults * for mappings which exist at the same VA in both processes. EVEN IF * WE'VE RECENTLY FIXED UP THE CORRESPONDING L1 in pmap_enter(), for * example. * * This is extremely likely to happen if pmap_enter() updated the L1 * entry for a recently entered mapping. In this case, the TLB is * flushed for the new mapping, but there may still be TLB entries for * other mappings belonging to other processes in the 1MB range * covered by the L1 entry. * * Since 'rv == 0', we know that the L1 already contains the correct * value, so the fault must be due to a stale TLB entry. * * Since we always need to flush the TLB anyway in the case where we * fixed up the L1, or frobbed the L2 PTE, we effectively deal with * stale TLB entries dynamically. * * However, the above condition can ONLY happen if the current L1 is * being shared. If it happens when the L1 is unshared, it indicates * that other parts of the pmap are not doing their job WRT managing * the TLB. */ if (rv == 0 && pmap->pm_l1->l1_domain_use_count == 1) { printf("fixup: pmap %p, va 0x%08x, ftype %d - nothing to do!\n", pmap, va, ftype); printf("fixup: l2 %p, l2b %p, ptep %p, pl1pd %p\n", l2, l2b, ptep, pl1pd); printf("fixup: pte 0x%x, l1pd 0x%x, last code 0x%x\n", pte, l1pd, last_fault_code); #ifdef DDB Debugger(); #endif } #endif +l1_section_out: cpu_tlb_flushID_SE(va); cpu_cpwait(); rv = 1; out: rw_wunlock(&pvh_global_lock); PMAP_UNLOCK(pmap); return (rv); } void pmap_postinit(void) { struct l2_bucket *l2b; struct l1_ttable *l1; pd_entry_t *pl1pt; pt_entry_t *ptep, pte; vm_offset_t va, eva; u_int loop, needed; needed = (maxproc / PMAP_DOMAINS) + ((maxproc % PMAP_DOMAINS) ? 1 : 0); needed -= 1; l1 = malloc(sizeof(*l1) * needed, M_VMPMAP, M_WAITOK); for (loop = 0; loop < needed; loop++, l1++) { /* Allocate a L1 page table */ va = (vm_offset_t)contigmalloc(L1_TABLE_SIZE, M_VMPMAP, 0, 0x0, 0xffffffff, L1_TABLE_SIZE, 0); if (va == 0) panic("Cannot allocate L1 KVM"); eva = va + L1_TABLE_SIZE; pl1pt = (pd_entry_t *)va; while (va < eva) { l2b = pmap_get_l2_bucket(pmap_kernel(), va); ptep = &l2b->l2b_kva[l2pte_index(va)]; pte = *ptep; pte = (pte & ~L2_S_CACHE_MASK) | pte_l2_s_cache_mode_pt; *ptep = pte; PTE_SYNC(ptep); cpu_tlb_flushD_SE(va); va += PAGE_SIZE; } pmap_init_l1(l1, pl1pt); } #ifdef DEBUG printf("pmap_postinit: Allocated %d static L1 descriptor tables\n", needed); #endif } /* * This is used to stuff certain critical values into the PCB where they * can be accessed quickly from cpu_switch() et al. */ void pmap_set_pcb_pagedir(pmap_t pmap, struct pcb *pcb) { struct l2_bucket *l2b; pcb->pcb_pagedir = pmap->pm_l1->l1_physaddr; pcb->pcb_dacr = (DOMAIN_CLIENT << (PMAP_DOMAIN_KERNEL * 2)) | (DOMAIN_CLIENT << (pmap->pm_domain * 2)); if (vector_page < KERNBASE) { pcb->pcb_pl1vec = &pmap->pm_l1->l1_kva[L1_IDX(vector_page)]; l2b = pmap_get_l2_bucket(pmap, vector_page); pcb->pcb_l1vec = l2b->l2b_phys | L1_C_PROTO | L1_C_DOM(pmap->pm_domain) | L1_C_DOM(PMAP_DOMAIN_KERNEL); } else pcb->pcb_pl1vec = NULL; } void pmap_activate(struct thread *td) { pmap_t pmap; struct pcb *pcb; pmap = vmspace_pmap(td->td_proc->p_vmspace); pcb = td->td_pcb; critical_enter(); pmap_set_pcb_pagedir(pmap, pcb); if (td == curthread) { u_int cur_dacr, cur_ttb; __asm __volatile("mrc p15, 0, %0, c2, c0, 0" : "=r"(cur_ttb)); __asm __volatile("mrc p15, 0, %0, c3, c0, 0" : "=r"(cur_dacr)); cur_ttb &= ~(L1_TABLE_SIZE - 1); if (cur_ttb == (u_int)pcb->pcb_pagedir && cur_dacr == pcb->pcb_dacr) { /* * No need to switch address spaces. */ critical_exit(); return; } /* * We MUST, I repeat, MUST fix up the L1 entry corresponding * to 'vector_page' in the incoming L1 table before switching * to it otherwise subsequent interrupts/exceptions (including * domain faults!) will jump into hyperspace. */ if (pcb->pcb_pl1vec) { *pcb->pcb_pl1vec = pcb->pcb_l1vec; } cpu_domains(pcb->pcb_dacr); cpu_setttb(pcb->pcb_pagedir); } critical_exit(); } static int pmap_set_pt_cache_mode(pd_entry_t *kl1, vm_offset_t va) { pd_entry_t *pdep, pde; pt_entry_t *ptep, pte; vm_offset_t pa; int rv = 0; /* * Make sure the descriptor itself has the correct cache mode */ pdep = &kl1[L1_IDX(va)]; pde = *pdep; if (l1pte_section_p(pde)) { if ((pde & L1_S_CACHE_MASK) != pte_l1_s_cache_mode_pt) { *pdep = (pde & ~L1_S_CACHE_MASK) | pte_l1_s_cache_mode_pt; PTE_SYNC(pdep); rv = 1; } } else { pa = (vm_paddr_t)(pde & L1_C_ADDR_MASK); ptep = (pt_entry_t *)kernel_pt_lookup(pa); if (ptep == NULL) panic("pmap_bootstrap: No L2 for L2 @ va %p\n", ptep); ptep = &ptep[l2pte_index(va)]; pte = *ptep; if ((pte & L2_S_CACHE_MASK) != pte_l2_s_cache_mode_pt) { *ptep = (pte & ~L2_S_CACHE_MASK) | pte_l2_s_cache_mode_pt; PTE_SYNC(ptep); rv = 1; } } return (rv); } static void pmap_alloc_specials(vm_offset_t *availp, int pages, vm_offset_t *vap, pt_entry_t **ptep) { vm_offset_t va = *availp; struct l2_bucket *l2b; if (ptep) { l2b = pmap_get_l2_bucket(pmap_kernel(), va); if (l2b == NULL) panic("pmap_alloc_specials: no l2b for 0x%x", va); *ptep = &l2b->l2b_kva[l2pte_index(va)]; } *vap = va; *availp = va + (PAGE_SIZE * pages); } /* * Bootstrap the system enough to run with virtual memory. * * On the arm this is called after mapping has already been enabled * and just syncs the pmap module with what has already been done. * [We can't call it easily with mapping off since the kernel is not * mapped with PA == VA, hence we would have to relocate every address * from the linked base (virtual) address "KERNBASE" to the actual * (physical) address starting relative to 0] */ #define PMAP_STATIC_L2_SIZE 16 void pmap_bootstrap(vm_offset_t firstaddr, struct pv_addr *l1pt) { static struct l1_ttable static_l1; static struct l2_dtable static_l2[PMAP_STATIC_L2_SIZE]; struct l1_ttable *l1 = &static_l1; struct l2_dtable *l2; struct l2_bucket *l2b; pd_entry_t pde; pd_entry_t *kernel_l1pt = (pd_entry_t *)l1pt->pv_va; pt_entry_t *ptep; vm_paddr_t pa; vm_offset_t va; vm_size_t size; int l1idx, l2idx, l2next = 0; PDEBUG(1, printf("firstaddr = %08x, lastaddr = %08x\n", firstaddr, vm_max_kernel_address)); virtual_avail = firstaddr; kernel_pmap->pm_l1 = l1; kernel_l1pa = l1pt->pv_pa; /* * Scan the L1 translation table created by initarm() and create * the required metadata for all valid mappings found in it. */ for (l1idx = 0; l1idx < (L1_TABLE_SIZE / sizeof(pd_entry_t)); l1idx++) { pde = kernel_l1pt[l1idx]; /* * We're only interested in Coarse mappings. * pmap_extract() can deal with section mappings without * recourse to checking L2 metadata. */ if ((pde & L1_TYPE_MASK) != L1_TYPE_C) continue; /* * Lookup the KVA of this L2 descriptor table */ pa = (vm_paddr_t)(pde & L1_C_ADDR_MASK); ptep = (pt_entry_t *)kernel_pt_lookup(pa); if (ptep == NULL) { panic("pmap_bootstrap: No L2 for va 0x%x, pa 0x%lx", (u_int)l1idx << L1_S_SHIFT, (long unsigned int)pa); } /* * Fetch the associated L2 metadata structure. * Allocate a new one if necessary. */ if ((l2 = kernel_pmap->pm_l2[L2_IDX(l1idx)]) == NULL) { if (l2next == PMAP_STATIC_L2_SIZE) panic("pmap_bootstrap: out of static L2s"); kernel_pmap->pm_l2[L2_IDX(l1idx)] = l2 = &static_l2[l2next++]; } /* * One more L1 slot tracked... */ l2->l2_occupancy++; /* * Fill in the details of the L2 descriptor in the * appropriate bucket. */ l2b = &l2->l2_bucket[L2_BUCKET(l1idx)]; l2b->l2b_kva = ptep; l2b->l2b_phys = pa; l2b->l2b_l1idx = l1idx; /* * Establish an initial occupancy count for this descriptor */ for (l2idx = 0; l2idx < (L2_TABLE_SIZE_REAL / sizeof(pt_entry_t)); l2idx++) { if ((ptep[l2idx] & L2_TYPE_MASK) != L2_TYPE_INV) { l2b->l2b_occupancy++; } } /* * Make sure the descriptor itself has the correct cache mode. * If not, fix it, but whine about the problem. Port-meisters * should consider this a clue to fix up their initarm() * function. :) */ if (pmap_set_pt_cache_mode(kernel_l1pt, (vm_offset_t)ptep)) { printf("pmap_bootstrap: WARNING! wrong cache mode for " "L2 pte @ %p\n", ptep); } } /* * Ensure the primary (kernel) L1 has the correct cache mode for * a page table. Bitch if it is not correctly set. */ for (va = (vm_offset_t)kernel_l1pt; va < ((vm_offset_t)kernel_l1pt + L1_TABLE_SIZE); va += PAGE_SIZE) { if (pmap_set_pt_cache_mode(kernel_l1pt, va)) printf("pmap_bootstrap: WARNING! wrong cache mode for " "primary L1 @ 0x%x\n", va); } cpu_dcache_wbinv_all(); cpu_l2cache_wbinv_all(); cpu_tlb_flushID(); cpu_cpwait(); PMAP_LOCK_INIT(kernel_pmap); CPU_FILL(&kernel_pmap->pm_active); kernel_pmap->pm_domain = PMAP_DOMAIN_KERNEL; TAILQ_INIT(&kernel_pmap->pm_pvchunk); /* * Initialize the global pv list lock. */ rw_init(&pvh_global_lock, "pmap pv global"); /* * Reserve some special page table entries/VA space for temporary * mapping of pages. */ pmap_alloc_specials(&virtual_avail, 1, &csrcp, &csrc_pte); pmap_set_pt_cache_mode(kernel_l1pt, (vm_offset_t)csrc_pte); pmap_alloc_specials(&virtual_avail, 1, &cdstp, &cdst_pte); pmap_set_pt_cache_mode(kernel_l1pt, (vm_offset_t)cdst_pte); size = ((vm_max_kernel_address - pmap_curmaxkvaddr) + L1_S_OFFSET) / L1_S_SIZE; pmap_alloc_specials(&virtual_avail, round_page(size * L2_TABLE_SIZE_REAL) / PAGE_SIZE, &pmap_kernel_l2ptp_kva, NULL); size = (size + (L2_BUCKET_SIZE - 1)) / L2_BUCKET_SIZE; pmap_alloc_specials(&virtual_avail, round_page(size * sizeof(struct l2_dtable)) / PAGE_SIZE, &pmap_kernel_l2dtable_kva, NULL); pmap_alloc_specials(&virtual_avail, 1, (vm_offset_t*)&_tmppt, NULL); pmap_alloc_specials(&virtual_avail, MAXDUMPPGS, (vm_offset_t *)&crashdumpmap, NULL); SLIST_INIT(&l1_list); TAILQ_INIT(&l1_lru_list); mtx_init(&l1_lru_lock, "l1 list lock", NULL, MTX_DEF); pmap_init_l1(l1, kernel_l1pt); cpu_dcache_wbinv_all(); cpu_l2cache_wbinv_all(); virtual_avail = round_page(virtual_avail); virtual_end = vm_max_kernel_address; kernel_vm_end = pmap_curmaxkvaddr; arm_nocache_startaddr = vm_max_kernel_address; mtx_init(&cmtx, "TMP mappings mtx", NULL, MTX_DEF); pmap_set_pcb_pagedir(kernel_pmap, thread0.td_pcb); } /*************************************************** * Pmap allocation/deallocation routines. ***************************************************/ /* * Release any resources held by the given physical map. * Called when a pmap initialized by pmap_pinit is being released. * Should only be called if the map contains no valid mappings. */ void pmap_release(pmap_t pmap) { struct pcb *pcb; cpu_idcache_wbinv_all(); cpu_l2cache_wbinv_all(); cpu_tlb_flushID(); cpu_cpwait(); if (vector_page < KERNBASE) { struct pcb *curpcb = PCPU_GET(curpcb); pcb = thread0.td_pcb; if (pmap_is_current(pmap)) { /* * Frob the L1 entry corresponding to the vector * page so that it contains the kernel pmap's domain * number. This will ensure pmap_remove() does not * pull the current vector page out from under us. */ critical_enter(); *pcb->pcb_pl1vec = pcb->pcb_l1vec; cpu_domains(pcb->pcb_dacr); cpu_setttb(pcb->pcb_pagedir); critical_exit(); } pmap_remove(pmap, vector_page, vector_page + PAGE_SIZE); /* * Make sure cpu_switch(), et al, DTRT. This is safe to do * since this process has no remaining mappings of its own. */ curpcb->pcb_pl1vec = pcb->pcb_pl1vec; curpcb->pcb_l1vec = pcb->pcb_l1vec; curpcb->pcb_dacr = pcb->pcb_dacr; curpcb->pcb_pagedir = pcb->pcb_pagedir; } pmap_free_l1(pmap); dprintf("pmap_release()\n"); } /* * Helper function for pmap_grow_l2_bucket() */ static __inline int pmap_grow_map(vm_offset_t va, pt_entry_t cache_mode, vm_paddr_t *pap) { struct l2_bucket *l2b; pt_entry_t *ptep; vm_paddr_t pa; struct vm_page *m; m = vm_page_alloc(NULL, 0, VM_ALLOC_NOOBJ | VM_ALLOC_WIRED); if (m == NULL) return (1); pa = VM_PAGE_TO_PHYS(m); if (pap) *pap = pa; l2b = pmap_get_l2_bucket(pmap_kernel(), va); ptep = &l2b->l2b_kva[l2pte_index(va)]; *ptep = L2_S_PROTO | pa | cache_mode | L2_S_REF; pmap_set_prot(ptep, VM_PROT_READ | VM_PROT_WRITE, 0); PTE_SYNC(ptep); return (0); } /* * This is the same as pmap_alloc_l2_bucket(), except that it is only * used by pmap_growkernel(). */ static __inline struct l2_bucket * pmap_grow_l2_bucket(pmap_t pmap, vm_offset_t va) { struct l2_dtable *l2; struct l2_bucket *l2b; struct l1_ttable *l1; pd_entry_t *pl1pd; u_short l1idx; vm_offset_t nva; l1idx = L1_IDX(va); if ((l2 = pmap->pm_l2[L2_IDX(l1idx)]) == NULL) { /* * No mapping at this address, as there is * no entry in the L1 table. * Need to allocate a new l2_dtable. */ nva = pmap_kernel_l2dtable_kva; if ((nva & PAGE_MASK) == 0) { /* * Need to allocate a backing page */ if (pmap_grow_map(nva, pte_l2_s_cache_mode, NULL)) return (NULL); } l2 = (struct l2_dtable *)nva; nva += sizeof(struct l2_dtable); if ((nva & PAGE_MASK) < (pmap_kernel_l2dtable_kva & PAGE_MASK)) { /* * The new l2_dtable straddles a page boundary. * Map in another page to cover it. */ if (pmap_grow_map(nva, pte_l2_s_cache_mode, NULL)) return (NULL); } pmap_kernel_l2dtable_kva = nva; /* * Link it into the parent pmap */ pmap->pm_l2[L2_IDX(l1idx)] = l2; memset(l2, 0, sizeof(*l2)); } l2b = &l2->l2_bucket[L2_BUCKET(l1idx)]; /* * Fetch pointer to the L2 page table associated with the address. */ if (l2b->l2b_kva == NULL) { pt_entry_t *ptep; /* * No L2 page table has been allocated. Chances are, this * is because we just allocated the l2_dtable, above. */ nva = pmap_kernel_l2ptp_kva; ptep = (pt_entry_t *)nva; if ((nva & PAGE_MASK) == 0) { /* * Need to allocate a backing page */ if (pmap_grow_map(nva, pte_l2_s_cache_mode_pt, &pmap_kernel_l2ptp_phys)) return (NULL); } memset(ptep, 0, L2_TABLE_SIZE_REAL); l2->l2_occupancy++; l2b->l2b_kva = ptep; l2b->l2b_l1idx = l1idx; l2b->l2b_phys = pmap_kernel_l2ptp_phys; pmap_kernel_l2ptp_kva += L2_TABLE_SIZE_REAL; pmap_kernel_l2ptp_phys += L2_TABLE_SIZE_REAL; } /* Distribute new L1 entry to all other L1s */ SLIST_FOREACH(l1, &l1_list, l1_link) { pl1pd = &l1->l1_kva[L1_IDX(va)]; *pl1pd = l2b->l2b_phys | L1_C_DOM(PMAP_DOMAIN_KERNEL) | L1_C_PROTO; PTE_SYNC(pl1pd); } return (l2b); } /* * grow the number of kernel page table entries, if needed */ void pmap_growkernel(vm_offset_t addr) { pmap_t kpmap = pmap_kernel(); if (addr <= pmap_curmaxkvaddr) return; /* we are OK */ /* * whoops! we need to add kernel PTPs */ /* Map 1MB at a time */ for (; pmap_curmaxkvaddr < addr; pmap_curmaxkvaddr += L1_S_SIZE) pmap_grow_l2_bucket(kpmap, pmap_curmaxkvaddr); /* * flush out the cache, expensive but growkernel will happen so * rarely */ cpu_dcache_wbinv_all(); cpu_l2cache_wbinv_all(); cpu_tlb_flushD(); cpu_cpwait(); kernel_vm_end = pmap_curmaxkvaddr; } +/* + * Returns TRUE if the given page is mapped individually or as part of + * a 1MB section. Otherwise, returns FALSE. + */ +boolean_t +pmap_page_is_mapped(vm_page_t m) +{ + boolean_t rv; + if ((m->oflags & VPO_UNMANAGED) != 0) + return (FALSE); + rw_wlock(&pvh_global_lock); + rv = !TAILQ_EMPTY(&m->md.pv_list) || + ((m->flags & PG_FICTITIOUS) == 0 && + !TAILQ_EMPTY(&pa_to_pvh(VM_PAGE_TO_PHYS(m))->pv_list)); + rw_wunlock(&pvh_global_lock); + return (rv); +} + /* * Remove all pages from specified address space * this aids process exit speeds. Also, this code * is special cased for current process only, but * can have the more generic (and slightly slower) * mode enabled. This is much faster than pmap_remove * in the case of running down an entire address space. */ void pmap_remove_pages(pmap_t pmap) { struct pv_entry *pv; struct l2_bucket *l2b = NULL; - vm_page_t m; - pt_entry_t *ptep; struct pv_chunk *pc, *npc; + struct md_page *pvh; + pd_entry_t *pl1pd, l1pd; + pt_entry_t *ptep; + vm_page_t m, mt; + vm_offset_t va; uint32_t inuse, bitmask; int allfree, bit, field, idx; rw_wlock(&pvh_global_lock); PMAP_LOCK(pmap); TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) { allfree = 1; for (field = 0; field < _NPCM; field++) { inuse = ~pc->pc_map[field] & pc_freemask[field]; while (inuse != 0) { bit = ffs(inuse) - 1; bitmask = 1ul << bit; idx = field * sizeof(inuse) * NBBY + bit; pv = &pc->pc_pventry[idx]; + va = pv->pv_va; inuse &= ~bitmask; if (pv->pv_flags & PVF_WIRED) { /* Cannot remove wired pages now. */ allfree = 0; continue; } - l2b = pmap_get_l2_bucket(pmap, pv->pv_va); - KASSERT(l2b != NULL, - ("No L2 bucket in pmap_remove_pages")); - ptep = &l2b->l2b_kva[l2pte_index(pv->pv_va)]; - m = PHYS_TO_VM_PAGE(*ptep & L2_ADDR_MASK); - KASSERT((vm_offset_t)m >= KERNBASE, - ("Trying to access non-existent page " - "va %x pte %x", pv->pv_va, *ptep)); - *ptep = 0; - PTE_SYNC(ptep); + pl1pd = &pmap->pm_l1->l1_kva[L1_IDX(va)]; + l1pd = *pl1pd; + l2b = pmap_get_l2_bucket(pmap, va); + if ((l1pd & L1_TYPE_MASK) == L1_S_PROTO) { + pvh = pa_to_pvh(l1pd & L1_S_FRAME); + TAILQ_REMOVE(&pvh->pv_list, pv, pv_list); + if (TAILQ_EMPTY(&pvh->pv_list)) { + m = PHYS_TO_VM_PAGE(l1pd & L1_S_FRAME); + KASSERT((vm_offset_t)m >= KERNBASE, + ("Trying to access non-existent page " + "va %x l1pd %x", trunc_1mpage(va), l1pd)); + for (mt = m; mt < &m[L2_PTE_NUM_TOTAL]; mt++) { + if (TAILQ_EMPTY(&mt->md.pv_list)) + vm_page_aflag_clear(mt, PGA_WRITEABLE); + } + } + if (l2b != NULL) { + KASSERT(l2b->l2b_occupancy == L2_PTE_NUM_TOTAL, + ("pmap_remove_pages: l2_bucket occupancy error")); + pmap_free_l2_bucket(pmap, l2b, L2_PTE_NUM_TOTAL); + } + pmap->pm_stats.resident_count -= L2_PTE_NUM_TOTAL; + *pl1pd = 0; + PTE_SYNC(pl1pd); + } else { + KASSERT(l2b != NULL, + ("No L2 bucket in pmap_remove_pages")); + ptep = &l2b->l2b_kva[l2pte_index(va)]; + m = PHYS_TO_VM_PAGE(l2pte_pa(*ptep)); + KASSERT((vm_offset_t)m >= KERNBASE, + ("Trying to access non-existent page " + "va %x pte %x", va, *ptep)); + TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); + if (TAILQ_EMPTY(&m->md.pv_list) && + (m->flags & PG_FICTITIOUS) == 0) { + pvh = pa_to_pvh(l2pte_pa(*ptep)); + if (TAILQ_EMPTY(&pvh->pv_list)) + vm_page_aflag_clear(m, PGA_WRITEABLE); + } + *ptep = 0; + PTE_SYNC(ptep); + pmap_free_l2_bucket(pmap, l2b, 1); + pmap->pm_stats.resident_count--; + } /* Mark free */ PV_STAT(pv_entry_frees++); PV_STAT(pv_entry_spare++); pv_entry_count--; - pmap->pm_stats.resident_count--; pc->pc_map[field] |= bitmask; - TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); - if (TAILQ_EMPTY(&m->md.pv_list)) - vm_page_aflag_clear(m, PGA_WRITEABLE); - pmap_free_l2_bucket(pmap, l2b, 1); } } if (allfree) { TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); pmap_free_pv_chunk(pc); } } rw_wunlock(&pvh_global_lock); cpu_tlb_flushID(); cpu_cpwait(); PMAP_UNLOCK(pmap); } /*************************************************** * Low level mapping routines..... ***************************************************/ #ifdef ARM_HAVE_SUPERSECTIONS /* Map a super section into the KVA. */ void pmap_kenter_supersection(vm_offset_t va, uint64_t pa, int flags) { pd_entry_t pd = L1_S_PROTO | L1_S_SUPERSEC | (pa & L1_SUP_FRAME) | (((pa >> 32) & 0xf) << 20) | L1_S_PROT(PTE_KERNEL, - VM_PROT_READ|VM_PROT_WRITE) | L1_S_DOM(PMAP_DOMAIN_KERNEL); + VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE) | + L1_S_DOM(PMAP_DOMAIN_KERNEL); struct l1_ttable *l1; vm_offset_t va0, va_end; KASSERT(((va | pa) & L1_SUP_OFFSET) == 0, ("Not a valid super section mapping")); if (flags & SECTION_CACHE) pd |= pte_l1_s_cache_mode; else if (flags & SECTION_PT) pd |= pte_l1_s_cache_mode_pt; va0 = va & L1_SUP_FRAME; va_end = va + L1_SUP_SIZE; SLIST_FOREACH(l1, &l1_list, l1_link) { va = va0; for (; va < va_end; va += L1_S_SIZE) { l1->l1_kva[L1_IDX(va)] = pd; PTE_SYNC(&l1->l1_kva[L1_IDX(va)]); } } } #endif /* Map a section into the KVA. */ void pmap_kenter_section(vm_offset_t va, vm_offset_t pa, int flags) { pd_entry_t pd = L1_S_PROTO | pa | L1_S_PROT(PTE_KERNEL, - VM_PROT_READ|VM_PROT_WRITE) | L1_S_DOM(PMAP_DOMAIN_KERNEL); + VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE) | L1_S_REF | + L1_S_DOM(PMAP_DOMAIN_KERNEL); struct l1_ttable *l1; KASSERT(((va | pa) & L1_S_OFFSET) == 0, ("Not a valid section mapping")); if (flags & SECTION_CACHE) pd |= pte_l1_s_cache_mode; else if (flags & SECTION_PT) pd |= pte_l1_s_cache_mode_pt; SLIST_FOREACH(l1, &l1_list, l1_link) { l1->l1_kva[L1_IDX(va)] = pd; PTE_SYNC(&l1->l1_kva[L1_IDX(va)]); } } /* * Make a temporary mapping for a physical address. This is only intended * to be used for panic dumps. */ void * pmap_kenter_temp(vm_paddr_t pa, int i) { vm_offset_t va; va = (vm_offset_t)crashdumpmap + (i * PAGE_SIZE); pmap_kenter(va, pa); return ((void *)crashdumpmap); } /* * add a wired page to the kva * note that in order for the mapping to take effect -- you * should do a invltlb after doing the pmap_kenter... */ static PMAP_INLINE void pmap_kenter_internal(vm_offset_t va, vm_offset_t pa, int flags) { struct l2_bucket *l2b; pt_entry_t *ptep; pt_entry_t opte; PDEBUG(1, printf("pmap_kenter: va = %08x, pa = %08x\n", (uint32_t) va, (uint32_t) pa)); l2b = pmap_get_l2_bucket(pmap_kernel(), va); if (l2b == NULL) l2b = pmap_grow_l2_bucket(pmap_kernel(), va); KASSERT(l2b != NULL, ("No L2 Bucket")); ptep = &l2b->l2b_kva[l2pte_index(va)]; opte = *ptep; if (l2pte_valid(opte)) { cpu_tlb_flushD_SE(va); cpu_cpwait(); } else { if (opte == 0) l2b->l2b_occupancy++; } if (flags & KENTER_CACHE) { *ptep = L2_S_PROTO | pa | pte_l2_s_cache_mode | L2_S_REF; pmap_set_prot(ptep, VM_PROT_READ | VM_PROT_WRITE, flags & KENTER_USER); } else { *ptep = L2_S_PROTO | pa | L2_S_REF; pmap_set_prot(ptep, VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE, 0); } PDEBUG(1, printf("pmap_kenter: pte = %08x, opte = %08x, npte = %08x\n", (uint32_t) ptep, opte, *ptep)); PTE_SYNC(ptep); cpu_cpwait(); } void pmap_kenter(vm_offset_t va, vm_paddr_t pa) { pmap_kenter_internal(va, pa, KENTER_CACHE); } void pmap_kenter_nocache(vm_offset_t va, vm_paddr_t pa) { pmap_kenter_internal(va, pa, 0); } void pmap_kenter_user(vm_offset_t va, vm_paddr_t pa) { pmap_kenter_internal(va, pa, KENTER_CACHE|KENTER_USER); /* * Call pmap_fault_fixup now, to make sure we'll have no exception * at the first use of the new address, or bad things will happen, * as we use one of these addresses in the exception handlers. */ pmap_fault_fixup(pmap_kernel(), va, VM_PROT_READ|VM_PROT_WRITE, 1); } vm_paddr_t pmap_kextract(vm_offset_t va) { return (pmap_extract_locked(kernel_pmap, va)); } /* * remove a page from the kernel pagetables */ void pmap_kremove(vm_offset_t va) { struct l2_bucket *l2b; pt_entry_t *ptep, opte; l2b = pmap_get_l2_bucket(pmap_kernel(), va); if (!l2b) return; KASSERT(l2b != NULL, ("No L2 Bucket")); ptep = &l2b->l2b_kva[l2pte_index(va)]; opte = *ptep; if (l2pte_valid(opte)) { va = va & ~PAGE_MASK; cpu_tlb_flushD_SE(va); cpu_cpwait(); *ptep = 0; PTE_SYNC(ptep); } } /* * Used to map a range of physical addresses into kernel * virtual address space. * * The value passed in '*virt' is a suggested virtual address for * the mapping. Architectures which can support a direct-mapped * physical to virtual region can return the appropriate address * within that region, leaving '*virt' unchanged. Other * architectures should map the pages starting at '*virt' and * update '*virt' with the first usable address after the mapped * region. */ vm_offset_t pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot) { vm_offset_t sva = *virt; vm_offset_t va = sva; PDEBUG(1, printf("pmap_map: virt = %08x, start = %08x, end = %08x, " "prot = %d\n", (uint32_t) *virt, (uint32_t) start, (uint32_t) end, prot)); while (start < end) { pmap_kenter(va, start); va += PAGE_SIZE; start += PAGE_SIZE; } *virt = va; return (sva); } /* * Add a list of wired pages to the kva * this routine is only used for temporary * kernel mappings that do not need to have * page modification or references recorded. * Note that old mappings are simply written * over. The page *must* be wired. */ void pmap_qenter(vm_offset_t va, vm_page_t *m, int count) { int i; for (i = 0; i < count; i++) { pmap_kenter_internal(va, VM_PAGE_TO_PHYS(m[i]), KENTER_CACHE); va += PAGE_SIZE; } } /* * this routine jerks page mappings from the * kernel -- it is meant only for temporary mappings. */ void pmap_qremove(vm_offset_t va, int count) { int i; for (i = 0; i < count; i++) { if (vtophys(va)) pmap_kremove(va); va += PAGE_SIZE; } } /* * pmap_object_init_pt preloads the ptes for a given object * into the specified pmap. This eliminates the blast of soft * faults on process startup and immediately after an mmap. */ void pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object, vm_pindex_t pindex, vm_size_t size) { VM_OBJECT_ASSERT_WLOCKED(object); KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG, ("pmap_object_init_pt: non-device object")); } /* * pmap_is_prefaultable: * * Return whether or not the specified virtual address is elgible * for prefault. */ boolean_t pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr) { pd_entry_t *pdep; pt_entry_t *ptep; if (!pmap_get_pde_pte(pmap, addr, &pdep, &ptep)) return (FALSE); - KASSERT(ptep != NULL, ("Valid mapping but no pte ?")); - if (*ptep == 0) - return (TRUE); + KASSERT((pdep != NULL && (l1pte_section_p(*pdep) || ptep != NULL)), + ("Valid mapping but no pte ?")); + if (*pdep != 0 && !l1pte_section_p(*pdep)) + if (*ptep == 0) + return (TRUE); return (FALSE); } /* * Fetch pointers to the PDE/PTE for the given pmap/VA pair. * Returns TRUE if the mapping exists, else FALSE. * * NOTE: This function is only used by a couple of arm-specific modules. * It is not safe to take any pmap locks here, since we could be right * in the middle of debugging the pmap anyway... * * It is possible for this routine to return FALSE even though a valid * mapping does exist. This is because we don't lock, so the metadata * state may be inconsistent. * * NOTE: We can return a NULL *ptp in the case where the L1 pde is * a "section" mapping. */ boolean_t pmap_get_pde_pte(pmap_t pmap, vm_offset_t va, pd_entry_t **pdp, pt_entry_t **ptp) { struct l2_dtable *l2; pd_entry_t *pl1pd, l1pd; pt_entry_t *ptep; u_short l1idx; if (pmap->pm_l1 == NULL) return (FALSE); l1idx = L1_IDX(va); *pdp = pl1pd = &pmap->pm_l1->l1_kva[l1idx]; l1pd = *pl1pd; if (l1pte_section_p(l1pd)) { *ptp = NULL; return (TRUE); } if (pmap->pm_l2 == NULL) return (FALSE); l2 = pmap->pm_l2[L2_IDX(l1idx)]; if (l2 == NULL || (ptep = l2->l2_bucket[L2_BUCKET(l1idx)].l2b_kva) == NULL) { return (FALSE); } *ptp = &ptep[l2pte_index(va)]; return (TRUE); } /* * Routine: pmap_remove_all * Function: * Removes this physical page from * all physical maps in which it resides. * Reflects back modify bits to the pager. * * Notes: * Original versions of this routine were very * inefficient because they iteratively called * pmap_remove (slow...) */ void pmap_remove_all(vm_page_t m) { + struct md_page *pvh; pv_entry_t pv; pmap_t pmap; pt_entry_t *ptep; struct l2_bucket *l2b; boolean_t flush = FALSE; pmap_t curpmap; u_int is_exec = 0; - KASSERT((m->flags & PG_FICTITIOUS) == 0, - ("pmap_remove_all: page %p is fictitious", m)); - - if (TAILQ_EMPTY(&m->md.pv_list)) - return; + KASSERT((m->oflags & VPO_UNMANAGED) == 0, + ("pmap_remove_all: page %p is not managed", m)); rw_wlock(&pvh_global_lock); + if ((m->flags & PG_FICTITIOUS) != 0) + goto small_mappings; + pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); + while ((pv = TAILQ_FIRST(&pvh->pv_list)) != NULL) { + pmap = PV_PMAP(pv); + PMAP_LOCK(pmap); + pd_entry_t *pl1pd; + pl1pd = &pmap->pm_l1->l1_kva[L1_IDX(pv->pv_va)]; + KASSERT((*pl1pd & L1_TYPE_MASK) == L1_S_PROTO, + ("pmap_remove_all: valid section mapping expected")); + (void)pmap_demote_section(pmap, pv->pv_va); + PMAP_UNLOCK(pmap); + } +small_mappings: curpmap = vmspace_pmap(curproc->p_vmspace); while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { pmap = PV_PMAP(pv); if (flush == FALSE && (pmap == curpmap || pmap == pmap_kernel())) flush = TRUE; PMAP_LOCK(pmap); l2b = pmap_get_l2_bucket(pmap, pv->pv_va); KASSERT(l2b != NULL, ("No l2 bucket")); ptep = &l2b->l2b_kva[l2pte_index(pv->pv_va)]; is_exec |= PTE_BEEN_EXECD(*ptep); *ptep = 0; if (pmap_is_current(pmap)) PTE_SYNC(ptep); pmap_free_l2_bucket(pmap, l2b, 1); pmap->pm_stats.resident_count--; TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); if (pv->pv_flags & PVF_WIRED) pmap->pm_stats.wired_count--; pmap_free_pv_entry(pmap, pv); PMAP_UNLOCK(pmap); } if (flush) { if (is_exec) cpu_tlb_flushID(); else cpu_tlb_flushD(); } vm_page_aflag_clear(m, PGA_WRITEABLE); rw_wunlock(&pvh_global_lock); } int pmap_change_attr(vm_offset_t sva, vm_size_t len, int mode) { vm_offset_t base, offset, tmpva; vm_size_t size; struct l2_bucket *l2b; pt_entry_t *ptep, pte; vm_offset_t next_bucket; PMAP_LOCK(kernel_pmap); base = trunc_page(sva); offset = sva & PAGE_MASK; size = roundup(offset + len, PAGE_SIZE); #ifdef checkit /* * Only supported on kernel virtual addresses, including the direct * map but excluding the recursive map. */ if (base < DMAP_MIN_ADDRESS) { PMAP_UNLOCK(kernel_pmap); return (EINVAL); } #endif for (tmpva = base; tmpva < base + size; ) { next_bucket = L2_NEXT_BUCKET(tmpva); if (next_bucket > base + size) next_bucket = base + size; l2b = pmap_get_l2_bucket(kernel_pmap, tmpva); if (l2b == NULL) { tmpva = next_bucket; continue; } ptep = &l2b->l2b_kva[l2pte_index(tmpva)]; if (*ptep == 0) { PMAP_UNLOCK(kernel_pmap); return(EINVAL); } pte = *ptep &~ L2_S_CACHE_MASK; cpu_idcache_wbinv_range(tmpva, PAGE_SIZE); pmap_l2cache_wbinv_range(tmpva, pte & L2_S_FRAME, PAGE_SIZE); *ptep = pte; cpu_tlb_flushID_SE(tmpva); dprintf("%s: for va:%x ptep:%x pte:%x\n", __func__, tmpva, (uint32_t)ptep, pte); tmpva += PAGE_SIZE; } PMAP_UNLOCK(kernel_pmap); return (0); } /* * Set the physical protection on the * specified range of this map as requested. */ void pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) { struct l2_bucket *l2b; + struct md_page *pvh; + struct pv_entry *pve; + pd_entry_t *pl1pd, l1pd; pt_entry_t *ptep, pte; vm_offset_t next_bucket; u_int is_exec, is_refd; int flush; if ((prot & VM_PROT_READ) == 0) { pmap_remove(pmap, sva, eva); return; } if (prot & VM_PROT_WRITE) { /* * If this is a read->write transition, just ignore it and let * vm_fault() take care of it later. */ return; } rw_wlock(&pvh_global_lock); PMAP_LOCK(pmap); /* * OK, at this point, we know we're doing write-protect operation. * If the pmap is active, write-back the range. */ flush = ((eva - sva) >= (PAGE_SIZE * 4)) ? 0 : -1; is_exec = is_refd = 0; while (sva < eva) { next_bucket = L2_NEXT_BUCKET(sva); + /* + * Check for large page. + */ + pl1pd = &pmap->pm_l1->l1_kva[L1_IDX(sva)]; + l1pd = *pl1pd; + if ((l1pd & L1_TYPE_MASK) == L1_S_PROTO) { + KASSERT(pmap != pmap_kernel(), + ("pmap_protect: trying to modify " + "kernel section protections")); + /* + * Are we protecting the entire large page? If not, + * demote the mapping and fall through. + */ + if (sva + L1_S_SIZE == L2_NEXT_BUCKET(sva) && + eva >= L2_NEXT_BUCKET(sva)) { + l1pd &= ~(L1_S_PROT_MASK | L1_S_XN); + if (!(prot & VM_PROT_EXECUTE)) + *pl1pd |= L1_S_XN; + /* + * At this point we are always setting + * write-protect bit. + */ + l1pd |= L1_S_APX; + /* All managed superpages are user pages. */ + l1pd |= L1_S_PROT_U; + *pl1pd = l1pd; + PTE_SYNC(pl1pd); + pvh = pa_to_pvh(l1pd & L1_S_FRAME); + pve = pmap_find_pv(pvh, pmap, + trunc_1mpage(sva)); + pve->pv_flags &= ~PVF_WRITE; + sva = next_bucket; + continue; + } else if (!pmap_demote_section(pmap, sva)) { + /* The large page mapping was destroyed. */ + sva = next_bucket; + continue; + } + } if (next_bucket > eva) next_bucket = eva; - l2b = pmap_get_l2_bucket(pmap, sva); if (l2b == NULL) { sva = next_bucket; continue; } ptep = &l2b->l2b_kva[l2pte_index(sva)]; while (sva < next_bucket) { if ((pte = *ptep) != 0 && L2_S_WRITABLE(pte)) { struct vm_page *m; m = PHYS_TO_VM_PAGE(l2pte_pa(pte)); pmap_set_prot(ptep, prot, !(pmap == pmap_kernel())); PTE_SYNC(ptep); pmap_modify_pv(m, pmap, sva, PVF_WRITE, 0); if (flush >= 0) { flush++; is_exec |= PTE_BEEN_EXECD(pte); is_refd |= PTE_BEEN_REFD(pte); } else { if (PTE_BEEN_EXECD(pte)) cpu_tlb_flushID_SE(sva); else if (PTE_BEEN_REFD(pte)) cpu_tlb_flushD_SE(sva); } } sva += PAGE_SIZE; ptep++; } } if (flush) { if (is_exec) cpu_tlb_flushID(); else if (is_refd) cpu_tlb_flushD(); } rw_wunlock(&pvh_global_lock); PMAP_UNLOCK(pmap); } /* * Insert the given physical page (p) at * the specified virtual address (v) in the * target physical map with the protection requested. * * If specified, the page will be wired down, meaning * that the related pte can not be reclaimed. * * NB: This is the only routine which MAY NOT lazy-evaluate * or lose information. That is, this routine must actually * insert this page into the given map NOW. */ void pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t access, vm_page_t m, vm_prot_t prot, boolean_t wired) { rw_wlock(&pvh_global_lock); PMAP_LOCK(pmap); pmap_enter_locked(pmap, va, access, m, prot, wired, M_WAITOK); PMAP_UNLOCK(pmap); rw_wunlock(&pvh_global_lock); } /* * The pvh global and pmap locks must be held. */ static void pmap_enter_locked(pmap_t pmap, vm_offset_t va, vm_prot_t access, vm_page_t m, vm_prot_t prot, boolean_t wired, int flags) { struct l2_bucket *l2b = NULL; struct vm_page *om; struct pv_entry *pve = NULL; + pd_entry_t *pl1pd, l1pd; pt_entry_t *ptep, npte, opte; u_int nflags; u_int is_exec, is_refd; vm_paddr_t pa; u_char user; PMAP_ASSERT_LOCKED(pmap); rw_assert(&pvh_global_lock, RA_WLOCKED); if (va == vector_page) { pa = systempage.pv_pa; m = NULL; } else { KASSERT((m->oflags & VPO_UNMANAGED) != 0 || vm_page_xbusied(m) || (flags & M_NOWAIT) != 0, ("pmap_enter_locked: page %p is not busy", m)); pa = VM_PAGE_TO_PHYS(m); } + pl1pd = &pmap->pm_l1->l1_kva[L1_IDX(va)]; + if ((*pl1pd & L1_TYPE_MASK) == L1_S_PROTO) + panic("pmap_enter_locked: attempt pmap_enter_on 1MB page"); + user = 0; /* * Make sure userland mappings get the right permissions */ if (pmap != pmap_kernel() && va != vector_page) user = 1; nflags = 0; if (prot & VM_PROT_WRITE) nflags |= PVF_WRITE; if (wired) nflags |= PVF_WIRED; PDEBUG(1, printf("pmap_enter: pmap = %08x, va = %08x, m = %08x, " "prot = %x, wired = %x\n", (uint32_t) pmap, va, (uint32_t) m, prot, wired)); if (pmap == pmap_kernel()) { l2b = pmap_get_l2_bucket(pmap, va); if (l2b == NULL) l2b = pmap_grow_l2_bucket(pmap, va); } else { do_l2b_alloc: l2b = pmap_alloc_l2_bucket(pmap, va); if (l2b == NULL) { if (flags & M_WAITOK) { PMAP_UNLOCK(pmap); rw_wunlock(&pvh_global_lock); VM_WAIT; rw_wlock(&pvh_global_lock); PMAP_LOCK(pmap); goto do_l2b_alloc; } return; } } ptep = &l2b->l2b_kva[l2pte_index(va)]; opte = *ptep; npte = pa; is_exec = is_refd = 0; if (opte) { if (l2pte_pa(opte) == pa) { /* * We're changing the attrs of an existing mapping. */ if (m != NULL) pmap_modify_pv(m, pmap, va, PVF_WRITE | PVF_WIRED, nflags); is_exec |= PTE_BEEN_EXECD(opte); is_refd |= PTE_BEEN_REFD(opte); goto validate; } if ((om = PHYS_TO_VM_PAGE(l2pte_pa(opte)))) { /* * Replacing an existing mapping with a new one. * It is part of our managed memory so we * must remove it from the PV list */ if ((pve = pmap_remove_pv(om, pmap, va))) { is_exec |= PTE_BEEN_EXECD(opte); is_refd |= PTE_BEEN_REFD(opte); if (m && ((m->oflags & VPO_UNMANAGED))) pmap_free_pv_entry(pmap, pve); } } } else { /* * Keep the stats up to date */ l2b->l2b_occupancy++; pmap->pm_stats.resident_count++; } /* * Enter on the PV list if part of our managed memory. */ if ((m && !(m->oflags & VPO_UNMANAGED))) { if ((!pve) && (pve = pmap_get_pv_entry(pmap, FALSE)) == NULL) panic("pmap_enter: no pv entries"); KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva, ("pmap_enter: managed mapping within the clean submap")); KASSERT(pve != NULL, ("No pv")); pmap_enter_pv(m, pve, pmap, va, nflags); } validate: /* Make the new PTE valid */ npte |= L2_S_PROTO; #ifdef SMP npte |= L2_SHARED; #endif /* Set defaults first - kernel read access */ npte |= L2_APX; npte |= L2_S_PROT_R; /* Set "referenced" flag */ npte |= L2_S_REF; /* Now tune APs as desired */ if (user) npte |= L2_S_PROT_U; /* * If this is not a vector_page * then continue setting mapping parameters */ if (m != NULL) { if (prot & (VM_PROT_ALL)) { if ((m->oflags & VPO_UNMANAGED) == 0) vm_page_aflag_set(m, PGA_REFERENCED); } else { /* * Need to do page referenced emulation. */ npte &= ~L2_S_REF; } if (prot & VM_PROT_WRITE) { /* * Enable write permission if the access type * indicates write intention. Emulate modified * bit otherwise. */ if ((access & VM_PROT_WRITE) != 0) npte &= ~(L2_APX); if ((m->oflags & VPO_UNMANAGED) == 0) { vm_page_aflag_set(m, PGA_WRITEABLE); /* * The access type and permissions indicate * that the page will be written as soon as * returned from fault service. * Mark it dirty from the outset. */ if ((access & VM_PROT_WRITE) != 0) vm_page_dirty(m); } } if (!(prot & VM_PROT_EXECUTE)) npte |= L2_XN; if (m->md.pv_memattr != VM_MEMATTR_UNCACHEABLE) npte |= pte_l2_s_cache_mode; } CTR5(KTR_PMAP,"enter: pmap:%p va:%x prot:%x pte:%x->%x", pmap, va, prot, opte, npte); /* * If this is just a wiring change, the two PTEs will be * identical, so there's no need to update the page table. */ if (npte != opte) { boolean_t is_cached = pmap_is_current(pmap); *ptep = npte; PTE_SYNC(ptep); if (is_cached) { /* * We only need to frob the cache/tlb if this pmap * is current */ if (L1_IDX(va) != L1_IDX(vector_page) && l2pte_valid(npte)) { /* * This mapping is likely to be accessed as * soon as we return to userland. Fix up the * L1 entry to avoid taking another * page/domain fault. */ - pd_entry_t *pl1pd, l1pd; - - pl1pd = &pmap->pm_l1->l1_kva[L1_IDX(va)]; l1pd = l2b->l2b_phys | L1_C_DOM(pmap->pm_domain) | L1_C_PROTO; if (*pl1pd != l1pd) { *pl1pd = l1pd; PTE_SYNC(pl1pd); } } } if (is_exec) cpu_tlb_flushID_SE(va); else if (is_refd) cpu_tlb_flushD_SE(va); } if ((pmap != pmap_kernel()) && (pmap == &curproc->p_vmspace->vm_pmap)) cpu_icache_sync_range(va, PAGE_SIZE); + /* + * If both the l2b_occupancy and the reservation are fully + * populated, then attempt promotion. + */ + if ((l2b->l2b_occupancy == L2_PTE_NUM_TOTAL) && + sp_enabled && (m->flags & PG_FICTITIOUS) == 0 && + vm_reserv_level_iffullpop(m) == 0) + pmap_promote_section(pmap, va); } /* * Maps a sequence of resident pages belonging to the same object. * The sequence begins with the given page m_start. This page is * mapped at the given virtual address start. Each subsequent page is * mapped at a virtual address that is offset from start by the same * amount as the page is offset from m_start within the object. The * last page in the sequence is the page with the largest offset from * m_start that can be mapped at a virtual address less than the given * virtual address end. Not every virtual page between start and end * is mapped; only those for which a resident page exists with the * corresponding offset from m_start are mapped. */ void pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end, vm_page_t m_start, vm_prot_t prot) { + vm_offset_t va; vm_page_t m; vm_pindex_t diff, psize; vm_prot_t access; VM_OBJECT_ASSERT_LOCKED(m_start->object); psize = atop(end - start); m = m_start; access = prot = prot & (VM_PROT_READ | VM_PROT_EXECUTE); rw_wlock(&pvh_global_lock); PMAP_LOCK(pmap); while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { - pmap_enter_locked(pmap, start + ptoa(diff), access, m, prot, - FALSE, M_NOWAIT); + va = start + ptoa(diff); + if ((va & L1_S_OFFSET) == 0 && L2_NEXT_BUCKET(va) <= end && + (VM_PAGE_TO_PHYS(m) & L1_S_OFFSET) == 0 && + sp_enabled && vm_reserv_level_iffullpop(m) == 0 && + pmap_enter_section(pmap, va, m, prot)) + m = &m[L1_S_SIZE / PAGE_SIZE - 1]; + else + pmap_enter_locked(pmap, va, access, m, prot, + FALSE, M_NOWAIT); m = TAILQ_NEXT(m, listq); } PMAP_UNLOCK(pmap); rw_wunlock(&pvh_global_lock); } /* * this code makes some *MAJOR* assumptions: * 1. Current pmap & pmap exists. * 2. Not wired. * 3. Read access. * 4. No page table pages. * but is *MUCH* faster than pmap_enter... */ void pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) { vm_prot_t access; access = prot = prot & (VM_PROT_READ | VM_PROT_EXECUTE); rw_wlock(&pvh_global_lock); PMAP_LOCK(pmap); pmap_enter_locked(pmap, va, access, m, prot, FALSE, M_NOWAIT); PMAP_UNLOCK(pmap); rw_wunlock(&pvh_global_lock); } /* * Routine: pmap_change_wiring * Function: Change the wiring attribute for a map/virtual-address * pair. * In/out conditions: * The mapping must already exist in the pmap. */ void pmap_change_wiring(pmap_t pmap, vm_offset_t va, boolean_t wired) { struct l2_bucket *l2b; + struct md_page *pvh; + struct pv_entry *pve; + pd_entry_t *pl1pd, l1pd; pt_entry_t *ptep, pte; vm_page_t m; rw_wlock(&pvh_global_lock); PMAP_LOCK(pmap); + pl1pd = &pmap->pm_l1->l1_kva[L1_IDX(va)]; + l1pd = *pl1pd; + if ((l1pd & L1_TYPE_MASK) == L1_S_PROTO) { + m = PHYS_TO_VM_PAGE(l1pd & L1_S_FRAME); + KASSERT((m != NULL) && ((m->oflags & VPO_UNMANAGED) == 0), + ("pmap_change_wiring: unmanaged superpage should not " + "be changed")); + KASSERT(pmap != pmap_kernel(), + ("pmap_change_wiring: managed kernel superpage " + "should not exist")); + pvh = pa_to_pvh(l1pd & L1_S_FRAME); + pve = pmap_find_pv(pvh, pmap, trunc_1mpage(va)); + if (!wired != ((pve->pv_flags & PVF_WIRED) == 0)) { + if (!pmap_demote_section(pmap, va)) + panic("pmap_change_wiring: demotion failed"); + } else + goto out; + } l2b = pmap_get_l2_bucket(pmap, va); KASSERT(l2b, ("No l2b bucket in pmap_change_wiring")); ptep = &l2b->l2b_kva[l2pte_index(va)]; pte = *ptep; m = PHYS_TO_VM_PAGE(l2pte_pa(pte)); if (m != NULL) pmap_modify_pv(m, pmap, va, PVF_WIRED, wired == TRUE ? PVF_WIRED : 0); +out: rw_wunlock(&pvh_global_lock); PMAP_UNLOCK(pmap); } /* * Copy the range specified by src_addr/len * from the source map to the range dst_addr/len * in the destination map. * * This routine is only advisory and need not do anything. */ void pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, vm_offset_t src_addr) { } /* * Routine: pmap_extract * Function: * Extract the physical page address associated * with the given map/virtual_address pair. */ vm_paddr_t pmap_extract(pmap_t pmap, vm_offset_t va) { vm_paddr_t pa; PMAP_LOCK(pmap); pa = pmap_extract_locked(pmap, va); PMAP_UNLOCK(pmap); return (pa); } static vm_paddr_t pmap_extract_locked(pmap_t pmap, vm_offset_t va) { struct l2_dtable *l2; pd_entry_t l1pd; pt_entry_t *ptep, pte; vm_paddr_t pa; u_int l1idx; if (pmap != kernel_pmap) PMAP_ASSERT_LOCKED(pmap); l1idx = L1_IDX(va); l1pd = pmap->pm_l1->l1_kva[l1idx]; if (l1pte_section_p(l1pd)) { /* * These should only happen for the kernel pmap. */ KASSERT(pmap == kernel_pmap, ("unexpected section")); /* XXX: what to do about the bits > 32 ? */ if (l1pd & L1_S_SUPERSEC) pa = (l1pd & L1_SUP_FRAME) | (va & L1_SUP_OFFSET); else pa = (l1pd & L1_S_FRAME) | (va & L1_S_OFFSET); } else { /* * Note that we can't rely on the validity of the L1 * descriptor as an indication that a mapping exists. * We have to look it up in the L2 dtable. */ l2 = pmap->pm_l2[L2_IDX(l1idx)]; if (l2 == NULL || (ptep = l2->l2_bucket[L2_BUCKET(l1idx)].l2b_kva) == NULL) return (0); pte = ptep[l2pte_index(va)]; if (pte == 0) return (0); switch (pte & L2_TYPE_MASK) { case L2_TYPE_L: pa = (pte & L2_L_FRAME) | (va & L2_L_OFFSET); break; default: pa = (pte & L2_S_FRAME) | (va & L2_S_OFFSET); break; } } return (pa); } /* * Atomically extract and hold the physical page with the given * pmap and virtual address pair if that mapping permits the given * protection. * */ vm_page_t pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) { struct l2_dtable *l2; pd_entry_t l1pd; pt_entry_t *ptep, pte; vm_paddr_t pa, paddr; vm_page_t m = NULL; u_int l1idx; l1idx = L1_IDX(va); paddr = 0; PMAP_LOCK(pmap); retry: l1pd = pmap->pm_l1->l1_kva[l1idx]; if (l1pte_section_p(l1pd)) { - /* - * These should only happen for pmap_kernel() - */ - KASSERT(pmap == pmap_kernel(), ("huh")); /* XXX: what to do about the bits > 32 ? */ if (l1pd & L1_S_SUPERSEC) pa = (l1pd & L1_SUP_FRAME) | (va & L1_SUP_OFFSET); else pa = (l1pd & L1_S_FRAME) | (va & L1_S_OFFSET); if (vm_page_pa_tryrelock(pmap, pa & PG_FRAME, &paddr)) goto retry; if (L1_S_WRITABLE(l1pd) || (prot & VM_PROT_WRITE) == 0) { m = PHYS_TO_VM_PAGE(pa); vm_page_hold(m); } } else { /* * Note that we can't rely on the validity of the L1 * descriptor as an indication that a mapping exists. * We have to look it up in the L2 dtable. */ l2 = pmap->pm_l2[L2_IDX(l1idx)]; if (l2 == NULL || (ptep = l2->l2_bucket[L2_BUCKET(l1idx)].l2b_kva) == NULL) { PMAP_UNLOCK(pmap); return (NULL); } ptep = &ptep[l2pte_index(va)]; pte = *ptep; if (pte == 0) { PMAP_UNLOCK(pmap); return (NULL); } else if ((prot & VM_PROT_WRITE) && (pte & L2_APX)) { PMAP_UNLOCK(pmap); return (NULL); } else { switch (pte & L2_TYPE_MASK) { case L2_TYPE_L: panic("extract and hold section mapping"); break; default: pa = (pte & L2_S_FRAME) | (va & L2_S_OFFSET); break; } if (vm_page_pa_tryrelock(pmap, pa & PG_FRAME, &paddr)) goto retry; m = PHYS_TO_VM_PAGE(pa); vm_page_hold(m); } } PMAP_UNLOCK(pmap); PA_UNLOCK_COND(paddr); return (m); } /* * Initialize a preallocated and zeroed pmap structure, * such as one in a vmspace structure. */ int pmap_pinit(pmap_t pmap) { PDEBUG(1, printf("pmap_pinit: pmap = %08x\n", (uint32_t) pmap)); pmap_alloc_l1(pmap); bzero(pmap->pm_l2, sizeof(pmap->pm_l2)); CPU_ZERO(&pmap->pm_active); TAILQ_INIT(&pmap->pm_pvchunk); bzero(&pmap->pm_stats, sizeof pmap->pm_stats); pmap->pm_stats.resident_count = 1; if (vector_page < KERNBASE) { pmap_enter(pmap, vector_page, VM_PROT_READ, PHYS_TO_VM_PAGE(systempage.pv_pa), VM_PROT_READ, 1); } return (1); } /*************************************************** + * Superpage management routines. + ***************************************************/ + +static PMAP_INLINE struct pv_entry * +pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va) +{ + pv_entry_t pv; + + rw_assert(&pvh_global_lock, RA_WLOCKED); + + pv = pmap_find_pv(pvh, pmap, va); + if (pv != NULL) + TAILQ_REMOVE(&pvh->pv_list, pv, pv_list); + + return (pv); +} + +static void +pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va) +{ + pv_entry_t pv; + + pv = pmap_pvh_remove(pvh, pmap, va); + KASSERT(pv != NULL, ("pmap_pvh_free: pv not found")); + pmap_free_pv_entry(pmap, pv); +} + +static boolean_t +pmap_pv_insert_section(pmap_t pmap, vm_offset_t va, vm_paddr_t pa) +{ + struct md_page *pvh; + pv_entry_t pv; + + rw_assert(&pvh_global_lock, RA_WLOCKED); + if (pv_entry_count < pv_entry_high_water && + (pv = pmap_get_pv_entry(pmap, TRUE)) != NULL) { + pv->pv_va = va; + pvh = pa_to_pvh(pa); + TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_list); + return (TRUE); + } else + return (FALSE); +} + +/* + * Create the pv entries for each of the pages within a superpage. + */ +static void +pmap_pv_demote_section(pmap_t pmap, vm_offset_t va, vm_paddr_t pa) +{ + struct md_page *pvh; + pv_entry_t pve, pv; + vm_offset_t va_last; + vm_page_t m; + + rw_assert(&pvh_global_lock, RA_WLOCKED); + KASSERT((pa & L1_S_OFFSET) == 0, + ("pmap_pv_demote_section: pa is not 1mpage aligned")); + + /* + * Transfer the 1mpage's pv entry for this mapping to the first + * page's pv list. + */ + pvh = pa_to_pvh(pa); + va = trunc_1mpage(va); + pv = pmap_pvh_remove(pvh, pmap, va); + KASSERT(pv != NULL, ("pmap_pv_demote_section: pv not found")); + m = PHYS_TO_VM_PAGE(pa); + TAILQ_INSERT_HEAD(&m->md.pv_list, pv, pv_list); + /* Instantiate the remaining pv entries. */ + va_last = L2_NEXT_BUCKET(va) - PAGE_SIZE; + do { + m++; + KASSERT((m->oflags & VPO_UNMANAGED) == 0, + ("pmap_pv_demote_section: page %p is not managed", m)); + va += PAGE_SIZE; + pve = pmap_get_pv_entry(pmap, FALSE); + pmap_enter_pv(m, pve, pmap, va, pv->pv_flags); + } while (va < va_last); +} + +static void +pmap_pv_promote_section(pmap_t pmap, vm_offset_t va, vm_paddr_t pa) +{ + struct md_page *pvh; + pv_entry_t pv; + vm_offset_t va_last; + vm_page_t m; + + rw_assert(&pvh_global_lock, RA_WLOCKED); + KASSERT((pa & L1_S_OFFSET) == 0, + ("pmap_pv_promote_section: pa is not 1mpage aligned")); + + /* + * Transfer the first page's pv entry for this mapping to the + * 1mpage's pv list. Aside from avoiding the cost of a call + * to get_pv_entry(), a transfer avoids the possibility that + * get_pv_entry() calls pmap_pv_reclaim() and that pmap_pv_reclaim() + * removes one of the mappings that is being promoted. + */ + m = PHYS_TO_VM_PAGE(pa); + va = trunc_1mpage(va); + pv = pmap_pvh_remove(&m->md, pmap, va); + KASSERT(pv != NULL, ("pmap_pv_promote_section: pv not found")); + pvh = pa_to_pvh(pa); + TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_list); + /* Free the remaining pv entries in the newly mapped section pages */ + va_last = L2_NEXT_BUCKET(va) - PAGE_SIZE; + do { + m++; + va += PAGE_SIZE; + /* + * Don't care the flags, first pv contains sufficient + * information for all of the pages so nothing is really lost. + */ + pmap_pvh_free(&m->md, pmap, va); + } while (va < va_last); +} + +/* + * Tries to create a 1MB page mapping. Returns TRUE if successful and + * FALSE otherwise. Fails if (1) page is unmanageg, kernel pmap or vectors + * page, (2) a mapping already exists at the specified virtual address, or + * (3) a pv entry cannot be allocated without reclaiming another pv entry. + */ +static boolean_t +pmap_enter_section(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) +{ + pd_entry_t *pl1pd; + vm_offset_t pa; + struct l2_bucket *l2b; + + rw_assert(&pvh_global_lock, RA_WLOCKED); + PMAP_ASSERT_LOCKED(pmap); + + /* Skip kernel, vectors page and unmanaged mappings */ + if ((pmap == pmap_kernel()) || (L1_IDX(va) == L1_IDX(vector_page)) || + ((m->oflags & VPO_UNMANAGED) != 0)) { + CTR2(KTR_PMAP, "pmap_enter_section: failure for va %#lx" + " in pmap %p", va, pmap); + return (FALSE); + } + /* + * Check whether this is a valid section superpage entry or + * there is a l2_bucket associated with that L1 page directory. + */ + va = trunc_1mpage(va); + pl1pd = &pmap->pm_l1->l1_kva[L1_IDX(va)]; + l2b = pmap_get_l2_bucket(pmap, va); + if ((*pl1pd & L1_S_PROTO) || (l2b != NULL)) { + CTR2(KTR_PMAP, "pmap_enter_section: failure for va %#lx" + " in pmap %p", va, pmap); + return (FALSE); + } + pa = VM_PAGE_TO_PHYS(m); + /* + * Abort this mapping if its PV entry could not be created. + */ + if (!pmap_pv_insert_section(pmap, va, VM_PAGE_TO_PHYS(m))) { + CTR2(KTR_PMAP, "pmap_enter_section: failure for va %#lx" + " in pmap %p", va, pmap); + return (FALSE); + } + /* + * Increment counters. + */ + pmap->pm_stats.resident_count += L2_PTE_NUM_TOTAL; + /* + * Despite permissions, mark the superpage read-only. + */ + prot &= ~VM_PROT_WRITE; + /* + * Map the superpage. + */ + pmap_map_section(pmap, va, pa, prot, FALSE); + + pmap_section_mappings++; + CTR2(KTR_PMAP, "pmap_enter_section: success for va %#lx" + " in pmap %p", va, pmap); + return (TRUE); +} + +/* + * pmap_remove_section: do the things to unmap a superpage in a process + */ +static void +pmap_remove_section(pmap_t pmap, vm_offset_t sva) +{ + struct md_page *pvh; + struct l2_bucket *l2b; + pd_entry_t *pl1pd, l1pd; + vm_offset_t eva, va; + vm_page_t m; + + PMAP_ASSERT_LOCKED(pmap); + if ((pmap == pmap_kernel()) || (L1_IDX(sva) == L1_IDX(vector_page))) + return; + + KASSERT((sva & L1_S_OFFSET) == 0, + ("pmap_remove_section: sva is not 1mpage aligned")); + + pl1pd = &pmap->pm_l1->l1_kva[L1_IDX(sva)]; + l1pd = *pl1pd; + + m = PHYS_TO_VM_PAGE(l1pd & L1_S_FRAME); + KASSERT((m != NULL && ((m->oflags & VPO_UNMANAGED) == 0)), + ("pmap_remove_section: no corresponding vm_page or " + "page unmanaged")); + + pmap->pm_stats.resident_count -= L2_PTE_NUM_TOTAL; + pvh = pa_to_pvh(l1pd & L1_S_FRAME); + pmap_pvh_free(pvh, pmap, sva); + eva = L2_NEXT_BUCKET(sva); + for (va = sva, m = PHYS_TO_VM_PAGE(l1pd & L1_S_FRAME); + va < eva; va += PAGE_SIZE, m++) { + /* + * Mark base pages referenced but skip marking them dirty. + * If the superpage is writeable, hence all base pages were + * already marked as dirty in pmap_fault_fixup() before + * promotion. Reference bit however, might not have been set + * for each base page when the superpage was created at once, + * not as a result of promotion. + */ + if (L1_S_REFERENCED(l1pd)) + vm_page_aflag_set(m, PGA_REFERENCED); + if (TAILQ_EMPTY(&m->md.pv_list) && + TAILQ_EMPTY(&pvh->pv_list)) + vm_page_aflag_clear(m, PGA_WRITEABLE); + } + + l2b = pmap_get_l2_bucket(pmap, sva); + if (l2b != NULL) { + KASSERT(l2b->l2b_occupancy == L2_PTE_NUM_TOTAL, + ("pmap_remove_section: l2_bucket occupancy error")); + pmap_free_l2_bucket(pmap, l2b, L2_PTE_NUM_TOTAL); + /* + * Now invalidate L1 slot as it was not invalidated in + * pmap_free_l2_bucket() due to L1_TYPE mismatch. + */ + *pl1pd = 0; + PTE_SYNC(pl1pd); + } +} + +/* + * Tries to promote the 256, contiguous 4KB page mappings that are + * within a single l2_bucket to a single 1MB section mapping. + * For promotion to occur, two conditions must be met: (1) the 4KB page + * mappings must map aligned, contiguous physical memory and (2) the 4KB page + * mappings must have identical characteristics. + */ +static void +pmap_promote_section(pmap_t pmap, vm_offset_t va) +{ + pt_entry_t *firstptep, firstpte, oldpte, pa, *pte; + vm_page_t m, oldm; + vm_offset_t first_va, old_va; + struct l2_bucket *l2b = NULL; + vm_prot_t prot; + struct pv_entry *pve, *first_pve; + + PMAP_ASSERT_LOCKED(pmap); + + prot = VM_PROT_ALL; + /* + * Skip promoting kernel pages. This is justified by following: + * 1. Kernel is already mapped using section mappings in each pmap + * 2. Managed mappings within the kernel are not to be promoted anyway + */ + if (pmap == pmap_kernel()) { + pmap_section_p_failures++; + CTR2(KTR_PMAP, "pmap_promote_section: failure for va %#x" + " in pmap %p", va, pmap); + return; + } + /* Do not attemp to promote vectors pages */ + if (L1_IDX(va) == L1_IDX(vector_page)) { + pmap_section_p_failures++; + CTR2(KTR_PMAP, "pmap_promote_section: failure for va %#x" + " in pmap %p", va, pmap); + return; + } + /* + * Examine the first PTE in the specified l2_bucket. Abort if this PTE + * is either invalid, unused, or does not map the first 4KB physical + * page within 1MB page. + */ + first_va = trunc_1mpage(va); + l2b = pmap_get_l2_bucket(pmap, first_va); + KASSERT(l2b != NULL, ("pmap_promote_section: trying to promote " + "not existing l2 bucket")); + firstptep = &l2b->l2b_kva[0]; + + firstpte = *firstptep; + if ((l2pte_pa(firstpte) & L1_S_OFFSET) != 0) { + pmap_section_p_failures++; + CTR2(KTR_PMAP, "pmap_promote_section: failure for va %#x" + " in pmap %p", va, pmap); + return; + } + + if ((firstpte & (L2_S_PROTO | L2_S_REF)) != (L2_S_PROTO | L2_S_REF)) { + pmap_section_p_failures++; + CTR2(KTR_PMAP, "pmap_promote_section: failure for va %#x" + " in pmap %p", va, pmap); + return; + } + /* + * ARM uses pv_entry to mark particular mapping WIRED so don't promote + * unmanaged pages since it is impossible to determine, whether the + * page is wired or not if there is no corresponding pv_entry. + */ + m = PHYS_TO_VM_PAGE(l2pte_pa(firstpte)); + if (m && ((m->oflags & VPO_UNMANAGED) != 0)) { + pmap_section_p_failures++; + CTR2(KTR_PMAP, "pmap_promote_section: failure for va %#x" + " in pmap %p", va, pmap); + return; + } + first_pve = pmap_find_pv(&m->md, pmap, first_va); + /* + * PTE is modified only on write due to modified bit + * emulation. If the entry is referenced and writable + * then it is modified and we don't clear write enable. + * Otherwise, writing is disabled in PTE anyway and + * we just configure protections for the section mapping + * that is going to be created. + */ + if (!L2_S_WRITABLE(firstpte) && (first_pve->pv_flags & PVF_WRITE)) { + first_pve->pv_flags &= ~PVF_WRITE; + prot &= ~VM_PROT_WRITE; + } + + if (!L2_S_EXECUTABLE(firstpte)) + prot &= ~VM_PROT_EXECUTE; + + /* + * Examine each of the other PTEs in the specified l2_bucket. + * Abort if this PTE maps an unexpected 4KB physical page or + * does not have identical characteristics to the first PTE. + */ + pa = l2pte_pa(firstpte) + ((L2_PTE_NUM_TOTAL - 1) * PAGE_SIZE); + old_va = L2_NEXT_BUCKET(first_va) - PAGE_SIZE; + + for (pte = (firstptep + L2_PTE_NUM_TOTAL - 1); pte > firstptep; pte--) { + oldpte = *pte; + if (l2pte_pa(oldpte) != pa) { + pmap_section_p_failures++; + CTR2(KTR_PMAP, "pmap_promote_section: failure for " + "va %#x in pmap %p", va, pmap); + return; + } + if ((oldpte & L2_S_PROMOTE) != (firstpte & L2_S_PROMOTE)) { + pmap_section_p_failures++; + CTR2(KTR_PMAP, "pmap_promote_section: failure for " + "va %#x in pmap %p", va, pmap); + return; + } + oldm = PHYS_TO_VM_PAGE(l2pte_pa(oldpte)); + if (oldm && ((oldm->oflags & VPO_UNMANAGED) != 0)) { + pmap_section_p_failures++; + CTR2(KTR_PMAP, "pmap_promote_section: failure for " + "va %#x in pmap %p", va, pmap); + return; + } + + pve = pmap_find_pv(&oldm->md, pmap, old_va); + if (pve == NULL) { + pmap_section_p_failures++; + CTR2(KTR_PMAP, "pmap_promote_section: failure for " + "va %#x old_va %x - no pve", va, old_va); + return; + } + + if (!L2_S_WRITABLE(oldpte) && (pve->pv_flags & PVF_WRITE)) + pve->pv_flags &= ~PVF_WRITE; + + old_va -= PAGE_SIZE; + pa -= PAGE_SIZE; + } + /* + * Promote the pv entries. + */ + pmap_pv_promote_section(pmap, first_va, l2pte_pa(firstpte)); + /* + * Map the superpage. + */ + pmap_map_section(pmap, first_va, l2pte_pa(firstpte), prot, TRUE); + pmap_section_promotions++; + CTR2(KTR_PMAP, "pmap_promote_section: success for va %#x" + " in pmap %p", first_va, pmap); +} + +/* + * Fills a l2_bucket with mappings to consecutive physical pages. + */ +static void +pmap_fill_l2b(struct l2_bucket *l2b, pt_entry_t newpte) +{ + pt_entry_t *ptep; + int i; + + for (i = 0; i < L2_PTE_NUM_TOTAL; i++) { + ptep = &l2b->l2b_kva[i]; + *ptep = newpte; + PTE_SYNC(ptep); + + newpte += PAGE_SIZE; + } + + l2b->l2b_occupancy = L2_PTE_NUM_TOTAL; +} + +/* + * Tries to demote a 1MB section mapping. If demotion fails, the + * 1MB section mapping is invalidated. + */ +static boolean_t +pmap_demote_section(pmap_t pmap, vm_offset_t va) +{ + struct l2_bucket *l2b; + struct pv_entry *l1pdpve; + struct md_page *pvh; + pd_entry_t *pl1pd, l1pd; + pt_entry_t *firstptep, newpte; + vm_offset_t pa; + vm_page_t m; + + PMAP_ASSERT_LOCKED(pmap); + /* + * According to assumptions described in pmap_promote_section, + * kernel is and always should be mapped using 1MB section mappings. + * What more, managed kernel pages were not to be promoted. + */ + KASSERT(pmap != pmap_kernel() && L1_IDX(va) != L1_IDX(vector_page), + ("pmap_demote_section: forbidden section mapping")); + + va = trunc_1mpage(va); + pl1pd = &pmap->pm_l1->l1_kva[L1_IDX(va)]; + l1pd = *pl1pd; + KASSERT((l1pd & L1_TYPE_MASK) == L1_S_PROTO, + ("pmap_demote_section: not section or invalid section")); + + pa = l1pd & L1_S_FRAME; + m = PHYS_TO_VM_PAGE(pa); + KASSERT((m != NULL && (m->oflags & VPO_UNMANAGED) == 0), + ("pmap_demote_section: no vm_page for selected superpage or" + "unmanaged")); + + pvh = pa_to_pvh(pa); + l1pdpve = pmap_find_pv(pvh, pmap, va); + KASSERT(l1pdpve != NULL, ("pmap_demote_section: no pv entry for " + "managed page")); + + l2b = pmap_get_l2_bucket(pmap, va); + if (l2b == NULL) { + KASSERT((l1pdpve->pv_flags & PVF_WIRED) == 0, + ("pmap_demote_section: No l2_bucket for wired mapping")); + /* + * Invalidate the 1MB section mapping and return + * "failure" if the mapping was never accessed or the + * allocation of the new l2_bucket fails. + */ + if (!L1_S_REFERENCED(l1pd) || + (l2b = pmap_alloc_l2_bucket(pmap, va)) == NULL) { + /* Unmap and invalidate superpage. */ + pmap_remove_section(pmap, trunc_1mpage(va)); + CTR2(KTR_PMAP, "pmap_demote_section: failure for " + "va %#x in pmap %p", va, pmap); + return (FALSE); + } + } + + /* + * Now we should have corresponding l2_bucket available. + * Let's process it to recreate 256 PTEs for each base page + * within superpage. + */ + newpte = pa | L1_S_DEMOTE(l1pd); + if (m->md.pv_memattr != VM_MEMATTR_UNCACHEABLE) + newpte |= pte_l2_s_cache_mode; + + /* + * If the l2_bucket is new, initialize it. + */ + if (l2b->l2b_occupancy == 0) + pmap_fill_l2b(l2b, newpte); + else { + firstptep = &l2b->l2b_kva[0]; + KASSERT(l2pte_pa(*firstptep) == (pa), + ("pmap_demote_section: firstpte and newpte map different " + "physical addresses")); + /* + * If the mapping has changed attributes, update the page table + * entries. + */ + if ((*firstptep & L2_S_PROMOTE) != (L1_S_DEMOTE(l1pd))) + pmap_fill_l2b(l2b, newpte); + } + /* Demote PV entry */ + pmap_pv_demote_section(pmap, va, pa); + + /* Now fix-up L1 */ + l1pd = l2b->l2b_phys | L1_C_DOM(pmap->pm_domain) | L1_C_PROTO; + *pl1pd = l1pd; + PTE_SYNC(pl1pd); + + pmap_section_demotions++; + CTR2(KTR_PMAP, "pmap_demote_section: success for va %#x" + " in pmap %p", va, pmap); + return (TRUE); +} + +/*************************************************** * page management routines. ***************************************************/ /* * We are in a serious low memory condition. Resort to * drastic measures to free some pages so we can allocate * another pv entry chunk. */ static vm_page_t pmap_pv_reclaim(pmap_t locked_pmap) { struct pch newtail; struct pv_chunk *pc; struct l2_bucket *l2b = NULL; pmap_t pmap; + pd_entry_t *pl1pd; pt_entry_t *ptep; pv_entry_t pv; vm_offset_t va; vm_page_t free, m, m_pc; uint32_t inuse; int bit, field, freed, idx; PMAP_ASSERT_LOCKED(locked_pmap); pmap = NULL; free = m_pc = NULL; TAILQ_INIT(&newtail); while ((pc = TAILQ_FIRST(&pv_chunks)) != NULL && (pv_vafree == 0 || free == NULL)) { TAILQ_REMOVE(&pv_chunks, pc, pc_lru); if (pmap != pc->pc_pmap) { if (pmap != NULL) { cpu_tlb_flushID(); cpu_cpwait(); if (pmap != locked_pmap) PMAP_UNLOCK(pmap); } pmap = pc->pc_pmap; /* Avoid deadlock and lock recursion. */ if (pmap > locked_pmap) PMAP_LOCK(pmap); else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap)) { pmap = NULL; TAILQ_INSERT_TAIL(&newtail, pc, pc_lru); continue; } } /* * Destroy every non-wired, 4 KB page mapping in the chunk. */ freed = 0; for (field = 0; field < _NPCM; field++) { for (inuse = ~pc->pc_map[field] & pc_freemask[field]; inuse != 0; inuse &= ~(1UL << bit)) { bit = ffs(inuse) - 1; idx = field * sizeof(inuse) * NBBY + bit; pv = &pc->pc_pventry[idx]; + va = pv->pv_va; + + pl1pd = &pmap->pm_l1->l1_kva[L1_IDX(va)]; + if ((*pl1pd & L1_TYPE_MASK) == L1_S_PROTO) + continue; if (pv->pv_flags & PVF_WIRED) continue; - va = pv->pv_va; l2b = pmap_get_l2_bucket(pmap, va); KASSERT(l2b != NULL, ("No l2 bucket")); ptep = &l2b->l2b_kva[l2pte_index(va)]; m = PHYS_TO_VM_PAGE(l2pte_pa(*ptep)); KASSERT((vm_offset_t)m >= KERNBASE, ("Trying to access non-existent page " "va %x pte %x", va, *ptep)); *ptep = 0; PTE_SYNC(ptep); TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); if (TAILQ_EMPTY(&m->md.pv_list)) vm_page_aflag_clear(m, PGA_WRITEABLE); pc->pc_map[field] |= 1UL << bit; freed++; } } if (freed == 0) { TAILQ_INSERT_TAIL(&newtail, pc, pc_lru); continue; } /* Every freed mapping is for a 4 KB page. */ pmap->pm_stats.resident_count -= freed; PV_STAT(pv_entry_frees += freed); PV_STAT(pv_entry_spare += freed); pv_entry_count -= freed; TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); for (field = 0; field < _NPCM; field++) if (pc->pc_map[field] != pc_freemask[field]) { TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); TAILQ_INSERT_TAIL(&newtail, pc, pc_lru); /* * One freed pv entry in locked_pmap is * sufficient. */ if (pmap == locked_pmap) goto out; break; } if (field == _NPCM) { PV_STAT(pv_entry_spare -= _NPCPV); PV_STAT(pc_chunk_count--); PV_STAT(pc_chunk_frees++); /* Entire chunk is free; return it. */ m_pc = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)pc)); pmap_qremove((vm_offset_t)pc, 1); pmap_ptelist_free(&pv_vafree, (vm_offset_t)pc); break; } } out: TAILQ_CONCAT(&pv_chunks, &newtail, pc_lru); if (pmap != NULL) { cpu_tlb_flushID(); cpu_cpwait(); if (pmap != locked_pmap) PMAP_UNLOCK(pmap); } return (m_pc); } /* * free the pv_entry back to the free list */ static void pmap_free_pv_entry(pmap_t pmap, pv_entry_t pv) { struct pv_chunk *pc; int bit, field, idx; rw_assert(&pvh_global_lock, RA_WLOCKED); PMAP_ASSERT_LOCKED(pmap); PV_STAT(pv_entry_frees++); PV_STAT(pv_entry_spare++); pv_entry_count--; pc = pv_to_chunk(pv); idx = pv - &pc->pc_pventry[0]; field = idx / (sizeof(u_long) * NBBY); bit = idx % (sizeof(u_long) * NBBY); pc->pc_map[field] |= 1ul << bit; for (idx = 0; idx < _NPCM; idx++) if (pc->pc_map[idx] != pc_freemask[idx]) { /* * 98% of the time, pc is already at the head of the * list. If it isn't already, move it to the head. */ if (__predict_false(TAILQ_FIRST(&pmap->pm_pvchunk) != pc)) { TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); } return; } TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); pmap_free_pv_chunk(pc); } static void pmap_free_pv_chunk(struct pv_chunk *pc) { vm_page_t m; TAILQ_REMOVE(&pv_chunks, pc, pc_lru); PV_STAT(pv_entry_spare -= _NPCPV); PV_STAT(pc_chunk_count--); PV_STAT(pc_chunk_frees++); /* entire chunk is free, return it */ m = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)pc)); pmap_qremove((vm_offset_t)pc, 1); vm_page_unwire(m, 0); vm_page_free(m); pmap_ptelist_free(&pv_vafree, (vm_offset_t)pc); } static pv_entry_t pmap_get_pv_entry(pmap_t pmap, boolean_t try) { static const struct timeval printinterval = { 60, 0 }; static struct timeval lastprint; struct pv_chunk *pc; pv_entry_t pv; vm_page_t m; int bit, field, idx; rw_assert(&pvh_global_lock, RA_WLOCKED); PMAP_ASSERT_LOCKED(pmap); PV_STAT(pv_entry_allocs++); pv_entry_count++; if (pv_entry_count > pv_entry_high_water) if (ratecheck(&lastprint, &printinterval)) printf("%s: Approaching the limit on PV entries.\n", __func__); retry: pc = TAILQ_FIRST(&pmap->pm_pvchunk); if (pc != NULL) { for (field = 0; field < _NPCM; field++) { if (pc->pc_map[field]) { bit = ffs(pc->pc_map[field]) - 1; break; } } if (field < _NPCM) { idx = field * sizeof(pc->pc_map[field]) * NBBY + bit; pv = &pc->pc_pventry[idx]; pc->pc_map[field] &= ~(1ul << bit); /* If this was the last item, move it to tail */ for (field = 0; field < _NPCM; field++) if (pc->pc_map[field] != 0) { PV_STAT(pv_entry_spare--); return (pv); /* not full, return */ } TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list); PV_STAT(pv_entry_spare--); return (pv); } } /* * Access to the ptelist "pv_vafree" is synchronized by the pvh * global lock. If "pv_vafree" is currently non-empty, it will * remain non-empty until pmap_ptelist_alloc() completes. */ if (pv_vafree == 0 || (m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) { if (try) { pv_entry_count--; PV_STAT(pc_chunk_tryfail++); return (NULL); } m = pmap_pv_reclaim(pmap); if (m == NULL) goto retry; } PV_STAT(pc_chunk_count++); PV_STAT(pc_chunk_allocs++); pc = (struct pv_chunk *)pmap_ptelist_alloc(&pv_vafree); pmap_qenter((vm_offset_t)pc, &m, 1); pc->pc_pmap = pmap; pc->pc_map[0] = pc_freemask[0] & ~1ul; /* preallocated bit 0 */ for (field = 1; field < _NPCM; field++) pc->pc_map[field] = pc_freemask[field]; TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru); pv = &pc->pc_pventry[0]; TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); PV_STAT(pv_entry_spare += _NPCPV - 1); return (pv); } /* * Remove the given range of addresses from the specified map. * * It is assumed that the start and end are properly * rounded to the page size. */ #define PMAP_REMOVE_CLEAN_LIST_SIZE 3 void pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { struct l2_bucket *l2b; vm_offset_t next_bucket; + pd_entry_t *pl1pd, l1pd; pt_entry_t *ptep; u_int total; u_int mappings, is_exec, is_refd; int flushall = 0; /* * we lock in the pmap => pv_head direction */ rw_wlock(&pvh_global_lock); PMAP_LOCK(pmap); total = 0; while (sva < eva) { /* + * Check for large page. + */ + pl1pd = &pmap->pm_l1->l1_kva[L1_IDX(sva)]; + l1pd = *pl1pd; + if ((l1pd & L1_TYPE_MASK) == L1_S_PROTO) { + KASSERT((l1pd & L1_S_DOM_MASK) != + L1_S_DOM(PMAP_DOMAIN_KERNEL), ("pmap_remove: " + "Trying to remove kernel section mapping")); + /* + * Are we removing the entire large page? If not, + * demote the mapping and fall through. + */ + if (sva + L1_S_SIZE == L2_NEXT_BUCKET(sva) && + eva >= L2_NEXT_BUCKET(sva)) { + pmap_remove_section(pmap, sva); + sva = L2_NEXT_BUCKET(sva); + continue; + } else if (!pmap_demote_section(pmap, sva)) { + /* The large page mapping was destroyed. */ + sva = L2_NEXT_BUCKET(sva); + continue; + } + } + /* * Do one L2 bucket's worth at a time. */ next_bucket = L2_NEXT_BUCKET(sva); if (next_bucket > eva) next_bucket = eva; l2b = pmap_get_l2_bucket(pmap, sva); if (l2b == NULL) { sva = next_bucket; continue; } ptep = &l2b->l2b_kva[l2pte_index(sva)]; mappings = 0; while (sva < next_bucket) { struct vm_page *m; pt_entry_t pte; vm_paddr_t pa; pte = *ptep; if (pte == 0) { /* * Nothing here, move along */ sva += PAGE_SIZE; ptep++; continue; } pmap->pm_stats.resident_count--; pa = l2pte_pa(pte); is_exec = 0; is_refd = 1; /* * Update flags. In a number of circumstances, * we could cluster a lot of these and do a * number of sequential pages in one go. */ if ((m = PHYS_TO_VM_PAGE(pa)) != NULL) { struct pv_entry *pve; pve = pmap_remove_pv(m, pmap, sva); if (pve) { is_exec = PTE_BEEN_EXECD(pte); is_refd = PTE_BEEN_REFD(pte); pmap_free_pv_entry(pmap, pve); } } if (pmap_is_current(pmap)) { total++; if (total < PMAP_REMOVE_CLEAN_LIST_SIZE) { if (is_exec) cpu_tlb_flushID_SE(sva); else if (is_refd) cpu_tlb_flushD_SE(sva); } else if (total == PMAP_REMOVE_CLEAN_LIST_SIZE) flushall = 1; } *ptep = 0; PTE_SYNC(ptep); sva += PAGE_SIZE; ptep++; mappings++; } pmap_free_l2_bucket(pmap, l2b, mappings); } rw_wunlock(&pvh_global_lock); if (flushall) cpu_tlb_flushID(); PMAP_UNLOCK(pmap); } /* * pmap_zero_page() * * Zero a given physical page by mapping it at a page hook point. * In doing the zero page op, the page we zero is mapped cachable, as with * StrongARM accesses to non-cached pages are non-burst making writing * _any_ bulk data very slow. */ static void pmap_zero_page_gen(vm_page_t m, int off, int size) { vm_paddr_t phys = VM_PAGE_TO_PHYS(m); if (!TAILQ_EMPTY(&m->md.pv_list)) panic("pmap_zero_page: page has mappings"); mtx_lock(&cmtx); /* * Hook in the page, zero it, invalidate the TLB as needed. * * Note the temporary zero-page mapping must be a non-cached page in * order to work without corruption when write-allocate is enabled. */ *cdst_pte = L2_S_PROTO | phys | pte_l2_s_cache_mode | L2_S_REF; pmap_set_prot(cdst_pte, VM_PROT_WRITE, 0); PTE_SYNC(cdst_pte); cpu_tlb_flushD_SE(cdstp); cpu_cpwait(); if (off || size != PAGE_SIZE) bzero((void *)(cdstp + off), size); else bzero_page(cdstp); /* * Although aliasing is not possible if we use * cdstp temporary mappings with memory that * will be mapped later as non-cached or with write-through * caches we might end up overwriting it when calling wbinv_all * So make sure caches are clean after copy operation */ cpu_idcache_wbinv_range(cdstp, size); pmap_l2cache_wbinv_range(cdstp, phys, size); mtx_unlock(&cmtx); } /* * pmap_zero_page zeros the specified hardware page by mapping * the page into KVM and using bzero to clear its contents. */ void pmap_zero_page(vm_page_t m) { pmap_zero_page_gen(m, 0, PAGE_SIZE); } /* * pmap_zero_page_area zeros the specified hardware page by mapping * the page into KVM and using bzero to clear its contents. * * off and size may not cover an area beyond a single hardware page. */ void pmap_zero_page_area(vm_page_t m, int off, int size) { pmap_zero_page_gen(m, off, size); } /* * pmap_zero_page_idle zeros the specified hardware page by mapping * the page into KVM and using bzero to clear its contents. This * is intended to be called from the vm_pagezero process only and * outside of Giant. */ void pmap_zero_page_idle(vm_page_t m) { pmap_zero_page(m); } /* * pmap_copy_page copies the specified (machine independent) * page by mapping the page into virtual memory and using * bcopy to copy the page, one machine dependent page at a * time. */ /* * pmap_copy_page() * * Copy one physical page into another, by mapping the pages into * hook points. The same comment regarding cachability as in * pmap_zero_page also applies here. */ void pmap_copy_page_generic(vm_paddr_t src, vm_paddr_t dst) { /* * Hold the source page's lock for the duration of the copy * so that no other mappings can be created while we have a * potentially aliased mapping. * Map the pages into the page hook points, copy them, and purge * the cache for the appropriate page. Invalidate the TLB * as required. */ mtx_lock(&cmtx); /* For ARMv6 using System bit is deprecated and mapping with AP * bits set to 0x0 makes page not accessible. csrc_pte is mapped * read/write until proper mapping defines are created for ARMv6. */ *csrc_pte = L2_S_PROTO | src | pte_l2_s_cache_mode | L2_S_REF; pmap_set_prot(csrc_pte, VM_PROT_READ, 0); PTE_SYNC(csrc_pte); *cdst_pte = L2_S_PROTO | dst | pte_l2_s_cache_mode | L2_S_REF; pmap_set_prot(cdst_pte, VM_PROT_READ | VM_PROT_WRITE, 0); PTE_SYNC(cdst_pte); cpu_tlb_flushD_SE(csrcp); cpu_tlb_flushD_SE(cdstp); cpu_cpwait(); /* * Although aliasing is not possible if we use * cdstp temporary mappings with memory that * will be mapped later as non-cached or with write-through * caches we might end up overwriting it when calling wbinv_all * So make sure caches are clean after copy operation */ bcopy_page(csrcp, cdstp); cpu_idcache_wbinv_range(cdstp, PAGE_SIZE); pmap_l2cache_wbinv_range(cdstp, dst, PAGE_SIZE); mtx_unlock(&cmtx); } int unmapped_buf_allowed = 1; void pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[], vm_offset_t b_offset, int xfersize) { vm_page_t a_pg, b_pg; vm_offset_t a_pg_offset, b_pg_offset; int cnt; mtx_lock(&cmtx); while (xfersize > 0) { a_pg = ma[a_offset >> PAGE_SHIFT]; a_pg_offset = a_offset & PAGE_MASK; cnt = min(xfersize, PAGE_SIZE - a_pg_offset); b_pg = mb[b_offset >> PAGE_SHIFT]; b_pg_offset = b_offset & PAGE_MASK; cnt = min(cnt, PAGE_SIZE - b_pg_offset); *csrc_pte = L2_S_PROTO | VM_PAGE_TO_PHYS(a_pg) | pte_l2_s_cache_mode | L2_S_REF; pmap_set_prot(csrc_pte, VM_PROT_READ, 0); PTE_SYNC(csrc_pte); *cdst_pte = L2_S_PROTO | VM_PAGE_TO_PHYS(b_pg) | pte_l2_s_cache_mode | L2_S_REF; pmap_set_prot(cdst_pte, VM_PROT_READ | VM_PROT_WRITE, 0); PTE_SYNC(cdst_pte); cpu_tlb_flushD_SE(csrcp); cpu_tlb_flushD_SE(cdstp); cpu_cpwait(); bcopy((char *)csrcp + a_pg_offset, (char *)cdstp + b_pg_offset, cnt); cpu_idcache_wbinv_range(cdstp + b_pg_offset, cnt); pmap_l2cache_wbinv_range(cdstp + b_pg_offset, VM_PAGE_TO_PHYS(b_pg) + b_pg_offset, cnt); xfersize -= cnt; a_offset += cnt; b_offset += cnt; } mtx_unlock(&cmtx); } void pmap_copy_page(vm_page_t src, vm_page_t dst) { if (_arm_memcpy && PAGE_SIZE >= _min_memcpy_size && _arm_memcpy((void *)VM_PAGE_TO_PHYS(dst), (void *)VM_PAGE_TO_PHYS(src), PAGE_SIZE, IS_PHYSICAL) == 0) return; pmap_copy_page_generic(VM_PAGE_TO_PHYS(src), VM_PAGE_TO_PHYS(dst)); } /* * this routine returns true if a physical page resides * in the given pmap. */ boolean_t pmap_page_exists_quick(pmap_t pmap, vm_page_t m) { + struct md_page *pvh; pv_entry_t pv; int loops = 0; boolean_t rv; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_page_exists_quick: page %p is not managed", m)); rv = FALSE; rw_wlock(&pvh_global_lock); TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { if (PV_PMAP(pv) == pmap) { rv = TRUE; break; } loops++; if (loops >= 16) break; } - + if (!rv && loops < 16 && (m->flags & PG_FICTITIOUS) == 0) { + pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); + TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { + if (PV_PMAP(pv) == pmap) { + rv = TRUE; + break; + } + loops++; + if (loops >= 16) + break; + } + } rw_wunlock(&pvh_global_lock); return (rv); } /* * pmap_page_wired_mappings: * * Return the number of managed mappings to the given physical page * that are wired. */ int pmap_page_wired_mappings(vm_page_t m) { - pv_entry_t pv; int count; count = 0; - if ((m->flags & PG_FICTITIOUS) != 0) + if ((m->oflags & VPO_UNMANAGED) != 0) return (count); rw_wlock(&pvh_global_lock); - TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) - if ((pv->pv_flags & PVF_WIRED) != 0) - count++; + count = pmap_pvh_wired_mappings(&m->md, count); + if ((m->flags & PG_FICTITIOUS) == 0) { + count = pmap_pvh_wired_mappings(pa_to_pvh(VM_PAGE_TO_PHYS(m)), + count); + } rw_wunlock(&pvh_global_lock); return (count); } /* - * pmap_is_referenced: + * pmap_pvh_wired_mappings: * - * Return whether or not the specified physical page was referenced - * in any physical maps. + * Return the updated number "count" of managed mappings that are wired. */ -boolean_t -pmap_is_referenced(vm_page_t m) +static int +pmap_pvh_wired_mappings(struct md_page *pvh, int count) { + pv_entry_t pv; + + rw_assert(&pvh_global_lock, RA_WLOCKED); + TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { + if ((pv->pv_flags & PVF_WIRED) != 0) + count++; + } + return (count); +} + +/* + * Returns TRUE if any of the given mappings were referenced and FALSE + * otherwise. Both page and section mappings are supported. + */ +static boolean_t +pmap_is_referenced_pvh(struct md_page *pvh) +{ struct l2_bucket *l2b; pv_entry_t pv; + pd_entry_t *pl1pd; pt_entry_t *ptep; pmap_t pmap; boolean_t rv; - KASSERT((m->oflags & VPO_UNMANAGED) == 0, - ("pmap_is_referenced: page %p is not managed", m)); + rw_assert(&pvh_global_lock, RA_WLOCKED); rv = FALSE; - rw_wlock(&pvh_global_lock); - TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { + TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); - l2b = pmap_get_l2_bucket(pmap, pv->pv_va); - ptep = &l2b->l2b_kva[l2pte_index(pv->pv_va)]; - rv = L2_S_REFERENCED(*ptep); + pl1pd = &pmap->pm_l1->l1_kva[L1_IDX(pv->pv_va)]; + if ((*pl1pd & L1_TYPE_MASK) == L1_S_PROTO) + rv = L1_S_REFERENCED(*pl1pd); + else { + l2b = pmap_get_l2_bucket(pmap, pv->pv_va); + ptep = &l2b->l2b_kva[l2pte_index(pv->pv_va)]; + rv = L2_S_REFERENCED(*ptep); + } PMAP_UNLOCK(pmap); if (rv) break; } + return (rv); +} + +/* + * pmap_is_referenced: + * + * Return whether or not the specified physical page was referenced + * in any physical maps. + */ +boolean_t +pmap_is_referenced(vm_page_t m) +{ + boolean_t rv; + + KASSERT((m->oflags & VPO_UNMANAGED) == 0, + ("pmap_is_referenced: page %p is not managed", m)); + rw_wlock(&pvh_global_lock); + rv = pmap_is_referenced_pvh(&m->md) || + ((m->flags & PG_FICTITIOUS) == 0 && + pmap_is_referenced_pvh(pa_to_pvh(VM_PAGE_TO_PHYS(m)))); rw_wunlock(&pvh_global_lock); return (rv); } /* * pmap_ts_referenced: * * Return the count of reference bits for a page, clearing all of them. */ int pmap_ts_referenced(vm_page_t m) { KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_ts_referenced: page %p is not managed", m)); return (pmap_clearbit(m, PVF_REF)); } - -boolean_t -pmap_is_modified(vm_page_t m) +/* + * Returns TRUE if any of the given mappings were used to modify + * physical memory. Otherwise, returns FALSE. Both page and 1MB section + * mappings are supported. + */ +static boolean_t +pmap_is_modified_pvh(struct md_page *pvh) { + pd_entry_t *pl1pd; struct l2_bucket *l2b; pv_entry_t pv; pt_entry_t *ptep; pmap_t pmap; boolean_t rv; + rw_assert(&pvh_global_lock, RA_WLOCKED); + rv = FALSE; + + TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { + pmap = PV_PMAP(pv); + PMAP_LOCK(pmap); + pl1pd = &pmap->pm_l1->l1_kva[L1_IDX(pv->pv_va)]; + if ((*pl1pd & L1_TYPE_MASK) == L1_S_PROTO) + rv = L1_S_WRITABLE(*pl1pd); + else { + l2b = pmap_get_l2_bucket(pmap, pv->pv_va); + ptep = &l2b->l2b_kva[l2pte_index(pv->pv_va)]; + rv = L2_S_WRITABLE(*ptep); + } + PMAP_UNLOCK(pmap); + if (rv) + break; + } + + return (rv); +} + +boolean_t +pmap_is_modified(vm_page_t m) +{ + boolean_t rv; + KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_is_modified: page %p is not managed", m)); - rv = FALSE; /* * If the page is not exclusive busied, then PGA_WRITEABLE cannot be * concurrently set while the object is locked. Thus, if PGA_WRITEABLE - * is clear, no PTEs can have PG_M set. + * is clear, no PTEs can have APX cleared. */ VM_OBJECT_ASSERT_WLOCKED(m->object); if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) - return (rv); + return (FALSE); rw_wlock(&pvh_global_lock); - TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { - pmap = PV_PMAP(pv); - PMAP_LOCK(pmap); - l2b = pmap_get_l2_bucket(pmap, pv->pv_va); - ptep = &l2b->l2b_kva[l2pte_index(pv->pv_va)]; - rv = (L2_S_WRITABLE(*ptep)); - PMAP_UNLOCK(pmap); - if (rv) - break; - } + rv = pmap_is_modified_pvh(&m->md) || + ((m->flags & PG_FICTITIOUS) == 0 && + pmap_is_modified_pvh(pa_to_pvh(VM_PAGE_TO_PHYS(m)))); rw_wunlock(&pvh_global_lock); return (rv); } - /* * Clear the modify bits on the specified physical page. */ void pmap_clear_modify(vm_page_t m) { KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_clear_modify: page %p is not managed", m)); VM_OBJECT_ASSERT_WLOCKED(m->object); KASSERT(!vm_page_xbusied(m), ("pmap_clear_modify: page %p is exclusive busied", m)); /* * If the page is not PGA_WRITEABLE, then no mappings can be modified. * If the object containing the page is locked and the page is not * exclusive busied, then PGA_WRITEABLE cannot be concurrently set. */ if ((m->aflags & PGA_WRITEABLE) == 0) return; if (pmap_is_modified(m)) pmap_clearbit(m, PVF_MOD); } /* * pmap_clear_reference: * * Clear the reference bit on the specified physical page. */ void pmap_clear_reference(vm_page_t m) { KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_clear_reference: page %p is not managed", m)); if (pmap_is_referenced(m)) pmap_clearbit(m, PVF_REF); } /* * Clear the write and modified bits in each of the given page's mappings. */ void pmap_remove_write(vm_page_t m) { KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_remove_write: page %p is not managed", m)); /* * If the page is not exclusive busied, then PGA_WRITEABLE cannot be * set by another thread while the object is locked. Thus, * if PGA_WRITEABLE is clear, no page table entries need updating. */ VM_OBJECT_ASSERT_WLOCKED(m->object); if (vm_page_xbusied(m) || (m->aflags & PGA_WRITEABLE) != 0) pmap_clearbit(m, PVF_WRITE); } /* * perform the pmap work for mincore */ int pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa) { struct l2_bucket *l2b; + pd_entry_t *pl1pd, l1pd; pt_entry_t *ptep, pte; vm_paddr_t pa; vm_page_t m; int val; boolean_t managed; PMAP_LOCK(pmap); retry: - l2b = pmap_get_l2_bucket(pmap, addr); - if (l2b == NULL) { - val = 0; - goto out; + pl1pd = &pmap->pm_l1->l1_kva[L1_IDX(addr)]; + l1pd = *pl1pd; + if ((l1pd & L1_TYPE_MASK) == L1_S_PROTO) { + pa = (l1pd & L1_S_FRAME); + val = MINCORE_SUPER | MINCORE_INCORE; + if (L1_S_WRITABLE(l1pd)) + val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER; + managed = FALSE; + m = PHYS_TO_VM_PAGE(pa); + if (m != NULL && (m->oflags & VPO_UNMANAGED) == 0) + managed = TRUE; + if (managed) { + if (L1_S_REFERENCED(l1pd)) + val |= MINCORE_REFERENCED | + MINCORE_REFERENCED_OTHER; + } + } else { + l2b = pmap_get_l2_bucket(pmap, addr); + if (l2b == NULL) { + val = 0; + goto out; + } + ptep = &l2b->l2b_kva[l2pte_index(addr)]; + pte = *ptep; + if (!l2pte_valid(pte)) { + val = 0; + goto out; + } + val = MINCORE_INCORE; + if (L2_S_WRITABLE(pte)) + val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER; + managed = FALSE; + pa = l2pte_pa(pte); + m = PHYS_TO_VM_PAGE(pa); + if (m != NULL && (m->oflags & VPO_UNMANAGED) == 0) + managed = TRUE; + if (managed) { + if (L2_S_REFERENCED(pte)) + val |= MINCORE_REFERENCED | + MINCORE_REFERENCED_OTHER; + } } - ptep = &l2b->l2b_kva[l2pte_index(addr)]; - pte = *ptep; - if (!l2pte_valid(pte)) { - val = 0; - goto out; - } - val = MINCORE_INCORE; - if (L2_S_WRITABLE(pte)) - val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER; - managed = FALSE; - pa = l2pte_pa(pte); - m = PHYS_TO_VM_PAGE(pa); - if (m != NULL && (m->oflags & VPO_UNMANAGED) == 0) - managed = TRUE; - if (managed) { - if (L2_S_REFERENCED(pte)) - val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER; - } if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) != (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) && managed) { /* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */ if (vm_page_pa_tryrelock(pmap, pa, locked_pa)) goto retry; } else out: PA_UNLOCK_COND(*locked_pa); PMAP_UNLOCK(pmap); return (val); } void pmap_sync_icache(pmap_t pmap, vm_offset_t va, vm_size_t sz) { } /* * Increase the starting virtual address of the given mapping if a * different alignment might result in more superpage mappings. */ void pmap_align_superpage(vm_object_t object, vm_ooffset_t offset, vm_offset_t *addr, vm_size_t size) { } /* * Map a set of physical memory pages into the kernel virtual * address space. Return a pointer to where it is mapped. This * routine is intended to be used for mapping device memory, * NOT real memory. */ void * pmap_mapdev(vm_offset_t pa, vm_size_t size) { vm_offset_t va, tmpva, offset; offset = pa & PAGE_MASK; size = roundup(size, PAGE_SIZE); GIANT_REQUIRED; va = kva_alloc(size); if (!va) panic("pmap_mapdev: Couldn't alloc kernel virtual memory"); for (tmpva = va; size > 0;) { pmap_kenter_internal(tmpva, pa, 0); size -= PAGE_SIZE; tmpva += PAGE_SIZE; pa += PAGE_SIZE; } return ((void *)(va + offset)); } /* * pmap_map_section: * * Create a single section mapping. */ void -pmap_map_section(vm_offset_t l1pt, vm_offset_t va, vm_offset_t pa, - int prot, int cache) +pmap_map_section(pmap_t pmap, vm_offset_t va, vm_offset_t pa, vm_prot_t prot, + boolean_t ref) { - pd_entry_t *pde = (pd_entry_t *) l1pt; + pd_entry_t *pl1pd, l1pd; pd_entry_t fl; - KASSERT(((va | pa) & L1_S_OFFSET) == 0, ("ouin2")); + KASSERT(((va | pa) & L1_S_OFFSET) == 0, + ("Not a valid section mapping")); - fl = l1_mem_types[cache]; + fl = pte_l1_s_cache_mode; - pde[va >> L1_S_SHIFT] = L1_S_PROTO | pa | - L1_S_PROT(PTE_KERNEL, prot) | fl | L1_S_DOM(PMAP_DOMAIN_KERNEL); - PTE_SYNC(&pde[va >> L1_S_SHIFT]); + pl1pd = &pmap->pm_l1->l1_kva[L1_IDX(va)]; + l1pd = L1_S_PROTO | pa | L1_S_PROT(PTE_USER, prot) | fl | + L1_S_DOM(pmap->pm_domain); + + /* Mark page referenced if this section is a result of a promotion. */ + if (ref == TRUE) + l1pd |= L1_S_REF; +#ifdef SMP + l1pd |= L1_SHARED; +#endif + *pl1pd = l1pd; + PTE_SYNC(pl1pd); } /* * pmap_link_l2pt: * * Link the L2 page table specified by l2pv.pv_pa into the L1 * page table at the slot for "va". */ void pmap_link_l2pt(vm_offset_t l1pt, vm_offset_t va, struct pv_addr *l2pv) { pd_entry_t *pde = (pd_entry_t *) l1pt, proto; u_int slot = va >> L1_S_SHIFT; proto = L1_S_DOM(PMAP_DOMAIN_KERNEL) | L1_C_PROTO; #ifdef VERBOSE_INIT_ARM printf("pmap_link_l2pt: pa=0x%x va=0x%x\n", l2pv->pv_pa, l2pv->pv_va); #endif pde[slot + 0] = proto | (l2pv->pv_pa + 0x000); PTE_SYNC(&pde[slot]); SLIST_INSERT_HEAD(&kernel_pt_list, l2pv, pv_list); } /* * pmap_map_entry * * Create a single page mapping. */ void pmap_map_entry(vm_offset_t l1pt, vm_offset_t va, vm_offset_t pa, int prot, int cache) { pd_entry_t *pde = (pd_entry_t *) l1pt; pt_entry_t fl; pt_entry_t *ptep; KASSERT(((va | pa) & PAGE_MASK) == 0, ("ouin")); fl = l2s_mem_types[cache]; if ((pde[va >> L1_S_SHIFT] & L1_TYPE_MASK) != L1_TYPE_C) panic("pmap_map_entry: no L2 table for VA 0x%08x", va); ptep = (pt_entry_t *)kernel_pt_lookup(pde[L1_IDX(va)] & L1_C_ADDR_MASK); if (ptep == NULL) panic("pmap_map_entry: can't find L2 table for VA 0x%08x", va); ptep[l2pte_index(va)] = L2_S_PROTO | pa | fl | L2_S_REF; pmap_set_prot(&ptep[l2pte_index(va)], prot, 0); PTE_SYNC(&ptep[l2pte_index(va)]); } /* * pmap_map_chunk: * * Map a chunk of memory using the most efficient mappings * possible (section. large page, small page) into the * provided L1 and L2 tables at the specified virtual address. */ vm_size_t pmap_map_chunk(vm_offset_t l1pt, vm_offset_t va, vm_offset_t pa, vm_size_t size, int prot, int type) { pd_entry_t *pde = (pd_entry_t *) l1pt; pt_entry_t *ptep, f1, f2s, f2l; vm_size_t resid; int i; resid = (size + (PAGE_SIZE - 1)) & ~(PAGE_SIZE - 1); if (l1pt == 0) panic("pmap_map_chunk: no L1 table provided"); #ifdef VERBOSE_INIT_ARM printf("pmap_map_chunk: pa=0x%x va=0x%x size=0x%x resid=0x%x " "prot=0x%x type=%d\n", pa, va, size, resid, prot, type); #endif f1 = l1_mem_types[type]; f2l = l2l_mem_types[type]; f2s = l2s_mem_types[type]; size = resid; while (resid > 0) { /* See if we can use a section mapping. */ if (L1_S_MAPPABLE_P(va, pa, resid)) { #ifdef VERBOSE_INIT_ARM printf("S"); #endif pde[va >> L1_S_SHIFT] = L1_S_PROTO | pa | - L1_S_PROT(PTE_KERNEL, prot) | f1 | - L1_S_DOM(PMAP_DOMAIN_KERNEL); + L1_S_PROT(PTE_KERNEL, prot | VM_PROT_EXECUTE) | + f1 | L1_S_DOM(PMAP_DOMAIN_KERNEL) | L1_S_REF; PTE_SYNC(&pde[va >> L1_S_SHIFT]); va += L1_S_SIZE; pa += L1_S_SIZE; resid -= L1_S_SIZE; continue; } /* * Ok, we're going to use an L2 table. Make sure * one is actually in the corresponding L1 slot * for the current VA. */ if ((pde[va >> L1_S_SHIFT] & L1_TYPE_MASK) != L1_TYPE_C) panic("pmap_map_chunk: no L2 table for VA 0x%08x", va); ptep = (pt_entry_t *) kernel_pt_lookup( pde[L1_IDX(va)] & L1_C_ADDR_MASK); if (ptep == NULL) panic("pmap_map_chunk: can't find L2 table for VA" "0x%08x", va); /* See if we can use a L2 large page mapping. */ if (L2_L_MAPPABLE_P(va, pa, resid)) { #ifdef VERBOSE_INIT_ARM printf("L"); #endif for (i = 0; i < 16; i++) { ptep[l2pte_index(va) + i] = L2_L_PROTO | pa | L2_L_PROT(PTE_KERNEL, prot) | f2l; PTE_SYNC(&ptep[l2pte_index(va) + i]); } va += L2_L_SIZE; pa += L2_L_SIZE; resid -= L2_L_SIZE; continue; } /* Use a small page mapping. */ #ifdef VERBOSE_INIT_ARM printf("P"); #endif ptep[l2pte_index(va)] = L2_S_PROTO | pa | f2s | L2_S_REF; pmap_set_prot(&ptep[l2pte_index(va)], prot, 0); PTE_SYNC(&ptep[l2pte_index(va)]); va += PAGE_SIZE; pa += PAGE_SIZE; resid -= PAGE_SIZE; } #ifdef VERBOSE_INIT_ARM printf("\n"); #endif return (size); } /********************** Static device map routines ***************************/ static const struct pmap_devmap *pmap_devmap_table; /* * Register the devmap table. This is provided in case early console * initialization needs to register mappings created by bootstrap code * before pmap_devmap_bootstrap() is called. */ void pmap_devmap_register(const struct pmap_devmap *table) { pmap_devmap_table = table; } /* * Map all of the static regions in the devmap table, and remember * the devmap table so other parts of the kernel can look up entries * later. */ void pmap_devmap_bootstrap(vm_offset_t l1pt, const struct pmap_devmap *table) { int i; pmap_devmap_table = table; for (i = 0; pmap_devmap_table[i].pd_size != 0; i++) { #ifdef VERBOSE_INIT_ARM printf("devmap: %08x -> %08x @ %08x\n", pmap_devmap_table[i].pd_pa, pmap_devmap_table[i].pd_pa + pmap_devmap_table[i].pd_size - 1, pmap_devmap_table[i].pd_va); #endif pmap_map_chunk(l1pt, pmap_devmap_table[i].pd_va, pmap_devmap_table[i].pd_pa, pmap_devmap_table[i].pd_size, pmap_devmap_table[i].pd_prot, pmap_devmap_table[i].pd_cache); } } const struct pmap_devmap * pmap_devmap_find_pa(vm_paddr_t pa, vm_size_t size) { int i; if (pmap_devmap_table == NULL) return (NULL); for (i = 0; pmap_devmap_table[i].pd_size != 0; i++) { if (pa >= pmap_devmap_table[i].pd_pa && pa + size <= pmap_devmap_table[i].pd_pa + pmap_devmap_table[i].pd_size) return (&pmap_devmap_table[i]); } return (NULL); } const struct pmap_devmap * pmap_devmap_find_va(vm_offset_t va, vm_size_t size) { int i; if (pmap_devmap_table == NULL) return (NULL); for (i = 0; pmap_devmap_table[i].pd_size != 0; i++) { if (va >= pmap_devmap_table[i].pd_va && va + size <= pmap_devmap_table[i].pd_va + pmap_devmap_table[i].pd_size) return (&pmap_devmap_table[i]); } return (NULL); } int pmap_dmap_iscurrent(pmap_t pmap) { return(pmap_is_current(pmap)); } void pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma) { /* * Remember the memattr in a field that gets used to set the appropriate * bits in the PTEs as mappings are established. */ m->md.pv_memattr = ma; /* * It appears that this function can only be called before any mappings * for the page are established on ARM. If this ever changes, this code * will need to walk the pv_list and make each of the existing mappings * uncacheable, being careful to sync caches and PTEs (and maybe * invalidate TLB?) for any current mapping it modifies. */ if (TAILQ_FIRST(&m->md.pv_list) != NULL) panic("Can't change memattr on page with existing mappings"); } Index: head/sys/arm/include/param.h =================================================================== --- head/sys/arm/include/param.h (revision 254917) +++ head/sys/arm/include/param.h (revision 254918) @@ -1,147 +1,148 @@ /*- * Copyright (c) 2001 David E. O'Brien * Copyright (c) 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)param.h 5.8 (Berkeley) 6/28/91 * $FreeBSD$ */ #ifndef _ARM_INCLUDE_PARAM_H_ #define _ARM_INCLUDE_PARAM_H_ /* * Machine dependent constants for StrongARM */ #include #define STACKALIGNBYTES (8 - 1) #define STACKALIGN(p) ((u_int)(p) & ~STACKALIGNBYTES) #define __PCI_REROUTE_INTERRUPT #ifndef MACHINE #define MACHINE "arm" #endif #ifndef MACHINE_ARCH #if defined(__FreeBSD_ARCH_armv6__) || (defined(__ARM_ARCH) && __ARM_ARCH >= 6) #ifdef __ARMEB__ #define MACHINE_ARCH "armv6eb" #else #define MACHINE_ARCH "armv6" #endif #else #ifdef __ARMEB__ #define MACHINE_ARCH "armeb" #else #define MACHINE_ARCH "arm" #endif #endif #endif #define MID_MACHINE MID_ARM6 #if defined(SMP) || defined(KLD_MODULE) #ifndef MAXCPU #define MAXCPU 4 #endif #else #define MAXCPU 1 #endif /* SMP || KLD_MODULE */ #ifndef MAXMEMDOM #define MAXMEMDOM 1 #endif #define ALIGNBYTES _ALIGNBYTES #define ALIGN(p) _ALIGN(p) /* * ALIGNED_POINTER is a boolean macro that checks whether an address * is valid to fetch data elements of type t from on this architecture. * This does not reflect the optimal alignment, just the possibility * (within reasonable limits). */ #define ALIGNED_POINTER(p, t) ((((unsigned)(p)) & (sizeof(t)-1)) == 0) /* * CACHE_LINE_SIZE is the compile-time maximum cache line size for an * architecture. It should be used with appropriate caution. */ #define CACHE_LINE_SHIFT 6 #define CACHE_LINE_SIZE (1 << CACHE_LINE_SHIFT) #define PAGE_SHIFT 12 #define PAGE_SIZE (1 << PAGE_SHIFT) /* Page size */ #define PAGE_MASK (PAGE_SIZE - 1) #define NPTEPG (PAGE_SIZE/(sizeof (pt_entry_t))) #define PDR_SHIFT 20 /* log2(NBPDR) */ #define NBPDR (1 << PDR_SHIFT) +#define PDRMASK (NBPDR - 1) #define NPDEPG (1 << (32 - PDR_SHIFT)) -#define MAXPAGESIZES 1 /* maximum number of supported page sizes */ +#define MAXPAGESIZES 2 /* maximum number of supported page sizes */ #ifndef KSTACK_PAGES #define KSTACK_PAGES 2 #endif /* !KSTACK_PAGES */ #ifndef FPCONTEXTSIZE #define FPCONTEXTSIZE (0x100) #endif #ifndef KSTACK_GUARD_PAGES #define KSTACK_GUARD_PAGES 1 #endif /* !KSTACK_GUARD_PAGES */ #define USPACE_SVC_STACK_TOP KSTACK_PAGES * PAGE_SIZE #define USPACE_SVC_STACK_BOTTOM (USPACE_SVC_STACK_TOP - 0x1000) #define USPACE_UNDEF_STACK_TOP (USPACE_SVC_STACK_BOTTOM - 0x10) #define USPACE_UNDEF_STACK_BOTTOM (FPCONTEXTSIZE + 10) /* * Mach derived conversion macros */ #define trunc_page(x) ((x) & ~PAGE_MASK) #define round_page(x) (((x) + PAGE_MASK) & ~PAGE_MASK) -#define trunc_4mpage(x) ((unsigned)(x) & ~PDRMASK) -#define round_4mpage(x) ((((unsigned)(x)) + PDRMASK) & ~PDRMASK) +#define trunc_1mpage(x) ((unsigned)(x) & ~PDRMASK) +#define round_1mpage(x) ((((unsigned)(x)) + PDRMASK) & ~PDRMASK) #define atop(x) ((unsigned)(x) >> PAGE_SHIFT) #define ptoa(x) ((unsigned)(x) << PAGE_SHIFT) #define arm32_btop(x) ((unsigned)(x) >> PAGE_SHIFT) #define arm32_ptob(x) ((unsigned)(x) << PAGE_SHIFT) #define pgtok(x) ((x) * (PAGE_SIZE / 1024)) #endif /* !_ARM_INCLUDE_PARAM_H_ */ Index: head/sys/arm/include/pmap.h =================================================================== --- head/sys/arm/include/pmap.h (revision 254917) +++ head/sys/arm/include/pmap.h (revision 254918) @@ -1,712 +1,740 @@ /*- * Copyright (c) 1991 Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * the Systems Programming Group of the University of Utah Computer * Science Department and William Jolitz of UUNET Technologies Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * Derived from hp300 version by Mike Hibler, this version by William * Jolitz uses a recursive map [a pde points to the page directory] to * map the page tables using the pagetables themselves. This is done to * reduce the impact on kernel virtual memory for lots of sparse address * space, and to reduce the cost of memory to each process. * * from: hp300: @(#)pmap.h 7.2 (Berkeley) 12/16/90 * from: @(#)pmap.h 7.4 (Berkeley) 5/12/91 * from: FreeBSD: src/sys/i386/include/pmap.h,v 1.70 2000/11/30 * * $FreeBSD$ */ #ifndef _MACHINE_PMAP_H_ #define _MACHINE_PMAP_H_ #include #include /* * Pte related macros */ #if ARM_ARCH_6 || ARM_ARCH_7A #ifdef SMP #define PTE_NOCACHE 2 #else #define PTE_NOCACHE 1 #endif #define PTE_CACHE 6 #define PTE_DEVICE 2 #define PTE_PAGETABLE 4 #else #define PTE_NOCACHE 1 #define PTE_CACHE 2 #define PTE_PAGETABLE 3 #endif enum mem_type { STRONG_ORD = 0, DEVICE_NOSHARE, DEVICE_SHARE, NRML_NOCACHE, NRML_IWT_OWT, NRML_IWB_OWB, NRML_IWBA_OWBA }; #ifndef LOCORE #include #include #include #include #define PDESIZE sizeof(pd_entry_t) /* for assembly files */ #define PTESIZE sizeof(pt_entry_t) /* for assembly files */ #ifdef _KERNEL #define vtophys(va) pmap_kextract((vm_offset_t)(va)) #endif #define pmap_page_get_memattr(m) ((m)->md.pv_memattr) -#define pmap_page_is_mapped(m) (!TAILQ_EMPTY(&(m)->md.pv_list)) #define pmap_page_is_write_mapped(m) (((m)->aflags & PGA_WRITEABLE) != 0) +#if (ARM_MMU_V6 + ARM_MMU_V7) > 0 +boolean_t pmap_page_is_mapped(vm_page_t); +#else +#define pmap_page_is_mapped(m) (!TAILQ_EMPTY(&(m)->md.pv_list)) +#endif void pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma); /* * Pmap stuff */ /* * This structure is used to hold a virtual<->physical address * association and is used mostly by bootstrap code */ struct pv_addr { SLIST_ENTRY(pv_addr) pv_list; vm_offset_t pv_va; vm_paddr_t pv_pa; }; struct pv_entry; struct pv_chunk; struct md_page { int pvh_attrs; vm_memattr_t pv_memattr; #if (ARM_MMU_V6 + ARM_MMU_V7) == 0 vm_offset_t pv_kva; /* first kernel VA mapping */ #endif TAILQ_HEAD(,pv_entry) pv_list; }; struct l1_ttable; struct l2_dtable; /* * The number of L2 descriptor tables which can be tracked by an l2_dtable. * A bucket size of 16 provides for 16MB of contiguous virtual address * space per l2_dtable. Most processes will, therefore, require only two or * three of these to map their whole working set. */ #define L2_BUCKET_LOG2 4 #define L2_BUCKET_SIZE (1 << L2_BUCKET_LOG2) /* * Given the above "L2-descriptors-per-l2_dtable" constant, the number * of l2_dtable structures required to track all possible page descriptors * mappable by an L1 translation table is given by the following constants: */ #define L2_LOG2 ((32 - L1_S_SHIFT) - L2_BUCKET_LOG2) #define L2_SIZE (1 << L2_LOG2) struct pmap { struct mtx pm_mtx; u_int8_t pm_domain; struct l1_ttable *pm_l1; struct l2_dtable *pm_l2[L2_SIZE]; cpuset_t pm_active; /* active on cpus */ struct pmap_statistics pm_stats; /* pmap statictics */ #if (ARM_MMU_V6 + ARM_MMU_V7) != 0 TAILQ_HEAD(,pv_chunk) pm_pvchunk; /* list of mappings in pmap */ #else TAILQ_HEAD(,pv_entry) pm_pvlist; /* list of mappings in pmap */ #endif }; typedef struct pmap *pmap_t; #ifdef _KERNEL extern struct pmap kernel_pmap_store; #define kernel_pmap (&kernel_pmap_store) #define pmap_kernel() kernel_pmap #define PMAP_ASSERT_LOCKED(pmap) \ mtx_assert(&(pmap)->pm_mtx, MA_OWNED) #define PMAP_LOCK(pmap) mtx_lock(&(pmap)->pm_mtx) #define PMAP_LOCK_DESTROY(pmap) mtx_destroy(&(pmap)->pm_mtx) #define PMAP_LOCK_INIT(pmap) mtx_init(&(pmap)->pm_mtx, "pmap", \ NULL, MTX_DEF | MTX_DUPOK) #define PMAP_OWNED(pmap) mtx_owned(&(pmap)->pm_mtx) #define PMAP_MTX(pmap) (&(pmap)->pm_mtx) #define PMAP_TRYLOCK(pmap) mtx_trylock(&(pmap)->pm_mtx) #define PMAP_UNLOCK(pmap) mtx_unlock(&(pmap)->pm_mtx) #endif /* * For each vm_page_t, there is a list of all currently valid virtual * mappings of that page. An entry is a pv_entry_t, the list is pv_list. */ typedef struct pv_entry { vm_offset_t pv_va; /* virtual address for mapping */ TAILQ_ENTRY(pv_entry) pv_list; int pv_flags; /* flags (wired, etc...) */ #if (ARM_MMU_V6 + ARM_MMU_V7) == 0 pmap_t pv_pmap; /* pmap where mapping lies */ TAILQ_ENTRY(pv_entry) pv_plist; #endif } *pv_entry_t; /* * pv_entries are allocated in chunks per-process. This avoids the * need to track per-pmap assignments. */ #define _NPCM 8 #define _NPCPV 252 struct pv_chunk { pmap_t pc_pmap; TAILQ_ENTRY(pv_chunk) pc_list; uint32_t pc_map[_NPCM]; /* bitmap; 1 = free */ uint32_t pc_dummy[3]; /* aligns pv_chunk to 4KB */ TAILQ_ENTRY(pv_chunk) pc_lru; struct pv_entry pc_pventry[_NPCPV]; }; #ifdef _KERNEL boolean_t pmap_get_pde_pte(pmap_t, vm_offset_t, pd_entry_t **, pt_entry_t **); /* * virtual address to page table entry and * to physical address. Likewise for alternate address space. * Note: these work recursively, thus vtopte of a pte will give * the corresponding pde that in turn maps it. */ /* * The current top of kernel VM. */ extern vm_offset_t pmap_curmaxkvaddr; struct pcb; void pmap_set_pcb_pagedir(pmap_t, struct pcb *); /* Virtual address to page table entry */ static __inline pt_entry_t * vtopte(vm_offset_t va) { pd_entry_t *pdep; pt_entry_t *ptep; if (pmap_get_pde_pte(pmap_kernel(), va, &pdep, &ptep) == FALSE) return (NULL); return (ptep); } extern vm_paddr_t phys_avail[]; extern vm_offset_t virtual_avail; extern vm_offset_t virtual_end; void pmap_bootstrap(vm_offset_t firstaddr, struct pv_addr *l1pt); int pmap_change_attr(vm_offset_t, vm_size_t, int); void pmap_kenter(vm_offset_t va, vm_paddr_t pa); void pmap_kenter_nocache(vm_offset_t va, vm_paddr_t pa); void *pmap_kenter_temp(vm_paddr_t pa, int i); void pmap_kenter_user(vm_offset_t va, vm_paddr_t pa); vm_paddr_t pmap_kextract(vm_offset_t va); void pmap_kremove(vm_offset_t); void *pmap_mapdev(vm_offset_t, vm_size_t); void pmap_unmapdev(vm_offset_t, vm_size_t); vm_page_t pmap_use_pt(pmap_t, vm_offset_t); void pmap_debug(int); +#if (ARM_MMU_V6 + ARM_MMU_V7) == 0 void pmap_map_section(vm_offset_t, vm_offset_t, vm_offset_t, int, int); +#endif void pmap_link_l2pt(vm_offset_t, vm_offset_t, struct pv_addr *); vm_size_t pmap_map_chunk(vm_offset_t, vm_offset_t, vm_offset_t, vm_size_t, int, int); void pmap_map_entry(vm_offset_t l1pt, vm_offset_t va, vm_offset_t pa, int prot, int cache); int pmap_fault_fixup(pmap_t, vm_offset_t, vm_prot_t, int); int pmap_dmap_iscurrent(pmap_t pmap); /* * Definitions for MMU domains */ #define PMAP_DOMAINS 15 /* 15 'user' domains (1-15) */ #define PMAP_DOMAIN_KERNEL 0 /* The kernel uses domain #0 */ /* * The new pmap ensures that page-tables are always mapping Write-Thru. * Thus, on some platforms we can run fast and loose and avoid syncing PTEs * on every change. * * Unfortunately, not all CPUs have a write-through cache mode. So we * define PMAP_NEEDS_PTE_SYNC for C code to conditionally do PTE syncs, * and if there is the chance for PTE syncs to be needed, we define * PMAP_INCLUDE_PTE_SYNC so e.g. assembly code can include (and run) * the code. */ extern int pmap_needs_pte_sync; /* * These macros define the various bit masks in the PTE. * * We use these macros since we use different bits on different processor * models. */ #define L1_S_CACHE_MASK_generic (L1_S_B|L1_S_C) #define L1_S_CACHE_MASK_xscale (L1_S_B|L1_S_C|L1_S_XSCALE_TEX(TEX_XSCALE_X)|\ L1_S_XSCALE_TEX(TEX_XSCALE_T)) #define L2_L_CACHE_MASK_generic (L2_B|L2_C) #define L2_L_CACHE_MASK_xscale (L2_B|L2_C|L2_XSCALE_L_TEX(TEX_XSCALE_X) | \ L2_XSCALE_L_TEX(TEX_XSCALE_T)) #define L2_S_PROT_U_generic (L2_AP(AP_U)) #define L2_S_PROT_W_generic (L2_AP(AP_W)) #define L2_S_PROT_MASK_generic (L2_S_PROT_U|L2_S_PROT_W) #define L2_S_PROT_U_xscale (L2_AP0(AP_U)) #define L2_S_PROT_W_xscale (L2_AP0(AP_W)) #define L2_S_PROT_MASK_xscale (L2_S_PROT_U|L2_S_PROT_W) #define L2_S_CACHE_MASK_generic (L2_B|L2_C) #define L2_S_CACHE_MASK_xscale (L2_B|L2_C|L2_XSCALE_T_TEX(TEX_XSCALE_X)| \ L2_XSCALE_T_TEX(TEX_XSCALE_X)) #define L1_S_PROTO_generic (L1_TYPE_S | L1_S_IMP) #define L1_S_PROTO_xscale (L1_TYPE_S) #define L1_C_PROTO_generic (L1_TYPE_C | L1_C_IMP2) #define L1_C_PROTO_xscale (L1_TYPE_C) #define L2_L_PROTO (L2_TYPE_L) #define L2_S_PROTO_generic (L2_TYPE_S) #define L2_S_PROTO_xscale (L2_TYPE_XSCALE_XS) /* * User-visible names for the ones that vary with MMU class. */ #if (ARM_MMU_V6 + ARM_MMU_V7) != 0 #define L2_AP(x) (L2_AP0(x)) #else #define L2_AP(x) (L2_AP0(x) | L2_AP1(x) | L2_AP2(x) | L2_AP3(x)) #endif #if ARM_NMMUS > 1 /* More than one MMU class configured; use variables. */ #define L2_S_PROT_U pte_l2_s_prot_u #define L2_S_PROT_W pte_l2_s_prot_w #define L2_S_PROT_MASK pte_l2_s_prot_mask #define L1_S_CACHE_MASK pte_l1_s_cache_mask #define L2_L_CACHE_MASK pte_l2_l_cache_mask #define L2_S_CACHE_MASK pte_l2_s_cache_mask #define L1_S_PROTO pte_l1_s_proto #define L1_C_PROTO pte_l1_c_proto #define L2_S_PROTO pte_l2_s_proto #elif (ARM_MMU_GENERIC + ARM_MMU_SA1) != 0 #define L2_S_PROT_U L2_S_PROT_U_generic #define L2_S_PROT_W L2_S_PROT_W_generic #define L2_S_PROT_MASK L2_S_PROT_MASK_generic #define L1_S_CACHE_MASK L1_S_CACHE_MASK_generic #define L2_L_CACHE_MASK L2_L_CACHE_MASK_generic #define L2_S_CACHE_MASK L2_S_CACHE_MASK_generic #define L1_S_PROTO L1_S_PROTO_generic #define L1_C_PROTO L1_C_PROTO_generic #define L2_S_PROTO L2_S_PROTO_generic #elif ARM_MMU_XSCALE == 1 #define L2_S_PROT_U L2_S_PROT_U_xscale #define L2_S_PROT_W L2_S_PROT_W_xscale #define L2_S_PROT_MASK L2_S_PROT_MASK_xscale #define L1_S_CACHE_MASK L1_S_CACHE_MASK_xscale #define L2_L_CACHE_MASK L2_L_CACHE_MASK_xscale #define L2_S_CACHE_MASK L2_S_CACHE_MASK_xscale #define L1_S_PROTO L1_S_PROTO_xscale #define L1_C_PROTO L1_C_PROTO_xscale #define L2_S_PROTO L2_S_PROTO_xscale #elif (ARM_MMU_V6 + ARM_MMU_V7) != 0 /* * AP[2:1] access permissions model: * * AP[2](APX) - Write Disable * AP[1] - User Enable * AP[0] - Reference Flag * * AP[2] AP[1] Kernel User * 0 0 R/W N * 0 1 R/W R/W * 1 0 R N * 1 1 R R * */ #define L2_S_PROT_R (0) /* kernel read */ #define L2_S_PROT_U (L2_AP0(2)) /* user read */ #define L2_S_REF (L2_AP0(1)) /* reference flag */ #define L2_S_PROT_MASK (L2_S_PROT_U|L2_S_PROT_R|L2_APX) #define L2_S_EXECUTABLE(pte) (!(pte & L2_XN)) #define L2_S_WRITABLE(pte) (!(pte & L2_APX)) #define L2_S_REFERENCED(pte) (!!(pte & L2_S_REF)) #ifndef SMP #define L1_S_CACHE_MASK (L1_S_TEX_MASK|L1_S_B|L1_S_C) #define L2_L_CACHE_MASK (L2_L_TEX_MASK|L2_B|L2_C) #define L2_S_CACHE_MASK (L2_S_TEX_MASK|L2_B|L2_C) #else #define L1_S_CACHE_MASK (L1_S_TEX_MASK|L1_S_B|L1_S_C|L1_SHARED) #define L2_L_CACHE_MASK (L2_L_TEX_MASK|L2_B|L2_C|L2_SHARED) #define L2_S_CACHE_MASK (L2_S_TEX_MASK|L2_B|L2_C|L2_SHARED) #endif /* SMP */ #define L1_S_PROTO (L1_TYPE_S) #define L1_C_PROTO (L1_TYPE_C) #define L2_S_PROTO (L2_TYPE_S) +/* + * Promotion to a 1MB (SECTION) mapping requires that the corresponding + * 4KB (SMALL) page mappings have identical settings for the following fields: + */ +#define L2_S_PROMOTE (L2_S_REF | L2_SHARED | L2_S_PROT_MASK | \ + L2_XN | L2_S_PROTO) + +/* + * In order to compare 1MB (SECTION) entry settings with the 4KB (SMALL) + * page mapping it is necessary to read and shift appropriate bits from + * L1 entry to positions of the corresponding bits in the L2 entry. + */ +#define L1_S_DEMOTE(l1pd) ((((l1pd) & L1_S_PROTO) >> 0) | \ + (((l1pd) & L1_SHARED) >> 6) | \ + (((l1pd) & L1_S_REF) >> 6) | \ + (((l1pd) & L1_S_PROT_MASK) >> 6) | \ + (((l1pd) & L1_S_XN) >> 4)) + #ifndef SMP #define ARM_L1S_STRONG_ORD (0) #define ARM_L1S_DEVICE_NOSHARE (L1_S_TEX(2)) #define ARM_L1S_DEVICE_SHARE (L1_S_B) #define ARM_L1S_NRML_NOCACHE (L1_S_TEX(1)) #define ARM_L1S_NRML_IWT_OWT (L1_S_C) #define ARM_L1S_NRML_IWB_OWB (L1_S_C|L1_S_B) #define ARM_L1S_NRML_IWBA_OWBA (L1_S_TEX(1)|L1_S_C|L1_S_B) #define ARM_L2L_STRONG_ORD (0) #define ARM_L2L_DEVICE_NOSHARE (L2_L_TEX(2)) #define ARM_L2L_DEVICE_SHARE (L2_B) #define ARM_L2L_NRML_NOCACHE (L2_L_TEX(1)) #define ARM_L2L_NRML_IWT_OWT (L2_C) #define ARM_L2L_NRML_IWB_OWB (L2_C|L2_B) #define ARM_L2L_NRML_IWBA_OWBA (L2_L_TEX(1)|L2_C|L2_B) #define ARM_L2S_STRONG_ORD (0) #define ARM_L2S_DEVICE_NOSHARE (L2_S_TEX(2)) #define ARM_L2S_DEVICE_SHARE (L2_B) #define ARM_L2S_NRML_NOCACHE (L2_S_TEX(1)) #define ARM_L2S_NRML_IWT_OWT (L2_C) #define ARM_L2S_NRML_IWB_OWB (L2_C|L2_B) #define ARM_L2S_NRML_IWBA_OWBA (L2_S_TEX(1)|L2_C|L2_B) #else #define ARM_L1S_STRONG_ORD (0) #define ARM_L1S_DEVICE_NOSHARE (L1_S_TEX(2)) #define ARM_L1S_DEVICE_SHARE (L1_S_B) #define ARM_L1S_NRML_NOCACHE (L1_S_TEX(1)|L1_SHARED) #define ARM_L1S_NRML_IWT_OWT (L1_S_C|L1_SHARED) #define ARM_L1S_NRML_IWB_OWB (L1_S_C|L1_S_B|L1_SHARED) #define ARM_L1S_NRML_IWBA_OWBA (L1_S_TEX(1)|L1_S_C|L1_S_B|L1_SHARED) #define ARM_L2L_STRONG_ORD (0) #define ARM_L2L_DEVICE_NOSHARE (L2_L_TEX(2)) #define ARM_L2L_DEVICE_SHARE (L2_B) #define ARM_L2L_NRML_NOCACHE (L2_L_TEX(1)|L2_SHARED) #define ARM_L2L_NRML_IWT_OWT (L2_C|L2_SHARED) #define ARM_L2L_NRML_IWB_OWB (L2_C|L2_B|L2_SHARED) #define ARM_L2L_NRML_IWBA_OWBA (L2_L_TEX(1)|L2_C|L2_B|L2_SHARED) #define ARM_L2S_STRONG_ORD (0) #define ARM_L2S_DEVICE_NOSHARE (L2_S_TEX(2)) #define ARM_L2S_DEVICE_SHARE (L2_B) #define ARM_L2S_NRML_NOCACHE (L2_S_TEX(1)|L2_SHARED) #define ARM_L2S_NRML_IWT_OWT (L2_C|L2_SHARED) #define ARM_L2S_NRML_IWB_OWB (L2_C|L2_B|L2_SHARED) #define ARM_L2S_NRML_IWBA_OWBA (L2_S_TEX(1)|L2_C|L2_B|L2_SHARED) #endif /* SMP */ #endif /* ARM_NMMUS > 1 */ #if (ARM_MMU_SA1 == 1) && (ARM_NMMUS == 1) #define PMAP_NEEDS_PTE_SYNC 1 #define PMAP_INCLUDE_PTE_SYNC #elif defined(CPU_XSCALE_81342) #define PMAP_NEEDS_PTE_SYNC 1 #define PMAP_INCLUDE_PTE_SYNC #elif (ARM_MMU_SA1 == 0) #define PMAP_NEEDS_PTE_SYNC 0 #endif /* * These macros return various bits based on kernel/user and protection. * Note that the compiler will usually fold these at compile time. */ #if (ARM_MMU_V6 + ARM_MMU_V7) == 0 #define L1_S_PROT_U (L1_S_AP(AP_U)) #define L1_S_PROT_W (L1_S_AP(AP_W)) #define L1_S_PROT_MASK (L1_S_PROT_U|L1_S_PROT_W) #define L1_S_WRITABLE(pd) ((pd) & L1_S_PROT_W) #define L1_S_PROT(ku, pr) ((((ku) == PTE_USER) ? L1_S_PROT_U : 0) | \ (((pr) & VM_PROT_WRITE) ? L1_S_PROT_W : 0)) #define L2_L_PROT_U (L2_AP(AP_U)) #define L2_L_PROT_W (L2_AP(AP_W)) #define L2_L_PROT_MASK (L2_L_PROT_U|L2_L_PROT_W) #define L2_L_PROT(ku, pr) ((((ku) == PTE_USER) ? L2_L_PROT_U : 0) | \ (((pr) & VM_PROT_WRITE) ? L2_L_PROT_W : 0)) #define L2_S_PROT(ku, pr) ((((ku) == PTE_USER) ? L2_S_PROT_U : 0) | \ (((pr) & VM_PROT_WRITE) ? L2_S_PROT_W : 0)) #else #define L1_S_PROT_U (L1_S_AP(AP_U)) -#define L1_S_PROT_MASK (L1_S_APX|L1_S_AP(0x3)) -#define L1_S_WRITABLE(pd) (!((pd) & L1_S_APX)) +#define L1_S_PROT_W (L1_S_APX) /* Write disable */ +#define L1_S_PROT_MASK (L1_S_PROT_W|L1_S_PROT_U) +#define L1_S_REF (L1_S_AP(AP_REF)) /* Reference flag */ +#define L1_S_WRITABLE(pd) (!((pd) & L1_S_PROT_W)) +#define L1_S_REFERENCED(pd) ((pd) & L1_S_REF) -#define L1_S_PROT(ku, pr) (L1_S_PROT_MASK & ~((((ku) == PTE_KERNEL) ? L1_S_PROT_U : 0) | \ - (((pr) & VM_PROT_WRITE) ? L1_S_APX : 0))) +#define L1_S_PROT(ku, pr) (((((ku) == PTE_KERNEL) ? 0 : L1_S_PROT_U) | \ + (((pr) & VM_PROT_WRITE) ? 0 : L1_S_PROT_W) | \ + (((pr) & VM_PROT_EXECUTE) ? 0 : L1_S_XN))) #define L2_L_PROT_MASK (L2_APX|L2_AP0(0x3)) #define L2_L_PROT(ku, pr) (L2_L_PROT_MASK & ~((((ku) == PTE_KERNEL) ? L2_S_PROT_U : 0) | \ (((pr) & VM_PROT_WRITE) ? L2_APX : 0))) #define L2_S_PROT(ku, pr) (L2_S_PROT_MASK & ~((((ku) == PTE_KERNEL) ? L2_S_PROT_U : 0) | \ (((pr) & VM_PROT_WRITE) ? L2_APX : 0))) #endif /* * Macros to test if a mapping is mappable with an L1 Section mapping * or an L2 Large Page mapping. */ #define L1_S_MAPPABLE_P(va, pa, size) \ ((((va) | (pa)) & L1_S_OFFSET) == 0 && (size) >= L1_S_SIZE) #define L2_L_MAPPABLE_P(va, pa, size) \ ((((va) | (pa)) & L2_L_OFFSET) == 0 && (size) >= L2_L_SIZE) /* * Provide a fallback in case we were not able to determine it at * compile-time. */ #ifndef PMAP_NEEDS_PTE_SYNC #define PMAP_NEEDS_PTE_SYNC pmap_needs_pte_sync #define PMAP_INCLUDE_PTE_SYNC #endif #define PTE_SYNC(pte) \ do { \ if (PMAP_NEEDS_PTE_SYNC) { \ cpu_dcache_wb_range((vm_offset_t)(pte), sizeof(pt_entry_t));\ cpu_l2cache_wb_range((vm_offset_t)(pte), sizeof(pt_entry_t));\ } else \ cpu_drain_writebuf(); \ } while (/*CONSTCOND*/0) #define PTE_SYNC_RANGE(pte, cnt) \ do { \ if (PMAP_NEEDS_PTE_SYNC) { \ cpu_dcache_wb_range((vm_offset_t)(pte), \ (cnt) << 2); /* * sizeof(pt_entry_t) */ \ cpu_l2cache_wb_range((vm_offset_t)(pte), \ (cnt) << 2); /* * sizeof(pt_entry_t) */ \ } else \ cpu_drain_writebuf(); \ } while (/*CONSTCOND*/0) extern pt_entry_t pte_l1_s_cache_mode; extern pt_entry_t pte_l1_s_cache_mask; extern pt_entry_t pte_l2_l_cache_mode; extern pt_entry_t pte_l2_l_cache_mask; extern pt_entry_t pte_l2_s_cache_mode; extern pt_entry_t pte_l2_s_cache_mask; extern pt_entry_t pte_l1_s_cache_mode_pt; extern pt_entry_t pte_l2_l_cache_mode_pt; extern pt_entry_t pte_l2_s_cache_mode_pt; extern pt_entry_t pte_l2_s_prot_u; extern pt_entry_t pte_l2_s_prot_w; extern pt_entry_t pte_l2_s_prot_mask; extern pt_entry_t pte_l1_s_proto; extern pt_entry_t pte_l1_c_proto; extern pt_entry_t pte_l2_s_proto; extern void (*pmap_copy_page_func)(vm_paddr_t, vm_paddr_t); extern void (*pmap_copy_page_offs_func)(vm_paddr_t a_phys, vm_offset_t a_offs, vm_paddr_t b_phys, vm_offset_t b_offs, int cnt); extern void (*pmap_zero_page_func)(vm_paddr_t, int, int); #if (ARM_MMU_GENERIC + ARM_MMU_V6 + ARM_MMU_V7 + ARM_MMU_SA1) != 0 || defined(CPU_XSCALE_81342) void pmap_copy_page_generic(vm_paddr_t, vm_paddr_t); void pmap_zero_page_generic(vm_paddr_t, int, int); void pmap_pte_init_generic(void); #if defined(CPU_ARM8) void pmap_pte_init_arm8(void); #endif #if defined(CPU_ARM9) void pmap_pte_init_arm9(void); #endif /* CPU_ARM9 */ #if defined(CPU_ARM10) void pmap_pte_init_arm10(void); #endif /* CPU_ARM10 */ #if (ARM_MMU_V6 + ARM_MMU_V7) != 0 void pmap_pte_init_mmu_v6(void); #endif /* (ARM_MMU_V6 + ARM_MMU_V7) != 0 */ #endif /* (ARM_MMU_GENERIC + ARM_MMU_SA1) != 0 */ #if /* ARM_MMU_SA1 == */1 void pmap_pte_init_sa1(void); #endif /* ARM_MMU_SA1 == 1 */ #if ARM_MMU_XSCALE == 1 void pmap_copy_page_xscale(vm_paddr_t, vm_paddr_t); void pmap_zero_page_xscale(vm_paddr_t, int, int); void pmap_pte_init_xscale(void); void xscale_setup_minidata(vm_offset_t, vm_offset_t, vm_offset_t); void pmap_use_minicache(vm_offset_t, vm_size_t); #endif /* ARM_MMU_XSCALE == 1 */ #if defined(CPU_XSCALE_81342) #define ARM_HAVE_SUPERSECTIONS #endif #define PTE_KERNEL 0 #define PTE_USER 1 #define l1pte_valid(pde) ((pde) != 0) #define l1pte_section_p(pde) (((pde) & L1_TYPE_MASK) == L1_TYPE_S) #define l1pte_page_p(pde) (((pde) & L1_TYPE_MASK) == L1_TYPE_C) #define l1pte_fpage_p(pde) (((pde) & L1_TYPE_MASK) == L1_TYPE_F) #define l2pte_index(v) (((v) & L2_ADDR_BITS) >> L2_S_SHIFT) #define l2pte_valid(pte) ((pte) != 0) #define l2pte_pa(pte) ((pte) & L2_S_FRAME) #define l2pte_minidata(pte) (((pte) & \ (L2_B | L2_C | L2_XSCALE_T_TEX(TEX_XSCALE_X)))\ == (L2_C | L2_XSCALE_T_TEX(TEX_XSCALE_X))) /* L1 and L2 page table macros */ #define pmap_pde_v(pde) l1pte_valid(*(pde)) #define pmap_pde_section(pde) l1pte_section_p(*(pde)) #define pmap_pde_page(pde) l1pte_page_p(*(pde)) #define pmap_pde_fpage(pde) l1pte_fpage_p(*(pde)) #define pmap_pte_v(pte) l2pte_valid(*(pte)) #define pmap_pte_pa(pte) l2pte_pa(*(pte)) /* * Flags that indicate attributes of pages or mappings of pages. * * The PVF_MOD and PVF_REF flags are stored in the mdpage for each * page. PVF_WIRED, PVF_WRITE, and PVF_NC are kept in individual * pv_entry's for each page. They live in the same "namespace" so * that we can clear multiple attributes at a time. * * Note the "non-cacheable" flag generally means the page has * multiple mappings in a given address space. */ #define PVF_MOD 0x01 /* page is modified */ #define PVF_REF 0x02 /* page is referenced */ #define PVF_WIRED 0x04 /* mapping is wired */ #define PVF_WRITE 0x08 /* mapping is writable */ #define PVF_EXEC 0x10 /* mapping is executable */ #define PVF_NC 0x20 /* mapping is non-cacheable */ #define PVF_MWC 0x40 /* mapping is used multiple times in userland */ #define PVF_UNMAN 0x80 /* mapping is unmanaged */ void vector_page_setprot(int); /* * This structure is used by machine-dependent code to describe * static mappings of devices, created at bootstrap time. */ struct pmap_devmap { vm_offset_t pd_va; /* virtual address */ vm_paddr_t pd_pa; /* physical address */ vm_size_t pd_size; /* size of region */ vm_prot_t pd_prot; /* protection code */ int pd_cache; /* cache attributes */ }; const struct pmap_devmap *pmap_devmap_find_pa(vm_paddr_t, vm_size_t); const struct pmap_devmap *pmap_devmap_find_va(vm_offset_t, vm_size_t); void pmap_devmap_bootstrap(vm_offset_t, const struct pmap_devmap *); void pmap_devmap_register(const struct pmap_devmap *); #define SECTION_CACHE 0x1 #define SECTION_PT 0x2 void pmap_kenter_section(vm_offset_t, vm_paddr_t, int flags); #ifdef ARM_HAVE_SUPERSECTIONS void pmap_kenter_supersection(vm_offset_t, uint64_t, int flags); #endif extern char *_tmppt; void pmap_postinit(void); #ifdef ARM_USE_SMALL_ALLOC void arm_add_smallalloc_pages(void *, void *, int, int); vm_offset_t arm_ptovirt(vm_paddr_t); void arm_init_smallalloc(void); struct arm_small_page { void *addr; TAILQ_ENTRY(arm_small_page) pg_list; }; #endif #define ARM_NOCACHE_KVA_SIZE 0x1000000 extern vm_offset_t arm_nocache_startaddr; void *arm_remap_nocache(void *, vm_size_t); void arm_unmap_nocache(void *, vm_size_t); extern vm_paddr_t dump_avail[]; #endif /* _KERNEL */ #endif /* !LOCORE */ #endif /* !_MACHINE_PMAP_H_ */ Index: head/sys/arm/include/pte.h =================================================================== --- head/sys/arm/include/pte.h (revision 254917) +++ head/sys/arm/include/pte.h (revision 254918) @@ -1,351 +1,356 @@ /* $NetBSD: pte.h,v 1.1 2001/11/23 17:39:04 thorpej Exp $ */ /*- * Copyright (c) 1994 Mark Brinicombe. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the RiscBSD team. * 4. The name "RiscBSD" nor the name of the author may be used to * endorse or promote products derived from this software without specific * prior written permission. * * THIS SOFTWARE IS PROVIDED BY RISCBSD ``AS IS'' AND ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL RISCBSD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _MACHINE_PTE_H_ #define _MACHINE_PTE_H_ #ifndef LOCORE typedef uint32_t pd_entry_t; /* page directory entry */ typedef uint32_t pt_entry_t; /* page table entry */ #endif #define PG_FRAME 0xfffff000 /* The PT_SIZE definition is misleading... A page table is only 0x400 * bytes long. But since VM mapping can only be done to 0x1000 a single * 1KB blocks cannot be steered to a va by itself. Therefore the * pages tables are allocated in blocks of 4. i.e. if a 1 KB block * was allocated for a PT then the other 3KB would also get mapped * whenever the 1KB was mapped. */ #define PT_RSIZE 0x0400 /* Real page table size */ #define PT_SIZE 0x1000 #define PD_SIZE 0x4000 /* Page table types and masks */ #define L1_PAGE 0x01 /* L1 page table mapping */ #define L1_SECTION 0x02 /* L1 section mapping */ #define L1_FPAGE 0x03 /* L1 fine page mapping */ #define L1_MASK 0x03 /* Mask for L1 entry type */ #define L2_LPAGE 0x01 /* L2 large page (64KB) */ #define L2_SPAGE 0x02 /* L2 small page (4KB) */ #define L2_MASK 0x03 /* Mask for L2 entry type */ #define L2_INVAL 0x00 /* L2 invalid type */ /* L1 and L2 address masks */ #define L1_ADDR_MASK 0xfffffc00 #define L2_ADDR_MASK 0xfffff000 /* * The ARM MMU architecture was introduced with ARM v3 (previous ARM * architecture versions used an optional off-CPU memory controller * to perform address translation). * * The ARM MMU consists of a TLB and translation table walking logic. * There is typically one TLB per memory interface (or, put another * way, one TLB per software-visible cache). * * The ARM MMU is capable of mapping memory in the following chunks: * * 1M Sections (L1 table) * * 64K Large Pages (L2 table) * * 4K Small Pages (L2 table) * * 1K Tiny Pages (L2 table) * * There are two types of L2 tables: Coarse Tables and Fine Tables. * Coarse Tables can map Large and Small Pages. Fine Tables can * map Tiny Pages. * * Coarse Tables can define 4 Subpages within Large and Small pages. * Subpages define different permissions for each Subpage within * a Page. * * Coarse Tables are 1K in length. Fine tables are 4K in length. * * The Translation Table Base register holds the pointer to the * L1 Table. The L1 Table is a 16K contiguous chunk of memory * aligned to a 16K boundary. Each entry in the L1 Table maps * 1M of virtual address space, either via a Section mapping or * via an L2 Table. * * In addition, the Fast Context Switching Extension (FCSE) is available * on some ARM v4 and ARM v5 processors. FCSE is a way of eliminating * TLB/cache flushes on context switch by use of a smaller address space * and a "process ID" that modifies the virtual address before being * presented to the translation logic. */ /* ARMv6 super-sections. */ #define L1_SUP_SIZE 0x01000000 /* 16M */ #define L1_SUP_OFFSET (L1_SUP_SIZE - 1) #define L1_SUP_FRAME (~L1_SUP_OFFSET) #define L1_SUP_SHIFT 24 #define L1_S_SIZE 0x00100000 /* 1M */ #define L1_S_OFFSET (L1_S_SIZE - 1) #define L1_S_FRAME (~L1_S_OFFSET) #define L1_S_SHIFT 20 #define L2_L_SIZE 0x00010000 /* 64K */ #define L2_L_OFFSET (L2_L_SIZE - 1) #define L2_L_FRAME (~L2_L_OFFSET) #define L2_L_SHIFT 16 #define L2_S_SIZE 0x00001000 /* 4K */ #define L2_S_OFFSET (L2_S_SIZE - 1) #define L2_S_FRAME (~L2_S_OFFSET) #define L2_S_SHIFT 12 #define L2_T_SIZE 0x00000400 /* 1K */ #define L2_T_OFFSET (L2_T_SIZE - 1) #define L2_T_FRAME (~L2_T_OFFSET) #define L2_T_SHIFT 10 /* * The NetBSD VM implementation only works on whole pages (4K), * whereas the ARM MMU's Coarse tables are sized in terms of 1K * (16K L1 table, 1K L2 table). * * So, we allocate L2 tables 4 at a time, thus yielding a 4K L2 * table. */ #define L1_ADDR_BITS 0xfff00000 /* L1 PTE address bits */ #define L2_ADDR_BITS 0x000ff000 /* L2 PTE address bits */ #define L1_TABLE_SIZE 0x4000 /* 16K */ #define L2_TABLE_SIZE 0x1000 /* 4K */ /* * The new pmap deals with the 1KB coarse L2 tables by * allocating them from a pool. Until every port has been converted, * keep the old L2_TABLE_SIZE define lying around. Converted ports * should use L2_TABLE_SIZE_REAL until then. */ #define L2_TABLE_SIZE_REAL 0x400 /* 1K */ +/* Total number of page table entries in L2 table */ +#define L2_PTE_NUM_TOTAL (L2_TABLE_SIZE_REAL / sizeof(pt_entry_t)) + /* * ARM L1 Descriptors */ #define L1_TYPE_INV 0x00 /* Invalid (fault) */ #define L1_TYPE_C 0x01 /* Coarse L2 */ #define L1_TYPE_S 0x02 /* Section */ #define L1_TYPE_F 0x03 /* Fine L2 */ #define L1_TYPE_MASK 0x03 /* mask of type bits */ /* L1 Section Descriptor */ #define L1_S_B 0x00000004 /* bufferable Section */ #define L1_S_C 0x00000008 /* cacheable Section */ #define L1_S_IMP 0x00000010 /* implementation defined */ +#define L1_S_XN (1 << 4) /* execute not */ #define L1_S_DOM(x) ((x) << 5) /* domain */ #define L1_S_DOM_MASK L1_S_DOM(0xf) #define L1_S_AP(x) ((x) << 10) /* access permissions */ #define L1_S_ADDR_MASK 0xfff00000 /* phys address of section */ #define L1_S_TEX(x) (((x) & 0x7) << 12) /* Type Extension */ #define L1_S_TEX_MASK (0x7 << 12) /* Type Extension */ #define L1_S_APX (1 << 15) #define L1_SHARED (1 << 16) #define L1_S_XSCALE_P 0x00000200 /* ECC enable for this section */ #define L1_S_XSCALE_TEX(x) ((x) << 12) /* Type Extension */ #define L1_S_SUPERSEC ((1) << 18) /* Section is a super-section. */ /* L1 Coarse Descriptor */ #define L1_C_IMP0 0x00000004 /* implementation defined */ #define L1_C_IMP1 0x00000008 /* implementation defined */ #define L1_C_IMP2 0x00000010 /* implementation defined */ #define L1_C_DOM(x) ((x) << 5) /* domain */ #define L1_C_DOM_MASK L1_C_DOM(0xf) #define L1_C_ADDR_MASK 0xfffffc00 /* phys address of L2 Table */ #define L1_C_XSCALE_P 0x00000200 /* ECC enable for this section */ /* L1 Fine Descriptor */ #define L1_F_IMP0 0x00000004 /* implementation defined */ #define L1_F_IMP1 0x00000008 /* implementation defined */ #define L1_F_IMP2 0x00000010 /* implementation defined */ #define L1_F_DOM(x) ((x) << 5) /* domain */ #define L1_F_DOM_MASK L1_F_DOM(0xf) #define L1_F_ADDR_MASK 0xfffff000 /* phys address of L2 Table */ #define L1_F_XSCALE_P 0x00000200 /* ECC enable for this section */ /* * ARM L2 Descriptors */ #define L2_TYPE_INV 0x00 /* Invalid (fault) */ #define L2_TYPE_L 0x01 /* Large Page */ #define L2_TYPE_S 0x02 /* Small Page */ #define L2_TYPE_T 0x03 /* Tiny Page */ #define L2_TYPE_MASK 0x03 /* mask of type bits */ /* * This L2 Descriptor type is available on XScale processors * when using a Coarse L1 Descriptor. The Extended Small * Descriptor has the same format as the XScale Tiny Descriptor, * but describes a 4K page, rather than a 1K page. */ #define L2_TYPE_XSCALE_XS 0x03 /* XScale Extended Small Page */ #define L2_B 0x00000004 /* Bufferable page */ #define L2_C 0x00000008 /* Cacheable page */ #define L2_AP0(x) ((x) << 4) /* access permissions (sp 0) */ #define L2_AP1(x) ((x) << 6) /* access permissions (sp 1) */ #define L2_AP2(x) ((x) << 8) /* access permissions (sp 2) */ #define L2_AP3(x) ((x) << 10) /* access permissions (sp 3) */ #define L2_SHARED (1 << 10) #define L2_APX (1 << 9) #define L2_XN (1 << 0) #define L2_L_TEX_MASK (0x7 << 12) /* Type Extension */ #define L2_L_TEX(x) (((x) & 0x7) << 12) #define L2_S_TEX_MASK (0x7 << 6) /* Type Extension */ #define L2_S_TEX(x) (((x) & 0x7) << 6) #define L2_XSCALE_L_TEX(x) ((x) << 12) /* Type Extension */ #define L2_XSCALE_L_S(x) (1 << 15) /* Shared */ #define L2_XSCALE_T_TEX(x) ((x) << 6) /* Type Extension */ /* * Access Permissions for L1 and L2 Descriptors. */ #define AP_W 0x01 /* writable */ +#define AP_REF 0x01 /* referenced flag */ #define AP_U 0x02 /* user */ /* * Short-hand for common AP_* constants. * * Note: These values assume the S (System) bit is set and * the R (ROM) bit is clear in CP15 register 1. */ #define AP_KR 0x00 /* kernel read */ #define AP_KRW 0x01 /* kernel read/write */ #define AP_KRWUR 0x02 /* kernel read/write usr read */ #define AP_KRWURW 0x03 /* kernel read/write usr read/write */ /* * Domain Types for the Domain Access Control Register. */ #define DOMAIN_FAULT 0x00 /* no access */ #define DOMAIN_CLIENT 0x01 /* client */ #define DOMAIN_RESERVED 0x02 /* reserved */ #define DOMAIN_MANAGER 0x03 /* manager */ /* * Type Extension bits for XScale processors. * * Behavior of C and B when X == 0: * * C B Cacheable Bufferable Write Policy Line Allocate Policy * 0 0 N N - - * 0 1 N Y - - * 1 0 Y Y Write-through Read Allocate * 1 1 Y Y Write-back Read Allocate * * Behavior of C and B when X == 1: * C B Cacheable Bufferable Write Policy Line Allocate Policy * 0 0 - - - - DO NOT USE * 0 1 N Y - - * 1 0 Mini-Data - - - * 1 1 Y Y Write-back R/W Allocate */ #define TEX_XSCALE_X 0x01 /* X modifies C and B */ #define TEX_XSCALE_E 0x02 #define TEX_XSCALE_T 0x04 /* Xscale core 3 */ /* * * Cache attributes with L2 present, S = 0 * T E X C B L1 i-cache L1 d-cache L1 DC WP L2 cacheable write coalesce * 0 0 0 0 0 N N - N N * 0 0 0 0 1 N N - N Y * 0 0 0 1 0 Y Y WT N Y * 0 0 0 1 1 Y Y WB Y Y * 0 0 1 0 0 N N - Y Y * 0 0 1 0 1 N N - N N * 0 0 1 1 0 Y Y - - N * 0 0 1 1 1 Y Y WT Y Y * 0 1 0 0 0 N N - N N * 0 1 0 0 1 N/A N/A N/A N/A N/A * 0 1 0 1 0 N/A N/A N/A N/A N/A * 0 1 0 1 1 N/A N/A N/A N/A N/A * 0 1 1 X X N/A N/A N/A N/A N/A * 1 X 0 0 0 N N - N Y * 1 X 0 0 1 Y N WB N Y * 1 X 0 1 0 Y N WT N Y * 1 X 0 1 1 Y N WB Y Y * 1 X 1 0 0 N N - Y Y * 1 X 1 0 1 Y Y WB Y Y * 1 X 1 1 0 Y Y WT Y Y * 1 X 1 1 1 Y Y WB Y Y * * * * * Cache attributes with L2 present, S = 1 * T E X C B L1 i-cache L1 d-cache L1 DC WP L2 cacheable write coalesce * 0 0 0 0 0 N N - N N * 0 0 0 0 1 N N - N Y * 0 0 0 1 0 Y Y - N Y * 0 0 0 1 1 Y Y WT Y Y * 0 0 1 0 0 N N - Y Y * 0 0 1 0 1 N N - N N * 0 0 1 1 0 Y Y - - N * 0 0 1 1 1 Y Y WT Y Y * 0 1 0 0 0 N N - N N * 0 1 0 0 1 N/A N/A N/A N/A N/A * 0 1 0 1 0 N/A N/A N/A N/A N/A * 0 1 0 1 1 N/A N/A N/A N/A N/A * 0 1 1 X X N/A N/A N/A N/A N/A * 1 X 0 0 0 N N - N Y * 1 X 0 0 1 Y N - N Y * 1 X 0 1 0 Y N - N Y * 1 X 0 1 1 Y N - Y Y * 1 X 1 0 0 N N - Y Y * 1 X 1 0 1 Y Y WT Y Y * 1 X 1 1 0 Y Y WT Y Y * 1 X 1 1 1 Y Y WT Y Y */ #endif /* !_MACHINE_PTE_H_ */ /* End of pte.h */