Page MenuHomeFreeBSD

D25237.id.diff
No OneTemporary

D25237.id.diff

Index: head/sys/powerpc/aim/mmu_oea64.h
===================================================================
--- head/sys/powerpc/aim/mmu_oea64.h
+++ head/sys/powerpc/aim/mmu_oea64.h
@@ -82,12 +82,18 @@
int64_t moea64_pte_unset(struct pvo_entry *);
int64_t moea64_pte_clear(struct pvo_entry *, uint64_t);
int64_t moea64_pte_synch(struct pvo_entry *);
+int64_t moea64_pte_insert_sp(struct pvo_entry *);
+int64_t moea64_pte_unset_sp(struct pvo_entry *);
+int64_t moea64_pte_replace_sp(struct pvo_entry *);
typedef int64_t (*moea64_pte_replace_t)(struct pvo_entry *, int);
typedef int64_t (*moea64_pte_insert_t)(struct pvo_entry *);
typedef int64_t (*moea64_pte_unset_t)(struct pvo_entry *);
typedef int64_t (*moea64_pte_clear_t)(struct pvo_entry *, uint64_t);
typedef int64_t (*moea64_pte_synch_t)(struct pvo_entry *);
+typedef int64_t (*moea64_pte_insert_sp_t)(struct pvo_entry *);
+typedef int64_t (*moea64_pte_unset_sp_t)(struct pvo_entry *);
+typedef int64_t (*moea64_pte_replace_sp_t)(struct pvo_entry *);
struct moea64_funcs {
moea64_pte_replace_t pte_replace;
@@ -95,6 +101,9 @@
moea64_pte_unset_t pte_unset;
moea64_pte_clear_t pte_clear;
moea64_pte_synch_t pte_synch;
+ moea64_pte_insert_sp_t pte_insert_sp;
+ moea64_pte_unset_sp_t pte_unset_sp;
+ moea64_pte_replace_sp_t pte_replace_sp;
};
extern struct moea64_funcs *moea64_ops;
@@ -128,5 +137,6 @@
extern u_long moea64_pteg_count;
extern u_long moea64_pteg_mask;
extern int n_slbs;
+extern bool moea64_has_lp_4k_16m;
#endif /* _POWERPC_AIM_MMU_OEA64_H */
Index: head/sys/powerpc/aim/mmu_oea64.c
===================================================================
--- head/sys/powerpc/aim/mmu_oea64.c
+++ head/sys/powerpc/aim/mmu_oea64.c
@@ -83,6 +83,7 @@
#include <vm/vm_extern.h>
#include <vm/vm_pageout.h>
#include <vm/vm_dumpset.h>
+#include <vm/vm_reserv.h>
#include <vm/uma.h>
#include <machine/_inttypes.h>
@@ -111,9 +112,6 @@
#define VSID_TO_HASH(vsid) (((vsid) >> 4) & 0xfffff)
#define VSID_HASH_MASK 0x0000007fffffffffULL
-/* Get physical address from PVO. */
-#define PVO_PADDR(pvo) ((pvo)->pvo_pte.pa & LPTE_RPGN)
-
/*
* Locking semantics:
*
@@ -146,6 +144,48 @@
#define PV_PAGE_UNLOCK(m) PV_UNLOCK(VM_PAGE_TO_PHYS(m))
#define PV_PAGE_LOCKASSERT(m) PV_LOCKASSERT(VM_PAGE_TO_PHYS(m))
+/* Superpage PV lock */
+
+#define PV_LOCK_SIZE (1<<PDRSHIFT)
+
+static __always_inline void
+moea64_sp_pv_lock(vm_paddr_t pa)
+{
+ vm_paddr_t pa_end;
+
+ /* Note: breaking when pa_end is reached to avoid overflows */
+ pa_end = pa + (HPT_SP_SIZE - PV_LOCK_SIZE);
+ for (;;) {
+ mtx_lock_flags(PV_LOCKPTR(pa), MTX_DUPOK);
+ if (pa == pa_end)
+ break;
+ pa += PV_LOCK_SIZE;
+ }
+}
+
+static __always_inline void
+moea64_sp_pv_unlock(vm_paddr_t pa)
+{
+ vm_paddr_t pa_end;
+
+ /* Note: breaking when pa_end is reached to avoid overflows */
+ pa_end = pa;
+ pa += HPT_SP_SIZE - PV_LOCK_SIZE;
+ for (;;) {
+ mtx_unlock_flags(PV_LOCKPTR(pa), MTX_DUPOK);
+ if (pa == pa_end)
+ break;
+ pa -= PV_LOCK_SIZE;
+ }
+}
+
+#define SP_PV_LOCK_ALIGNED(pa) moea64_sp_pv_lock(pa)
+#define SP_PV_UNLOCK_ALIGNED(pa) moea64_sp_pv_unlock(pa)
+#define SP_PV_LOCK(pa) moea64_sp_pv_lock((pa) & ~HPT_SP_MASK)
+#define SP_PV_UNLOCK(pa) moea64_sp_pv_unlock((pa) & ~HPT_SP_MASK)
+#define SP_PV_PAGE_LOCK(m) SP_PV_LOCK(VM_PAGE_TO_PHYS(m))
+#define SP_PV_PAGE_UNLOCK(m) SP_PV_UNLOCK(VM_PAGE_TO_PHYS(m))
+
struct ofw_map {
cell_t om_va;
cell_t om_len;
@@ -234,6 +274,7 @@
uint64_t moea64_large_page_mask = 0;
uint64_t moea64_large_page_size = 0;
int moea64_large_page_shift = 0;
+bool moea64_has_lp_4k_16m = false;
/*
* PVO calls.
@@ -255,8 +296,97 @@
static void moea64_syncicache(pmap_t pmap, vm_offset_t va,
vm_paddr_t pa, vm_size_t sz);
static void moea64_pmap_init_qpages(void);
+static void moea64_remove_locked(pmap_t, vm_offset_t,
+ vm_offset_t, struct pvo_dlist *);
/*
+ * Superpages data and routines.
+ */
+
+/*
+ * PVO flags (in vaddr) that must match for promotion to succeed.
+ * Note that protection bits are checked separately, as they reside in
+ * another field.
+ */
+#define PVO_FLAGS_PROMOTE (PVO_WIRED | PVO_MANAGED | PVO_PTEGIDX_VALID)
+
+#define PVO_IS_SP(pvo) (((pvo)->pvo_vaddr & PVO_LARGE) && \
+ (pvo)->pvo_pmap != kernel_pmap)
+
+/* Get physical address from PVO. */
+#define PVO_PADDR(pvo) moea64_pvo_paddr(pvo)
+
+/* MD page flag indicating that the page is a superpage. */
+#define MDPG_ATTR_SP 0x40000000
+
+static SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0,
+ "VM/pmap parameters");
+
+static int superpages_enabled = 0;
+SYSCTL_INT(_vm_pmap, OID_AUTO, superpages_enabled, CTLFLAG_RDTUN,
+ &superpages_enabled, 0, "Enable support for transparent superpages");
+
+static SYSCTL_NODE(_vm_pmap, OID_AUTO, sp, CTLFLAG_RD, 0,
+ "SP page mapping counters");
+
+static u_long sp_demotions;
+SYSCTL_ULONG(_vm_pmap_sp, OID_AUTO, demotions, CTLFLAG_RD,
+ &sp_demotions, 0, "SP page demotions");
+
+static u_long sp_mappings;
+SYSCTL_ULONG(_vm_pmap_sp, OID_AUTO, mappings, CTLFLAG_RD,
+ &sp_mappings, 0, "SP page mappings");
+
+static u_long sp_p_failures;
+SYSCTL_ULONG(_vm_pmap_sp, OID_AUTO, p_failures, CTLFLAG_RD,
+ &sp_p_failures, 0, "SP page promotion failures");
+
+static u_long sp_p_fail_pa;
+SYSCTL_ULONG(_vm_pmap_sp, OID_AUTO, p_fail_pa, CTLFLAG_RD,
+ &sp_p_fail_pa, 0, "SP page promotion failure: PAs don't match");
+
+static u_long sp_p_fail_flags;
+SYSCTL_ULONG(_vm_pmap_sp, OID_AUTO, p_fail_flags, CTLFLAG_RD,
+ &sp_p_fail_flags, 0, "SP page promotion failure: page flags don't match");
+
+static u_long sp_p_fail_prot;
+SYSCTL_ULONG(_vm_pmap_sp, OID_AUTO, p_fail_prot, CTLFLAG_RD,
+ &sp_p_fail_prot, 0,
+ "SP page promotion failure: page protections don't match");
+
+static u_long sp_p_fail_wimg;
+SYSCTL_ULONG(_vm_pmap_sp, OID_AUTO, p_fail_wimg, CTLFLAG_RD,
+ &sp_p_fail_wimg, 0, "SP page promotion failure: WIMG bits don't match");
+
+static u_long sp_promotions;
+SYSCTL_ULONG(_vm_pmap_sp, OID_AUTO, promotions, CTLFLAG_RD,
+ &sp_promotions, 0, "SP page promotions");
+
+static bool moea64_ps_enabled(pmap_t);
+static void moea64_align_superpage(vm_object_t, vm_ooffset_t,
+ vm_offset_t *, vm_size_t);
+
+static int moea64_sp_enter(pmap_t pmap, vm_offset_t va,
+ vm_page_t m, vm_prot_t prot, u_int flags, int8_t psind);
+static struct pvo_entry *moea64_sp_remove(struct pvo_entry *sp,
+ struct pvo_dlist *tofree);
+
+static void moea64_sp_promote(pmap_t pmap, vm_offset_t va, vm_page_t m);
+static void moea64_sp_demote_aligned(struct pvo_entry *sp);
+static void moea64_sp_demote(struct pvo_entry *pvo);
+
+static struct pvo_entry *moea64_sp_unwire(struct pvo_entry *sp);
+static struct pvo_entry *moea64_sp_protect(struct pvo_entry *sp,
+ vm_prot_t prot);
+
+static int64_t moea64_sp_query(struct pvo_entry *pvo, uint64_t ptebit);
+static int64_t moea64_sp_clear(struct pvo_entry *pvo, vm_page_t m,
+ uint64_t ptebit);
+
+static __inline bool moea64_sp_pvo_in_range(struct pvo_entry *pvo,
+ vm_offset_t sva, vm_offset_t eva);
+
+/*
* Kernel MMU interface
*/
void moea64_clear_modify(vm_page_t);
@@ -362,6 +492,8 @@
#ifdef __powerpc64__
.page_array_startup = moea64_page_array_startup,
#endif
+ .ps_enabled = moea64_ps_enabled,
+ .align_superpage = moea64_align_superpage,
/* Internal interfaces */
.mapdev = moea64_mapdev,
@@ -381,6 +513,26 @@
MMU_DEF(oea64_mmu, "mmu_oea64_base", moea64_methods);
+/*
+ * Get physical address from PVO.
+ *
+ * For superpages, the lower bits are not stored on pvo_pte.pa and must be
+ * obtained from VA.
+ */
+static __always_inline vm_paddr_t
+moea64_pvo_paddr(struct pvo_entry *pvo)
+{
+ vm_paddr_t pa;
+
+ pa = (pvo)->pvo_pte.pa & LPTE_RPGN;
+
+ if (PVO_IS_SP(pvo)) {
+ pa &= ~HPT_SP_MASK; /* This is needed to clear LPTE_LP bits. */
+ pa |= PVO_VADDR(pvo) & HPT_SP_MASK;
+ }
+ return (pa);
+}
+
static struct pvo_head *
vm_page_to_pvoh(vm_page_t m)
{
@@ -428,8 +580,10 @@
pvo->pvo_vpn = (uint64_t)((va & ADDR_PIDX) >> ADDR_PIDX_SHFT)
| (vsid << 16);
- shift = (pvo->pvo_vaddr & PVO_LARGE) ? moea64_large_page_shift :
- ADDR_PIDX_SHFT;
+ if (pmap == kernel_pmap && (pvo->pvo_vaddr & PVO_LARGE) != 0)
+ shift = moea64_large_page_shift;
+ else
+ shift = ADDR_PIDX_SHFT;
hash = (vsid & VSID_HASH_MASK) ^ (((uint64_t)va & ADDR_PIDX) >> shift);
pvo->pvo_pte.slot = (hash & moea64_pteg_mask) << 3;
}
@@ -773,6 +927,9 @@
vm_paddr_t kernelphysstart, kernelphysend;
int rm_pavail;
+ /* Level 0 reservations consist of 4096 pages (16MB superpage). */
+ vm_level_0_order = 12;
+
#ifndef __powerpc64__
/* We don't have a direct map since there is no BAT */
hw_direct_map = 0;
@@ -1204,6 +1361,17 @@
for (pvo = RB_NFIND(pvo_tree, &pm->pmap_pvo, &key);
pvo != NULL && PVO_VADDR(pvo) < eva;
pvo = RB_NEXT(pvo_tree, &pm->pmap_pvo, pvo)) {
+ if (PVO_IS_SP(pvo)) {
+ if (moea64_sp_pvo_in_range(pvo, sva, eva)) {
+ pvo = moea64_sp_unwire(pvo);
+ continue;
+ } else {
+ CTR1(KTR_PMAP, "%s: demote before unwire",
+ __func__);
+ moea64_sp_demote(pvo);
+ }
+ }
+
if ((pvo->pvo_vaddr & PVO_WIRED) == 0)
panic("moea64_unwire: pvo %p is missing PVO_WIRED",
pvo);
@@ -1489,10 +1657,11 @@
moea64_enter(pmap_t pmap, vm_offset_t va, vm_page_t m,
vm_prot_t prot, u_int flags, int8_t psind)
{
- struct pvo_entry *pvo, *oldpvo;
+ struct pvo_entry *pvo, *oldpvo, *tpvo;
struct pvo_head *pvo_head;
uint64_t pte_lo;
int error;
+ vm_paddr_t pa;
if ((m->oflags & VPO_UNMANAGED) == 0) {
if ((flags & PMAP_ENTER_QUICK_LOCKED) == 0)
@@ -1501,14 +1670,18 @@
VM_OBJECT_ASSERT_LOCKED(m->object);
}
+ if (psind > 0)
+ return (moea64_sp_enter(pmap, va, m, prot, flags, psind));
+
pvo = alloc_pvo_entry(0);
if (pvo == NULL)
return (KERN_RESOURCE_SHORTAGE);
pvo->pvo_pmap = NULL; /* to be filled in later */
pvo->pvo_pte.prot = prot;
- pte_lo = moea64_calc_wimg(VM_PAGE_TO_PHYS(m), pmap_page_get_memattr(m));
- pvo->pvo_pte.pa = VM_PAGE_TO_PHYS(m) | pte_lo;
+ pa = VM_PAGE_TO_PHYS(m);
+ pte_lo = moea64_calc_wimg(pa, pmap_page_get_memattr(m));
+ pvo->pvo_pte.pa = pa | pte_lo;
if ((flags & PMAP_ENTER_WIRED) != 0)
pvo->pvo_vaddr |= PVO_WIRED;
@@ -1520,10 +1693,20 @@
pvo->pvo_vaddr |= PVO_MANAGED;
}
- PV_PAGE_LOCK(m);
+ PV_LOCK(pa);
PMAP_LOCK(pmap);
if (pvo->pvo_pmap == NULL)
init_pvo_entry(pvo, pmap, va);
+
+ if (moea64_ps_enabled(pmap) &&
+ (tpvo = moea64_pvo_find_va(pmap, va & ~HPT_SP_MASK)) != NULL &&
+ PVO_IS_SP(tpvo)) {
+ /* Demote SP before entering a regular page */
+ CTR2(KTR_PMAP, "%s: demote before enter: va=%#jx",
+ __func__, (uintmax_t)va);
+ moea64_sp_demote_aligned(tpvo);
+ }
+
if (prot & VM_PROT_WRITE)
if (pmap_bootstrapped &&
(m->oflags & VPO_UNMANAGED) == 0)
@@ -1544,9 +1727,10 @@
}
/* Then just clean up and go home */
- PV_PAGE_UNLOCK(m);
PMAP_UNLOCK(pmap);
+ PV_UNLOCK(pa);
free_pvo_entry(pvo);
+ pvo = NULL;
goto out;
} else {
/* Otherwise, need to kill it first */
@@ -1557,7 +1741,7 @@
}
}
PMAP_UNLOCK(pmap);
- PV_PAGE_UNLOCK(m);
+ PV_UNLOCK(pa);
/* Free any dead pages */
if (error == EEXIST) {
@@ -1573,8 +1757,23 @@
if (pmap != kernel_pmap && (m->a.flags & PGA_EXECUTABLE) == 0 &&
(pte_lo & (LPTE_I | LPTE_G | LPTE_NOEXEC)) == 0) {
vm_page_aflag_set(m, PGA_EXECUTABLE);
- moea64_syncicache(pmap, va, VM_PAGE_TO_PHYS(m), PAGE_SIZE);
+ moea64_syncicache(pmap, va, pa, PAGE_SIZE);
}
+
+ /*
+ * Try to promote pages.
+ *
+ * If the VA of the entered page is not aligned with its PA,
+ * don't try page promotion as it is not possible.
+ * This reduces the number of promotion failures dramatically.
+ */
+ if (moea64_ps_enabled(pmap) && pmap != kernel_pmap && pvo != NULL &&
+ (pvo->pvo_vaddr & PVO_MANAGED) != 0 &&
+ (va & HPT_SP_MASK) == (pa & HPT_SP_MASK) &&
+ (m->flags & PG_FICTITIOUS) == 0 &&
+ vm_reserv_level_iffullpop(m) == 0)
+ moea64_sp_promote(pmap, va, m);
+
return (KERN_SUCCESS);
}
@@ -1633,15 +1832,25 @@
{
vm_page_t m;
vm_pindex_t diff, psize;
+ vm_offset_t va;
+ int8_t psind;
VM_OBJECT_ASSERT_LOCKED(m_start->object);
psize = atop(end - start);
m = m_start;
while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
- moea64_enter(pm, start + ptoa(diff), m, prot &
- (VM_PROT_READ | VM_PROT_EXECUTE), PMAP_ENTER_NOSLEEP |
- PMAP_ENTER_QUICK_LOCKED, 0);
+ va = start + ptoa(diff);
+ if ((va & HPT_SP_MASK) == 0 && va + HPT_SP_SIZE <= end &&
+ m->psind == 1 && moea64_ps_enabled(pm))
+ psind = 1;
+ else
+ psind = 0;
+ moea64_enter(pm, va, m, prot &
+ (VM_PROT_READ | VM_PROT_EXECUTE),
+ PMAP_ENTER_NOSLEEP | PMAP_ENTER_QUICK_LOCKED, psind);
+ if (psind == 1)
+ m = &m[HPT_SP_SIZE / PAGE_SIZE - 1];
m = TAILQ_NEXT(m, listq);
}
}
@@ -1755,6 +1964,27 @@
NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,
UMA_ZONE_VM | UMA_ZONE_NOFREE);
+ /*
+ * Are large page mappings enabled?
+ */
+ TUNABLE_INT_FETCH("vm.pmap.superpages_enabled", &superpages_enabled);
+ if (superpages_enabled) {
+ KASSERT(MAXPAGESIZES > 1 && pagesizes[1] == 0,
+ ("moea64_init: can't assign to pagesizes[1]"));
+
+ if (moea64_large_page_size == 0) {
+ printf("mmu_oea64: HW does not support large pages. "
+ "Disabling superpages...\n");
+ superpages_enabled = 0;
+ } else if (!moea64_has_lp_4k_16m) {
+ printf("mmu_oea64: "
+ "HW does not support mixed 4KB/16MB page sizes. "
+ "Disabling superpages...\n");
+ superpages_enabled = 0;
+ } else
+ pagesizes[1] = HPT_SP_SIZE;
+ }
+
if (!hw_direct_map) {
uma_zone_set_allocf(moea64_pvo_zone, moea64_uma_page_alloc);
}
@@ -1834,7 +2064,7 @@
vm_page_assert_busied(m);
if (!pmap_page_is_write_mapped(m))
- return
+ return;
powerpc_sync();
PV_PAGE_LOCK(m);
@@ -1844,6 +2074,11 @@
PMAP_LOCK(pmap);
if (!(pvo->pvo_vaddr & PVO_DEAD) &&
(pvo->pvo_pte.prot & VM_PROT_WRITE)) {
+ if (PVO_IS_SP(pvo)) {
+ CTR1(KTR_PMAP, "%s: demote before remwr",
+ __func__);
+ moea64_sp_demote(pvo);
+ }
pvo->pvo_pte.prot &= ~VM_PROT_WRITE;
ret = moea64_pte_replace(pvo, MOEA64_PTE_PROT_UPDATE);
if (ret < 0)
@@ -1892,6 +2127,9 @@
pmap_t pmap;
uint64_t lo;
+ CTR3(KTR_PMAP, "%s: pa=%#jx, ma=%#x",
+ __func__, (uintmax_t)VM_PAGE_TO_PHYS(m), ma);
+
if ((m->oflags & VPO_UNMANAGED) != 0) {
m->md.mdpg_cache_attrs = ma;
return;
@@ -1904,6 +2142,11 @@
pmap = pvo->pvo_pmap;
PMAP_LOCK(pmap);
if (!(pvo->pvo_vaddr & PVO_DEAD)) {
+ if (PVO_IS_SP(pvo)) {
+ CTR1(KTR_PMAP,
+ "%s: demote before set_memattr", __func__);
+ moea64_sp_demote(pvo);
+ }
pvo->pvo_pte.pa &= ~LPTE_WIMG;
pvo->pvo_pte.pa |= lo;
refchg = moea64_pte_replace(pvo, MOEA64_PTE_INVALIDATE);
@@ -2356,7 +2599,7 @@
moea64_protect(pmap_t pm, vm_offset_t sva, vm_offset_t eva,
vm_prot_t prot)
{
- struct pvo_entry *pvo, *tpvo, key;
+ struct pvo_entry *pvo, key;
CTR4(KTR_PMAP, "moea64_protect: pm=%p sva=%#x eva=%#x prot=%#x", pm,
sva, eva, prot);
@@ -2372,8 +2615,18 @@
PMAP_LOCK(pm);
key.pvo_vaddr = sva;
for (pvo = RB_NFIND(pvo_tree, &pm->pmap_pvo, &key);
- pvo != NULL && PVO_VADDR(pvo) < eva; pvo = tpvo) {
- tpvo = RB_NEXT(pvo_tree, &pm->pmap_pvo, pvo);
+ pvo != NULL && PVO_VADDR(pvo) < eva;
+ pvo = RB_NEXT(pvo_tree, &pm->pmap_pvo, pvo)) {
+ if (PVO_IS_SP(pvo)) {
+ if (moea64_sp_pvo_in_range(pvo, sva, eva)) {
+ pvo = moea64_sp_protect(pvo, prot);
+ continue;
+ } else {
+ CTR1(KTR_PMAP, "%s: demote before protect",
+ __func__);
+ moea64_sp_demote(pvo);
+ }
+ }
moea64_pvo_protect(pm, pvo, prot);
}
PMAP_UNLOCK(pm);
@@ -2473,13 +2726,46 @@
}
}
+static void
+moea64_remove_locked(pmap_t pm, vm_offset_t sva, vm_offset_t eva,
+ struct pvo_dlist *tofree)
+{
+ struct pvo_entry *pvo, *tpvo, key;
+
+ PMAP_LOCK_ASSERT(pm, MA_OWNED);
+
+ key.pvo_vaddr = sva;
+ for (pvo = RB_NFIND(pvo_tree, &pm->pmap_pvo, &key);
+ pvo != NULL && PVO_VADDR(pvo) < eva; pvo = tpvo) {
+ if (PVO_IS_SP(pvo)) {
+ if (moea64_sp_pvo_in_range(pvo, sva, eva)) {
+ tpvo = moea64_sp_remove(pvo, tofree);
+ continue;
+ } else {
+ CTR1(KTR_PMAP, "%s: demote before remove",
+ __func__);
+ moea64_sp_demote(pvo);
+ }
+ }
+ tpvo = RB_NEXT(pvo_tree, &pm->pmap_pvo, pvo);
+
+ /*
+ * For locking reasons, remove this from the page table and
+ * pmap, but save delinking from the vm_page for a second
+ * pass
+ */
+ moea64_pvo_remove_from_pmap(pvo);
+ SLIST_INSERT_HEAD(tofree, pvo, pvo_dlink);
+ }
+}
+
/*
* Remove the given range of addresses from the specified map.
*/
void
moea64_remove(pmap_t pm, vm_offset_t sva, vm_offset_t eva)
{
- struct pvo_entry *pvo, *tpvo, key;
+ struct pvo_entry *pvo;
struct pvo_dlist tofree;
/*
@@ -2488,23 +2774,9 @@
if (pm->pm_stats.resident_count == 0)
return;
- key.pvo_vaddr = sva;
-
SLIST_INIT(&tofree);
-
PMAP_LOCK(pm);
- for (pvo = RB_NFIND(pvo_tree, &pm->pmap_pvo, &key);
- pvo != NULL && PVO_VADDR(pvo) < eva; pvo = tpvo) {
- tpvo = RB_NEXT(pvo_tree, &pm->pmap_pvo, pvo);
-
- /*
- * For locking reasons, remove this from the page table and
- * pmap, but save delinking from the vm_page for a second
- * pass
- */
- moea64_pvo_remove_from_pmap(pvo);
- SLIST_INSERT_HEAD(&tofree, pvo, pvo_dlink);
- }
+ moea64_remove_locked(pm, sva, eva, &tofree);
PMAP_UNLOCK(pm);
while (!SLIST_EMPTY(&tofree)) {
@@ -2534,8 +2806,14 @@
pmap = pvo->pvo_pmap;
PMAP_LOCK(pmap);
wasdead = (pvo->pvo_vaddr & PVO_DEAD);
- if (!wasdead)
+ if (!wasdead) {
+ if (PVO_IS_SP(pvo)) {
+ CTR1(KTR_PMAP, "%s: demote before remove_all",
+ __func__);
+ moea64_sp_demote(pvo);
+ }
moea64_pvo_remove_from_pmap(pvo);
+ }
moea64_pvo_remove_from_page_locked(pvo, m);
if (!wasdead)
LIST_INSERT_HEAD(&freequeue, pvo, pvo_vlink);
@@ -2768,11 +3046,17 @@
struct pvo_entry *pvo;
int64_t ret;
boolean_t rv;
+ vm_page_t sp;
/*
* See if this bit is stored in the page already.
+ *
+ * For superpages, the bit is stored in the first vm page.
*/
- if (m->md.mdpg_attrs & ptebit)
+ if ((m->md.mdpg_attrs & ptebit) != 0 ||
+ ((sp = PHYS_TO_VM_PAGE(VM_PAGE_TO_PHYS(m) & ~HPT_SP_MASK)) != NULL &&
+ (sp->md.mdpg_attrs & (ptebit | MDPG_ATTR_SP)) ==
+ (ptebit | MDPG_ATTR_SP)))
return (TRUE);
/*
@@ -2783,6 +3067,21 @@
powerpc_sync();
PV_PAGE_LOCK(m);
LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) {
+ if (PVO_IS_SP(pvo)) {
+ ret = moea64_sp_query(pvo, ptebit);
+ /*
+ * If SP was not demoted, check its REF/CHG bits here.
+ */
+ if (ret != -1) {
+ if ((ret & ptebit) != 0) {
+ rv = TRUE;
+ break;
+ }
+ continue;
+ }
+ /* else, fallthrough */
+ }
+
ret = 0;
/*
@@ -2828,6 +3127,12 @@
count = 0;
PV_PAGE_LOCK(m);
LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) {
+ if (PVO_IS_SP(pvo)) {
+ if ((ret = moea64_sp_clear(pvo, m, ptebit)) != -1) {
+ count += ret;
+ continue;
+ }
+ }
ret = 0;
PMAP_LOCK(pvo->pvo_pmap);
@@ -3231,3 +3536,770 @@
DEFINE_OEA64_IFUNC(int64_t, pte_clear, (struct pvo_entry *, uint64_t),
moea64_null_method)
DEFINE_OEA64_IFUNC(int64_t, pte_synch, (struct pvo_entry *), moea64_null_method)
+DEFINE_OEA64_IFUNC(int64_t, pte_insert_sp, (struct pvo_entry *), moea64_null_method)
+DEFINE_OEA64_IFUNC(int64_t, pte_unset_sp, (struct pvo_entry *), moea64_null_method)
+DEFINE_OEA64_IFUNC(int64_t, pte_replace_sp, (struct pvo_entry *), moea64_null_method)
+
+/* Superpage functions */
+
+/* MMU interface */
+
+static bool
+moea64_ps_enabled(pmap_t pmap)
+{
+ return (superpages_enabled);
+}
+
+static void
+moea64_align_superpage(vm_object_t object, vm_ooffset_t offset,
+ vm_offset_t *addr, vm_size_t size)
+{
+ vm_offset_t sp_offset;
+
+ if (size < HPT_SP_SIZE)
+ return;
+
+ CTR4(KTR_PMAP, "%s: offs=%#jx, addr=%p, size=%#jx",
+ __func__, (uintmax_t)offset, addr, (uintmax_t)size);
+
+ if (object != NULL && (object->flags & OBJ_COLORED) != 0)
+ offset += ptoa(object->pg_color);
+ sp_offset = offset & HPT_SP_MASK;
+ if (size - ((HPT_SP_SIZE - sp_offset) & HPT_SP_MASK) < HPT_SP_SIZE ||
+ (*addr & HPT_SP_MASK) == sp_offset)
+ return;
+ if ((*addr & HPT_SP_MASK) < sp_offset)
+ *addr = (*addr & ~HPT_SP_MASK) + sp_offset;
+ else
+ *addr = ((*addr + HPT_SP_MASK) & ~HPT_SP_MASK) + sp_offset;
+}
+
+/* Helpers */
+
+static __inline void
+moea64_pvo_cleanup(struct pvo_dlist *tofree)
+{
+ struct pvo_entry *pvo;
+
+ /* clean up */
+ while (!SLIST_EMPTY(tofree)) {
+ pvo = SLIST_FIRST(tofree);
+ SLIST_REMOVE_HEAD(tofree, pvo_dlink);
+ if (pvo->pvo_vaddr & PVO_DEAD)
+ moea64_pvo_remove_from_page(pvo);
+ free_pvo_entry(pvo);
+ }
+}
+
+static __inline uint16_t
+pvo_to_vmpage_flags(struct pvo_entry *pvo)
+{
+ uint16_t flags;
+
+ flags = 0;
+ if ((pvo->pvo_pte.prot & VM_PROT_WRITE) != 0)
+ flags |= PGA_WRITEABLE;
+ if ((pvo->pvo_pte.prot & VM_PROT_EXECUTE) != 0)
+ flags |= PGA_EXECUTABLE;
+
+ return (flags);
+}
+
+/*
+ * Check if the given pvo and its superpage are in sva-eva range.
+ */
+static __inline bool
+moea64_sp_pvo_in_range(struct pvo_entry *pvo, vm_offset_t sva, vm_offset_t eva)
+{
+ vm_offset_t spva;
+
+ spva = PVO_VADDR(pvo) & ~HPT_SP_MASK;
+ if (spva >= sva && spva + HPT_SP_SIZE <= eva) {
+ /*
+ * Because this function is intended to be called from loops
+ * that iterate over ordered pvo entries, if the condition
+ * above is true then the pvo must be the first of its
+ * superpage.
+ */
+ KASSERT(PVO_VADDR(pvo) == spva,
+ ("%s: unexpected unaligned superpage pvo", __func__));
+ return (true);
+ }
+ return (false);
+}
+
+/*
+ * Update vm about the REF/CHG bits if the superpage is managed and
+ * has (or had) write access.
+ */
+static void
+moea64_sp_refchg_process(struct pvo_entry *sp, vm_page_t m,
+ int64_t sp_refchg, vm_prot_t prot)
+{
+ vm_page_t m_end;
+ int64_t refchg;
+
+ if ((sp->pvo_vaddr & PVO_MANAGED) != 0 && (prot & VM_PROT_WRITE) != 0) {
+ for (m_end = &m[HPT_SP_PAGES]; m < m_end; m++) {
+ refchg = sp_refchg |
+ atomic_readandclear_32(&m->md.mdpg_attrs);
+ if (refchg & LPTE_CHG)
+ vm_page_dirty(m);
+ if (refchg & LPTE_REF)
+ vm_page_aflag_set(m, PGA_REFERENCED);
+ }
+ }
+}
+
+/* Superpage ops */
+
+static int
+moea64_sp_enter(pmap_t pmap, vm_offset_t va, vm_page_t m,
+ vm_prot_t prot, u_int flags, int8_t psind)
+{
+ struct pvo_entry *pvo, **pvos;
+ struct pvo_head *pvo_head;
+ vm_offset_t sva;
+ vm_page_t sm;
+ vm_paddr_t pa, spa;
+ bool sync;
+ struct pvo_dlist tofree;
+ int error, i;
+ uint16_t aflags;
+
+ KASSERT((va & HPT_SP_MASK) == 0, ("%s: va %#jx unaligned",
+ __func__, (uintmax_t)va));
+ KASSERT(psind == 1, ("%s: invalid psind: %d", __func__, psind));
+ KASSERT(m->psind == 1, ("%s: invalid m->psind: %d",
+ __func__, m->psind));
+ KASSERT(pmap != kernel_pmap,
+ ("%s: function called with kernel pmap", __func__));
+
+ CTR5(KTR_PMAP, "%s: va=%#jx, pa=%#jx, prot=%#x, flags=%#x, psind=1",
+ __func__, (uintmax_t)va, (uintmax_t)VM_PAGE_TO_PHYS(m),
+ prot, flags);
+
+ SLIST_INIT(&tofree);
+
+ sva = va;
+ sm = m;
+ spa = pa = VM_PAGE_TO_PHYS(sm);
+
+ /* Try to allocate all PVOs first, to make failure handling easier. */
+ pvos = malloc(HPT_SP_PAGES * sizeof(struct pvo_entry *), M_TEMP,
+ M_NOWAIT);
+ if (pvos == NULL) {
+ CTR1(KTR_PMAP, "%s: failed to alloc pvo array", __func__);
+ return (KERN_RESOURCE_SHORTAGE);
+ }
+
+ for (i = 0; i < HPT_SP_PAGES; i++) {
+ pvos[i] = alloc_pvo_entry(0);
+ if (pvos[i] == NULL) {
+ CTR1(KTR_PMAP, "%s: failed to alloc pvo", __func__);
+ for (i = i - 1; i >= 0; i--)
+ free_pvo_entry(pvos[i]);
+ free(pvos, M_TEMP);
+ return (KERN_RESOURCE_SHORTAGE);
+ }
+ }
+
+ SP_PV_LOCK_ALIGNED(spa);
+ PMAP_LOCK(pmap);
+
+ /* Note: moea64_remove_locked() also clears cached REF/CHG bits. */
+ moea64_remove_locked(pmap, va, va + HPT_SP_SIZE, &tofree);
+
+ /* Enter pages */
+ for (i = 0; i < HPT_SP_PAGES;
+ i++, va += PAGE_SIZE, pa += PAGE_SIZE, m++) {
+ pvo = pvos[i];
+
+ pvo->pvo_pte.prot = prot;
+ pvo->pvo_pte.pa = (pa & ~LPTE_LP_MASK) | LPTE_LP_4K_16M |
+ moea64_calc_wimg(pa, pmap_page_get_memattr(m));
+
+ if ((flags & PMAP_ENTER_WIRED) != 0)
+ pvo->pvo_vaddr |= PVO_WIRED;
+ pvo->pvo_vaddr |= PVO_LARGE;
+
+ if ((m->oflags & VPO_UNMANAGED) != 0)
+ pvo_head = NULL;
+ else {
+ pvo_head = &m->md.mdpg_pvoh;
+ pvo->pvo_vaddr |= PVO_MANAGED;
+ }
+
+ init_pvo_entry(pvo, pmap, va);
+
+ error = moea64_pvo_enter(pvo, pvo_head, NULL);
+ /*
+ * All superpage PVOs were previously removed, so no errors
+ * should occur while inserting the new ones.
+ */
+ KASSERT(error == 0, ("%s: unexpected error "
+ "when inserting superpage PVO: %d",
+ __func__, error));
+ }
+
+ PMAP_UNLOCK(pmap);
+ SP_PV_UNLOCK_ALIGNED(spa);
+
+ sync = (sm->a.flags & PGA_EXECUTABLE) == 0;
+ /* Note: moea64_pvo_cleanup() also clears page prot. flags. */
+ moea64_pvo_cleanup(&tofree);
+ pvo = pvos[0];
+
+ /* Set vm page flags */
+ aflags = pvo_to_vmpage_flags(pvo);
+ if (aflags != 0)
+ for (m = sm; m < &sm[HPT_SP_PAGES]; m++)
+ vm_page_aflag_set(m, aflags);
+
+ /*
+ * Flush the page from the instruction cache if this page is
+ * mapped executable and cacheable.
+ */
+ if (sync && (pvo->pvo_pte.pa & (LPTE_I | LPTE_G | LPTE_NOEXEC)) == 0)
+ moea64_syncicache(pmap, sva, spa, HPT_SP_SIZE);
+
+ atomic_add_long(&sp_mappings, 1);
+ CTR3(KTR_PMAP, "%s: SP success for va %#jx in pmap %p",
+ __func__, (uintmax_t)sva, pmap);
+
+ free(pvos, M_TEMP);
+ return (KERN_SUCCESS);
+}
+
+static void
+moea64_sp_promote(pmap_t pmap, vm_offset_t va, vm_page_t m)
+{
+ struct pvo_entry *first, *pvo;
+ vm_paddr_t pa, pa_end;
+ vm_offset_t sva, va_end;
+ int64_t sp_refchg;
+
+ /* This CTR may generate a lot of output. */
+ /* CTR2(KTR_PMAP, "%s: va=%#jx", __func__, (uintmax_t)va); */
+
+ va &= ~HPT_SP_MASK;
+ sva = va;
+ /* Get superpage */
+ pa = VM_PAGE_TO_PHYS(m) & ~HPT_SP_MASK;
+ m = PHYS_TO_VM_PAGE(pa);
+
+ PMAP_LOCK(pmap);
+
+ /*
+ * Check if all pages meet promotion criteria.
+ *
+ * XXX In some cases the loop below may be executed for each or most
+ * of the entered pages of a superpage, which can be expensive
+ * (although it was not profiled) and need some optimization.
+ *
+ * Some cases where this seems to happen are:
+ * - When a superpage is first entered read-only and later becomes
+ * read-write.
+ * - When some of the superpage's virtual addresses map to previously
+ * wired/cached pages while others map to pages allocated from a
+ * different physical address range. A common scenario where this
+ * happens is when mmap'ing a file that is already present in FS
+ * block cache and doesn't fill a superpage.
+ */
+ first = pvo = moea64_pvo_find_va(pmap, sva);
+ for (pa_end = pa + HPT_SP_SIZE;
+ pa < pa_end; pa += PAGE_SIZE, va += PAGE_SIZE) {
+ if (pvo == NULL || (pvo->pvo_vaddr & PVO_DEAD) != 0) {
+ CTR3(KTR_PMAP,
+ "%s: NULL or dead PVO: pmap=%p, va=%#jx",
+ __func__, pmap, (uintmax_t)va);
+ goto error;
+ }
+ if (PVO_PADDR(pvo) != pa) {
+ CTR5(KTR_PMAP, "%s: PAs don't match: "
+ "pmap=%p, va=%#jx, pvo_pa=%#jx, exp_pa=%#jx",
+ __func__, pmap, (uintmax_t)va,
+ (uintmax_t)PVO_PADDR(pvo), (uintmax_t)pa);
+ atomic_add_long(&sp_p_fail_pa, 1);
+ goto error;
+ }
+ if ((first->pvo_vaddr & PVO_FLAGS_PROMOTE) !=
+ (pvo->pvo_vaddr & PVO_FLAGS_PROMOTE)) {
+ CTR5(KTR_PMAP, "%s: PVO flags don't match: "
+ "pmap=%p, va=%#jx, pvo_flags=%#jx, exp_flags=%#jx",
+ __func__, pmap, (uintmax_t)va,
+ (uintmax_t)(pvo->pvo_vaddr & PVO_FLAGS_PROMOTE),
+ (uintmax_t)(first->pvo_vaddr & PVO_FLAGS_PROMOTE));
+ atomic_add_long(&sp_p_fail_flags, 1);
+ goto error;
+ }
+ if (first->pvo_pte.prot != pvo->pvo_pte.prot) {
+ CTR5(KTR_PMAP, "%s: PVO protections don't match: "
+ "pmap=%p, va=%#jx, pvo_prot=%#x, exp_prot=%#x",
+ __func__, pmap, (uintmax_t)va,
+ pvo->pvo_pte.prot, first->pvo_pte.prot);
+ atomic_add_long(&sp_p_fail_prot, 1);
+ goto error;
+ }
+ if ((first->pvo_pte.pa & LPTE_WIMG) !=
+ (pvo->pvo_pte.pa & LPTE_WIMG)) {
+ CTR5(KTR_PMAP, "%s: WIMG bits don't match: "
+ "pmap=%p, va=%#jx, pvo_wimg=%#jx, exp_wimg=%#jx",
+ __func__, pmap, (uintmax_t)va,
+ (uintmax_t)(pvo->pvo_pte.pa & LPTE_WIMG),
+ (uintmax_t)(first->pvo_pte.pa & LPTE_WIMG));
+ atomic_add_long(&sp_p_fail_wimg, 1);
+ goto error;
+ }
+
+ pvo = RB_NEXT(pvo_tree, &pmap->pmap_pvo, pvo);
+ }
+
+ /* All OK, promote. */
+
+ /*
+ * Handle superpage REF/CHG bits. If REF or CHG is set in
+ * any page, then it must be set in the superpage.
+ *
+ * Instead of querying each page, we take advantage of two facts:
+ * 1- If a page is being promoted, it was referenced.
+ * 2- If promoted pages are writable, they were modified.
+ */
+ sp_refchg = LPTE_REF |
+ ((first->pvo_pte.prot & VM_PROT_WRITE) != 0 ? LPTE_CHG : 0);
+
+ /* Promote pages */
+
+ for (pvo = first, va_end = PVO_VADDR(pvo) + HPT_SP_SIZE;
+ pvo != NULL && PVO_VADDR(pvo) < va_end;
+ pvo = RB_NEXT(pvo_tree, &pmap->pmap_pvo, pvo)) {
+ pvo->pvo_pte.pa &= ~LPTE_LP_MASK;
+ pvo->pvo_pte.pa |= LPTE_LP_4K_16M;
+ pvo->pvo_vaddr |= PVO_LARGE;
+ }
+ moea64_pte_replace_sp(first);
+
+ /* Send REF/CHG bits to VM */
+ moea64_sp_refchg_process(first, m, sp_refchg, first->pvo_pte.prot);
+
+ /* Use first page to cache REF/CHG bits */
+ atomic_set_32(&m->md.mdpg_attrs, sp_refchg | MDPG_ATTR_SP);
+
+ PMAP_UNLOCK(pmap);
+
+ atomic_add_long(&sp_mappings, 1);
+ atomic_add_long(&sp_promotions, 1);
+ CTR3(KTR_PMAP, "%s: success for va %#jx in pmap %p",
+ __func__, (uintmax_t)sva, pmap);
+ return;
+
+error:
+ atomic_add_long(&sp_p_failures, 1);
+ PMAP_UNLOCK(pmap);
+}
+
+static void
+moea64_sp_demote_aligned(struct pvo_entry *sp)
+{
+ struct pvo_entry *pvo;
+ vm_offset_t va, va_end;
+ vm_paddr_t pa;
+ vm_page_t m;
+ pmap_t pmap;
+ int64_t refchg;
+
+ CTR2(KTR_PMAP, "%s: va=%#jx", __func__, (uintmax_t)PVO_VADDR(sp));
+
+ pmap = sp->pvo_pmap;
+ PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+
+ pvo = sp;
+
+ /* Demote pages */
+
+ va = PVO_VADDR(pvo);
+ pa = PVO_PADDR(pvo);
+ m = PHYS_TO_VM_PAGE(pa);
+
+ for (pvo = sp, va_end = va + HPT_SP_SIZE;
+ pvo != NULL && PVO_VADDR(pvo) < va_end;
+ pvo = RB_NEXT(pvo_tree, &pmap->pmap_pvo, pvo),
+ va += PAGE_SIZE, pa += PAGE_SIZE) {
+ KASSERT(pvo && PVO_VADDR(pvo) == va,
+ ("%s: missing PVO for va %#jx", __func__, (uintmax_t)va));
+
+ pvo->pvo_vaddr &= ~PVO_LARGE;
+ pvo->pvo_pte.pa &= ~LPTE_RPGN;
+ pvo->pvo_pte.pa |= pa;
+
+ }
+ refchg = moea64_pte_replace_sp(sp);
+
+ /*
+ * Clear SP flag
+ *
+ * XXX It is possible that another pmap has this page mapped as
+ * part of a superpage, but as the SP flag is used only for
+ * caching SP REF/CHG bits, that will be queried if not set
+ * in cache, it should be ok to clear it here.
+ */
+ atomic_clear_32(&m->md.mdpg_attrs, MDPG_ATTR_SP);
+
+ /*
+ * Handle superpage REF/CHG bits. A bit set in the superpage
+ * means all pages should consider it set.
+ */
+ moea64_sp_refchg_process(sp, m, refchg, sp->pvo_pte.prot);
+
+ atomic_add_long(&sp_demotions, 1);
+ CTR3(KTR_PMAP, "%s: success for va %#jx in pmap %p",
+ __func__, (uintmax_t)PVO_VADDR(sp), pmap);
+}
+
+static void
+moea64_sp_demote(struct pvo_entry *pvo)
+{
+ PMAP_LOCK_ASSERT(pvo->pvo_pmap, MA_OWNED);
+
+ if ((PVO_VADDR(pvo) & HPT_SP_MASK) != 0) {
+ pvo = moea64_pvo_find_va(pvo->pvo_pmap,
+ PVO_VADDR(pvo) & ~HPT_SP_MASK);
+ KASSERT(pvo != NULL, ("%s: missing PVO for va %#jx",
+ __func__, (uintmax_t)(PVO_VADDR(pvo) & ~HPT_SP_MASK)));
+ }
+ moea64_sp_demote_aligned(pvo);
+}
+
+static struct pvo_entry *
+moea64_sp_unwire(struct pvo_entry *sp)
+{
+ struct pvo_entry *pvo, *prev;
+ vm_offset_t eva;
+ pmap_t pm;
+ int64_t ret, refchg;
+
+ CTR2(KTR_PMAP, "%s: va=%#jx", __func__, (uintmax_t)PVO_VADDR(sp));
+
+ pm = sp->pvo_pmap;
+ PMAP_LOCK_ASSERT(pm, MA_OWNED);
+
+ eva = PVO_VADDR(sp) + HPT_SP_SIZE;
+ refchg = 0;
+ for (pvo = sp; pvo != NULL && PVO_VADDR(pvo) < eva;
+ prev = pvo, pvo = RB_NEXT(pvo_tree, &pm->pmap_pvo, pvo)) {
+ if ((pvo->pvo_vaddr & PVO_WIRED) == 0)
+ panic("%s: pvo %p is missing PVO_WIRED",
+ __func__, pvo);
+ pvo->pvo_vaddr &= ~PVO_WIRED;
+
+ ret = moea64_pte_replace(pvo, 0 /* No invalidation */);
+ if (ret < 0)
+ refchg |= LPTE_CHG;
+ else
+ refchg |= ret;
+
+ pm->pm_stats.wired_count--;
+ }
+
+ /* Send REF/CHG bits to VM */
+ moea64_sp_refchg_process(sp, PHYS_TO_VM_PAGE(PVO_PADDR(sp)),
+ refchg, sp->pvo_pte.prot);
+
+ return (prev);
+}
+
+static struct pvo_entry *
+moea64_sp_protect(struct pvo_entry *sp, vm_prot_t prot)
+{
+ struct pvo_entry *pvo, *prev;
+ vm_offset_t eva;
+ pmap_t pm;
+ vm_page_t m, m_end;
+ int64_t ret, refchg;
+ vm_prot_t oldprot;
+
+ CTR3(KTR_PMAP, "%s: va=%#jx, prot=%x",
+ __func__, (uintmax_t)PVO_VADDR(sp), prot);
+
+ pm = sp->pvo_pmap;
+ PMAP_LOCK_ASSERT(pm, MA_OWNED);
+
+ oldprot = sp->pvo_pte.prot;
+ m = PHYS_TO_VM_PAGE(PVO_PADDR(sp));
+ KASSERT(m != NULL, ("%s: missing vm page for pa %#jx",
+ __func__, (uintmax_t)PVO_PADDR(sp)));
+ eva = PVO_VADDR(sp) + HPT_SP_SIZE;
+ refchg = 0;
+
+ for (pvo = sp; pvo != NULL && PVO_VADDR(pvo) < eva;
+ prev = pvo, pvo = RB_NEXT(pvo_tree, &pm->pmap_pvo, pvo)) {
+ pvo->pvo_pte.prot = prot;
+ /*
+ * If the PVO is in the page table, update mapping
+ */
+ ret = moea64_pte_replace(pvo, MOEA64_PTE_PROT_UPDATE);
+ if (ret < 0)
+ refchg |= LPTE_CHG;
+ else
+ refchg |= ret;
+ }
+
+ /* Send REF/CHG bits to VM */
+ moea64_sp_refchg_process(sp, m, refchg, oldprot);
+
+ /* Handle pages that became executable */
+ if ((m->a.flags & PGA_EXECUTABLE) == 0 &&
+ (sp->pvo_pte.pa & (LPTE_I | LPTE_G | LPTE_NOEXEC)) == 0) {
+ if ((m->oflags & VPO_UNMANAGED) == 0)
+ for (m_end = &m[HPT_SP_PAGES]; m < m_end; m++)
+ vm_page_aflag_set(m, PGA_EXECUTABLE);
+ moea64_syncicache(pm, PVO_VADDR(sp), PVO_PADDR(sp),
+ HPT_SP_SIZE);
+ }
+
+ return (prev);
+}
+
+static struct pvo_entry *
+moea64_sp_remove(struct pvo_entry *sp, struct pvo_dlist *tofree)
+{
+ struct pvo_entry *pvo, *tpvo;
+ vm_offset_t eva;
+ pmap_t pm;
+
+ CTR2(KTR_PMAP, "%s: va=%#jx", __func__, (uintmax_t)PVO_VADDR(sp));
+
+ pm = sp->pvo_pmap;
+ PMAP_LOCK_ASSERT(pm, MA_OWNED);
+
+ eva = PVO_VADDR(sp) + HPT_SP_SIZE;
+ for (pvo = sp; pvo != NULL && PVO_VADDR(pvo) < eva; pvo = tpvo) {
+ tpvo = RB_NEXT(pvo_tree, &pm->pmap_pvo, pvo);
+
+ /*
+ * For locking reasons, remove this from the page table and
+ * pmap, but save delinking from the vm_page for a second
+ * pass
+ */
+ moea64_pvo_remove_from_pmap(pvo);
+ SLIST_INSERT_HEAD(tofree, pvo, pvo_dlink);
+ }
+
+ /*
+ * Clear SP bit
+ *
+ * XXX See comment in moea64_sp_demote_aligned() for why it's
+ * ok to always clear the SP bit on remove/demote.
+ */
+ atomic_clear_32(&PHYS_TO_VM_PAGE(PVO_PADDR(sp))->md.mdpg_attrs,
+ MDPG_ATTR_SP);
+
+ return (tpvo);
+}
+
+static int64_t
+moea64_sp_query_locked(struct pvo_entry *pvo, uint64_t ptebit)
+{
+ int64_t refchg, ret;
+ vm_offset_t eva;
+ vm_page_t m;
+ pmap_t pmap;
+ struct pvo_entry *sp;
+
+ pmap = pvo->pvo_pmap;
+ PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+
+ /* Get first SP PVO */
+ if ((PVO_VADDR(pvo) & HPT_SP_MASK) != 0) {
+ sp = moea64_pvo_find_va(pmap, PVO_VADDR(pvo) & ~HPT_SP_MASK);
+ KASSERT(sp != NULL, ("%s: missing PVO for va %#jx",
+ __func__, (uintmax_t)(PVO_VADDR(pvo) & ~HPT_SP_MASK)));
+ } else
+ sp = pvo;
+ eva = PVO_VADDR(sp) + HPT_SP_SIZE;
+
+ refchg = 0;
+ for (pvo = sp; pvo != NULL && PVO_VADDR(pvo) < eva;
+ pvo = RB_NEXT(pvo_tree, &pmap->pmap_pvo, pvo)) {
+ ret = moea64_pte_synch(pvo);
+ if (ret > 0) {
+ refchg |= ret & (LPTE_CHG | LPTE_REF);
+ if ((refchg & ptebit) != 0)
+ break;
+ }
+ }
+
+ /* Save results */
+ if (refchg != 0) {
+ m = PHYS_TO_VM_PAGE(PVO_PADDR(sp));
+ atomic_set_32(&m->md.mdpg_attrs, refchg | MDPG_ATTR_SP);
+ }
+
+ return (refchg);
+}
+
+static int64_t
+moea64_sp_query(struct pvo_entry *pvo, uint64_t ptebit)
+{
+ int64_t refchg;
+ pmap_t pmap;
+
+ pmap = pvo->pvo_pmap;
+ PMAP_LOCK(pmap);
+
+ /*
+ * Check if SP was demoted/removed before pmap lock was acquired.
+ */
+ if (!PVO_IS_SP(pvo) || (pvo->pvo_vaddr & PVO_DEAD) != 0) {
+ CTR2(KTR_PMAP, "%s: demoted/removed: pa=%#jx",
+ __func__, (uintmax_t)PVO_PADDR(pvo));
+ PMAP_UNLOCK(pmap);
+ return (-1);
+ }
+
+ refchg = moea64_sp_query_locked(pvo, ptebit);
+ PMAP_UNLOCK(pmap);
+
+ CTR4(KTR_PMAP, "%s: va=%#jx, pa=%#jx: refchg=%#jx",
+ __func__, (uintmax_t)PVO_VADDR(pvo),
+ (uintmax_t)PVO_PADDR(pvo), (uintmax_t)refchg);
+
+ return (refchg);
+}
+
+static int64_t
+moea64_sp_pvo_clear(struct pvo_entry *pvo, uint64_t ptebit)
+{
+ int64_t refchg, ret;
+ pmap_t pmap;
+ struct pvo_entry *sp;
+ vm_offset_t eva;
+ vm_page_t m;
+
+ pmap = pvo->pvo_pmap;
+ PMAP_LOCK(pmap);
+
+ /*
+ * Check if SP was demoted/removed before pmap lock was acquired.
+ */
+ if (!PVO_IS_SP(pvo) || (pvo->pvo_vaddr & PVO_DEAD) != 0) {
+ CTR2(KTR_PMAP, "%s: demoted/removed: pa=%#jx",
+ __func__, (uintmax_t)PVO_PADDR(pvo));
+ PMAP_UNLOCK(pmap);
+ return (-1);
+ }
+
+ /* Get first SP PVO */
+ if ((PVO_VADDR(pvo) & HPT_SP_MASK) != 0) {
+ sp = moea64_pvo_find_va(pmap, PVO_VADDR(pvo) & ~HPT_SP_MASK);
+ KASSERT(sp != NULL, ("%s: missing PVO for va %#jx",
+ __func__, (uintmax_t)(PVO_VADDR(pvo) & ~HPT_SP_MASK)));
+ } else
+ sp = pvo;
+ eva = PVO_VADDR(sp) + HPT_SP_SIZE;
+
+ refchg = 0;
+ for (pvo = sp; pvo != NULL && PVO_VADDR(pvo) < eva;
+ pvo = RB_NEXT(pvo_tree, &pmap->pmap_pvo, pvo)) {
+ ret = moea64_pte_clear(pvo, ptebit);
+ if (ret > 0)
+ refchg |= ret & (LPTE_CHG | LPTE_REF);
+ }
+
+ m = PHYS_TO_VM_PAGE(PVO_PADDR(sp));
+ atomic_clear_32(&m->md.mdpg_attrs, ptebit);
+ PMAP_UNLOCK(pmap);
+
+ CTR4(KTR_PMAP, "%s: va=%#jx, pa=%#jx: refchg=%#jx",
+ __func__, (uintmax_t)PVO_VADDR(sp),
+ (uintmax_t)PVO_PADDR(sp), (uintmax_t)refchg);
+
+ return (refchg);
+}
+
+static int64_t
+moea64_sp_clear(struct pvo_entry *pvo, vm_page_t m, uint64_t ptebit)
+{
+ int64_t count, ret;
+ pmap_t pmap;
+
+ count = 0;
+ pmap = pvo->pvo_pmap;
+
+ /*
+ * Since this reference bit is shared by 4096 4KB pages, it
+ * should not be cleared every time it is tested. Apply a
+ * simple "hash" function on the physical page number, the
+ * virtual superpage number, and the pmap address to select
+ * one 4KB page out of the 4096 on which testing the
+ * reference bit will result in clearing that reference bit.
+ * This function is designed to avoid the selection of the
+ * same 4KB page for every 16MB page mapping.
+ *
+ * Always leave the reference bit of a wired mapping set, as
+ * the current state of its reference bit won't affect page
+ * replacement.
+ */
+ if (ptebit == LPTE_REF && (((VM_PAGE_TO_PHYS(m) >> PAGE_SHIFT) ^
+ (PVO_VADDR(pvo) >> HPT_SP_SHIFT) ^ (uintptr_t)pmap) &
+ (HPT_SP_PAGES - 1)) == 0 && (pvo->pvo_vaddr & PVO_WIRED) == 0) {
+ if ((ret = moea64_sp_pvo_clear(pvo, ptebit)) == -1)
+ return (-1);
+
+ if ((ret & ptebit) != 0)
+ count++;
+
+ /*
+ * If this page was not selected by the hash function, then assume
+ * its REF bit was set.
+ */
+ } else if (ptebit == LPTE_REF) {
+ count++;
+
+ /*
+ * To clear the CHG bit of a single SP page, first it must be demoted.
+ * But if no CHG bit is set, no bit clear and thus no SP demotion is
+ * needed.
+ */
+ } else {
+ CTR4(KTR_PMAP, "%s: ptebit=%#jx, va=%#jx, pa=%#jx",
+ __func__, (uintmax_t)ptebit, (uintmax_t)PVO_VADDR(pvo),
+ (uintmax_t)PVO_PADDR(pvo));
+
+ PMAP_LOCK(pmap);
+
+ /*
+ * Make sure SP wasn't demoted/removed before pmap lock
+ * was acquired.
+ */
+ if (!PVO_IS_SP(pvo) || (pvo->pvo_vaddr & PVO_DEAD) != 0) {
+ CTR2(KTR_PMAP, "%s: demoted/removed: pa=%#jx",
+ __func__, (uintmax_t)PVO_PADDR(pvo));
+ PMAP_UNLOCK(pmap);
+ return (-1);
+ }
+
+ ret = moea64_sp_query_locked(pvo, ptebit);
+ if ((ret & ptebit) != 0)
+ count++;
+ else {
+ PMAP_UNLOCK(pmap);
+ return (0);
+ }
+
+ moea64_sp_demote(pvo);
+ moea64_pte_clear(pvo, ptebit);
+
+ /*
+ * Write protect the mapping to a single page so that a
+ * subsequent write access may repromote.
+ */
+ if ((pvo->pvo_vaddr & PVO_WIRED) == 0)
+ moea64_pvo_protect(pmap, pvo,
+ pvo->pvo_pte.prot & ~VM_PROT_WRITE);
+
+ PMAP_UNLOCK(pmap);
+ }
+
+ return (count);
+}
Index: head/sys/powerpc/aim/moea64_native.c
===================================================================
--- head/sys/powerpc/aim/moea64_native.c
+++ head/sys/powerpc/aim/moea64_native.c
@@ -132,11 +132,32 @@
/* POWER9 only permits a 64k partition table size. */
#define PART_SIZE 0x10000
+/* Actual page sizes (to be used with tlbie, when L=0) */
+#define AP_4K 0x00
+#define AP_16M 0x80
+
+#define LPTE_KERNEL_VSID_BIT (KERNEL_VSID_BIT << \
+ (16 - (ADDR_API_SHFT64 - ADDR_PIDX_SHFT)))
+
+/* Abbreviated Virtual Address Page - high bits */
+#define LPTE_AVA_PGNHI_MASK 0x0000000000000F80ULL
+#define LPTE_AVA_PGNHI_SHIFT 7
+
+/* Effective Address Page - low bits */
+#define EA_PAGELO_MASK 0x7ffULL
+#define EA_PAGELO_SHIFT 11
+
static bool moea64_crop_tlbie;
static bool moea64_need_lock;
+/*
+ * The tlbie instruction has two forms: an old one used by PowerISA
+ * 2.03 and prior, and a newer one used by PowerISA 2.06 and later.
+ * We need to support both.
+ */
static __inline void
-TLBIE(uint64_t vpn) {
+TLBIE(uint64_t vpn, uint64_t oldptehi)
+{
#ifndef __powerpc64__
register_t vpn_hi, vpn_lo;
register_t msr;
@@ -153,12 +174,32 @@
while (!atomic_cmpset_int(&tlbie_lock, 0, 1));
isync(); /* Flush instruction queue once lock acquired */
- if (moea64_crop_tlbie)
+ if (moea64_crop_tlbie) {
vpn &= ~(0xffffULL << 48);
+#ifdef __powerpc64__
+ if ((oldptehi & LPTE_BIG) != 0)
+ __asm __volatile("tlbie %0, 1" :: "r"(vpn) :
+ "memory");
+ else
+ __asm __volatile("tlbie %0, 0" :: "r"(vpn) :
+ "memory");
+ __asm __volatile("eieio; tlbsync; ptesync" :::
+ "memory");
+ goto done;
+#endif
+ }
}
#ifdef __powerpc64__
/*
+ * If this page has LPTE_BIG set and is from userspace, then
+ * it must be a superpage with 4KB base/16MB actual page size.
+ */
+ if ((oldptehi & LPTE_BIG) != 0 &&
+ (oldptehi & LPTE_KERNEL_VSID_BIT) == 0)
+ vpn |= AP_16M;
+
+ /*
* Explicitly clobber r0. The tlbie instruction has two forms: an old
* one used by PowerISA 2.03 and prior, and a newer one used by PowerISA
* 2.06 (maybe 2.05?) and later. We need to support both, and it just
@@ -168,7 +209,7 @@
* in the newer form is in the same position as the L(page size) bit of
* the old form, so a slong as RS is 0, we're good on both sides.
*/
- __asm __volatile("li 0, 0 \n tlbie %0" :: "r"(vpn) : "r0", "memory");
+ __asm __volatile("li 0, 0 \n tlbie %0, 0" :: "r"(vpn) : "r0", "memory");
__asm __volatile("eieio; tlbsync; ptesync" ::: "memory");
#else
vpn_hi = (uint32_t)(vpn >> 32);
@@ -194,6 +235,7 @@
intr_restore(intr);
#endif
+done:
/* No barriers or special ops -- taken care of by ptesync above */
if (need_lock)
tlbie_lock = 0;
@@ -224,6 +266,9 @@
static int64_t moea64_pte_clear_native(struct pvo_entry *, uint64_t);
static int64_t moea64_pte_replace_native(struct pvo_entry *, int);
static int64_t moea64_pte_unset_native(struct pvo_entry *);
+static int64_t moea64_pte_insert_sp_native(struct pvo_entry *);
+static int64_t moea64_pte_unset_sp_native(struct pvo_entry *);
+static int64_t moea64_pte_replace_sp_native(struct pvo_entry *);
/*
* Utility routines.
@@ -245,10 +290,13 @@
static struct moea64_funcs moea64_native_funcs = {
.pte_synch = moea64_pte_synch_native,
- .pte_clear = moea64_pte_clear_native,
- .pte_unset = moea64_pte_unset_native,
- .pte_replace = moea64_pte_replace_native,
- .pte_insert = moea64_pte_insert_native,
+ .pte_clear = moea64_pte_clear_native,
+ .pte_unset = moea64_pte_unset_native,
+ .pte_replace = moea64_pte_replace_native,
+ .pte_insert = moea64_pte_insert_native,
+ .pte_insert_sp = moea64_pte_insert_sp_native,
+ .pte_unset_sp = moea64_pte_unset_sp_native,
+ .pte_replace_sp = moea64_pte_replace_sp_native,
};
MMU_DEF_INHERIT(oea64_mmu_native, MMU_TYPE_G5, moea64_native_methods, oea64_mmu);
@@ -321,7 +369,7 @@
rw_runlock(&moea64_eviction_lock);
critical_enter();
- TLBIE(pvo->pvo_vpn);
+ TLBIE(pvo->pvo_vpn, properpt.pte_hi);
critical_exit();
} else {
rw_runlock(&moea64_eviction_lock);
@@ -332,22 +380,11 @@
return (ptelo & (LPTE_REF | LPTE_CHG));
}
-static int64_t
-moea64_pte_unset_native(struct pvo_entry *pvo)
+static __always_inline int64_t
+moea64_pte_unset_locked(volatile struct lpte *pt, uint64_t vpn)
{
- volatile struct lpte *pt = moea64_pteg_table + pvo->pvo_pte.slot;
- uint64_t ptelo, pvo_ptevpn;
+ uint64_t ptelo;
- pvo_ptevpn = moea64_pte_vpn_from_pvo_vpn(pvo);
-
- rw_rlock(&moea64_eviction_lock);
- if ((be64toh(pt->pte_hi) & LPTE_AVPN_MASK) != pvo_ptevpn) {
- /* Evicted */
- STAT_MOEA64(moea64_pte_overflow--);
- rw_runlock(&moea64_eviction_lock);
- return (-1);
- }
-
/*
* Invalidate the pte, briefly locking it to collect RC bits. No
* atomics needed since this is protected against eviction by the lock.
@@ -356,11 +393,10 @@
critical_enter();
pt->pte_hi = htobe64((be64toh(pt->pte_hi) & ~LPTE_VALID) | LPTE_LOCKED);
PTESYNC();
- TLBIE(pvo->pvo_vpn);
+ TLBIE(vpn, pt->pte_hi);
ptelo = be64toh(pt->pte_lo);
*((volatile int32_t *)(&pt->pte_hi) + 1) = 0; /* Release lock */
critical_exit();
- rw_runlock(&moea64_eviction_lock);
/* Keep statistics */
STAT_MOEA64(moea64_pte_valid--);
@@ -369,6 +405,29 @@
}
static int64_t
+moea64_pte_unset_native(struct pvo_entry *pvo)
+{
+ volatile struct lpte *pt = moea64_pteg_table + pvo->pvo_pte.slot;
+ int64_t ret;
+ uint64_t pvo_ptevpn;
+
+ pvo_ptevpn = moea64_pte_vpn_from_pvo_vpn(pvo);
+
+ rw_rlock(&moea64_eviction_lock);
+
+ if ((be64toh(pt->pte_hi & LPTE_AVPN_MASK)) != pvo_ptevpn) {
+ /* Evicted */
+ STAT_MOEA64(moea64_pte_overflow--);
+ ret = -1;
+ } else
+ ret = moea64_pte_unset_locked(pt, pvo->pvo_vpn);
+
+ rw_runlock(&moea64_eviction_lock);
+
+ return (ret);
+}
+
+static int64_t
moea64_pte_replace_inval_native(struct pvo_entry *pvo,
volatile struct lpte *pt)
{
@@ -394,7 +453,7 @@
critical_enter();
pt->pte_hi = htobe64((be64toh(pt->pte_hi) & ~LPTE_VALID) | LPTE_LOCKED);
PTESYNC();
- TLBIE(pvo->pvo_vpn);
+ TLBIE(pvo->pvo_vpn, pt->pte_hi);
ptelo = be64toh(pt->pte_lo);
EIEIO();
pt->pte_lo = htobe64(properpt.pte_lo);
@@ -734,7 +793,7 @@
va |= (oldptehi & LPTE_AVPN_MASK) <<
(ADDR_API_SHFT64 - ADDR_PIDX_SHFT);
PTESYNC();
- TLBIE(va);
+ TLBIE(va, oldptehi);
STAT_MOEA64(moea64_pte_valid--);
STAT_MOEA64(moea64_pte_overflow++);
}
@@ -754,26 +813,18 @@
return (k);
}
-static int64_t
-moea64_pte_insert_native(struct pvo_entry *pvo)
+static __always_inline int64_t
+moea64_pte_insert_locked(struct pvo_entry *pvo, struct lpte *insertpt,
+ uint64_t mask)
{
- struct lpte insertpt;
uintptr_t slot;
- /* Initialize PTE */
- moea64_pte_from_pvo(pvo, &insertpt);
-
- /* Make sure further insertion is locked out during evictions */
- rw_rlock(&moea64_eviction_lock);
-
/*
* First try primary hash.
*/
- pvo->pvo_pte.slot &= ~7ULL; /* Base slot address */
- slot = moea64_insert_to_pteg_native(&insertpt, pvo->pvo_pte.slot,
- LPTE_VALID | LPTE_WIRED | LPTE_LOCKED);
+ slot = moea64_insert_to_pteg_native(insertpt, pvo->pvo_pte.slot,
+ mask | LPTE_WIRED | LPTE_LOCKED);
if (slot != -1) {
- rw_runlock(&moea64_eviction_lock);
pvo->pvo_pte.slot = slot;
return (0);
}
@@ -782,50 +833,52 @@
* Now try secondary hash.
*/
pvo->pvo_vaddr ^= PVO_HID;
- insertpt.pte_hi ^= LPTE_HID;
+ insertpt->pte_hi ^= LPTE_HID;
pvo->pvo_pte.slot ^= (moea64_pteg_mask << 3);
- slot = moea64_insert_to_pteg_native(&insertpt, pvo->pvo_pte.slot,
- LPTE_VALID | LPTE_WIRED | LPTE_LOCKED);
+ slot = moea64_insert_to_pteg_native(insertpt, pvo->pvo_pte.slot,
+ mask | LPTE_WIRED | LPTE_LOCKED);
if (slot != -1) {
- rw_runlock(&moea64_eviction_lock);
pvo->pvo_pte.slot = slot;
return (0);
}
- /*
- * Out of luck. Find a PTE to sacrifice.
- */
+ return (-1);
+}
- /* Lock out all insertions for a bit */
- if (!rw_try_upgrade(&moea64_eviction_lock)) {
- rw_runlock(&moea64_eviction_lock);
- rw_wlock(&moea64_eviction_lock);
- }
+static int64_t
+moea64_pte_insert_native(struct pvo_entry *pvo)
+{
+ struct lpte insertpt;
+ int64_t ret;
- slot = moea64_insert_to_pteg_native(&insertpt, pvo->pvo_pte.slot,
- LPTE_WIRED | LPTE_LOCKED);
- if (slot != -1) {
- rw_wunlock(&moea64_eviction_lock);
- pvo->pvo_pte.slot = slot;
- return (0);
- }
+ /* Initialize PTE */
+ moea64_pte_from_pvo(pvo, &insertpt);
- /* Try other hash table. Now we're getting desperate... */
- pvo->pvo_vaddr ^= PVO_HID;
- insertpt.pte_hi ^= LPTE_HID;
- pvo->pvo_pte.slot ^= (moea64_pteg_mask << 3);
- slot = moea64_insert_to_pteg_native(&insertpt, pvo->pvo_pte.slot,
- LPTE_WIRED | LPTE_LOCKED);
- if (slot != -1) {
+ /* Make sure further insertion is locked out during evictions */
+ rw_rlock(&moea64_eviction_lock);
+
+ pvo->pvo_pte.slot &= ~7ULL; /* Base slot address */
+ ret = moea64_pte_insert_locked(pvo, &insertpt, LPTE_VALID);
+ if (ret == -1) {
+ /*
+ * Out of luck. Find a PTE to sacrifice.
+ */
+
+ /* Lock out all insertions for a bit */
+ if (!rw_try_upgrade(&moea64_eviction_lock)) {
+ rw_runlock(&moea64_eviction_lock);
+ rw_wlock(&moea64_eviction_lock);
+ }
+ /* Don't evict large pages */
+ ret = moea64_pte_insert_locked(pvo, &insertpt, LPTE_BIG);
rw_wunlock(&moea64_eviction_lock);
- pvo->pvo_pte.slot = slot;
- return (0);
- }
+ /* No freeable slots in either PTEG? We're hosed. */
+ if (ret == -1)
+ panic("moea64_pte_insert: overflow");
+ } else
+ rw_runlock(&moea64_eviction_lock);
- /* No freeable slots in either PTEG? We're hosed. */
- rw_wunlock(&moea64_eviction_lock);
- panic("moea64_pte_insert: overflow");
- return (-1);
+ return (0);
}
static void *
@@ -845,4 +898,135 @@
dctx->ptex = ptex_end;
return (__DEVOLATILE(struct lpte *, moea64_pteg_table) + ptex);
+}
+
+static __always_inline uint64_t
+moea64_vpn_from_pte(uint64_t ptehi, uintptr_t slot)
+{
+ uint64_t pgn, pgnlo, vsid;
+
+ vsid = (ptehi & LPTE_AVA_MASK) >> LPTE_VSID_SHIFT;
+ if ((ptehi & LPTE_HID) != 0)
+ slot ^= (moea64_pteg_mask << 3);
+ pgnlo = ((vsid & VSID_HASH_MASK) ^ (slot >> 3)) & EA_PAGELO_MASK;
+ pgn = ((ptehi & LPTE_AVA_PGNHI_MASK) << (EA_PAGELO_SHIFT -
+ LPTE_AVA_PGNHI_SHIFT)) | pgnlo;
+ return ((vsid << 16) | pgn);
+}
+
+static __always_inline int64_t
+moea64_pte_unset_sp_locked(struct pvo_entry *pvo)
+{
+ volatile struct lpte *pt;
+ uint64_t ptehi, refchg, vpn;
+ vm_offset_t eva;
+ pmap_t pm;
+
+ pm = pvo->pvo_pmap;
+ refchg = 0;
+ eva = PVO_VADDR(pvo) + HPT_SP_SIZE;
+
+ for (; pvo != NULL && PVO_VADDR(pvo) < eva;
+ pvo = RB_NEXT(pvo_tree, &pm->pmap_pvo, pvo)) {
+ pt = moea64_pteg_table + pvo->pvo_pte.slot;
+ ptehi = be64toh(pt->pte_hi);
+ if ((ptehi & LPTE_AVPN_MASK) !=
+ moea64_pte_vpn_from_pvo_vpn(pvo)) {
+ /* Evicted: invalidate new entry */
+ STAT_MOEA64(moea64_pte_overflow--);
+ vpn = moea64_vpn_from_pte(ptehi, pvo->pvo_pte.slot);
+ CTR1(KTR_PMAP, "Evicted page in pte_unset_sp: vpn=%jx",
+ (uintmax_t)vpn);
+ /* Assume evicted page was modified */
+ refchg |= LPTE_CHG;
+ } else
+ vpn = pvo->pvo_vpn;
+
+ refchg |= moea64_pte_unset_locked(pt, vpn);
+ }
+
+ return (refchg);
+}
+
+static int64_t
+moea64_pte_unset_sp_native(struct pvo_entry *pvo)
+{
+ uint64_t refchg;
+
+ PMAP_LOCK_ASSERT(pvo->pvo_pmap, MA_OWNED);
+ KASSERT((PVO_VADDR(pvo) & HPT_SP_MASK) == 0,
+ ("%s: va %#jx unaligned", __func__, (uintmax_t)PVO_VADDR(pvo)));
+
+ rw_rlock(&moea64_eviction_lock);
+ refchg = moea64_pte_unset_sp_locked(pvo);
+ rw_runlock(&moea64_eviction_lock);
+
+ return (refchg);
+}
+
+static __always_inline int64_t
+moea64_pte_insert_sp_locked(struct pvo_entry *pvo)
+{
+ struct lpte insertpt;
+ int64_t ret;
+ vm_offset_t eva;
+ pmap_t pm;
+
+ pm = pvo->pvo_pmap;
+ eva = PVO_VADDR(pvo) + HPT_SP_SIZE;
+
+ for (; pvo != NULL && PVO_VADDR(pvo) < eva;
+ pvo = RB_NEXT(pvo_tree, &pm->pmap_pvo, pvo)) {
+ moea64_pte_from_pvo(pvo, &insertpt);
+ pvo->pvo_pte.slot &= ~7ULL; /* Base slot address */
+
+ ret = moea64_pte_insert_locked(pvo, &insertpt, LPTE_VALID);
+ if (ret == -1) {
+ /* Lock out all insertions for a bit */
+ if (!rw_try_upgrade(&moea64_eviction_lock)) {
+ rw_runlock(&moea64_eviction_lock);
+ rw_wlock(&moea64_eviction_lock);
+ }
+ /* Don't evict large pages */
+ ret = moea64_pte_insert_locked(pvo, &insertpt,
+ LPTE_BIG);
+ rw_downgrade(&moea64_eviction_lock);
+ /* No freeable slots in either PTEG? We're hosed. */
+ if (ret == -1)
+ panic("moea64_pte_insert_sp: overflow");
+ }
+ }
+
+ return (0);
+}
+
+static int64_t
+moea64_pte_insert_sp_native(struct pvo_entry *pvo)
+{
+ PMAP_LOCK_ASSERT(pvo->pvo_pmap, MA_OWNED);
+ KASSERT((PVO_VADDR(pvo) & HPT_SP_MASK) == 0,
+ ("%s: va %#jx unaligned", __func__, (uintmax_t)PVO_VADDR(pvo)));
+
+ rw_rlock(&moea64_eviction_lock);
+ moea64_pte_insert_sp_locked(pvo);
+ rw_runlock(&moea64_eviction_lock);
+
+ return (0);
+}
+
+static int64_t
+moea64_pte_replace_sp_native(struct pvo_entry *pvo)
+{
+ uint64_t refchg;
+
+ PMAP_LOCK_ASSERT(pvo->pvo_pmap, MA_OWNED);
+ KASSERT((PVO_VADDR(pvo) & HPT_SP_MASK) == 0,
+ ("%s: va %#jx unaligned", __func__, (uintmax_t)PVO_VADDR(pvo)));
+
+ rw_rlock(&moea64_eviction_lock);
+ refchg = moea64_pte_unset_sp_locked(pvo);
+ moea64_pte_insert_sp_locked(pvo);
+ rw_runlock(&moea64_eviction_lock);
+
+ return (refchg);
}
Index: head/sys/powerpc/include/pmap.h
===================================================================
--- head/sys/powerpc/include/pmap.h
+++ head/sys/powerpc/include/pmap.h
@@ -148,8 +148,8 @@
#define PVO_MANAGED 0x020UL /* PVO entry is managed */
#define PVO_BOOTSTRAP 0x080UL /* PVO entry allocated during
bootstrap */
-#define PVO_DEAD 0x100UL /* waiting to be deleted */
-#define PVO_LARGE 0x200UL /* large page */
+#define PVO_DEAD 0x100UL /* waiting to be deleted */
+#define PVO_LARGE 0x200UL /* large page */
#define PVO_VADDR(pvo) ((pvo)->pvo_vaddr & ~ADDR_POFF)
#define PVO_PTEGIDX_GET(pvo) ((pvo)->pvo_vaddr & PVO_PTEGIDX_MASK)
#define PVO_PTEGIDX_ISSET(pvo) ((pvo)->pvo_vaddr & PVO_PTEGIDX_VALID)
Index: head/sys/powerpc/include/pte.h
===================================================================
--- head/sys/powerpc/include/pte.h
+++ head/sys/powerpc/include/pte.h
@@ -111,6 +111,7 @@
/* High quadword: */
#define LPTE_VSID_SHIFT 12
#define LPTE_AVPN_MASK 0xFFFFFFFFFFFFFF80ULL
+#define LPTE_AVA_MASK 0x3FFFFFFFFFFFFF80ULL
#define LPTE_API 0x0000000000000F80ULL
#define LPTE_SWBITS 0x0000000000000078ULL
#define LPTE_WIRED 0x0000000000000010ULL
@@ -120,8 +121,13 @@
#define LPTE_VALID 0x0000000000000001ULL
/* Low quadword: */
+#define LP_4K_16M 0x38 /* 4KB base, 16MB actual page size */
+
#define EXTEND_PTE(x) UINT64_C(x) /* make constants 64-bit */
#define LPTE_RPGN 0xfffffffffffff000ULL
+#define LPTE_LP_MASK 0x00000000000ff000ULL
+#define LPTE_LP_SHIFT 12
+#define LPTE_LP_4K_16M ((unsigned long long)(LP_4K_16M) << LPTE_LP_SHIFT)
#define LPTE_REF EXTEND_PTE( PTE_REF )
#define LPTE_CHG EXTEND_PTE( PTE_CHG )
#define LPTE_WIMG EXTEND_PTE( PTE_WIMG )
@@ -138,6 +144,12 @@
#define LPTE_BR EXTEND_PTE( PTE_BR ) /* Both Read Only */
#define LPTE_RW LPTE_BW
#define LPTE_RO LPTE_BR
+
+/* HPT superpage definitions */
+#define HPT_SP_SHIFT (VM_LEVEL_0_ORDER + PAGE_SHIFT)
+#define HPT_SP_SIZE (1 << HPT_SP_SHIFT)
+#define HPT_SP_MASK (HPT_SP_SIZE - 1)
+#define HPT_SP_PAGES (1 << VM_LEVEL_0_ORDER)
/* POWER ISA 3.0 Radix Table Definitions */
#define RPTE_VALID 0x8000000000000000ULL
Index: head/sys/powerpc/include/slb.h
===================================================================
--- head/sys/powerpc/include/slb.h
+++ head/sys/powerpc/include/slb.h
@@ -64,6 +64,14 @@
#define SLBE_ESID_MASK 0xfffffffff0000000UL /* Effective segment ID mask */
#define SLBE_ESID_SHIFT 28
+/*
+ * SLB page sizes encoding, as present in property ibm,segment-page-sizes
+ * of CPU device tree node.
+ *
+ * See LoPAPR: CPU Node Properties, section C.6.1.4.
+ */
+#define SLB_PGSZ_4K_4K 0
+
/* Virtual real-mode VSID in LPARs */
#define VSID_VRMA 0x1ffffff
Index: head/sys/powerpc/include/vmparam.h
===================================================================
--- head/sys/powerpc/include/vmparam.h
+++ head/sys/powerpc/include/vmparam.h
@@ -185,31 +185,34 @@
#define VM_NFREELIST 1
#define VM_FREELIST_DEFAULT 0
-/*
- * The largest allocation size is 4MB.
- */
#ifdef __powerpc64__
+/* The largest allocation size is 16MB. */
#define VM_NFREEORDER 13
#else
+/* The largest allocation size is 4MB. */
#define VM_NFREEORDER 11
#endif
#ifndef VM_NRESERVLEVEL
#ifdef __powerpc64__
+/* Enable superpage reservations: 1 level. */
#define VM_NRESERVLEVEL 1
#else
-/*
- * Disable superpage reservations.
- */
+/* Disable superpage reservations. */
#define VM_NRESERVLEVEL 0
#endif
#endif
-/*
- * Level 0 reservations consist of 512 pages.
- */
#ifndef VM_LEVEL_0_ORDER
-#define VM_LEVEL_0_ORDER 9
+/* Level 0 reservations consist of 512 (RPT) or 4096 (HPT) pages. */
+#define VM_LEVEL_0_ORDER vm_level_0_order
+#ifndef __ASSEMBLER__
+extern int vm_level_0_order;
+#endif
+#endif
+
+#ifndef VM_LEVEL_0_ORDER_MAX
+#define VM_LEVEL_0_ORDER_MAX 12
#endif
#ifdef __powerpc64__
Index: head/sys/powerpc/powernv/platform_powernv.c
===================================================================
--- head/sys/powerpc/powernv/platform_powernv.c
+++ head/sys/powerpc/powernv/platform_powernv.c
@@ -141,6 +141,7 @@
phandle_t opal;
int res, len, idx;
register_t msr;
+ bool has_lp;
/* Ping OPAL again just to make sure */
opal_check();
@@ -228,6 +229,7 @@
sizeof(arr));
len /= 4;
idx = 0;
+ has_lp = false;
while (len > 0) {
shift = arr[idx];
slb_encoding = arr[idx + 1];
@@ -238,17 +240,21 @@
lp_size = arr[idx];
lp_encoding = arr[idx+1];
if (slb_encoding == SLBV_L && lp_encoding == 0)
- break;
+ has_lp = true;
+ if (slb_encoding == SLB_PGSZ_4K_4K &&
+ lp_encoding == LP_4K_16M)
+ moea64_has_lp_4k_16m = true;
+
idx += 2;
len -= 2;
nptlp--;
}
- if (nptlp && slb_encoding == SLBV_L && lp_encoding == 0)
+ if (has_lp && moea64_has_lp_4k_16m)
break;
}
- if (len == 0)
+ if (!has_lp)
panic("Standard large pages (SLB[L] = 1, PTE[LP] = 0) "
"not supported by this system.");
Index: head/sys/powerpc/powerpc/pmap_dispatch.c
===================================================================
--- head/sys/powerpc/powerpc/pmap_dispatch.c
+++ head/sys/powerpc/powerpc/pmap_dispatch.c
@@ -77,6 +77,8 @@
caddr_t crashdumpmap;
int pmap_bootstrapped;
+/* Default level 0 reservations consist of 512 pages (2MB superpage). */
+int vm_level_0_order = 9;
#ifdef AIM
int
Index: head/sys/powerpc/pseries/mmu_phyp.c
===================================================================
--- head/sys/powerpc/pseries/mmu_phyp.c
+++ head/sys/powerpc/pseries/mmu_phyp.c
@@ -82,6 +82,9 @@
static int64_t mphyp_pte_clear(struct pvo_entry *pvo, uint64_t ptebit);
static int64_t mphyp_pte_unset(struct pvo_entry *pvo);
static int64_t mphyp_pte_insert(struct pvo_entry *pvo);
+static int64_t mphyp_pte_unset_sp(struct pvo_entry *pvo);
+static int64_t mphyp_pte_insert_sp(struct pvo_entry *pvo);
+static int64_t mphyp_pte_replace_sp(struct pvo_entry *pvo);
static struct pmap_funcs mphyp_methods = {
.install = mphyp_install,
@@ -95,6 +98,9 @@
.pte_clear = mphyp_pte_clear,
.pte_unset = mphyp_pte_unset,
.pte_insert = mphyp_pte_insert,
+ .pte_unset_sp = mphyp_pte_unset_sp,
+ .pte_insert_sp = mphyp_pte_insert_sp,
+ .pte_replace_sp = mphyp_pte_replace_sp,
};
MMU_DEF_INHERIT(pseries_mmu, "mmu_phyp", mphyp_methods, oea64_mmu);
@@ -135,6 +141,7 @@
uint64_t vsid;
phandle_t dev, node, root;
int idx, len, res;
+ bool has_lp;
rm_init(&mphyp_eviction_lock, "pte eviction");
@@ -199,6 +206,7 @@
sizeof(arr));
len /= 4;
idx = 0;
+ has_lp = false;
while (len > 0) {
shift = arr[idx];
slb_encoding = arr[idx + 1];
@@ -220,18 +228,22 @@
lp_encoding);
if (slb_encoding == SLBV_L && lp_encoding == 0)
- break;
+ has_lp = true;
+ if (slb_encoding == SLB_PGSZ_4K_4K &&
+ lp_encoding == LP_4K_16M)
+ moea64_has_lp_4k_16m = true;
+
idx += 2;
len -= 2;
nptlp--;
}
dprintf("\n");
- if (nptlp && slb_encoding == SLBV_L && lp_encoding == 0)
+ if (has_lp && moea64_has_lp_4k_16m)
break;
}
- if (len > 0) {
+ if (has_lp) {
moea64_large_page_shift = shift;
moea64_large_page_size = 1ULL << lp_size;
moea64_large_page_mask = moea64_large_page_size - 1;
@@ -393,7 +405,7 @@
phyp_pft_hcall(H_READ, 0, slot, 0, 0, &pt.pte_hi,
&pt.pte_lo, &junk);
- if (pt.pte_hi & LPTE_WIRED)
+ if ((pt.pte_hi & (LPTE_WIRED | LPTE_BIG)) != 0)
continue;
/* This is a candidate, so remember it */
@@ -414,68 +426,61 @@
return (k);
}
-static int64_t
-mphyp_pte_insert(struct pvo_entry *pvo)
+static __inline int64_t
+mphyp_pte_insert_locked(struct pvo_entry *pvo, struct lpte *pte)
{
- struct rm_priotracker track;
+ struct lpte evicted;
+ uint64_t index, junk;
int64_t result;
- struct lpte evicted, pte;
- uint64_t index, junk, lastptelo;
- PMAP_LOCK_ASSERT(pvo->pvo_pmap, MA_OWNED);
-
- /* Initialize PTE */
- moea64_pte_from_pvo(pvo, &pte);
- evicted.pte_hi = 0;
-
- /* Make sure further insertion is locked out during evictions */
- rm_rlock(&mphyp_eviction_lock, &track);
-
/*
* First try primary hash.
*/
pvo->pvo_pte.slot &= ~7UL; /* Base slot address */
- result = phyp_pft_hcall(H_ENTER, 0, pvo->pvo_pte.slot, pte.pte_hi,
- pte.pte_lo, &index, &evicted.pte_lo, &junk);
+ result = phyp_pft_hcall(H_ENTER, 0, pvo->pvo_pte.slot, pte->pte_hi,
+ pte->pte_lo, &index, &evicted.pte_lo, &junk);
if (result == H_SUCCESS) {
- rm_runlock(&mphyp_eviction_lock, &track);
pvo->pvo_pte.slot = index;
return (0);
}
KASSERT(result == H_PTEG_FULL, ("Page insertion error: %ld "
"(ptegidx: %#zx/%#lx, PTE %#lx/%#lx", result, pvo->pvo_pte.slot,
- moea64_pteg_count, pte.pte_hi, pte.pte_lo));
+ moea64_pteg_count, pte->pte_hi, pte->pte_lo));
/*
* Next try secondary hash.
*/
pvo->pvo_vaddr ^= PVO_HID;
- pte.pte_hi ^= LPTE_HID;
+ pte->pte_hi ^= LPTE_HID;
pvo->pvo_pte.slot ^= (moea64_pteg_mask << 3);
result = phyp_pft_hcall(H_ENTER, 0, pvo->pvo_pte.slot,
- pte.pte_hi, pte.pte_lo, &index, &evicted.pte_lo, &junk);
+ pte->pte_hi, pte->pte_lo, &index, &evicted.pte_lo, &junk);
if (result == H_SUCCESS) {
- rm_runlock(&mphyp_eviction_lock, &track);
pvo->pvo_pte.slot = index;
return (0);
}
KASSERT(result == H_PTEG_FULL, ("Secondary page insertion error: %ld",
result));
- /*
- * Out of luck. Find a PTE to sacrifice.
- */
+ return (-1);
+}
- /* Lock out all insertions for a bit */
- rm_runlock(&mphyp_eviction_lock, &track);
- rm_wlock(&mphyp_eviction_lock);
+static __inline int64_t
+mphyp_pte_evict_and_insert_locked(struct pvo_entry *pvo, struct lpte *pte)
+{
+ struct lpte evicted;
+ uint64_t index, junk, lastptelo;
+ int64_t result;
+
+ evicted.pte_hi = 0;
+
index = mphyp_pte_spillable_ident(pvo->pvo_pte.slot, &evicted);
if (index == -1L) {
/* Try other hash table? */
pvo->pvo_vaddr ^= PVO_HID;
- pte.pte_hi ^= LPTE_HID;
+ pte->pte_hi ^= LPTE_HID;
pvo->pvo_pte.slot ^= (moea64_pteg_mask << 3);
index = mphyp_pte_spillable_ident(pvo->pvo_pte.slot, &evicted);
}
@@ -500,18 +505,50 @@
/*
* Set the new PTE.
*/
- result = phyp_pft_hcall(H_ENTER, H_EXACT, index, pte.pte_hi,
- pte.pte_lo, &index, &evicted.pte_lo, &junk);
- rm_wunlock(&mphyp_eviction_lock); /* All clear */
+ result = phyp_pft_hcall(H_ENTER, H_EXACT, index, pte->pte_hi,
+ pte->pte_lo, &index, &evicted.pte_lo, &junk);
pvo->pvo_pte.slot = index;
if (result == H_SUCCESS)
return (0);
+ rm_wunlock(&mphyp_eviction_lock);
panic("Page replacement error: %ld", result);
return (result);
}
+static int64_t
+mphyp_pte_insert(struct pvo_entry *pvo)
+{
+ struct rm_priotracker track;
+ int64_t ret;
+ struct lpte pte;
+
+ PMAP_LOCK_ASSERT(pvo->pvo_pmap, MA_OWNED);
+
+ /* Initialize PTE */
+ moea64_pte_from_pvo(pvo, &pte);
+
+ /* Make sure further insertion is locked out during evictions */
+ rm_rlock(&mphyp_eviction_lock, &track);
+
+ ret = mphyp_pte_insert_locked(pvo, &pte);
+ rm_runlock(&mphyp_eviction_lock, &track);
+
+ if (ret == -1) {
+ /*
+ * Out of luck. Find a PTE to sacrifice.
+ */
+
+ /* Lock out all insertions for a bit */
+ rm_wlock(&mphyp_eviction_lock);
+ ret = mphyp_pte_evict_and_insert_locked(pvo, &pte);
+ rm_wunlock(&mphyp_eviction_lock); /* All clear */
+ }
+
+ return (ret);
+}
+
static void *
mphyp_dump_pmap(void *ctx, void *buf, u_long *nbytes)
{
@@ -540,4 +577,92 @@
dctx->ptex = ptex;
return (buf);
+}
+
+static int64_t
+mphyp_pte_unset_sp(struct pvo_entry *pvo)
+{
+ struct lpte pte;
+ uint64_t junk, refchg;
+ int err;
+ vm_offset_t eva;
+ pmap_t pm;
+
+ pm = pvo->pvo_pmap;
+ PMAP_LOCK_ASSERT(pm, MA_OWNED);
+ KASSERT((PVO_VADDR(pvo) & HPT_SP_MASK) == 0,
+ ("%s: va %#jx unaligned", __func__, (uintmax_t)PVO_VADDR(pvo)));
+
+ refchg = 0;
+ eva = PVO_VADDR(pvo) + HPT_SP_SIZE;
+
+ for (; pvo != NULL && PVO_VADDR(pvo) < eva;
+ pvo = RB_NEXT(pvo_tree, &pm->pmap_pvo, pvo)) {
+ moea64_pte_from_pvo(pvo, &pte);
+
+ err = phyp_pft_hcall(H_REMOVE, H_AVPN, pvo->pvo_pte.slot,
+ pte.pte_hi & LPTE_AVPN_MASK, 0, &pte.pte_hi, &pte.pte_lo,
+ &junk);
+ KASSERT(err == H_SUCCESS || err == H_NOT_FOUND,
+ ("Error removing page: %d", err));
+
+ if (err == H_NOT_FOUND)
+ STAT_MOEA64(moea64_pte_overflow--);
+ refchg |= pte.pte_lo & (LPTE_REF | LPTE_CHG);
+ }
+
+ return (refchg);
+}
+
+static int64_t
+mphyp_pte_insert_sp(struct pvo_entry *pvo)
+{
+ struct rm_priotracker track;
+ int64_t ret;
+ struct lpte pte;
+ vm_offset_t eva;
+ pmap_t pm;
+
+ pm = pvo->pvo_pmap;
+ PMAP_LOCK_ASSERT(pm, MA_OWNED);
+ KASSERT((PVO_VADDR(pvo) & HPT_SP_MASK) == 0,
+ ("%s: va %#jx unaligned", __func__, (uintmax_t)PVO_VADDR(pvo)));
+
+ eva = PVO_VADDR(pvo) + HPT_SP_SIZE;
+
+ /* Make sure further insertion is locked out during evictions */
+ rm_rlock(&mphyp_eviction_lock, &track);
+
+ for (; pvo != NULL && PVO_VADDR(pvo) < eva;
+ pvo = RB_NEXT(pvo_tree, &pm->pmap_pvo, pvo)) {
+ /* Initialize PTE */
+ moea64_pte_from_pvo(pvo, &pte);
+
+ ret = mphyp_pte_insert_locked(pvo, &pte);
+ if (ret == -1) {
+ /*
+ * Out of luck. Find a PTE to sacrifice.
+ */
+
+ /* Lock out all insertions for a bit */
+ rm_runlock(&mphyp_eviction_lock, &track);
+ rm_wlock(&mphyp_eviction_lock);
+ mphyp_pte_evict_and_insert_locked(pvo, &pte);
+ rm_wunlock(&mphyp_eviction_lock); /* All clear */
+ rm_rlock(&mphyp_eviction_lock, &track);
+ }
+ }
+
+ rm_runlock(&mphyp_eviction_lock, &track);
+ return (0);
+}
+
+static int64_t
+mphyp_pte_replace_sp(struct pvo_entry *pvo)
+{
+ int64_t refchg;
+
+ refchg = mphyp_pte_unset_sp(pvo);
+ mphyp_pte_insert_sp(pvo);
+ return (refchg);
}
Index: head/sys/vm/vm_fault.c
===================================================================
--- head/sys/vm/vm_fault.c
+++ head/sys/vm/vm_fault.c
@@ -542,7 +542,8 @@
pidx += npages, m = vm_page_next(&m[npages - 1])) {
vaddr = fs->entry->start + IDX_TO_OFF(pidx) - fs->entry->offset;
#if defined(__aarch64__) || defined(__amd64__) || (defined(__arm__) && \
- __ARM_ARCH >= 6) || defined(__i386__) || defined(__riscv)
+ __ARM_ARCH >= 6) || defined(__i386__) || defined(__riscv) || \
+ defined(__powerpc64__)
psind = m->psind;
if (psind > 0 && ((vaddr & (pagesizes[psind] - 1)) != 0 ||
pidx + OFF_TO_IDX(pagesizes[psind]) - 1 > pager_last ||

File Metadata

Mime Type
text/plain
Expires
Mon, Oct 13, 2:09 AM (7 h, 20 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
23651762
Default Alt Text
D25237.id.diff (66 KB)

Event Timeline