Changeset View
Standalone View
sys/powerpc/aim/moea64_native.c
Show First 20 Lines • Show All 126 Lines • ▼ Show 20 Lines | |||||
#define SYNC() __asm __volatile("sync"); | #define SYNC() __asm __volatile("sync"); | ||||
#define EIEIO() __asm __volatile("eieio"); | #define EIEIO() __asm __volatile("eieio"); | ||||
#define VSID_HASH_MASK 0x0000007fffffffffULL | #define VSID_HASH_MASK 0x0000007fffffffffULL | ||||
/* POWER9 only permits a 64k partition table size. */ | /* POWER9 only permits a 64k partition table size. */ | ||||
#define PART_SIZE 0x10000 | #define PART_SIZE 0x10000 | ||||
/* Actual page sizes (to be used with tlbie, when L=0) */ | |||||
#define AP_4K 0x00 | |||||
#define AP_16M 0x80 | |||||
#define LPTE_KERNEL_VSID_BIT (KERNEL_VSID_BIT << \ | |||||
(16 - (ADDR_API_SHFT64 - ADDR_PIDX_SHFT))) | |||||
static bool moea64_crop_tlbie; | static bool moea64_crop_tlbie; | ||||
static bool moea64_need_lock; | static bool moea64_need_lock; | ||||
/* | |||||
jhibbits: This may need to change in the future, to support DRI. We may need to demote to change cache… | |||||
Done Inline ActionsOk, but this would not be a trivial change. luporl: Ok, but this would not be a trivial change.
AFAIK, there is no easy way to tell if a PTE… | |||||
Done Inline ActionsNow using LPTE_KERNEL_VSID_BIT to check if page belongs to kernel or userspace. luporl: Now using LPTE_KERNEL_VSID_BIT to check if page belongs to kernel or userspace. | |||||
Done Inline ActionsI think 'old' is only needed when 'crop' is needed; otherwise, it's not used. So putting the tlb_old into the 'if (moea64_crop_tlbie)' block, with a goto, and putting the tlbie_new() as the rest of the body, should work, and might knock off a part of the hit, because you'll be removing one level of indirection. jhibbits: I think 'old' is only needed when 'crop' is needed; otherwise, it's not used. So putting the… | |||||
Done Inline ActionsOk. This change worked fine on Talos, but I didn't notice any change in performance. luporl: Ok. This change worked fine on Talos, but I didn't notice any change in performance. | |||||
* The tlbie instruction has two forms: an old one used by PowerISA | |||||
* 2.03 and prior, and a newer one used by PowerISA 2.06 and later. | |||||
* We need to support both. | |||||
*/ | |||||
static __inline void | static __inline void | ||||
TLBIE(uint64_t vpn) { | TLBIE(uint64_t vpn, uint64_t oldptehi) | ||||
{ | |||||
#ifndef __powerpc64__ | #ifndef __powerpc64__ | ||||
register_t vpn_hi, vpn_lo; | register_t vpn_hi, vpn_lo; | ||||
register_t msr; | register_t msr; | ||||
register_t scratch, intr; | register_t scratch, intr; | ||||
#endif | #endif | ||||
static volatile u_int tlbie_lock = 0; | static volatile u_int tlbie_lock = 0; | ||||
bool need_lock = moea64_need_lock; | bool need_lock = moea64_need_lock; | ||||
vpn <<= ADDR_PIDX_SHFT; | vpn <<= ADDR_PIDX_SHFT; | ||||
/* Hobo spinlock: we need stronger guarantees than mutexes provide */ | /* Hobo spinlock: we need stronger guarantees than mutexes provide */ | ||||
if (need_lock) { | if (need_lock) { | ||||
while (!atomic_cmpset_int(&tlbie_lock, 0, 1)); | while (!atomic_cmpset_int(&tlbie_lock, 0, 1)); | ||||
isync(); /* Flush instruction queue once lock acquired */ | isync(); /* Flush instruction queue once lock acquired */ | ||||
if (moea64_crop_tlbie) | if (moea64_crop_tlbie) { | ||||
vpn &= ~(0xffffULL << 48); | vpn &= ~(0xffffULL << 48); | ||||
#ifdef __powerpc64__ | |||||
if ((oldptehi & LPTE_BIG) != 0) | |||||
__asm __volatile("tlbie %0, 1" :: "r"(vpn) : | |||||
"memory"); | |||||
else | |||||
__asm __volatile("tlbie %0, 0" :: "r"(vpn) : | |||||
"memory"); | |||||
__asm __volatile("eieio; tlbsync; ptesync" ::: | |||||
"memory"); | |||||
goto done; | |||||
#endif | |||||
} | } | ||||
} | |||||
#ifdef __powerpc64__ | #ifdef __powerpc64__ | ||||
/* | /* | ||||
* Explicitly clobber r0. The tlbie instruction has two forms: an old | * If this page has LPTE_BIG set and is from userspace, then | ||||
Done Inline ActionsCan you keep this comment in, to note why this silly mess is here? jhibbits: Can you keep this comment in, to note why this silly mess is here? | |||||
Done Inline ActionsSure. luporl: Sure. | |||||
* one used by PowerISA 2.03 and prior, and a newer one used by PowerISA | * it must be a superpage with 4KB base/16MB actual page size. | ||||
* 2.06 (maybe 2.05?) and later. We need to support both, and it just | |||||
* so happens that since we use 4k pages we can simply zero out r0, and | |||||
* clobber it, and the assembler will interpret the single-operand form | |||||
* of tlbie as having RB set, and everything else as 0. The RS operand | |||||
* in the newer form is in the same position as the L(page size) bit of | |||||
* the old form, so a slong as RS is 0, we're good on both sides. | |||||
*/ | */ | ||||
__asm __volatile("li 0, 0 \n tlbie %0" :: "r"(vpn) : "r0", "memory"); | if ((oldptehi & LPTE_BIG) != 0 && | ||||
(oldptehi & LPTE_KERNEL_VSID_BIT) == 0) | |||||
vpn |= AP_16M; | |||||
__asm __volatile("li 0, 0 \n tlbie %0, 0" :: "r"(vpn) : "r0", "memory"); | |||||
Done Inline ActionsThe PPC970 supports superpages, as does the POWER4. Now, we probably don't care much about the POWER4, but the PPC970 can benefit from superpages, but as mentioned in the comment right above, it uses a different tlbie instruction format. Can that be worked into this? jhibbits: The PPC970 supports superpages, as does the POWER4. Now, we probably don't care much about the… | |||||
Done Inline ActionsWill this also work on ISA 2.03 and prior? We may want to just do a 'if running on old, use old tlbie, otherwise use new' (or use asm routines to do it all). jhibbits: Will this also work on ISA 2.03 and prior? We may want to just do a 'if running on old, use… | |||||
Done Inline ActionsBy taking a look at PowerISA 2.03 spec, it initially looks like this wouldn't work on it, but in the end it's ok. On 2.03, AP is a 1-bit flag telling if the page to be invalidated is 4K or 64K. luporl: By taking a look at PowerISA 2.03 spec, it initially looks like this wouldn't work on it, but… | |||||
Done Inline ActionsOld tlbie instruction format is now on __tlbie_old and new one on __tlbie_new. luporl: Old tlbie instruction format is now on `__tlbie_old` and new one on `__tlbie_new`.
Both support… | |||||
__asm __volatile("eieio; tlbsync; ptesync" ::: "memory"); | __asm __volatile("eieio; tlbsync; ptesync" ::: "memory"); | ||||
#else | #else | ||||
vpn_hi = (uint32_t)(vpn >> 32); | vpn_hi = (uint32_t)(vpn >> 32); | ||||
vpn_lo = (uint32_t)vpn; | vpn_lo = (uint32_t)vpn; | ||||
intr = intr_disable(); | intr = intr_disable(); | ||||
__asm __volatile("\ | __asm __volatile("\ | ||||
mfmsr %0; \ | mfmsr %0; \ | ||||
Show All 9 Lines | __asm __volatile("\ | ||||
eieio; \ | eieio; \ | ||||
tlbsync; \ | tlbsync; \ | ||||
ptesync;" | ptesync;" | ||||
: "=r"(msr), "=r"(scratch) : "r"(vpn_hi), "r"(vpn_lo), "r"(32), "r"(1) | : "=r"(msr), "=r"(scratch) : "r"(vpn_hi), "r"(vpn_lo), "r"(32), "r"(1) | ||||
: "memory"); | : "memory"); | ||||
intr_restore(intr); | intr_restore(intr); | ||||
#endif | #endif | ||||
done: | |||||
/* No barriers or special ops -- taken care of by ptesync above */ | /* No barriers or special ops -- taken care of by ptesync above */ | ||||
if (need_lock) | if (need_lock) | ||||
tlbie_lock = 0; | tlbie_lock = 0; | ||||
} | } | ||||
#define DISABLE_TRANS(msr) msr = mfmsr(); mtmsr(msr & ~PSL_DR) | #define DISABLE_TRANS(msr) msr = mfmsr(); mtmsr(msr & ~PSL_DR) | ||||
#define ENABLE_TRANS(msr) mtmsr(msr) | #define ENABLE_TRANS(msr) mtmsr(msr) | ||||
▲ Show 20 Lines • Show All 111 Lines • ▼ Show 20 Lines | |||||
#if BYTE_ORDER == BIG_ENDIAN | #if BYTE_ORDER == BIG_ENDIAN | ||||
((uint8_t *)(&properpt.pte_lo))[6]; | ((uint8_t *)(&properpt.pte_lo))[6]; | ||||
#else | #else | ||||
((uint8_t *)(&properpt.pte_lo))[1]; | ((uint8_t *)(&properpt.pte_lo))[1]; | ||||
#endif | #endif | ||||
rw_runlock(&moea64_eviction_lock); | rw_runlock(&moea64_eviction_lock); | ||||
critical_enter(); | critical_enter(); | ||||
TLBIE(pvo->pvo_vpn); | TLBIE(pvo->pvo_vpn, properpt.pte_hi); | ||||
critical_exit(); | critical_exit(); | ||||
} else { | } else { | ||||
rw_runlock(&moea64_eviction_lock); | rw_runlock(&moea64_eviction_lock); | ||||
ptelo = moea64_pte_unset_native(pvo); | ptelo = moea64_pte_unset_native(pvo); | ||||
moea64_pte_insert_native(pvo); | moea64_pte_insert_native(pvo); | ||||
} | } | ||||
return (ptelo & (LPTE_REF | LPTE_CHG)); | return (ptelo & (LPTE_REF | LPTE_CHG)); | ||||
Show All 18 Lines | moea64_pte_unset_native(struct pvo_entry *pvo) | ||||
/* | /* | ||||
* Invalidate the pte, briefly locking it to collect RC bits. No | * Invalidate the pte, briefly locking it to collect RC bits. No | ||||
* atomics needed since this is protected against eviction by the lock. | * atomics needed since this is protected against eviction by the lock. | ||||
*/ | */ | ||||
isync(); | isync(); | ||||
critical_enter(); | critical_enter(); | ||||
pt->pte_hi = be64toh((pt->pte_hi & ~LPTE_VALID) | LPTE_LOCKED); | pt->pte_hi = be64toh((pt->pte_hi & ~LPTE_VALID) | LPTE_LOCKED); | ||||
PTESYNC(); | PTESYNC(); | ||||
TLBIE(pvo->pvo_vpn); | TLBIE(pvo->pvo_vpn, pt->pte_hi); | ||||
ptelo = be64toh(pt->pte_lo); | ptelo = be64toh(pt->pte_lo); | ||||
*((volatile int32_t *)(&pt->pte_hi) + 1) = 0; /* Release lock */ | *((volatile int32_t *)(&pt->pte_hi) + 1) = 0; /* Release lock */ | ||||
critical_exit(); | critical_exit(); | ||||
rw_runlock(&moea64_eviction_lock); | rw_runlock(&moea64_eviction_lock); | ||||
/* Keep statistics */ | /* Keep statistics */ | ||||
STAT_MOEA64(moea64_pte_valid--); | STAT_MOEA64(moea64_pte_valid--); | ||||
Show All 21 Lines | moea64_pte_replace_inval_native(struct pvo_entry *pvo, | ||||
/* | /* | ||||
* Replace the pte, briefly locking it to collect RC bits. No | * Replace the pte, briefly locking it to collect RC bits. No | ||||
* atomics needed since this is protected against eviction by the lock. | * atomics needed since this is protected against eviction by the lock. | ||||
*/ | */ | ||||
isync(); | isync(); | ||||
critical_enter(); | critical_enter(); | ||||
pt->pte_hi = be64toh((pt->pte_hi & ~LPTE_VALID) | LPTE_LOCKED); | pt->pte_hi = be64toh((pt->pte_hi & ~LPTE_VALID) | LPTE_LOCKED); | ||||
PTESYNC(); | PTESYNC(); | ||||
TLBIE(pvo->pvo_vpn); | TLBIE(pvo->pvo_vpn, pt->pte_hi); | ||||
ptelo = be64toh(pt->pte_lo); | ptelo = be64toh(pt->pte_lo); | ||||
EIEIO(); | EIEIO(); | ||||
pt->pte_lo = htobe64(properpt.pte_lo); | pt->pte_lo = htobe64(properpt.pte_lo); | ||||
EIEIO(); | EIEIO(); | ||||
pt->pte_hi = htobe64(properpt.pte_hi); /* Release lock */ | pt->pte_hi = htobe64(properpt.pte_hi); /* Release lock */ | ||||
PTESYNC(); | PTESYNC(); | ||||
critical_exit(); | critical_exit(); | ||||
rw_runlock(&moea64_eviction_lock); | rw_runlock(&moea64_eviction_lock); | ||||
▲ Show 20 Lines • Show All 291 Lines • ▼ Show 20 Lines | if (oldptehi & LPTE_VALID) { | ||||
if (oldptehi & LPTE_HID) | if (oldptehi & LPTE_HID) | ||||
va = (((k >> 3) ^ moea64_pteg_mask) ^ va) & | va = (((k >> 3) ^ moea64_pteg_mask) ^ va) & | ||||
(ADDR_PIDX >> ADDR_PIDX_SHFT); | (ADDR_PIDX >> ADDR_PIDX_SHFT); | ||||
else | else | ||||
va = ((k >> 3) ^ va) & (ADDR_PIDX >> ADDR_PIDX_SHFT); | va = ((k >> 3) ^ va) & (ADDR_PIDX >> ADDR_PIDX_SHFT); | ||||
va |= (oldptehi & LPTE_AVPN_MASK) << | va |= (oldptehi & LPTE_AVPN_MASK) << | ||||
(ADDR_API_SHFT64 - ADDR_PIDX_SHFT); | (ADDR_API_SHFT64 - ADDR_PIDX_SHFT); | ||||
PTESYNC(); | PTESYNC(); | ||||
TLBIE(va); | TLBIE(va, oldptehi); | ||||
STAT_MOEA64(moea64_pte_valid--); | STAT_MOEA64(moea64_pte_valid--); | ||||
STAT_MOEA64(moea64_pte_overflow++); | STAT_MOEA64(moea64_pte_overflow++); | ||||
} | } | ||||
/* | /* | ||||
* Update the PTE as per "Adding a Page Table Entry". Lock is released | * Update the PTE as per "Adding a Page Table Entry". Lock is released | ||||
* by setting the high doubleworld. | * by setting the high doubleworld. | ||||
*/ | */ | ||||
▲ Show 20 Lines • Show All 103 Lines • Show Last 20 Lines |
This may need to change in the future, to support DRI. We may need to demote to change cache characteristics for an individual DMAP page.