Index: sys/powerpc/aim/aim_machdep.c =================================================================== --- sys/powerpc/aim/aim_machdep.c +++ sys/powerpc/aim/aim_machdep.c @@ -160,15 +160,72 @@ extern void *dsmisstrap, *dsmisssize; extern void *ap_pcpu; +extern void __restartkernel(vm_offset_t, vm_offset_t, vm_offset_t, void *, uint32_t, register_t offset, register_t msr); +void aim_early_init(vm_offset_t fdt, vm_offset_t toc, vm_offset_t ofentry, + void *mdp, uint32_t mdp_cookie); void aim_cpu_init(vm_offset_t toc); void +aim_early_init(vm_offset_t fdt, vm_offset_t toc, vm_offset_t ofentry, void *mdp, + uint32_t mdp_cookie) +{ + register_t scratch; + + /* + * If running from an FDT, make sure we are in real mode to avoid + * tromping on firmware page tables. Everything in the kernel assumes + * 1:1 mappings out of firmware, so this won't break anything not + * already broken. This doesn't work if there is live OF, since OF + * may internally use non-1:1 mappings. + */ + if (ofentry == 0) + mtmsr(mfmsr() & ~(PSL_IR | PSL_DR)); + +#ifdef __powerpc64__ + /* + * If in real mode, relocate to high memory so that the kernel + * can execute from the direct map. + */ + if (!(mfmsr() & PSL_DR) && + (vm_offset_t)&aim_early_init < DMAP_BASE_ADDRESS) + __restartkernel(fdt, 0, ofentry, mdp, mdp_cookie, + DMAP_BASE_ADDRESS, mfmsr()); +#endif + + /* Various very early CPU fix ups */ + switch (mfpvr() >> 16) { + /* + * PowerPC 970 CPUs have a misfeature requested by Apple that + * makes them pretend they have a 32-byte cacheline. Turn this + * off before we measure the cacheline size. + */ + case IBM970: + case IBM970FX: + case IBM970MP: + case IBM970GX: + scratch = mfspr(SPR_HID5); + scratch &= ~HID5_970_DCBZ_SIZE_HI; + mtspr(SPR_HID5, scratch); + break; + #ifdef __powerpc64__ + case IBMPOWER7: + case IBMPOWER7PLUS: + case IBMPOWER8: + case IBMPOWER8E: + /* XXX: get from ibm,slb-size in device tree */ + n_slbs = 32; + break; + #endif + } +} + +void aim_cpu_init(vm_offset_t toc) { size_t trap_offset, trapsize; vm_offset_t trap; - register_t msr, scratch; + register_t msr; uint8_t *cache_check; int cacheline_warn; #ifndef __powerpc64__ @@ -199,32 +256,6 @@ */ psl_userstatic &= ~0x783f0000UL; - /* Various very early CPU fix ups */ - switch (mfpvr() >> 16) { - /* - * PowerPC 970 CPUs have a misfeature requested by Apple that - * makes them pretend they have a 32-byte cacheline. Turn this - * off before we measure the cacheline size. - */ - case IBM970: - case IBM970FX: - case IBM970MP: - case IBM970GX: - scratch = mfspr(SPR_HID5); - scratch &= ~HID5_970_DCBZ_SIZE_HI; - mtspr(SPR_HID5, scratch); - break; - #ifdef __powerpc64__ - case IBMPOWER7: - case IBMPOWER7PLUS: - case IBMPOWER8: - case IBMPOWER8E: - /* XXX: get from ibm,slb-size in device tree */ - n_slbs = 32; - break; - #endif - } - /* * Initialize the interrupt tables and figure out our cache line * size and whether or not we need the 64-bit bridge code. Index: sys/powerpc/aim/locore64.S =================================================================== --- sys/powerpc/aim/locore64.S +++ sys/powerpc/aim/locore64.S @@ -105,7 +105,6 @@ mtsrr1 %r1 ba EXC_RST - /* * Now start the real text section */ @@ -149,9 +148,12 @@ subf %r31,%r31,%r2 /* Subtract from real TOC base to get base */ /* Set up the stack pointer */ - ld %r1,TOC_REF(tmpstk)(%r2) - addi %r1,%r1,TMPSTKSZ-96 - add %r1,%r1,%r31 + bl 1f + .llong tmpstk + TMPSTKSZ - 96 - . +1: mflr %r30 + ld %r1,0(%r30) + add %r1,%r1,%r30 + nop /* Relocate kernel */ std %r3,48(%r1) @@ -189,4 +191,20 @@ /* Unreachable */ b . +ASENTRY_NOPROF(__restartkernel) + /* + * r3-r7: arguments to go to __start + * r8: offset from current kernel address to apply + * r9: MSR to set when (atomically) jumping to __start + r8 + */ + mtsrr1 %r9 + bl 1f +1: mflr %r25 + add %r25,%r8,%r25 + addi %r25,%r25,2f-1b + mtsrr0 %r25 + rfid +2: bl __start + nop + #include Index: sys/powerpc/aim/mmu_oea64.c =================================================================== --- sys/powerpc/aim/mmu_oea64.c +++ sys/powerpc/aim/mmu_oea64.c @@ -701,6 +701,7 @@ { int i, j; vm_size_t physsz, hwphyssz; + vm_paddr_t kernelphysstart, kernelphysend; #ifndef __powerpc64__ /* We don't have a direct map since there is no BAT */ @@ -727,6 +728,9 @@ __syncicache((void *)EXC_ISE, 0x80); #endif + kernelphysstart = kernelstart & ~DMAP_BASE_ADDRESS; + kernelphysend = kernelend & ~DMAP_BASE_ADDRESS; + /* Get physical memory regions from firmware */ mem_regions(&pregions, &pregions_sz, ®ions, ®ions_sz); CTR0(KTR_PMAP, "moea64_bootstrap: physical memory"); @@ -764,29 +768,30 @@ if (phys_avail[j] < EXC_LAST) phys_avail[j] += EXC_LAST; - if (kernelstart >= phys_avail[j] && - kernelstart < phys_avail[j+1]) { - if (kernelend < phys_avail[j+1]) { + if (kernelphysstart >= phys_avail[j] && + kernelphysstart < phys_avail[j+1]) { + if (kernelphysend < phys_avail[j+1]) { phys_avail[2*phys_avail_count] = - (kernelend & ~PAGE_MASK) + PAGE_SIZE; + (kernelphysend & ~PAGE_MASK) + PAGE_SIZE; phys_avail[2*phys_avail_count + 1] = phys_avail[j+1]; phys_avail_count++; } - phys_avail[j+1] = kernelstart & ~PAGE_MASK; + phys_avail[j+1] = kernelphysstart & ~PAGE_MASK; } - if (kernelend >= phys_avail[j] && - kernelend < phys_avail[j+1]) { - if (kernelstart > phys_avail[j]) { + if (kernelphysend >= phys_avail[j] && + kernelphysend < phys_avail[j+1]) { + if (kernelphysstart > phys_avail[j]) { phys_avail[2*phys_avail_count] = phys_avail[j]; phys_avail[2*phys_avail_count + 1] = - kernelstart & ~PAGE_MASK; + kernelphysstart & ~PAGE_MASK; phys_avail_count++; } - phys_avail[j] = (kernelend & ~PAGE_MASK) + PAGE_SIZE; + phys_avail[j] = (kernelphysend & ~PAGE_MASK) + + PAGE_SIZE; } } Index: sys/powerpc/ofw/ofwcall64.S =================================================================== --- sys/powerpc/ofw/ofwcall64.S +++ sys/powerpc/ofw/ofwcall64.S @@ -42,7 +42,7 @@ ofwstk: .space OFWSTKSZ rtas_regsave: - .space 24 /* 3 * sizeof(register_t) */ + .space 32 /* 4 * sizeof(register_t) */ GLOBAL(ofmsr) .llong 0, 0, 0, 0, 0 /* msr/sprg0-3 used in Open Firmware */ GLOBAL(rtasmsr) @@ -64,8 +64,8 @@ */ ASENTRY_NOPROF(ofwcall) - mflr %r0 - std %r0,16(%r1) + mflr %r8 + std %r8,16(%r1) stdu %r1,-208(%r1) /* @@ -106,7 +106,7 @@ /* Get OF stack pointer */ ld %r7,TOC_REF(ofwstk)(%r2) - addi %r7,%r7,OFWSTKSZ-32 + addi %r7,%r7,OFWSTKSZ-40 /* * Set the MSR to the OF value. This has the side effect of disabling @@ -129,6 +129,8 @@ std %r5,8(%r1) /* Save real stack pointer */ std %r2,16(%r1) /* Save old TOC */ std %r6,24(%r1) /* Save old MSR */ + std %r8,32(%r1) /* Save high 32-bits of the kernel's PC */ + li %r5,0 stw %r5,4(%r1) stw %r5,0(%r1) @@ -137,15 +139,23 @@ mtctr %r4 bctrl - /* Reload stack pointer and MSR from the OFW stack */ + /* Reload stack pointer, MSR, and reference PC from the OFW stack */ + ld %r7,32(%r1) ld %r6,24(%r1) ld %r2,16(%r1) ld %r1,8(%r1) - /* Now set the real MSR */ - mtmsrd %r6 - isync + /* Get back to the MSR/PC we want, using the cached high bits of PC */ + mtsrr1 %r6 + clrrdi %r7,%r7,32 + bl 1f +1: mflr %r8 + or %r8,%r8,%r7 + addi %r8,%r8,2f-1b + mtsrr0 %r8 + rfid /* Turn on MMU, exceptions, and 64-bit mode */ +2: /* Sign-extend the return value from OF */ extsw %r3,%r3 @@ -186,8 +196,8 @@ */ ASENTRY_NOPROF(rtascall) - mflr %r0 - std %r0,16(%r1) + mflr %r9 + std %r9,16(%r1) stdu %r1,-208(%r1) /* @@ -248,6 +258,7 @@ std %r7,0(%r1) /* Save 64-bit stack pointer */ std %r2,8(%r1) /* Save TOC */ std %r6,16(%r1) /* Save MSR */ + std %r9,24(%r1) /* Save reference PC for high 32 bits */ /* Finally, branch to RTAS */ mtctr %r5 @@ -254,18 +265,34 @@ bctrl /* - * Reload stack pointer and MSR from the reg save area in r1. We are - * running in 32-bit mode at this point, so it doesn't matter if r1 + * Reload stack pointer, MSR, reg PC from the reg save area in r1. We + * are running in 32-bit mode at this point, so it doesn't matter if r1 * has become sign-extended. */ + ld %r7,24(%r1) ld %r6,16(%r1) ld %r2,8(%r1) ld %r1,0(%r1) - /* Now set the real MSR */ - mtmsrd %r6 - isync + /* + * Get back to the right PC. We need to atomically re-enable + * exceptions, 64-bit mode, and the MMU. One thing that has likely + * happened is that, if we were running in the high-memory direct + * map, we no longer are as a result of LR truncation in RTAS. + * Fix this by copying the high-order bits of the LR at function + * entry onto the current PC and then jumping there while flipping + * all the MSR bits. + */ + mtsrr1 %r6 + clrrdi %r7,%r7,32 + bl 1f +1: mflr %r8 + or %r8,%r8,%r7 + addi %r8,%r8,2f-1b + mtsrr0 %r8 + rfid /* Turn on MMU, exceptions, and 64-bit mode */ +2: /* Sign-extend the return value from RTAS */ extsw %r3,%r3 Index: sys/powerpc/powerpc/machdep.c =================================================================== --- sys/powerpc/powerpc/machdep.c +++ sys/powerpc/powerpc/machdep.c @@ -237,6 +237,8 @@ extern unsigned char __sbss_end[]; extern unsigned char _end[]; +void aim_early_init(vm_offset_t fdt, vm_offset_t toc, vm_offset_t ofentry, + void *mdp, uint32_t mdp_cookie); void aim_cpu_init(vm_offset_t toc); void booke_cpu_init(void); @@ -247,7 +249,6 @@ struct pcpu *pc; struct cpuref bsp; vm_offset_t startkernel, endkernel; - void *kmdp; char *env; bool ofw_bootargs = false; #ifdef DDB @@ -255,8 +256,6 @@ vm_offset_t ksym_end; #endif - kmdp = NULL; - /* First guess at start/end kernel positions */ startkernel = __startkernel; endkernel = __endkernel; @@ -278,15 +277,7 @@ #endif #ifdef AIM - /* - * If running from an FDT, make sure we are in real mode to avoid - * tromping on firmware page tables. Everything in the kernel assumes - * 1:1 mappings out of firmware, so this won't break anything not - * already broken. This doesn't work if there is live OF, since OF - * may internally use non-1:1 mappings. - */ - if (ofentry == 0) - mtmsr(mfmsr() & ~(PSL_IR | PSL_DR)); + aim_early_init(fdt, toc, ofentry, mdp, mdp_cookie); #endif /* @@ -295,14 +286,33 @@ * boothowto. */ if (mdp != NULL) { + void *kmdp = NULL; + char *envp = NULL; + uintptr_t md_offset = 0; + vm_paddr_t kernelendphys; + +#ifdef AIM + if ((uintptr_t)&powerpc_init > DMAP_BASE_ADDRESS) + md_offset = DMAP_BASE_ADDRESS; +#endif + preload_metadata = mdp; + if (md_offset > 0) { + preload_metadata += md_offset; + preload_bootstrap_relocate(md_offset); + } kmdp = preload_search_by_type("elf kernel"); if (kmdp != NULL) { boothowto = MD_FETCH(kmdp, MODINFOMD_HOWTO, int); - init_static_kenv(MD_FETCH(kmdp, MODINFOMD_ENVP, char *), - 0); - endkernel = ulmax(endkernel, MD_FETCH(kmdp, - MODINFOMD_KERNEND, vm_offset_t)); + envp = MD_FETCH(kmdp, MODINFOMD_ENVP, char *); + if (envp != NULL) + envp += md_offset; + init_static_kenv(envp, 0); + kernelendphys = MD_FETCH(kmdp, MODINFOMD_KERNEND, + vm_offset_t); + if (kernelendphys != 0) + kernelendphys += md_offset; + endkernel = ulmax(endkernel, kernelendphys); #ifdef DDB ksym_start = MD_FETCH(kmdp, MODINFOMD_SSYM, uintptr_t); ksym_end = MD_FETCH(kmdp, MODINFOMD_ESYM, uintptr_t);