Index: sys/conf/kern.pre.mk =================================================================== --- sys/conf/kern.pre.mk +++ sys/conf/kern.pre.mk @@ -157,7 +157,7 @@ .endif .if (${MACHINE_CPUARCH} == "aarch64" || ${MACHINE_CPUARCH} == "amd64" || \ - ${MACHINE_CPUARCH} == "i386") && \ + ${MACHINE_CPUARCH} == "i386" || ${MACHINE_CPUARCH} == "powerpc64") && \ defined(LINKER_FEATURES) && ${LINKER_FEATURES:Mifunc} == "" .error amd64/arm64/i386 kernel requires linker ifunc support .endif Index: sys/kern/link_elf.c =================================================================== --- sys/kern/link_elf.c +++ sys/kern/link_elf.c @@ -387,7 +387,9 @@ return (0); } +#ifdef RELOCATABLE_KERNEL extern vm_offset_t __startkernel, __endkernel; +#endif static void link_elf_init(void* arg) @@ -416,7 +418,7 @@ ef = (elf_file_t) linker_kernel_file; ef->preloaded = 1; -#ifdef __powerpc__ +#ifdef RELOCATABLE_KERNEL ef->address = (caddr_t) (__startkernel - KERNBASE); #else ef->address = 0; @@ -428,7 +430,7 @@ if (dp != NULL) parse_dynamic(ef); -#ifdef __powerpc__ +#ifdef RELOCATABLE_KERNEL linker_kernel_file->address = (caddr_t)__startkernel; linker_kernel_file->size = (intptr_t)(__endkernel - __startkernel); #else @@ -1852,7 +1854,7 @@ return (ef->ddbstrcnt); } -#if defined(__i386__) || defined(__amd64__) || defined(__aarch64__) +#if defined(__i386__) || defined(__amd64__) || defined(__aarch64__) || defined(__powerpc64__) /* * Use this lookup routine when performing relocations early during boot. * The generic lookup routine depends on kobj, which is not initialized @@ -1888,8 +1890,14 @@ ef->modptr = kmdp; ef->dynamic = (Elf_Dyn *)&_DYNAMIC; + +#ifdef RELOCATABLE_KERNEL + ef->address = (caddr_t) (__startkernel - KERNBASE); +#else + ef->address = 0; +#endif parse_dynamic(ef); - ef->address = 0; + link_elf_preload_parse_symbols(ef); relocate_file1(ef, elf_lookup_ifunc, elf_reloc, true); } Index: sys/kern/link_elf_obj.c =================================================================== --- sys/kern/link_elf_obj.c +++ sys/kern/link_elf_obj.c @@ -625,7 +625,7 @@ if (error) return (error); -#if defined(__i386__) || defined(__amd64__) +#if defined(__i386__) || defined(__amd64__) || defined(__powerpc64__) /* Now ifuncs. */ error = link_elf_reloc_local(lf, true); if (error != 0) @@ -1151,7 +1151,7 @@ if (error) goto out; -#if defined(__i386__) || defined(__amd64__) +#if defined(__i386__) || defined(__amd64__) || defined(__powerpc64__) /* Now ifuncs. */ error = link_elf_reloc_local(lf, true); if (error != 0) Index: sys/powerpc/aim/aim_machdep.c =================================================================== --- sys/powerpc/aim/aim_machdep.c +++ sys/powerpc/aim/aim_machdep.c @@ -161,6 +161,7 @@ extern void *ap_pcpu; extern void __restartkernel(vm_offset_t, vm_offset_t, vm_offset_t, void *, uint32_t, register_t offset, register_t msr); +extern void __restartkernel_virtual(vm_offset_t, vm_offset_t, vm_offset_t, void *, uint32_t, register_t offset, register_t msr); void aim_early_init(vm_offset_t fdt, vm_offset_t toc, vm_offset_t ofentry, void *mdp, uint32_t mdp_cookie); @@ -184,13 +185,22 @@ #ifdef __powerpc64__ /* - * If in real mode, relocate to high memory so that the kernel + * Relocate to high memory so that the kernel * can execute from the direct map. + * + * If we are in virtual mode already, use a special entry point + * that sets up a temporary DMAP to execute from until we can + * properly set up the MMU. */ - if (!(mfmsr() & PSL_DR) && - (vm_offset_t)&aim_early_init < DMAP_BASE_ADDRESS) - __restartkernel(fdt, 0, ofentry, mdp, mdp_cookie, - DMAP_BASE_ADDRESS, mfmsr()); + if ((vm_offset_t)&aim_early_init < DMAP_BASE_ADDRESS) { + if (mfmsr() & PSL_DR) { + __restartkernel_virtual(fdt, 0, ofentry, mdp, + mdp_cookie, DMAP_BASE_ADDRESS, mfmsr()); + } else { + __restartkernel(fdt, 0, ofentry, mdp, mdp_cookie, + DMAP_BASE_ADDRESS, mfmsr()); + } + } #endif /* Various very early CPU fix ups */ Index: sys/powerpc/aim/locore64.S =================================================================== --- sys/powerpc/aim/locore64.S +++ sys/powerpc/aim/locore64.S @@ -200,6 +200,85 @@ /* Unreachable */ b . +ASENTRY_NOPROF(__restartkernel_virtual) + /* + * When coming in via this entry point, we need to alter the SLB to + * shadow the segment register emulation entries in DMAP space. + * We need to do this dance because we are running with virtual-mode + * OpenFirmware and have not yet taken over the MMU. + * + * Assumptions: + * 1) The kernel is currently identity-mapped. + * 2) We are currently executing at an address compatible with + * real mode. + * 3) The first 16 SLB entries are emulating SRs. + * 4) The rest of the SLB is not in use. + * 5) OpenFirmware is not manipulating the SLB at runtime. + * 6) We are running on 64-bit AIM. + * + * Tested on a G5. + */ + mfmsr %r14 + /* Switch to real mode because we are about to mess with the SLB. */ + andi. %r14, %r14, ~(PSL_DR|PSL_IR|PSL_ME|PSL_RI)@l + mtmsr %r14 + isync + /* Prepare variables for later use. */ + li %r14, 0 + li %r18, 0 + oris %r18, %r18, 0xc000 + sldi %r18, %r18, 32 /* r18: 0xc000000000000000 */ +1: + /* + * Loop over the first 16 SLB entries. + * Offset the SLBE into the DMAP, add 16 to the index, and write + * it back to the SLB. + */ + /* XXX add more safety checks */ + slbmfev %r15, %r14 + slbmfee %r16, %r14 + or %r16, %r16, %r14 /* index is 0-15 */ + ori %r16, %r16, 0x10 /* add 16 to index. */ + or %r16, %r16, %r18 /* SLBE DMAP offset */ + rldicr %r17, %r16, 0, 37 /* Invalidation SLBE */ + + isync + slbie %r17 + /* isync */ + slbmte %r15, %r16 + isync + addi %r14, %r14, 1 + cmpdi %r14, 16 + blt 1b + +#if 0 + /* Hack: Install segment traps. OF is silly and skips them. */ + li %r14, 0x43a6 + oris %r14, %r14, 0x7c10 + stw %r14, 0x380(0) + stw %r14, 0x480(0) + li %r14, 0x02a6 + oris %r14, %r14, 0x7c08 + stw %r14, 0x384(0) + stw %r14, 0x484(0) + li %r14, 0x4386 + oris %r14, %r14, 0x7c31 + stw %r14, 0x388(0) + stw %r14, 0x488(0) + li %r14, 0x01e3 /* Apple OF trap trampoline @ 0x01e3 */ + oris %r14, %r14, 0x4800 + stw %r14, 0x38c(0) + stw %r14, 0x48c(0) + sync + li %r14, 0x380 + dcbf 0, %r14 + icbi 0, %r14 + li %r14, 0x480 + dcbf 0, %r14 + icbi 0, %r14 + sync +#endif + ASENTRY_NOPROF(__restartkernel) /* * r3-r7: arguments to go to __start Index: sys/powerpc/include/db_machdep.h =================================================================== --- sys/powerpc/include/db_machdep.h +++ sys/powerpc/include/db_machdep.h @@ -86,7 +86,7 @@ #define inst_store(ins) 0 #ifdef __powerpc64__ -#define DB_STOFFS(offs) ((offs) & ~DMAP_BASE_ADDRESS) +//#define DB_STOFFS(offs) ((offs) & ~DMAP_BASE_ADDRESS) #endif #endif /* _POWERPC_DB_MACHDEP_H_ */ Index: sys/powerpc/include/param.h =================================================================== --- sys/powerpc/include/param.h +++ sys/powerpc/include/param.h @@ -109,6 +109,8 @@ #define MAXPAGESIZES 1 /* maximum number of supported page sizes */ +#define RELOCATABLE_KERNEL 1 /* kernel may relocate during startup */ + #ifndef KSTACK_PAGES #ifdef __powerpc64__ #define KSTACK_PAGES 8 /* includes pcb */ Index: sys/powerpc/ofw/ofw_initrd.c =================================================================== --- sys/powerpc/ofw/ofw_initrd.c +++ sys/powerpc/ofw/ofw_initrd.c @@ -36,6 +36,8 @@ #include #include +#include +#include #include #include @@ -58,6 +60,8 @@ vm_paddr_t start, end; pcell_t cell[2]; ssize_t size; + u_char *taste; + Elf_Ehdr ehdr; if (!hw_direct_map) return; @@ -91,7 +95,15 @@ } if (end - start > 0) { - mfs_root = (u_char *) PHYS_TO_DMAP(start); + taste = (u_char*) PHYS_TO_DMAP(start); + memcpy(&ehdr, taste, sizeof(ehdr)); + + if (IS_ELF(ehdr)) { + printf("ofw_initrd: initrd is kernel image!\n"); + return; + } + + mfs_root = taste; mfs_root_size = end - start; printf("ofw_initrd: initrd loaded at 0x%08lx-0x%08lx\n", start, end); Index: sys/powerpc/powerpc/elf64_machdep.c =================================================================== --- sys/powerpc/powerpc/elf64_machdep.c +++ sys/powerpc/powerpc/elf64_machdep.c @@ -282,10 +282,10 @@ } bool -elf_is_ifunc_reloc(Elf_Size r_info __unused) +elf_is_ifunc_reloc(Elf_Size r_info) { - return (false); + return (ELF_R_TYPE(r_info) == R_PPC_IRELATIVE); } /* Process one elf relocation with addend. */ @@ -295,7 +295,7 @@ { Elf_Addr *where; Elf_Addr addr; - Elf_Addr addend; + Elf_Addr addend, val; Elf_Word rtype, symidx; const Elf_Rela *rela; int error; @@ -342,6 +342,13 @@ __asm __volatile("dcbst 0,%0; sync" :: "r"(where) : "memory"); break; + case R_PPC_IRELATIVE: + addr = relocbase + addend; + val = ((Elf64_Addr (*)(void))addr)(); + if (*where != val) + *where = val; + break; + default: printf("kldload: unexpected relocation type %d\n", (int) rtype); Index: sys/powerpc/powerpc/machdep.c =================================================================== --- sys/powerpc/powerpc/machdep.c +++ sys/powerpc/powerpc/machdep.c @@ -161,6 +161,8 @@ uintptr_t powerpc_init(vm_offset_t, vm_offset_t, vm_offset_t, void *, uint32_t); +static void fake_preload_metadata(void); + long Maxmem = 0; long realmem = 0; @@ -246,6 +248,11 @@ void aim_cpu_init(vm_offset_t toc); void booke_cpu_init(void); +#ifdef DDB +static void load_external_symtab(void); +static void displace_symbol_table(vm_offset_t, vm_offset_t, vm_offset_t); +#endif + uintptr_t powerpc_init(vm_offset_t fdt, vm_offset_t toc, vm_offset_t ofentry, void *mdp, uint32_t mdp_cookie) @@ -254,10 +261,13 @@ struct cpuref bsp; vm_offset_t startkernel, endkernel; char *env; + void *kmdp = NULL; bool ofw_bootargs = false; + bool symbols_provided = false; #ifdef DDB vm_offset_t ksym_start; vm_offset_t ksym_end; + vm_offset_t ksym_sz; #endif /* First guess at start/end kernel positions */ @@ -287,15 +297,29 @@ #endif /* + * At this point, we are executing in our correct memory space. + * Book-E started there, and AIM has done an rfi and restarted + * execution from _start. + * + * We may still be in real mode, however. If we are running out of + * the direct map on 64 bit, this is possible to do. + */ + + /* * Parse metadata if present and fetch parameters. Must be done * before console is inited so cninit gets the right value of * boothowto. */ if (mdp != NULL) { - void *kmdp = NULL; + /* + * Starting up from loader. + * + * Full metadata has been provided, but we need to figure + * out the correct address to relocate it to. + */ char *envp = NULL; uintptr_t md_offset = 0; - vm_paddr_t kernelendphys; + vm_paddr_t kernelstartphys, kernelendphys; #ifdef AIM if ((uintptr_t)&powerpc_init > DMAP_BASE_ADDRESS) @@ -306,6 +330,7 @@ preload_metadata = mdp; if (md_offset > 0) { + /* Translate phys offset into DMAP offset. */ preload_metadata += md_offset; preload_bootstrap_relocate(md_offset); } @@ -321,6 +346,9 @@ if (fdt != 0) fdt += md_offset; } + kernelstartphys = MD_FETCH(kmdp, MODINFO_ADDR, + vm_offset_t); + /* kernelstartphys is already relocated. */ kernelendphys = MD_FETCH(kmdp, MODINFOMD_KERNEND, vm_offset_t); if (kernelendphys != 0) @@ -329,13 +357,35 @@ #ifdef DDB ksym_start = MD_FETCH(kmdp, MODINFOMD_SSYM, uintptr_t); ksym_end = MD_FETCH(kmdp, MODINFOMD_ESYM, uintptr_t); + ksym_sz = *(Elf_Size*)ksym_start; + + /* + * Loader already handled displacing to the load + * address, but we still need to displace it to the + * DMAP. + */ + displace_symbol_table( + (vm_offset_t)(ksym_start + sizeof(Elf_Size)), + ksym_sz, md_offset); + db_fetch_ksymtab(ksym_start, ksym_end); + symbols_provided = true; #endif } } else { + /* + * Self-loading kernel, we have to fake up metadata. + * + * Since we are creating the metadata from the final + * memory space, we don't need to call + * preload_boostrap_relocate(). + */ + fake_preload_metadata(); + kmdp = preload_search_by_type("elf kernel"); init_static_kenv(init_kenv, sizeof(init_kenv)); ofw_bootargs = true; } + /* Store boot environment state */ OF_initial_setup((void *)fdt, NULL, (int (*)(void *))ofentry); @@ -365,6 +415,11 @@ */ OF_bootstrap(); +#ifdef DDB + if (!symbols_provided && hw_direct_map) + load_external_symtab(); +#endif + if (ofw_bootargs) ofw_parse_bootargs(); @@ -412,6 +467,9 @@ */ pmap_bootstrap(startkernel, endkernel); mtmsr(psl_kernset & ~PSL_EE); +#ifdef __powerpc64__ + link_elf_ireloc(kmdp); +#endif /* * Initialize params/tunables that are derived from memsize @@ -449,7 +507,179 @@ (sizeof(struct callframe) - 3*sizeof(register_t))) & ~15UL); } +#ifdef DDB /* + * XXX Figure out where to move this. + */ +static void +displace_symbol_table(vm_offset_t ksym_start, + vm_offset_t ksym_sz, vm_offset_t displacement) { + Elf_Sym *sym; + + /* + * Relocate the symbol table to our final load address. + */ + for (sym = (Elf_Sym *)ksym_start; + (vm_paddr_t)sym < (ksym_start + ksym_sz); + sym++) { + if (sym->st_name == 0 || + sym->st_shndx == SHN_UNDEF || + sym->st_value == 0) + continue; + if (ELF_ST_TYPE(sym->st_info) != STT_OBJECT && + ELF_ST_TYPE(sym->st_info) != STT_FUNC && + ELF_ST_TYPE(sym->st_info) != STT_NOTYPE) + continue; + /* Skip relocating any implausible symbols */ + if (sym->st_value > KERNBASE) + sym->st_value += displacement; + } +} + +/* + * On powernv, we might not have symbols loaded via loader. However, if the + * user passed the kernel in as the initrd as well, we can manually load it + * via reinterpreting the initrd copy of the kernel. + */ +static void +load_external_symtab(void) { + phandle_t chosen; + vm_paddr_t start, end; + pcell_t cell[2]; + ssize_t size; + u_char *kernelimg; + + int i; + + Elf_Ehdr *ehdr; + Elf_Phdr *phdr; + Elf_Shdr *shdr; + + vm_offset_t ksym_start, ksym_sz, kstr_start, kstr_sz; + + if (!hw_direct_map) + return; + + chosen = OF_finddevice("/chosen"); + if (chosen <= 0) + return; + + if (!OF_hasprop(chosen, "linux,initrd-start") || + !OF_hasprop(chosen, "linux,initrd-end")) + return; + + size = OF_getencprop(chosen, "linux,initrd-start", cell, sizeof(cell)); + if (size == 4) + start = cell[0]; + else if (size == 8) + start = (uint64_t)cell[0] << 32 | cell[1]; + else + return; + + size = OF_getencprop(chosen, "linux,initrd-end", cell, sizeof(cell)); + if (size == 4) + end = cell[0]; + else if (size == 8) + end = (uint64_t)cell[0] << 32 | cell[1]; + else + return; + + if (!(end - start > 0)) + return; + + kernelimg = (u_char *) PHYS_TO_DMAP(start); + + ehdr = (Elf_Ehdr *)kernelimg; + + if (!IS_ELF(*ehdr)) + return; + + phdr = (Elf_Phdr *)(kernelimg + ehdr->e_phoff); + shdr = (Elf_Shdr *)(kernelimg + ehdr->e_shoff); + + ksym_start = 0; + ksym_sz = 0; + kstr_start = 0; + kstr_sz = 0; + for (i = 0; i < ehdr->e_shnum; i++) { + if (shdr[i].sh_type == SHT_SYMTAB) { + ksym_start = (vm_offset_t)(kernelimg + + shdr[i].sh_offset); + ksym_sz = (vm_offset_t)(shdr[i].sh_size); + kstr_start = (vm_offset_t)(kernelimg + + shdr[shdr[i].sh_link].sh_offset); + kstr_sz = (vm_offset_t) + (shdr[shdr[i].sh_link].sh_size); + } + } + + if (ksym_start != 0 && kstr_start != 0 && ksym_sz != 0 && + kstr_sz != 0 && ksym_start < kstr_start) { + + displace_symbol_table(ksym_start, ksym_sz, + (__startkernel - KERNBASE)); + ksymtab = ksym_start; + ksymtab_size = ksym_sz; + kstrtab = kstr_start; + } + +}; +#endif + +/* + * When not being loaded from loader, we need to create our own metadata + * so we can interact with the kernel linker. + */ +static void +fake_preload_metadata(void) { + /* We depend on dword alignment here. */ + static uint32_t fake_preload[36] __aligned(8); + int i = 0; + + fake_preload[i++] = MODINFO_NAME; + fake_preload[i++] = strlen("kernel") + 1; + strcpy((char*)&fake_preload[i], "kernel"); + /* ['k' 'e' 'r' 'n'] ['e' 'l' '\0' ..] */ + i += 2; + + fake_preload[i++] = MODINFO_TYPE; + fake_preload[i++] = strlen("elf kernel") + 1; + strcpy((char*)&fake_preload[i], "elf kernel"); + /* ['e' 'l' 'f' ' '] ['k' 'e' 'r' 'n'] ['e' 'l' '\0' ..] */ + i += 3; + +#ifdef __powerpc64__ + /* Padding -- Fields start on u_long boundaries */ + fake_preload[i++] = 0; +#endif + + fake_preload[i++] = MODINFO_ADDR; + fake_preload[i++] = sizeof(vm_offset_t); + *(vm_offset_t *)&fake_preload[i] = + (vm_offset_t)(__startkernel); + i += (sizeof(vm_offset_t) / 4); + + fake_preload[i++] = MODINFO_SIZE; + fake_preload[i++] = sizeof(vm_offset_t); + *(vm_offset_t *)&fake_preload[i] = + (vm_offset_t)(__endkernel) - (vm_offset_t)(__startkernel); + i += (sizeof(vm_offset_t) / 4); + + /* + * MODINFOMD_SSYM and MODINFOMD_ESYM cannot be provided here, + * as the memory comes from outside the loaded ELF sections. + * + * If the symbols are being provided by other means (MFS), the + * tables will be loaded into the debugger directly. + */ + + /* Null field at end to mark end of data. */ + fake_preload[i++] = 0; + fake_preload[i] = 0; + preload_metadata = (void*)fake_preload; +} + +/* * Flush the D-cache for non-DMA I/O so that the I-cache can * be made coherent later. */