Index: head/sys/conf/kern.pre.mk =================================================================== --- head/sys/conf/kern.pre.mk +++ head/sys/conf/kern.pre.mk @@ -161,9 +161,9 @@ .endif .if (${MACHINE_CPUARCH} == "aarch64" || ${MACHINE_CPUARCH} == "amd64" || \ - ${MACHINE_CPUARCH} == "i386") && \ + ${MACHINE_CPUARCH} == "i386" || ${MACHINE} == "powerpc") && \ defined(LINKER_FEATURES) && ${LINKER_FEATURES:Mifunc} == "" -.error amd64/arm64/i386 kernel requires linker ifunc support +.error amd64/arm64/i386/ppc* kernel requires linker ifunc support .endif .if ${MACHINE_CPUARCH} == "amd64" LDFLAGS+= -z max-page-size=2097152 Index: head/sys/conf/ldscript.powerpc =================================================================== --- head/sys/conf/ldscript.powerpc +++ head/sys/conf/ldscript.powerpc @@ -6,6 +6,11 @@ ENTRY(__start) SEARCH_DIR(/usr/lib); PROVIDE (__stack = 0); +PHDRS +{ + kernel PT_LOAD; + dynamic PT_DYNAMIC; +} SECTIONS { /* Read-only sections, merged into text segment: */ @@ -21,7 +26,7 @@ /* .gnu.warning sections are handled specially by elf32.em. */ *(.gnu.warning) *(.gnu.linkonce.t*) - } =0 + } :kernel =0 _etext = .; PROVIDE (etext = .); @@ -77,7 +82,7 @@ .got.plt : { *(.got.plt) } - .dynamic : { *(.dynamic) } + .dynamic : { *(.dynamic) } :kernel :dynamic /* Put .ctors and .dtors next to the .got2 section, so that the pointers get relocated with -mrelocatable. Also put in the .fixup pointers. The current compiler no longer needs this, but keep it around for 2.7.2 */ @@ -96,7 +101,7 @@ /* We want the small data sections together, so single-instruction offsets can access them all, and initialized data all before uninitialized, so we can shorten the on-disk segment size. */ - .sdata : { *(.sdata) } + .sdata : { *(.sdata) } :kernel _edata = .; PROVIDE (edata = .); .sbss : Index: head/sys/conf/ldscript.powerpc64 =================================================================== --- head/sys/conf/ldscript.powerpc64 +++ head/sys/conf/ldscript.powerpc64 @@ -8,15 +8,15 @@ PROVIDE (__stack = 0); PHDRS { - text PT_LOAD ; - dynamic PT_DYNAMIC ; + kernel PT_LOAD; + dynamic PT_DYNAMIC; } SECTIONS { /* Low-address wrapper for bootloaders (kexec/kboot) that can't parse ELF */ . = kernbase - 0x100; - .kboot : { *(.text.kboot) } :text + .kboot : { *(.text.kboot) } :kernel /* Read-only sections, merged into text segment: */ . = kernbase; @@ -106,7 +106,7 @@ .got : ALIGN(8) { __tocbase = .; *(.got) } .toc : ALIGN(8) { *(.toc) } - .dynamic : { *(.dynamic) } :text :dynamic + .dynamic : { *(.dynamic) } :kernel :dynamic /* Put .ctors and .dtors next to the .got2 section, so that the pointers get relocated with -mrelocatable. Also put in the .fixup pointers. The current compiler no longer needs this, but keep it around for 2.7.2 */ @@ -125,7 +125,7 @@ /* We want the small data sections together, so single-instruction offsets can access them all, and initialized data all before uninitialized, so we can shorten the on-disk segment size. */ - .sdata : { *(.sdata) } + .sdata : { *(.sdata) } :kernel _edata = .; PROVIDE (edata = .); .sbss : Index: head/sys/conf/ldscript.powerpcspe =================================================================== --- head/sys/conf/ldscript.powerpcspe +++ head/sys/conf/ldscript.powerpcspe @@ -6,6 +6,11 @@ ENTRY(__start) SEARCH_DIR(/usr/lib); PROVIDE (__stack = 0); +PHDRS +{ + kernel PT_LOAD; + dynamic PT_DYNAMIC; +} SECTIONS { /* Read-only sections, merged into text segment: */ @@ -21,7 +26,7 @@ /* .gnu.warning sections are handled specially by elf32.em. */ *(.gnu.warning) *(.gnu.linkonce.t*) - } =0 + } :kernel =0 _etext = .; PROVIDE (etext = .); @@ -78,7 +83,7 @@ .got.plt : { *(.got.plt) } - .dynamic : { *(.dynamic) } + .dynamic : { *(.dynamic) } :kernel :dynamic /* Put .ctors and .dtors next to the .got2 section, so that the pointers get relocated with -mrelocatable. Also put in the .fixup pointers. The current compiler no longer needs this, but keep it around for 2.7.2 */ @@ -97,7 +102,7 @@ /* We want the small data sections together, so single-instruction offsets can access them all, and initialized data all before uninitialized, so we can shorten the on-disk segment size. */ - .sdata : { *(.sdata) } + .sdata : { *(.sdata) } :kernel _edata = .; PROVIDE (edata = .); .sbss : Index: head/sys/kern/link_elf.c =================================================================== --- head/sys/kern/link_elf.c +++ head/sys/kern/link_elf.c @@ -388,7 +388,9 @@ return (0); } +#ifdef RELOCATABLE_KERNEL extern vm_offset_t __startkernel, __endkernel; +#endif static unsigned long kern_relbase = KERNBASE; @@ -424,7 +426,7 @@ ef = (elf_file_t) linker_kernel_file; ef->preloaded = 1; -#ifdef __powerpc__ +#ifdef RELOCATABLE_KERNEL ef->address = (caddr_t) (__startkernel - KERNBASE); #else ef->address = 0; @@ -436,7 +438,7 @@ if (dp != NULL) parse_dynamic(ef); -#ifdef __powerpc__ +#ifdef RELOCATABLE_KERNEL linker_kernel_file->address = (caddr_t)__startkernel; linker_kernel_file->size = (intptr_t)(__endkernel - __startkernel); kern_relbase = (unsigned long)__startkernel; @@ -1860,7 +1862,7 @@ return (ef->ddbstrcnt); } -#if defined(__i386__) || defined(__amd64__) || defined(__aarch64__) +#if defined(__i386__) || defined(__amd64__) || defined(__aarch64__) || defined(__powerpc__) /* * Use this lookup routine when performing relocations early during boot. * The generic lookup routine depends on kobj, which is not initialized @@ -1896,8 +1898,14 @@ ef->modptr = kmdp; ef->dynamic = (Elf_Dyn *)&_DYNAMIC; - parse_dynamic(ef); + +#ifdef RELOCATABLE_KERNEL + ef->address = (caddr_t) (__startkernel - KERNBASE); +#else ef->address = 0; +#endif + parse_dynamic(ef); + link_elf_preload_parse_symbols(ef); relocate_file1(ef, elf_lookup_ifunc, elf_reloc, true); } Index: head/sys/powerpc/aim/aim_machdep.c =================================================================== --- head/sys/powerpc/aim/aim_machdep.c +++ head/sys/powerpc/aim/aim_machdep.c @@ -161,6 +161,7 @@ extern void *ap_pcpu; extern void __restartkernel(vm_offset_t, vm_offset_t, vm_offset_t, void *, uint32_t, register_t offset, register_t msr); +extern void __restartkernel_virtual(vm_offset_t, vm_offset_t, vm_offset_t, void *, uint32_t, register_t offset, register_t msr); void aim_early_init(vm_offset_t fdt, vm_offset_t toc, vm_offset_t ofentry, void *mdp, uint32_t mdp_cookie); @@ -184,13 +185,22 @@ #ifdef __powerpc64__ /* - * If in real mode, relocate to high memory so that the kernel + * Relocate to high memory so that the kernel * can execute from the direct map. + * + * If we are in virtual mode already, use a special entry point + * that sets up a temporary DMAP to execute from until we can + * properly set up the MMU. */ - if (!(mfmsr() & PSL_DR) && - (vm_offset_t)&aim_early_init < DMAP_BASE_ADDRESS) - __restartkernel(fdt, 0, ofentry, mdp, mdp_cookie, - DMAP_BASE_ADDRESS, mfmsr()); + if ((vm_offset_t)&aim_early_init < DMAP_BASE_ADDRESS) { + if (mfmsr() & PSL_DR) { + __restartkernel_virtual(fdt, 0, ofentry, mdp, + mdp_cookie, DMAP_BASE_ADDRESS, mfmsr()); + } else { + __restartkernel(fdt, 0, ofentry, mdp, mdp_cookie, + DMAP_BASE_ADDRESS, mfmsr()); + } + } #endif /* Various very early CPU fix ups */ Index: head/sys/powerpc/aim/locore64.S =================================================================== --- head/sys/powerpc/aim/locore64.S +++ head/sys/powerpc/aim/locore64.S @@ -200,6 +200,57 @@ /* Unreachable */ b . +ASENTRY_NOPROF(__restartkernel_virtual) + /* + * When coming in via this entry point, we need to alter the SLB to + * shadow the segment register emulation entries in DMAP space. + * We need to do this dance because we are running with virtual-mode + * OpenFirmware and have not yet taken over the MMU. + * + * Assumptions: + * 1) The kernel is currently identity-mapped. + * 2) We are currently executing at an address compatible with + * real mode. + * 3) The first 16 SLB entries are emulating SRs. + * 4) The rest of the SLB is not in use. + * 5) OpenFirmware is not manipulating the SLB at runtime. + * 6) We are running on 64-bit AIM. + * + * Tested on a G5. + */ + mfmsr %r14 + /* Switch to real mode because we are about to mess with the SLB. */ + andi. %r14, %r14, ~(PSL_DR|PSL_IR|PSL_ME|PSL_RI)@l + mtmsr %r14 + isync + /* Prepare variables for later use. */ + li %r14, 0 + li %r18, 0 + oris %r18, %r18, 0xc000 + sldi %r18, %r18, 32 /* r18: 0xc000000000000000 */ +1: + /* + * Loop over the first 16 SLB entries. + * Offset the SLBE into the DMAP, add 16 to the index, and write + * it back to the SLB. + */ + /* XXX add more safety checks */ + slbmfev %r15, %r14 + slbmfee %r16, %r14 + or %r16, %r16, %r14 /* index is 0-15 */ + ori %r16, %r16, 0x10 /* add 16 to index. */ + or %r16, %r16, %r18 /* SLBE DMAP offset */ + rldicr %r17, %r16, 0, 37 /* Invalidation SLBE */ + + isync + slbie %r17 + /* isync */ + slbmte %r15, %r16 + isync + addi %r14, %r14, 1 + cmpdi %r14, 16 + blt 1b + ASENTRY_NOPROF(__restartkernel) /* * r3-r7: arguments to go to __start Index: head/sys/powerpc/include/db_machdep.h =================================================================== --- head/sys/powerpc/include/db_machdep.h +++ head/sys/powerpc/include/db_machdep.h @@ -85,8 +85,4 @@ #define inst_load(ins) 0 #define inst_store(ins) 0 -#ifdef __powerpc64__ -#define DB_STOFFS(offs) ((offs) & ~DMAP_BASE_ADDRESS) -#endif - #endif /* _POWERPC_DB_MACHDEP_H_ */ Index: head/sys/powerpc/include/param.h =================================================================== --- head/sys/powerpc/include/param.h +++ head/sys/powerpc/include/param.h @@ -109,6 +109,8 @@ #define MAXPAGESIZES 1 /* maximum number of supported page sizes */ +#define RELOCATABLE_KERNEL 1 /* kernel may relocate during startup */ + #ifndef KSTACK_PAGES #ifdef __powerpc64__ #define KSTACK_PAGES 8 /* includes pcb */ Index: head/sys/powerpc/ofw/ofw_initrd.c =================================================================== --- head/sys/powerpc/ofw/ofw_initrd.c +++ head/sys/powerpc/ofw/ofw_initrd.c @@ -36,6 +36,8 @@ #include #include +#include +#include #include #include @@ -58,6 +60,8 @@ vm_paddr_t start, end; pcell_t cell[2]; ssize_t size; + u_char *taste; + Elf_Ehdr ehdr; if (!hw_direct_map) return; @@ -91,7 +95,15 @@ } if (end - start > 0) { - mfs_root = (u_char *) PHYS_TO_DMAP(start); + taste = (u_char*) PHYS_TO_DMAP(start); + memcpy(&ehdr, taste, sizeof(ehdr)); + + if (IS_ELF(ehdr)) { + printf("ofw_initrd: initrd is kernel image!\n"); + return; + } + + mfs_root = taste; mfs_root_size = end - start; printf("ofw_initrd: initrd loaded at 0x%08lx-0x%08lx\n", start, end); Index: head/sys/powerpc/powerpc/elf32_machdep.c =================================================================== --- head/sys/powerpc/powerpc/elf32_machdep.c +++ head/sys/powerpc/powerpc/elf32_machdep.c @@ -221,10 +221,10 @@ #ifndef __powerpc64__ bool -elf_is_ifunc_reloc(Elf_Size r_info __unused) +elf_is_ifunc_reloc(Elf_Size r_info) { - return (false); + return (ELF_R_TYPE(r_info) == R_PPC_IRELATIVE); } /* Process one elf relocation with addend. */ @@ -235,7 +235,7 @@ Elf_Addr *where; Elf_Half *hwhere; Elf_Addr addr; - Elf_Addr addend; + Elf_Addr addend, val; Elf_Word rtype, symidx; const Elf_Rela *rela; int error; @@ -315,6 +315,13 @@ if (error != 0) return -1; *where = elf_relocaddr(lf, addr + addend); + break; + + case R_PPC_IRELATIVE: + addr = relocbase + addend; + val = ((Elf32_Addr (*)(void))addr)(); + if (*where != val) + *where = val; break; default: Index: head/sys/powerpc/powerpc/elf64_machdep.c =================================================================== --- head/sys/powerpc/powerpc/elf64_machdep.c +++ head/sys/powerpc/powerpc/elf64_machdep.c @@ -282,10 +282,10 @@ } bool -elf_is_ifunc_reloc(Elf_Size r_info __unused) +elf_is_ifunc_reloc(Elf_Size r_info) { - return (false); + return (ELF_R_TYPE(r_info) == R_PPC_IRELATIVE); } /* Process one elf relocation with addend. */ @@ -295,7 +295,7 @@ { Elf_Addr *where; Elf_Addr addr; - Elf_Addr addend; + Elf_Addr addend, val; Elf_Word rtype, symidx; const Elf_Rela *rela; int error; @@ -340,6 +340,13 @@ *where = addr; #endif __asm __volatile("dcbst 0,%0; sync" :: "r"(where) : "memory"); + break; + + case R_PPC_IRELATIVE: + addr = relocbase + addend; + val = ((Elf64_Addr (*)(void))addr)(); + if (*where != val) + *where = val; break; default: Index: head/sys/powerpc/powerpc/machdep.c =================================================================== --- head/sys/powerpc/powerpc/machdep.c +++ head/sys/powerpc/powerpc/machdep.c @@ -113,6 +113,7 @@ #include #include #include +#include #include #include #include @@ -161,6 +162,8 @@ uintptr_t powerpc_init(vm_offset_t, vm_offset_t, vm_offset_t, void *, uint32_t); +static void fake_preload_metadata(void); + long Maxmem = 0; long realmem = 0; @@ -246,6 +249,11 @@ void aim_cpu_init(vm_offset_t toc); void booke_cpu_init(void); +#ifdef DDB +static void load_external_symtab(void); +static void displace_symbol_table(vm_offset_t, vm_offset_t, vm_offset_t); +#endif + uintptr_t powerpc_init(vm_offset_t fdt, vm_offset_t toc, vm_offset_t ofentry, void *mdp, uint32_t mdp_cookie) @@ -254,10 +262,13 @@ struct cpuref bsp; vm_offset_t startkernel, endkernel; char *env; + void *kmdp = NULL; bool ofw_bootargs = false; + bool symbols_provided = false; #ifdef DDB vm_offset_t ksym_start; vm_offset_t ksym_end; + vm_offset_t ksym_sz; #endif /* First guess at start/end kernel positions */ @@ -287,15 +298,29 @@ #endif /* + * At this point, we are executing in our correct memory space. + * Book-E started there, and AIM has done an rfi and restarted + * execution from _start. + * + * We may still be in real mode, however. If we are running out of + * the direct map on 64 bit, this is possible to do. + */ + + /* * Parse metadata if present and fetch parameters. Must be done * before console is inited so cninit gets the right value of * boothowto. */ if (mdp != NULL) { - void *kmdp = NULL; + /* + * Starting up from loader. + * + * Full metadata has been provided, but we need to figure + * out the correct address to relocate it to. + */ char *envp = NULL; uintptr_t md_offset = 0; - vm_paddr_t kernelendphys; + vm_paddr_t kernelstartphys, kernelendphys; #ifdef AIM if ((uintptr_t)&powerpc_init > DMAP_BASE_ADDRESS) @@ -306,6 +331,7 @@ preload_metadata = mdp; if (md_offset > 0) { + /* Translate phys offset into DMAP offset. */ preload_metadata += md_offset; preload_bootstrap_relocate(md_offset); } @@ -321,6 +347,9 @@ if (fdt != 0) fdt += md_offset; } + kernelstartphys = MD_FETCH(kmdp, MODINFO_ADDR, + vm_offset_t); + /* kernelstartphys is already relocated. */ kernelendphys = MD_FETCH(kmdp, MODINFOMD_KERNEND, vm_offset_t); if (kernelendphys != 0) @@ -329,13 +358,35 @@ #ifdef DDB ksym_start = MD_FETCH(kmdp, MODINFOMD_SSYM, uintptr_t); ksym_end = MD_FETCH(kmdp, MODINFOMD_ESYM, uintptr_t); + ksym_sz = *(Elf_Size*)ksym_start; + + /* + * Loader already handled displacing to the load + * address, but we still need to displace it to the + * DMAP. + */ + displace_symbol_table( + (vm_offset_t)(ksym_start + sizeof(Elf_Size)), + ksym_sz, md_offset); + db_fetch_ksymtab(ksym_start, ksym_end); + symbols_provided = true; #endif } } else { + /* + * Self-loading kernel, we have to fake up metadata. + * + * Since we are creating the metadata from the final + * memory space, we don't need to call + * preload_boostrap_relocate(). + */ + fake_preload_metadata(); + kmdp = preload_search_by_type("elf kernel"); init_static_kenv(init_kenv, sizeof(init_kenv)); ofw_bootargs = true; } + /* Store boot environment state */ OF_initial_setup((void *)fdt, NULL, (int (*)(void *))ofentry); @@ -365,6 +416,11 @@ */ OF_bootstrap(); +#ifdef DDB + if (!symbols_provided && hw_direct_map) + load_external_symtab(); +#endif + if (ofw_bootargs) ofw_parse_bootargs(); @@ -412,6 +468,7 @@ */ pmap_bootstrap(startkernel, endkernel); mtmsr(psl_kernset & ~PSL_EE); + link_elf_ireloc(kmdp); /* * Initialize params/tunables that are derived from memsize @@ -447,6 +504,178 @@ return (((uintptr_t)thread0.td_pcb - (sizeof(struct callframe) - 3*sizeof(register_t))) & ~15UL); +} + +#ifdef DDB +/* + * XXX Figure out where to move this. + */ +static void +displace_symbol_table(vm_offset_t ksym_start, + vm_offset_t ksym_sz, vm_offset_t displacement) { + Elf_Sym *sym; + + /* + * Relocate the symbol table to our final load address. + */ + for (sym = (Elf_Sym *)ksym_start; + (vm_paddr_t)sym < (ksym_start + ksym_sz); + sym++) { + if (sym->st_name == 0 || + sym->st_shndx == SHN_UNDEF || + sym->st_value == 0) + continue; + if (ELF_ST_TYPE(sym->st_info) != STT_OBJECT && + ELF_ST_TYPE(sym->st_info) != STT_FUNC && + ELF_ST_TYPE(sym->st_info) != STT_NOTYPE) + continue; + /* Skip relocating any implausible symbols */ + if (sym->st_value > KERNBASE) + sym->st_value += displacement; + } +} + +/* + * On powernv, we might not have symbols loaded via loader. However, if the + * user passed the kernel in as the initrd as well, we can manually load it + * via reinterpreting the initrd copy of the kernel. + */ +static void +load_external_symtab(void) { + phandle_t chosen; + vm_paddr_t start, end; + pcell_t cell[2]; + ssize_t size; + u_char *kernelimg; + + int i; + + Elf_Ehdr *ehdr; + Elf_Phdr *phdr; + Elf_Shdr *shdr; + + vm_offset_t ksym_start, ksym_sz, kstr_start, kstr_sz; + + if (!hw_direct_map) + return; + + chosen = OF_finddevice("/chosen"); + if (chosen <= 0) + return; + + if (!OF_hasprop(chosen, "linux,initrd-start") || + !OF_hasprop(chosen, "linux,initrd-end")) + return; + + size = OF_getencprop(chosen, "linux,initrd-start", cell, sizeof(cell)); + if (size == 4) + start = cell[0]; + else if (size == 8) + start = (uint64_t)cell[0] << 32 | cell[1]; + else + return; + + size = OF_getencprop(chosen, "linux,initrd-end", cell, sizeof(cell)); + if (size == 4) + end = cell[0]; + else if (size == 8) + end = (uint64_t)cell[0] << 32 | cell[1]; + else + return; + + if (!(end - start > 0)) + return; + + kernelimg = (u_char *) PHYS_TO_DMAP(start); + + ehdr = (Elf_Ehdr *)kernelimg; + + if (!IS_ELF(*ehdr)) + return; + + phdr = (Elf_Phdr *)(kernelimg + ehdr->e_phoff); + shdr = (Elf_Shdr *)(kernelimg + ehdr->e_shoff); + + ksym_start = 0; + ksym_sz = 0; + kstr_start = 0; + kstr_sz = 0; + for (i = 0; i < ehdr->e_shnum; i++) { + if (shdr[i].sh_type == SHT_SYMTAB) { + ksym_start = (vm_offset_t)(kernelimg + + shdr[i].sh_offset); + ksym_sz = (vm_offset_t)(shdr[i].sh_size); + kstr_start = (vm_offset_t)(kernelimg + + shdr[shdr[i].sh_link].sh_offset); + kstr_sz = (vm_offset_t) + (shdr[shdr[i].sh_link].sh_size); + } + } + + if (ksym_start != 0 && kstr_start != 0 && ksym_sz != 0 && + kstr_sz != 0 && ksym_start < kstr_start) { + + displace_symbol_table(ksym_start, ksym_sz, + (__startkernel - KERNBASE)); + ksymtab = ksym_start; + ksymtab_size = ksym_sz; + kstrtab = kstr_start; + } + +}; +#endif + +/* + * When not being loaded from loader, we need to create our own metadata + * so we can interact with the kernel linker. + */ +static void +fake_preload_metadata(void) { + /* We depend on dword alignment here. */ + static uint32_t fake_preload[36] __aligned(8); + int i = 0; + + fake_preload[i++] = MODINFO_NAME; + fake_preload[i++] = strlen("kernel") + 1; + strcpy((char*)&fake_preload[i], "kernel"); + /* ['k' 'e' 'r' 'n'] ['e' 'l' '\0' ..] */ + i += 2; + + fake_preload[i++] = MODINFO_TYPE; + fake_preload[i++] = strlen("elf kernel") + 1; + strcpy((char*)&fake_preload[i], "elf kernel"); + /* ['e' 'l' 'f' ' '] ['k' 'e' 'r' 'n'] ['e' 'l' '\0' ..] */ + i += 3; + +#ifdef __powerpc64__ + /* Padding -- Fields start on u_long boundaries */ + fake_preload[i++] = 0; +#endif + + fake_preload[i++] = MODINFO_ADDR; + fake_preload[i++] = sizeof(vm_offset_t); + *(vm_offset_t *)&fake_preload[i] = + (vm_offset_t)(__startkernel); + i += (sizeof(vm_offset_t) / 4); + + fake_preload[i++] = MODINFO_SIZE; + fake_preload[i++] = sizeof(vm_offset_t); + *(vm_offset_t *)&fake_preload[i] = + (vm_offset_t)(__endkernel) - (vm_offset_t)(__startkernel); + i += (sizeof(vm_offset_t) / 4); + + /* + * MODINFOMD_SSYM and MODINFOMD_ESYM cannot be provided here, + * as the memory comes from outside the loaded ELF sections. + * + * If the symbols are being provided by other means (MFS), the + * tables will be loaded into the debugger directly. + */ + + /* Null field at end to mark end of data. */ + fake_preload[i++] = 0; + fake_preload[i] = 0; + preload_metadata = (void*)fake_preload; } /*