Index: sys/amd64/amd64/kload.c =================================================================== --- /dev/null +++ sys/amd64/amd64/kload.c @@ -0,0 +1,122 @@ +/* + * Copyright (c) 2011 - 2015 + * Russell Cattelan Digital Elves LLC + * Copyright (c) 2012 - 2015 + * EMC Corp / Isilon Systems Division All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +#include +#include +#include +#include + +#include +#include +#include +#include + +#define GUEST_NULL_SEL 0 +#define GUEST_CODE_SEL 1 +#define GUEST_DATA_SEL 2 + +/* Set up the GDT the same as what the boot loader + one noteable change is turn on write access to + the data segment so we can write to it during + final relocate_kernel phase + sys/boot/i386/libi386/amd64_tramp.S + gdt: + .long 0 # null descriptor + .long 0 + .long 0x00000000 # %cs + .long 0x00209800 + .long 0x00000000 # %ds + .long 0x00008000 +*/ + +void +setup_freebsd_gdt(uint64_t *gdtr) +{ + gdtr[GUEST_NULL_SEL] = 0x0000000000000000; + gdtr[GUEST_CODE_SEL] = 0x0020980000000000; + gdtr[GUEST_DATA_SEL] = 0x0000920000000000; +} + +pt_entry_t * +kload_build_page_table(void) +{ + pt_entry_t *PT4; + pt_entry_t *PT3; + pt_entry_t *PT2; + int i; + pt_entry_t va; + + va = kmem_malloc(kernel_arena, PAGE_SIZE * 3, M_ZERO | M_WAITOK); + PT4 = (pt_entry_t *)va; + PT3 = (pt_entry_t *)(PT4 + (PAGE_SIZE / sizeof(unsigned long))); + PT2 = (pt_entry_t *)(PT3 + (PAGE_SIZE / sizeof(unsigned long))); + + if (bootverbose) + printf("%s PT4 0x%lx (0x%lx) PT3 0x%lx (0x%lx) " + "PT2 0x%lx (0x%lx)\n", + __func__, + (unsigned long)PT4, (unsigned long)vtophys(PT4), + (unsigned long)PT3, (unsigned long)vtophys(PT3), + (unsigned long)PT2, (unsigned long)vtophys(PT2)); + + /* + * The following section is a direct copy of + * head/src/sys/boot/i386/libi386/elf64_freebsd.c:92 at r236688 + */ + + bzero(PT4, PAGE_SIZE); + bzero(PT3, PAGE_SIZE); + bzero(PT2, PAGE_SIZE); + + /* + * This is kinda brutal, but every single 1GB VM memory segment points + * to the same first 1GB of physical memory. But it is more than + * adequate. + */ + for (i = 0; i < 512; i++) { + /* + * Each slot of the level 4 pages points to the + * same level 3 page + */ + PT4[i] = (pt_entry_t)(vtophys(PT3)); + PT4[i] |= PG_V | PG_RW | PG_U; + + /* + * Each slot of the level 3 pages points to the + * same level 2 page + */ + PT3[i] = (pt_entry_t)(vtophys(PT2)); + PT3[i] |= PG_V | PG_RW | PG_U; + + /* The level 2 page slots are mapped with 2MB pages for 1GB. */ + PT2[i] = i * (2 * 1024 * 1024); + PT2[i] |= PG_V | PG_RW | PG_PS | PG_U; + } + return ((pt_entry_t *)vtophys(PT4)); +} Index: sys/amd64/amd64/kload_exec.S =================================================================== --- /dev/null +++ sys/amd64/amd64/kload_exec.S @@ -0,0 +1,182 @@ +/* + * Copyright (c) 2011 - 2015 + * Russell Cattelan Digital Elves LLC + * Copyright (c) 2012 - 2015 + * EMC Corp / Isilon Systems Division All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +#include "assym.s" + +#define CR0_PG 0x80000000 /* PaGing enable */ + +#define X86_CR0_PE 0x00000001 /* Protection Enable */ +#define X86_CR0_MP 0x00000002 /* Monitor Coprocessor */ +#define X86_CR0_EM 0x00000004 /* Emulation */ +#define X86_CR0_TS 0x00000008 /* Task Switched */ +#define X86_CR0_ET 0x00000010 /* Extension Type */ +#define X86_CR0_NE 0x00000020 /* Numeric Error */ +#define X86_CR0_WP 0x00010000 /* Write Protect */ +#define X86_CR0_AM 0x00040000 /* Alignment Mask */ +#define X86_CR0_NW 0x20000000 /* Not Write-through */ +#define X86_CR0_CD 0x40000000 /* Cache Disable */ +#define X86_CR0_PG 0x80000000 /* Paging */ + +#define X86_CR4_PSE 0x00000010 /* enable page size extensions */ +#define X86_CR4_PAE 0x00000020 /* enable physical address extensions */ + + .globl relocate_kernel +relocate_kernel: + /* Install the new page table. */ + movq 32(%rcx), %rax /* page table */ + movq 40(%rcx), %r9 /* address of control_page with new PT */ + movq %rax, %cr3 + + /* + * Set cr4 to a known state: + * - page size extensions + * - physical address extension enabled + */ + movq $(X86_CR4_PSE | X86_CR4_PAE), %rax + movq %rax, %cr4 + + /* Move the stack to the end of control page. */ + lea 4096(%r9), %rsp + + /* now save stuff onto the new stack */ + pushq %rcx /* arg 4 control page */ + pushq %rdx /* arg 3 code page */ + pushq %rsi /* arg 2 kern base */ + pushq %rdi /* arg 1 va_list */ + + /* Zero out flags, and disable interrupts. */ + pushq $0 + popfq + cli + + /* Install simple gdt. */ + movq 24(%r9), %rax /* gdt */ + lgdt (%rax) + movq 56(%r9), %rax + lidt (%rax) /* null idt */ + /* + * Move to the code page. + * code_page is based on new page table. + */ + movq %rdx, %r8 + addq $(identity_mapped - relocate_kernel), %r8 + /* Offset of code segment in new gdt. */ + pushq $0x08 + pushq %r8 + /* Jump to this spot in the new page. */ + lretq +identity_mapped: + + movq $0x10,%rax + movq %rax,%ds + movq %rax,%es + movq %rax,%fs + movq %rax,%gs + movq %rax,%ss + + /* + * Set cr0 to a known state: + * - Paging enabled + * - Alignment check disabled + * - Write protect disabled + * - No task switch + * - Don't do FP software emulation. + * - Proctected mode enabled + */ + movq %cr0, %rax + andq $~(X86_CR0_AM | X86_CR0_WP | X86_CR0_TS | X86_CR0_EM | X86_CR0_MP | X86_CR0_NE), %rax + orl $(X86_CR0_PG | X86_CR0_PE), %eax + movq %rax, %cr0 + + /* + * The main loop that copies the kernel from the + * temporary pages to the final kernel destnation. + */ + cld + /* Saved list of source pages. */ + movq 0(%rsp), %rbx + /* + * This is the initial dest page. + * @ KERNBASE + 0x200000 + * Kernel is contigious in memory. + */ + movq 8(%rsp), %rdi +0: /* Top -- read another word for the indirection page. */ + movq (%rbx), %rcx + + addq $8, %rbx + testq $0x1, %rcx /* Is it a destination page? */ + jz 1f + movq %rcx, %rdi + andq $0xFFFFFFFFfffff000, %rdi + jmp 0b +1: + testq $0x2, %rcx /* Is it an indirection page? */ + jz 1f + movq %rcx, %rbx + andq $0xFFFFFFFFfffff000, %rbx + jmp 0b +1: + testq $0x4, %rcx /* Is it the done indicator? */ + jz 1f + jmp 2f +1: + testq $0x8, %rcx /* Is it the source indicator? */ + jz 0b /* Ignore it otherwise. */ + movq %rcx, %rsi /* For every source page do a copy. */ + andq $0xfffffffffffff000, %rsi + movq $512, %rcx + rep + movsq + jmp 0b +2: + /* + * Set all of the registers to known values. + * Leave %rsp alone. + */ + xorq %rax, %rax + xorq %rbx, %rbx + xorq %rcx, %rcx + xorq %rdx, %rdx + xorq %rsi, %rsi + xorq %rdi, %rdi + xorq %rbp, %rbp + + pushq 16(%r9) /* physfree */ + movq 8(%r9), %rax /* modulep */ + salq $32, %rax + pushq %rax + + pushq $0x8 + pushq 48(%r9) /* entry # kernel entry pt */ + lretq +relocate_kernel_end: + .globl relocate_kernel_size +relocate_kernel_size: + .long relocate_kernel_end - relocate_kernel Index: sys/kern/kern_kload.c =================================================================== --- /dev/null +++ sys/kern/kern_kload.c @@ -0,0 +1,489 @@ +/* + * Copyright (c) 2011 - 2015 + * Russell Cattelan Digital Elves LLC + * Copyright (c) 2012 - 2015 + * EMC Corp / Isilon Systems Division All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +//#include +//#include +//#include + +#include +#include +#include +#include +#include +#include +#include +#include + + +static struct kload_items *k_items = NULL; +static MALLOC_DEFINE(M_KLOAD, "kload_items", "kload items"); +static int kload_ready = 0; +static int kload_prealloc = 0; +TUNABLE_INT("kern.kload_prealloc", &kload_prealloc); + +static vm_offset_t kload_image_va = 0; +/* + * Warning this is somewhat arbitrary, but should go + * away once the allocate delays in kmem_alloc_attr are + * fixed. + */ +#define IMAGE_PREALLOC_MAX (48 * 1024 * 1024) + +static void kload_init(void); +SYSINIT(kload_mem, SI_SUB_DRIVERS, SI_ORDER_ANY, kload_init, NULL); + +static int kload_copyin_segment(struct kload_segment *, int); +static int kload_add_page(struct kload_items *, unsigned long); +static void kload_shutdown_final(void *, int); +static struct region_descriptor *mygdt; +static vm_offset_t control_page; +static vm_offset_t code_page; +static void *gdt_desc; +pt_entry_t *kload_pgtbl = NULL; /* used as a test */ +static unsigned long max_addr = 0 , min_addr = 0; + +#define GIGMASK (~((1<<30)-1)) +#define ONEGIG (1<<30) +#define GUEST_GDTR_LIMIT (3 * 8 - 1) + +extern char kernphys[]; +#define KLOADBASE KERNBASE + +static void +update_max_min(vm_offset_t addr, int count) +{ + int i; + + for (i = 0; i < count; i++) { + if (vtophys(addr + (i * PAGE_SIZE)) < min_addr) + min_addr = vtophys(addr + (i * PAGE_SIZE)); + if (vtophys(addr + (i * PAGE_SIZE)) > max_addr) + max_addr = vtophys(addr + (i * PAGE_SIZE)); + } +} + +static vm_offset_t +kload_kmem_alloc(vm_size_t size) +{ + vm_offset_t va; + int num_pages; + + va = kmem_alloc_attr(kernel_arena, size, + M_WAITOK | M_ZERO, + 0, (1 << 30) /* 1Gig limit */, + VM_MEMATTR_WRITE_COMBINING); + + if (va) { + num_pages = roundup2(size,PAGE_SIZE) >> PAGE_SHIFT; + update_max_min(va, num_pages); + } + + return (va); +} + +struct kload_cpage { + unsigned long kcp_magic; /* 0 */ + unsigned long kcp_modulep; /* 1 */ + unsigned long kcp_physfree; /* 2 */ + unsigned long kcp_gdt; /* 3 */ + unsigned long kcp_pgtbl; /* 4 */ + unsigned long kcp_cp; /* 5 */ + unsigned long kcp_entry_pt; /* 6 */ + unsigned long kcp_idt; /* 7 */ +} __packed; + +static int +kload_add_page(struct kload_items *items, unsigned long item_m) +{ + vm_paddr_t phys; + unsigned long va; + + if (*items->item != 0) { + printf(" item != 0 0x%lx\n", *items->item); + items->item++; + items->i_count--; + } + + if ((items->item == items->last_item) || (items->i_count == 0)) { + /* out of space in current page grab a new one */ + va = (unsigned long)kload_kmem_alloc(PAGE_SIZE); + if (items->head_va == 0) + items->head_va = va; + + phys = vtophys(va); + /* store the address of indrect page */ + *items->item = (unsigned long) + (vtophys(va) + KLOADBASE) | KLOAD_INDIRECT; + items->item = (unsigned long *)va; + /* ok now move to new page to start storing address */ + items->last_item = (unsigned long *)va + + ((PAGE_SIZE/sizeof(unsigned long)) - 1); + items->i_count = ((PAGE_SIZE/sizeof(unsigned long)) - 1); + } + *items->item = item_m; + items->item++; + items->i_count--; + + return (0); +} + +static void +kload_init(void) +{ + int size; + + if (kload_prealloc > 0) { + size = min((kload_prealloc * 1024 * 1024), IMAGE_PREALLOC_MAX); + kload_image_va = kload_kmem_alloc(size); + printf("%s: preallocated %dMB\n", __func__, kload_prealloc); + kload_prealloc = size; /* re-use for copy in check */ + } else { + printf("%s: has not preallocated temporary space\n", __func__); + } + +} + +int +kload_copyin_segment(struct kload_segment *khdr, int seg) +{ + int i; + int num_pages; + int error = 0; + vm_offset_t va = kload_image_va; + + num_pages = roundup2(khdr->k_memsz,PAGE_SIZE) >> PAGE_SHIFT; + + /* check to make sure the preallocate space is beg enough */ + if (va && ((num_pages * PAGE_SIZE) > kload_prealloc)) { + printf("%s size over prealloc size %d need %d\n", __func__, + kload_prealloc, num_pages * PAGE_SIZE); + kmem_free(kernel_arena, va, kload_prealloc); + va = 0; + } + + if (va == 0) { + va = kload_kmem_alloc(num_pages * PAGE_SIZE); + if (va == 0) + return (ENOMEM); + } + + /* need to set up a START dst page */ + for (i = 0; i < num_pages; i++) { + kload_add_page(k_items, + (vtophys(va + (i * PAGE_SIZE)) + KLOADBASE) | KLOAD_SOURCE); + } + printf("%s starting copyin... ", __func__); + *k_items->item = KLOAD_DONE; + if ((error = copyin(khdr->k_buf, (void *)va, khdr->k_memsz)) != 0) + return (error); + printf("copied %d bytes to va %p done marker at %p\n", + (int)khdr->k_memsz, (void *)va, &k_items->item ); + + return (error); +} + +static int +sys_kload(struct thread *td, struct kload_args *uap) +{ + struct region_descriptor *null_idt; + struct kload_cpage *k_cpage; + struct kload kld; + int error = 0; + int i; + size_t bufsize = uap->buflen; + + error = priv_check(td, PRIV_REBOOT); + if (error) + return (error); + + /* + * Hook into the shutdown/reboot path so + * we end up here before cpu reset. + */ + EVENTHANDLER_REGISTER(shutdown_final, kload_shutdown_final, + NULL, SHUTDOWN_PRI_KLOAD); + + max_addr = 0; + min_addr = ~0UL; + + if (bufsize != sizeof(struct kload)) { + printf("Hmm size not right %jd %jd\n", (uintmax_t)bufsize, + (uintmax_t)sizeof(struct kload)); + return (error); + } + if ((error = copyin(uap->kld, &kld, bufsize)) != 0) + return (error); + + if (k_items == NULL) { + if((k_items = malloc(sizeof(struct kload_items), + M_KLOAD, M_WAITOK | M_ZERO)) == NULL) + return (ENOMEM); + + k_items->head = 0; + k_items->head_va = 0; + k_items->item = &k_items->head; + k_items->last_item = &k_items->head; + } + + control_page = kload_kmem_alloc(PAGE_SIZE * 2); + if (control_page == 0) + return (ENOMEM); + k_cpage = (struct kload_cpage *)control_page; + code_page = control_page + PAGE_SIZE; + + printf("copy from %p kernel_kump to 0x%lx size %d\n", + relocate_kernel, (unsigned long)code_page, relocate_kernel_size); + memset((void *)control_page, 0, PAGE_SIZE * 2); + memcpy((void *)code_page, relocate_kernel, relocate_kernel_size); + + k_cpage->kcp_magic = 0xC0DE; + k_cpage->kcp_modulep = kld.k_modulep; + k_cpage->kcp_physfree = kld.k_physfree; + + mygdt = (struct region_descriptor *)kload_kmem_alloc(PAGE_SIZE); + if (mygdt == NULL) + return (ENOMEM); + k_cpage->kcp_gdt = (unsigned long)vtophys(mygdt) + KLOADBASE; + + gdt_desc = (char *)mygdt + sizeof(struct region_descriptor); + setup_freebsd_gdt(gdt_desc); + mygdt->rd_limit = GUEST_GDTR_LIMIT; + mygdt->rd_base = (unsigned long)(vtophys(gdt_desc) + KLOADBASE); + + /* + * we pass the virt addr of control_page but we need + * new virt addr as well + */ + k_cpage->kcp_cp = (unsigned long)(vtophys(control_page) + KLOADBASE); + k_cpage->kcp_entry_pt = kld.k_entry_pt; + + /* 10 segments should be more than enough */ + for (i = 0 ; (i < kld.num_hdrs && i <= 10); i++) { + error = kload_copyin_segment(&kld.khdr[i], i); + if (error != 0) + return (error); + } + + null_idt = (struct region_descriptor*) + kload_kmem_alloc(PAGE_SIZE); + if (null_idt == NULL) + return (ENOMEM); + k_cpage->kcp_idt = (unsigned long)vtophys(null_idt) + KLOADBASE; + /* Wipe the IDT. */ + null_idt->rd_limit = 0; + null_idt->rd_base = 0; + /* + * This must be built after all other allocations so it can + * build a page table entry based on min max addresses + */ + /* returns new page table phys addr */ + kload_pgtbl = kload_build_page_table(); + if (kload_pgtbl == NULL) + return (ENOMEM); + k_cpage->kcp_pgtbl = (unsigned long)kload_pgtbl; + + /* + * We could simply not install the handler and never + * hit kload_shutdown_final. But this way we can log + * the fact that we had a failed kload so allow the + * function to be called, but flagged not ready. + */ + kload_ready = 1; + + if (bootverbose) + printf("%s:\n\t" + "head_va 0x%lx (phys 0x%lx)\n\t" + "kernbase 0x%lx\n\t" + "code_page 0x%lx (phys 0x%lx)\n\t" + "control_page 0x%lx (phys 0x%lx)\n\t" + "gdt 0x%lx (phys 0x%lx)\n\t" + "idt 0x%lx (phys 0x%lx)\n\t" + "k_entry_pt 0x%lx\n\t" + "pgtbl (phys 0x%lx)\n\t" + "max_addr (phys 0x%lx)\n\t" + "min_addr (phys 0x%lx)\n\t" + "modulep (phys 0x%lx)\n\t" + "physfree (phys 0x%lx)\n", + __func__, + (unsigned long)k_items->head_va, + (unsigned long)vtophys(k_items->head_va), + (unsigned long)(KERNBASE + (vm_paddr_t)kernphys), + (unsigned long)(control_page + PAGE_SIZE), + (unsigned long)vtophys(control_page + PAGE_SIZE), + (unsigned long)control_page, + (unsigned long)vtophys(control_page), + (unsigned long)mygdt,(unsigned long)vtophys(mygdt), + (unsigned long)null_idt,(unsigned long)vtophys(null_idt), + (unsigned long)kld.k_entry_pt, + (unsigned long)kload_pgtbl, + (unsigned long)max_addr, + (unsigned long)min_addr, + (unsigned long)kld.k_modulep, + (unsigned long)kld.k_physfree); + + if(!(uap->flags & (KLOAD_EXEC | KLOAD_REBOOT))) + goto just_load; +#if defined(SMP) + /* + * Bind us to CPU 0 so that all shutdown code runs there. Some + * systems don't shutdown properly (i.e., ACPI power off) if we + * run on another processor. + */ + printf("Binding process to cpu 0\n"); + thread_lock(curthread); + sched_bind(curthread, 0); + thread_unlock(curthread); + KASSERT(PCPU_GET(cpuid) == 0, ("%s: not running on cpu 0", __func__)); +#endif + if(uap->flags & KLOAD_REBOOT) { + mtx_lock(&Giant); + kern_reboot(RB_KLOAD); + /* should not return */ + mtx_unlock(&Giant); + } + /* + * The reboot code will do a module shutdown so it is not + * part of kload_shutdown_final but it needs to happen. + * So in the case of the exec flag being passed run it here. + */ + if (bootverbose) + printf("%s: module_shutdown\n", __func__); + kload_module_shutdown(); + kload_shutdown_final(NULL, RB_KLOAD); +just_load: + printf("%s: Kernel image loaded waiting for reboot\n", __func__); + return (0); +} + +static void +kload_shutdown_final(void *arg, int howto) +{ + int ret; + cpuset_t map; + + if (bootverbose) + printf("%s arg %p howto 0x%x\n", __func__, arg, howto); + + if ((howto & RB_KLOAD) == 0) { + printf("%s not a kload reboot\n", __func__); + return; + } + /* Just to make sure we are on cpu 0 */ + KASSERT(PCPU_GET(cpuid) == 0, ("%s: not running on cpu 0", __func__)); + if (kload_ready) { + printf("%s: suspend APs\n", __func__); + map = all_cpus; + /* we should be bound to cpu 0 at this point */ + printf("%s cpuid %d\n", __func__, PCPU_GET(cpuid)); + CPU_CLR(PCPU_GET(cpuid), &map); + CPU_NAND(&map, &stopped_cpus); + if (!CPU_EMPTY(&map)) { + printf("cpu_reset: Stopping other CPUs\n"); + suspend_cpus(map); + } + + if (bootverbose) + printf("%s: clear all handlers\n", __func__); + intr_clear_all_handlers(); + + if (bootverbose) + printf("%s: loapic_clear_lapic\n", __func__); + lapic_clear_lapic(1); + + intr_suspend(); + + if (bootverbose) + printf("%s disable_interrupts cpuid %d\n", + __func__, PCPU_GET(cpuid)); + disable_intr(); + + printf("calling relocate_kernel\n"); + ret = relocate_kernel(vtophys(k_items->head_va) + KLOADBASE, + /* dest addr i.e. overwrite existing kernel */ + KERNBASE + (vm_paddr_t)kernphys, + vtophys(code_page) + KLOADBASE, + control_page); + /* currently this will never happen */ + printf("\trelocate_new_kernel returned %d\n",ret); + } else { + printf("kload_shutdown_final called without " + "a new kernel loaded\n"); + } +} + +static int +kload_modload(struct module *module, int cmd, void *arg) +{ + int error = 0; + + switch (cmd) { + case MOD_LOAD: + printf("%s MOD_LOAD\n", __func__); + break; + case MOD_UNLOAD: + printf("%s MOD_UNLOAD\n", __func__); + break; + case MOD_SHUTDOWN: + break; + default: + error = EINVAL; + break; + } + return error; +} + +static int offset = NO_SYSCALL; + +static struct sysent kload_sysent= { + 3, /* sy_narg */ + (sy_call_t *)sys_kload /* sy_call */ +}; + +SYSCALL_MODULE(kload, &offset, &kload_sysent, kload_modload, NULL); + +MODULE_VERSION(kload, 1); Index: sys/sys/kload.h =================================================================== --- sys/sys/kload.h +++ sys/sys/kload.h @@ -1,8 +1,8 @@ /* - * Copyright (c) 2011 - 2012 - * Russell Cattelan Digital Elves Inc - * Copyright (c) - * Isilon Systems, LLC. All rights reserved. + * Copyright (c) 2011 - 2015 + * Russell Cattelan Digital Elves LLC + * Copyright (c) 2012 - 2015 + * EMC Corp / Isilon Systems Division All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions