Page MenuHomeFreeBSD

D31121.id92815.diff
No OneTemporary

D31121.id92815.diff

diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c
--- a/sys/amd64/amd64/machdep.c
+++ b/sys/amd64/amd64/machdep.c
@@ -1279,7 +1279,7 @@
* in real mode mode (e.g. SMP bare metal).
*/
#ifdef SMP
- mp_bootaddress(physmap, &physmap_idx);
+ alloc_ap_trampoline(physmap, &physmap_idx);
#endif
/* call pmap initialization to make new kernel address space */
diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c
--- a/sys/amd64/amd64/mp_machdep.c
+++ b/sys/amd64/amd64/mp_machdep.c
@@ -105,6 +105,7 @@
static char *dbg_stack;
extern u_int mptramp_la57;
+extern u_int mptramp_nx;
/*
* Local data and functions.
@@ -112,86 +113,6 @@
static int start_ap(int apic_id);
-static bool
-is_kernel_paddr(vm_paddr_t pa)
-{
-
- return (pa >= trunc_2mpage(btext - KERNBASE) &&
- pa < round_page(_end - KERNBASE));
-}
-
-static bool
-is_mpboot_good(vm_paddr_t start, vm_paddr_t end)
-{
-
- return (start + AP_BOOTPT_SZ <= GiB(4) && atop(end) < Maxmem);
-}
-
-/*
- * Calculate usable address in base memory for AP trampoline code.
- */
-void
-mp_bootaddress(vm_paddr_t *physmap, unsigned int *physmap_idx)
-{
- vm_paddr_t start, end;
- unsigned int i;
- bool allocated;
-
- alloc_ap_trampoline(physmap, physmap_idx);
-
- /*
- * Find a memory region big enough below the 4GB boundary to
- * store the initial page tables. Region must be mapped by
- * the direct map.
- *
- * Note that it needs to be aligned to a page boundary.
- */
- allocated = false;
- for (i = *physmap_idx; i <= *physmap_idx; i -= 2) {
- /*
- * First, try to chomp at the start of the physmap region.
- * Kernel binary might claim it already.
- */
- start = round_page(physmap[i]);
- end = start + AP_BOOTPT_SZ;
- if (start < end && end <= physmap[i + 1] &&
- is_mpboot_good(start, end) &&
- !is_kernel_paddr(start) && !is_kernel_paddr(end - 1)) {
- allocated = true;
- physmap[i] = end;
- break;
- }
-
- /*
- * Second, try to chomp at the end. Again, check
- * against kernel.
- */
- end = trunc_page(physmap[i + 1]);
- start = end - AP_BOOTPT_SZ;
- if (start < end && start >= physmap[i] &&
- is_mpboot_good(start, end) &&
- !is_kernel_paddr(start) && !is_kernel_paddr(end - 1)) {
- allocated = true;
- physmap[i + 1] = start;
- break;
- }
- }
- if (allocated) {
- mptramp_pagetables = start;
- if (physmap[i] == physmap[i + 1] && *physmap_idx != 0) {
- memmove(&physmap[i], &physmap[i + 2],
- sizeof(*physmap) * (*physmap_idx - i + 2));
- *physmap_idx -= 2;
- }
- } else {
- mptramp_pagetables = trunc_page(boot_address) - AP_BOOTPT_SZ;
- if (bootverbose)
- printf(
-"Cannot find enough space for the initial AP page tables, placing them at %#x",
- mptramp_pagetables);
- }
-}
-
/*
* Initialize the IPI handlers and start up the AP's.
*/
@@ -243,6 +164,9 @@
assign_cpu_ids();
mptramp_la57 = la57;
+ mptramp_nx = pg_nx != 0;
+ MPASS(kernel_pmap->pm_cr3 < (1UL << 32));
+ mptramp_pagetables = kernel_pmap->pm_cr3;
/* Start each Application Processor */
start_all_aps();
@@ -399,55 +323,67 @@
int
start_all_aps(void)
{
- u_int64_t *pt5, *pt4, *pt3, *pt2;
+ vm_page_t m_pml4, m_pdp, m_pd[4];
+ pml5_entry_t old_pml45;
+ pml4_entry_t *v_pml4;
+ pdp_entry_t *v_pdp;
+ pd_entry_t *v_pd;
u_int32_t mpbioswarmvec;
- int apic_id, cpu, domain, i, xo;
+ int apic_id, cpu, domain, i;
u_char mpbiosreason;
mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN);
- /* copy the AP 1st level boot code */
- bcopy(mptramp_start, (void *)PHYS_TO_DMAP(boot_address), bootMP_size);
-
- /* Locate the page tables, they'll be below the trampoline */
+ /* Create a transient 1:1 mapping of low 4G */
if (la57) {
- pt5 = (uint64_t *)PHYS_TO_DMAP(mptramp_pagetables);
- xo = 1;
+ m_pml4 = pmap_page_alloc_below_4g(true);
+ v_pml4 = (pml4_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m_pml4));
} else {
- xo = 0;
+ v_pml4 = &kernel_pmap->pm_pmltop[0];
}
- pt4 = (uint64_t *)PHYS_TO_DMAP(mptramp_pagetables + xo * PAGE_SIZE);
- pt3 = pt4 + (PAGE_SIZE) / sizeof(u_int64_t);
- pt2 = pt3 + (PAGE_SIZE) / sizeof(u_int64_t);
-
- /* Create the initial 1GB replicated page tables */
- for (i = 0; i < 512; i++) {
- if (la57) {
- pt5[i] = (u_int64_t)(uintptr_t)(mptramp_pagetables +
- PAGE_SIZE);
- pt5[i] |= PG_V | PG_RW | PG_U;
- }
-
- /*
- * Each slot of the level 4 pages points to the same
- * level 3 page.
- */
- pt4[i] = (u_int64_t)(uintptr_t)(mptramp_pagetables +
- (xo + 1) * PAGE_SIZE);
- pt4[i] |= PG_V | PG_RW | PG_U;
-
- /*
- * Each slot of the level 3 pages points to the same
- * level 2 page.
- */
- pt3[i] = (u_int64_t)(uintptr_t)(mptramp_pagetables +
- ((xo + 2) * PAGE_SIZE));
- pt3[i] |= PG_V | PG_RW | PG_U;
-
- /* The level 2 page slots are mapped with 2MB pages for 1GB. */
- pt2[i] = i * (2 * 1024 * 1024);
- pt2[i] |= PG_V | PG_RW | PG_PS | PG_U;
+ m_pdp = pmap_page_alloc_below_4g(true);
+ v_pdp = (pdp_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m_pdp));
+ m_pd[0] = pmap_page_alloc_below_4g(false);
+ v_pd = (pd_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m_pd[0]));
+ for (i = 0; i < NPDEPG; i++)
+ v_pd[i] = (i << PDRSHIFT) | X86_PG_V | X86_PG_RW | X86_PG_A |
+ X86_PG_M | PG_PS;
+ m_pd[1] = pmap_page_alloc_below_4g(false);
+ v_pd = (pd_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m_pd[1]));
+ for (i = 0; i < NPDEPG; i++)
+ v_pd[i] = (NBPDP + (i << PDRSHIFT)) | X86_PG_V | X86_PG_RW |
+ X86_PG_A | X86_PG_M | PG_PS;
+ m_pd[2] = pmap_page_alloc_below_4g(false);
+ v_pd = (pd_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m_pd[2]));
+ for (i = 0; i < NPDEPG; i++)
+ v_pd[i] = (2UL * NBPDP + (i << PDRSHIFT)) | X86_PG_V |
+ X86_PG_RW | X86_PG_A | X86_PG_M | PG_PS;
+ m_pd[3] = pmap_page_alloc_below_4g(false);
+ v_pd = (pd_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m_pd[3]));
+ for (i = 0; i < NPDEPG; i++)
+ v_pd[i] = (3UL * NBPDP + (i << PDRSHIFT)) | X86_PG_V |
+ X86_PG_RW | X86_PG_A | X86_PG_M | PG_PS;
+ v_pdp[0] = VM_PAGE_TO_PHYS(m_pd[0]) | X86_PG_V |
+ X86_PG_RW | X86_PG_A | X86_PG_M;
+ v_pdp[1] = VM_PAGE_TO_PHYS(m_pd[1]) | X86_PG_V |
+ X86_PG_RW | X86_PG_A | X86_PG_M;
+ v_pdp[2] = VM_PAGE_TO_PHYS(m_pd[2]) | X86_PG_V |
+ X86_PG_RW | X86_PG_A | X86_PG_M;
+ v_pdp[3] = VM_PAGE_TO_PHYS(m_pd[3]) | X86_PG_V |
+ X86_PG_RW | X86_PG_A | X86_PG_M;
+ old_pml45 = kernel_pmap->pm_pmltop[0];
+ if (la57) {
+ kernel_pmap->pm_pmltop[0] = VM_PAGE_TO_PHYS(m_pml4) |
+ X86_PG_V | X86_PG_RW | X86_PG_A | X86_PG_M;
}
+ v_pml4[0] = VM_PAGE_TO_PHYS(m_pdp) | X86_PG_V |
+ X86_PG_RW | X86_PG_A | X86_PG_M;
+ pmap_invalidate_all(kernel_pmap);
+
+ /* copy the AP 1st level boot code */
+ bcopy(mptramp_start, (void *)PHYS_TO_DMAP(boot_address), bootMP_size);
+ if (bootverbose)
+ printf("AP boot address %#x\n", boot_address);
/* save the current value of the warm-start vector */
if (!efi_boot)
@@ -515,6 +451,17 @@
outb(CMOS_REG, BIOS_RESET);
outb(CMOS_DATA, mpbiosreason);
+ /* Destroy transient 1:1 mapping */
+ kernel_pmap->pm_pmltop[0] = old_pml45;
+ invlpg(0);
+ if (la57)
+ vm_page_free(m_pml4);
+ vm_page_free(m_pd[3]);
+ vm_page_free(m_pd[2]);
+ vm_page_free(m_pd[1]);
+ vm_page_free(m_pd[0]);
+ vm_page_free(m_pdp);
+
/* number of APs actually started */
return (mp_naps);
}
diff --git a/sys/amd64/amd64/mpboot.S b/sys/amd64/amd64/mpboot.S
--- a/sys/amd64/amd64/mpboot.S
+++ b/sys/amd64/amd64/mpboot.S
@@ -95,12 +95,25 @@
* is later enabled.
*/
mov %cr4, %eax
- orl $CR4_PAE, %eax
+ orl $(CR4_PAE | CR4_PGE), %eax
cmpb $0, mptramp_la57-mptramp_start(%ebx)
je 1f
orl $CR4_LA57, %eax
1: mov %eax, %cr4
+ /*
+ * If the BSP reported NXE support, enable EFER.NXE for all APs
+ * prior to loading %cr3. This avoids page faults if the AP
+ * encounters memory marked with the NX bit prior to detecting and
+ * enabling NXE support.
+ */
+ cmpb $0,mptramp_nx-mptramp_start(%ebx)
+ je 2f
+ movl $MSR_EFER, %ecx
+ rdmsr
+ orl $EFER_NXE, %eax
+ wrmsr
+2:
/*
* Enable EFER.LME so that we get long mode when all the prereqs are
* in place. In this case, it turns on when CR0_PG is finally enabled.
@@ -112,12 +125,13 @@
wrmsr
/*
- * Point to the embedded page tables for startup. Note that this
- * only gets accessed after we're actually in 64 bit mode, however
- * we can only set the bottom 32 bits of %cr3 in this state. This
- * means we are required to use a temporary page table that is below
- * the 4GB limit. %ebx is still our relocation base. We could just
- * subtract 3 * PAGE_SIZE, but that would be too easy.
+ * Load kernel page table pointer into %cr3.
+ * %ebx is still our relocation base.
+ *
+ * Note that this only gets accessed after we're actually in 64 bit
+ * mode, however we can only set the bottom 32 bits of %cr3 in this
+ * state. This means we depend on the kernel page table being
+ * allocated from the low 4G.
*/
leal mptramp_pagetables-mptramp_start(%ebx),%eax
movl (%eax), %eax
@@ -155,10 +169,8 @@
/*
* Yeehar! We're running in 64 bit mode! We can mostly ignore our
* segment registers, and get on with it.
- * Note that we are running at the correct virtual address, but with
- * a 1:1 1GB mirrored mapping over entire address space. We had better
- * switch to a real %cr3 promptly so that we can get to the direct map
- * space. Remember that jmp is relative and that we've been relocated,
+ * We are running at the correct virtual address space.
+ * Note that the jmp is relative and that we've been relocated,
* so use an indirect jump.
*/
.code64
@@ -220,6 +232,10 @@
mptramp_la57:
.long 0
+ .globl mptramp_nx
+mptramp_nx:
+ .long 0
+
/*
* The pseudo descriptor for lgdt to use.
*/
@@ -243,31 +259,5 @@
.code64
.p2align 4,0
entry_64:
- /*
- * If the BSP reported NXE support, enable EFER.NXE for all APs
- * prior to loading %cr3. This avoids page faults if the AP
- * encounters memory marked with the NX bit prior to detecting and
- * enabling NXE support.
- */
- movq pg_nx, %rbx
- testq %rbx, %rbx
- je 1f
- movl $MSR_EFER, %ecx
- rdmsr
- orl $EFER_NXE, %eax
- wrmsr
-
-1:
- /*
- * Load a real %cr3 that has all the direct map stuff and switches
- * off the 1GB replicated mirror. Load a stack pointer and jump
- * into AP startup code in C.
- */
- cmpl $0, la57
- jne 2f
- movq KPML4phys, %rax
- jmp 3f
-2: movq KPML5phys, %rax
-3: movq %rax, %cr3
movq bootSTK, %rsp
jmp init_secondary
diff --git a/sys/amd64/include/smp.h b/sys/amd64/include/smp.h
--- a/sys/amd64/include/smp.h
+++ b/sys/amd64/include/smp.h
@@ -39,7 +39,6 @@
void invlop_handler(void);
int start_all_aps(void);
-void mp_bootaddress(vm_paddr_t *, unsigned int *);
#endif /* !LOCORE */
#endif /* SMP */
diff --git a/sys/x86/x86/mp_x86.c b/sys/x86/x86/mp_x86.c
--- a/sys/x86/x86/mp_x86.c
+++ b/sys/x86/x86/mp_x86.c
@@ -1065,11 +1065,6 @@
}
#ifdef __amd64__
- /*
- * Enable global pages TLB extension
- * This also implicitly flushes the TLB
- */
- load_cr4(rcr4() | CR4_PGE);
if (pmap_pcid_enabled)
load_cr4(rcr4() | CR4_PCIDE);
load_ds(_udatasel);

File Metadata

Mime Type
text/plain
Expires
Tue, Jan 27, 1:40 PM (3 h, 40 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
28056544
Default Alt Text
D31121.id92815.diff (10 KB)

Event Timeline