Changeset View
Changeset View
Standalone View
Standalone View
sys/amd64/amd64/pmap.c
- This file is larger than 256 KB, so syntax highlighting is disabled by default.
Show First 20 Lines • Show All 1,404 Lines • ▼ Show 20 Lines | allocpages(vm_paddr_t *firstaddr, int n) | ||||
u_int64_t ret; | u_int64_t ret; | ||||
ret = *firstaddr; | ret = *firstaddr; | ||||
bzero((void *)ret, n * PAGE_SIZE); | bzero((void *)ret, n * PAGE_SIZE); | ||||
*firstaddr += n * PAGE_SIZE; | *firstaddr += n * PAGE_SIZE; | ||||
return (ret); | return (ret); | ||||
} | } | ||||
static uint64_t | |||||
kib: Why uint64_t and not vm_offset_t or u_long ? | |||||
Done Inline ActionsJust for consistency with allocpages() above. markj: Just for consistency with allocpages() above. | |||||
alloc2mpage(vm_paddr_t *firstaddr) | |||||
{ | |||||
uint64_t ret; | |||||
ret = roundup2(*firstaddr, NBPDR); | |||||
bzero((void *)ret, NBPDR); | |||||
*firstaddr = ret + NBPDR; | |||||
return (ret); | |||||
} | |||||
CTASSERT(powerof2(NDMPML4E)); | CTASSERT(powerof2(NDMPML4E)); | ||||
/* number of kernel PDP slots */ | /* number of kernel PDP slots */ | ||||
#define NKPDPE(ptpgs) howmany(ptpgs, NPDEPG) | #define NKPDPE(ptpgs) howmany(ptpgs, NPDEPG) | ||||
static void | static void | ||||
nkpt_init(vm_paddr_t addr) | nkpt_init(vm_paddr_t addr) | ||||
{ | { | ||||
▲ Show 20 Lines • Show All 229 Lines • ▼ Show 20 Lines | create_pagetables(vm_paddr_t *firstaddr) | ||||
/* Connect the KVA slots up to the PML4 */ | /* Connect the KVA slots up to the PML4 */ | ||||
for (i = 0; i < NKPML4E; i++) { | for (i = 0; i < NKPML4E; i++) { | ||||
p4_p[KPML4BASE + i] = KPDPphys + ptoa(i); | p4_p[KPML4BASE + i] = KPDPphys + ptoa(i); | ||||
p4_p[KPML4BASE + i] |= X86_PG_RW | X86_PG_V; | p4_p[KPML4BASE + i] |= X86_PG_RW | X86_PG_V; | ||||
} | } | ||||
} | } | ||||
static void | |||||
bootstrap_pcpu(vm_paddr_t pcpupg, vm_paddr_t pdppg) | |||||
{ | |||||
struct region_descriptor r_gdt; | |||||
struct pcpu *oldpc, *pc; | |||||
void *dpcpu; | |||||
vm_offset_t va; | |||||
pdp_entry_t *pdpe; | |||||
pd_entry_t *pde; | |||||
/* | /* | ||||
* Map the bootstrap per-CPU region. | |||||
*/ | |||||
va = VM_PCPU_BASE_START; | |||||
pdpe = pmap_pdpe(kernel_pmap, va); | |||||
if ((*pdpe & X86_PG_V) != 0) | |||||
panic("pdpe for %#lx is already valid", va); | |||||
*pdpe = pdppg | X86_PG_V | X86_PG_RW | X86_PG_A | X86_PG_M; | |||||
pde = pmap_pde(kernel_pmap, va); | |||||
pde_store(pde, pcpupg | X86_PG_V | X86_PG_PS | X86_PG_RW | X86_PG_A | | |||||
X86_PG_M | pg_nx | pg_g); | |||||
/* | |||||
Done Inline ActionsI had to re-init cpuhead slist there, otherwise BSP appeared twice on it. How do you handle that ? kib: I had to re-init cpuhead slist there, otherwise BSP appeared twice on it. How do you handle… | |||||
Done Inline ActionsIt is a bug in the patch, thanks. markj: It is a bug in the patch, thanks. | |||||
* Re-initialize PCPU area for BSP after switching. | |||||
* Make hardware use gdt and common_tss from the new PCPU. | |||||
* Copy dynamic PCPU data following the PCPU structure. | |||||
*/ | |||||
pc = (struct pcpu *)va; | |||||
oldpc = get_pcpu(); | |||||
wrmsr(MSR_GSBASE, (uintptr_t)pc); | |||||
pcpu_init(pc, 0, sizeof(struct pcpu)); | |||||
amd64_bsp_pcpu_init1(pc); | |||||
amd64_bsp_ist_init(pc); | |||||
pc->pc_common_tss.tss_iobase = sizeof(struct amd64tss) + | |||||
IOPERM_BITMAP_SIZE; | |||||
memcpy(pc->pc_gdt, oldpc->pc_gdt, NGDT * | |||||
sizeof(struct user_segment_descriptor)); | |||||
gdt_segs[GPROC0_SEL].ssd_base = (uintptr_t)&pc->pc_common_tss; | |||||
ssdtosyssd(&gdt_segs[GPROC0_SEL], | |||||
(struct system_segment_descriptor *)&pc->pc_gdt[GPROC0_SEL]); | |||||
r_gdt.rd_limit = NGDT * sizeof(struct user_segment_descriptor) - 1; | |||||
r_gdt.rd_base = (long)pc->pc_gdt; | |||||
lgdt(&r_gdt); | |||||
wrmsr(MSR_GSBASE, (uintptr_t)pc); | |||||
ltr(GSEL(GPROC0_SEL, SEL_KPL)); | |||||
pc->pc_acpi_id = oldpc->pc_acpi_id; | |||||
dpcpu = (void *)DPCPU_BASE(pc); | |||||
dpcpu_init(dpcpu, 0); | |||||
memcpy(dpcpu, (void *)DPCPU_BASE(oldpc), DPCPU_BYTES); | |||||
} | |||||
/* | |||||
* Bootstrap the system enough to run with virtual memory. | * Bootstrap the system enough to run with virtual memory. | ||||
* | * | ||||
* On amd64 this is called after mapping has already been enabled | * On amd64 this is called after mapping has already been enabled | ||||
* and just syncs the pmap module with what has already been done. | * and just syncs the pmap module with what has already been done. | ||||
* [We can't call it easily with mapping off since the kernel is not | * [We can't call it easily with mapping off since the kernel is not | ||||
* mapped with PA == VA, hence we would have to relocate every address | * mapped with PA == VA, hence we would have to relocate every address | ||||
* from the linked base (virtual) address "KERNBASE" to the actual | * from the linked base (virtual) address "KERNBASE" to the actual | ||||
* (physical) address starting relative to 0] | * (physical) address starting relative to 0] | ||||
*/ | */ | ||||
void | void | ||||
pmap_bootstrap(vm_paddr_t *firstaddr) | pmap_bootstrap(vm_paddr_t *firstaddr) | ||||
{ | { | ||||
vm_offset_t va; | vm_offset_t va; | ||||
pt_entry_t *pte, *pcpu_pte; | pt_entry_t *pte; | ||||
struct region_descriptor r_gdt; | uint64_t cr4; | ||||
uint64_t cr4, pcpu_phys; | u_long res, pcpupg, pdppg; | ||||
u_long res; | |||||
int i; | int i; | ||||
KERNend = *firstaddr; | KERNend = *firstaddr; | ||||
res = atop(KERNend - (vm_paddr_t)kernphys); | res = atop(KERNend - (vm_paddr_t)kernphys); | ||||
if (!pti) | if (!pti) | ||||
pg_g = X86_PG_G; | pg_g = X86_PG_G; | ||||
/* | /* | ||||
* Create an initial set of page tables to run the kernel in. | * Create an initial set of page tables to run the kernel in. | ||||
*/ | */ | ||||
create_pagetables(firstaddr); | create_pagetables(firstaddr); | ||||
pcpu_phys = allocpages(firstaddr, MAXCPU); | |||||
/* | /* | ||||
* Add a physical memory segment (vm_phys_seg) corresponding to the | * Add a physical memory segment (vm_phys_seg) corresponding to the | ||||
* preallocated kernel page table pages so that vm_page structures | * preallocated kernel page table pages so that vm_page structures | ||||
* representing these pages will be created. The vm_page structures | * representing these pages will be created. The vm_page structures | ||||
* are required for promotion of the corresponding kernel virtual | * are required for promotion of the corresponding kernel virtual | ||||
* addresses to superpage mappings. | * addresses to superpage mappings. | ||||
*/ | */ | ||||
vm_phys_add_seg(KPTphys, KPTphys + ptoa(nkpt)); | vm_phys_add_seg(KPTphys, KPTphys + ptoa(nkpt)); | ||||
/* | /* | ||||
* Account for the virtual addresses mapped by create_pagetables(). | * Account for the virtual addresses mapped by create_pagetables(). | ||||
*/ | */ | ||||
virtual_avail = (vm_offset_t)KERNBASE + round_2mpage(KERNend); | virtual_avail = (vm_offset_t)KERNBASE + round_2mpage(KERNend); | ||||
virtual_end = VM_MAX_KERNEL_ADDRESS; | virtual_end = VM_MAX_KERNEL_ADDRESS; | ||||
/* | /* | ||||
* Reserve physical memory to bootstrap the per-CPU allocator, as well | |||||
* as a PD page used to map it into the kernel map. Minimize the amount | |||||
* of memory wasted to maintain alignment. | |||||
*/ | |||||
if ((*firstaddr & PDRMASK) != 0) { | |||||
pdppg = allocpages(firstaddr, 1); | |||||
pcpupg = alloc2mpage(firstaddr); | |||||
} else { | |||||
pcpupg = alloc2mpage(firstaddr); | |||||
pdppg = allocpages(firstaddr, 1); | |||||
} | |||||
/* | |||||
* Enable PG_G global pages, then switch to the kernel page | * Enable PG_G global pages, then switch to the kernel page | ||||
* table from the bootstrap page table. After the switch, it | * table from the bootstrap page table. After the switch, it | ||||
* is possible to enable SMEP and SMAP since PG_U bits are | * is possible to enable SMEP and SMAP since PG_U bits are | ||||
* correct now. | * correct now. | ||||
*/ | */ | ||||
cr4 = rcr4(); | cr4 = rcr4(); | ||||
cr4 |= CR4_PGE; | cr4 |= CR4_PGE; | ||||
load_cr4(cr4); | load_cr4(cr4); | ||||
Show All 34 Lines | #define SYSMAP(c, p, v, n) \ | ||||
pte = vtopte(va); | pte = vtopte(va); | ||||
/* | /* | ||||
* Crashdump maps. The first page is reused as CMAP1 for the | * Crashdump maps. The first page is reused as CMAP1 for the | ||||
* memory test. | * memory test. | ||||
*/ | */ | ||||
SYSMAP(caddr_t, CMAP1, crashdumpmap, MAXDUMPPGS) | SYSMAP(caddr_t, CMAP1, crashdumpmap, MAXDUMPPGS) | ||||
CADDR1 = crashdumpmap; | CADDR1 = crashdumpmap; | ||||
SYSMAP(struct pcpu *, pcpu_pte, __pcpu, MAXCPU); | |||||
virtual_avail = va; | virtual_avail = va; | ||||
for (i = 0; i < MAXCPU; i++) { | |||||
pcpu_pte[i] = (pcpu_phys + ptoa(i)) | X86_PG_V | X86_PG_RW | | |||||
pg_g | pg_nx | X86_PG_M | X86_PG_A; | |||||
} | |||||
/* | /* | ||||
* Re-initialize PCPU area for BSP after switching. | * Bootstrap the per-CPU allocator. | ||||
* Make hardware use gdt and common_tss from the new PCPU. | |||||
*/ | */ | ||||
STAILQ_INIT(&cpuhead); | bootstrap_pcpu(pcpupg, pdppg); | ||||
wrmsr(MSR_GSBASE, (uint64_t)&__pcpu[0]); | |||||
pcpu_init(&__pcpu[0], 0, sizeof(struct pcpu)); | |||||
amd64_bsp_pcpu_init1(&__pcpu[0]); | |||||
amd64_bsp_ist_init(&__pcpu[0]); | |||||
__pcpu[0].pc_common_tss.tss_iobase = sizeof(struct amd64tss) + | |||||
IOPERM_BITMAP_SIZE; | |||||
memcpy(__pcpu[0].pc_gdt, temp_bsp_pcpu.pc_gdt, NGDT * | |||||
sizeof(struct user_segment_descriptor)); | |||||
gdt_segs[GPROC0_SEL].ssd_base = (uintptr_t)&__pcpu[0].pc_common_tss; | |||||
ssdtosyssd(&gdt_segs[GPROC0_SEL], | |||||
(struct system_segment_descriptor *)&__pcpu[0].pc_gdt[GPROC0_SEL]); | |||||
r_gdt.rd_limit = NGDT * sizeof(struct user_segment_descriptor) - 1; | |||||
r_gdt.rd_base = (long)__pcpu[0].pc_gdt; | |||||
lgdt(&r_gdt); | |||||
wrmsr(MSR_GSBASE, (uint64_t)&__pcpu[0]); | |||||
ltr(GSEL(GPROC0_SEL, SEL_KPL)); | |||||
__pcpu[0].pc_dynamic = temp_bsp_pcpu.pc_dynamic; | |||||
__pcpu[0].pc_acpi_id = temp_bsp_pcpu.pc_acpi_id; | |||||
/* | /* | ||||
* Initialize the PAT MSR. | * Initialize the PAT MSR. | ||||
* pmap_init_pat() clears and sets CR4_PGE, which, as a | * pmap_init_pat() clears and sets CR4_PGE, which, as a | ||||
* side-effect, invalidates stale PG_G TLB entries that might | * side-effect, invalidates stale PG_G TLB entries that might | ||||
* have been created in our pre-boot environment. | * have been created in our pre-boot environment. | ||||
*/ | */ | ||||
pmap_init_pat(); | pmap_init_pat(); | ||||
▲ Show 20 Lines • Show All 2,302 Lines • ▼ Show 20 Lines | pmap_page_array_startup(long pages) | ||||
pd_entry_t *pde, newpdir; | pd_entry_t *pde, newpdir; | ||||
vm_offset_t va, start, end; | vm_offset_t va, start, end; | ||||
vm_paddr_t pa; | vm_paddr_t pa; | ||||
long pfn; | long pfn; | ||||
int domain, i; | int domain, i; | ||||
vm_page_array_size = pages; | vm_page_array_size = pages; | ||||
start = VM_MIN_KERNEL_ADDRESS; | start = VM_PAGE_ARRAY_START; | ||||
end = start + pages * sizeof(struct vm_page); | end = start + pages * sizeof(struct vm_page); | ||||
for (va = start; va < end; va += NBPDR) { | for (va = start; va < end; va += NBPDR) { | ||||
pfn = first_page + (va - start) / sizeof(struct vm_page); | pfn = first_page + (va - start) / sizeof(struct vm_page); | ||||
domain = _vm_phys_domain(ptoa(pfn)); | domain = _vm_phys_domain(ptoa(pfn)); | ||||
pdpe = pmap_pdpe(kernel_pmap, va); | pdpe = pmap_pdpe(kernel_pmap, va); | ||||
if ((*pdpe & X86_PG_V) == 0) { | if ((*pdpe & X86_PG_V) == 0) { | ||||
pa = vm_phys_early_alloc(domain, PAGE_SIZE); | pa = vm_phys_early_alloc(domain, PAGE_SIZE); | ||||
dump_add_page(pa); | dump_add_page(pa); | ||||
▲ Show 20 Lines • Show All 5,692 Lines • ▼ Show 20 Lines | pmap_pti_free_page(vm_page_t m) | ||||
return (true); | return (true); | ||||
} | } | ||||
static void | static void | ||||
pmap_pti_init(void) | pmap_pti_init(void) | ||||
{ | { | ||||
vm_page_t pml4_pg; | vm_page_t pml4_pg; | ||||
pdp_entry_t *pdpe; | pdp_entry_t *pdpe; | ||||
struct pcpu *pc; | |||||
vm_offset_t va; | vm_offset_t va; | ||||
int i; | int i; | ||||
if (!pti) | if (!pti) | ||||
return; | return; | ||||
pti_obj = vm_pager_allocate(OBJT_PHYS, NULL, 0, VM_PROT_ALL, 0, NULL); | pti_obj = vm_pager_allocate(OBJT_PHYS, NULL, 0, VM_PROT_ALL, 0, NULL); | ||||
VM_OBJECT_WLOCK(pti_obj); | VM_OBJECT_WLOCK(pti_obj); | ||||
pml4_pg = pmap_pti_alloc_page(); | pml4_pg = pmap_pti_alloc_page(); | ||||
pti_pml4 = (pml4_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pml4_pg)); | pti_pml4 = (pml4_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pml4_pg)); | ||||
for (va = VM_MIN_KERNEL_ADDRESS; va <= VM_MAX_KERNEL_ADDRESS && | for (va = VM_MIN_KERNEL_ADDRESS; va <= VM_MAX_KERNEL_ADDRESS && | ||||
va >= VM_MIN_KERNEL_ADDRESS && va > NBPML4; va += NBPML4) { | va >= VM_MIN_KERNEL_ADDRESS && va > NBPML4; va += NBPML4) { | ||||
pdpe = pmap_pti_pdpe(va); | pdpe = pmap_pti_pdpe(va); | ||||
pmap_pti_wire_pte(pdpe); | pmap_pti_wire_pte(pdpe); | ||||
} | } | ||||
pmap_pti_add_kva_locked((vm_offset_t)&__pcpu[0], | |||||
(vm_offset_t)&__pcpu[0] + sizeof(__pcpu[0]) * MAXCPU, false); | |||||
pmap_pti_add_kva_locked((vm_offset_t)idt, (vm_offset_t)idt + | pmap_pti_add_kva_locked((vm_offset_t)idt, (vm_offset_t)idt + | ||||
sizeof(struct gate_descriptor) * NIDT, false); | sizeof(struct gate_descriptor) * NIDT, false); | ||||
CPU_FOREACH(i) { | CPU_FOREACH(i) { | ||||
pc = cpuid_to_pcpu[i]; | |||||
pmap_pti_add_kva_locked((vm_offset_t)pc, (vm_offset_t)(pc + 1), | |||||
false); | |||||
/* Doublefault stack IST 1 */ | /* Doublefault stack IST 1 */ | ||||
va = __pcpu[i].pc_common_tss.tss_ist1; | va = pc->pc_common_tss.tss_ist1; | ||||
pmap_pti_add_kva_locked(va - PAGE_SIZE, va, false); | pmap_pti_add_kva_locked(va - PAGE_SIZE, va, false); | ||||
/* NMI stack IST 2 */ | /* NMI stack IST 2 */ | ||||
va = __pcpu[i].pc_common_tss.tss_ist2 + sizeof(struct nmi_pcpu); | va = pc->pc_common_tss.tss_ist2 + sizeof(struct nmi_pcpu); | ||||
pmap_pti_add_kva_locked(va - PAGE_SIZE, va, false); | pmap_pti_add_kva_locked(va - PAGE_SIZE, va, false); | ||||
/* MC# stack IST 3 */ | /* MC# stack IST 3 */ | ||||
va = __pcpu[i].pc_common_tss.tss_ist3 + | va = pc->pc_common_tss.tss_ist3 + | ||||
sizeof(struct nmi_pcpu); | sizeof(struct nmi_pcpu); | ||||
pmap_pti_add_kva_locked(va - PAGE_SIZE, va, false); | pmap_pti_add_kva_locked(va - PAGE_SIZE, va, false); | ||||
/* DB# stack IST 4 */ | /* DB# stack IST 4 */ | ||||
va = __pcpu[i].pc_common_tss.tss_ist4 + sizeof(struct nmi_pcpu); | va = pc->pc_common_tss.tss_ist4 + sizeof(struct nmi_pcpu); | ||||
pmap_pti_add_kva_locked(va - PAGE_SIZE, va, false); | pmap_pti_add_kva_locked(va - PAGE_SIZE, va, false); | ||||
} | } | ||||
pmap_pti_add_kva_locked((vm_offset_t)kernphys + KERNBASE, | pmap_pti_add_kva_locked((vm_offset_t)kernphys + KERNBASE, | ||||
(vm_offset_t)etext, true); | (vm_offset_t)etext, true); | ||||
pti_finalized = true; | pti_finalized = true; | ||||
VM_OBJECT_WUNLOCK(pti_obj); | VM_OBJECT_WUNLOCK(pti_obj); | ||||
} | } | ||||
SYSINIT(pmap_pti, SI_SUB_CPU + 1, SI_ORDER_ANY, pmap_pti_init, NULL); | SYSINIT(pmap_pti, SI_SUB_CPU + 1, SI_ORDER_ANY, pmap_pti_init, NULL); | ||||
▲ Show 20 Lines • Show All 817 Lines • Show Last 20 Lines |
Why uint64_t and not vm_offset_t or u_long ?