Index: head/lib/libkvm/kvm_i386.h =================================================================== --- head/lib/libkvm/kvm_i386.h +++ head/lib/libkvm/kvm_i386.h @@ -67,14 +67,16 @@ _Static_assert(PAGE_SHIFT == I386_PAGE_SHIFT, "PAGE_SHIFT mismatch"); _Static_assert(PAGE_SIZE == I386_PAGE_SIZE, "PAGE_SIZE mismatch"); _Static_assert(PAGE_MASK == I386_PAGE_MASK, "PAGE_MASK mismatch"); +#if 0 _Static_assert(NPTEPG == I386_NPTEPG, "NPTEPG mismatch"); -_Static_assert(PDRSHIFT == I386_PDRSHIFT, "PDRSHIFT mismatch"); _Static_assert(NBPDR == I386_NBPDR, "NBPDR mismatch"); +#endif +_Static_assert(PDRSHIFT_NOPAE == I386_PDRSHIFT, "PDRSHIFT mismatch"); _Static_assert(PG_V == I386_PG_V, "PG_V mismatch"); _Static_assert(PG_PS == I386_PG_PS, "PG_PS mismatch"); -_Static_assert((u_int)PG_FRAME == I386_PG_FRAME, "PG_FRAME mismatch"); -_Static_assert(PG_PS_FRAME == I386_PG_PS_FRAME, "PG_PS_FRAME mismatch"); +_Static_assert((u_int)PG_FRAME_NOPAE == I386_PG_FRAME, "PG_FRAME mismatch"); +_Static_assert(PG_PS_FRAME_NOPAE == I386_PG_PS_FRAME, "PG_PS_FRAME mismatch"); #endif int _i386_native(kvm_t *); Index: head/sys/conf/files.i386 =================================================================== --- head/sys/conf/files.i386 +++ head/sys/conf/files.i386 @@ -492,12 +492,16 @@ i386/i386/machdep.c standard i386/i386/mem.c optional mem i386/i386/minidump_machdep.c standard +i386/i386/minidump_machdep_pae.c standard +i386/i386/minidump_machdep_nopae.c standard i386/i386/mp_clock.c optional smp i386/i386/mp_machdep.c optional smp i386/i386/mpboot.s optional smp i386/i386/npx.c standard i386/i386/perfmon.c optional perfmon -i386/i386/pmap.c standard +i386/i386/pmap_base.c standard +i386/i386/pmap_nopae.c standard +i386/i386/pmap_pae.c standard i386/i386/prof_machdep.c optional profiling-routine i386/i386/ptrace_machdep.c standard i386/i386/sigtramp.s standard Index: head/sys/conf/options.i386 =================================================================== --- head/sys/conf/options.i386 +++ head/sys/conf/options.i386 @@ -33,11 +33,6 @@ # Physical address extensions and support for >4G ram. As above. PAE opt_global.h -# Use PAE page tables, but limit memory support to 4GB. -# This keeps the i386 non-PAE KBI, in particular, drivers see -# 32bit vm_paddr_t. -PAE_TABLES opt_global.h - TIMER_FREQ opt_clock.h CPU_ATHLON_SSE_HACK opt_cpu.h Index: head/sys/dev/dcons/dcons_os.c =================================================================== --- head/sys/dev/dcons/dcons_os.c +++ head/sys/dev/dcons/dcons_os.c @@ -309,7 +309,7 @@ * Allow read/write access to dcons buffer. */ for (pa = trunc_page(addr); pa < addr + size; pa += PAGE_SIZE) - *vtopte(PMAP_MAP_LOW + pa) |= PG_RW; + pmap_ksetrw(PMAP_MAP_LOW + pa); invltlb(); #endif /* XXX P to V */ Index: head/sys/dev/fb/fb.c =================================================================== --- head/sys/dev/fb/fb.c +++ head/sys/dev/fb/fb.c @@ -513,7 +513,7 @@ } int genfbmmap(genfb_softc_t *sc, video_adapter_t *adp, vm_ooffset_t offset, - vm_offset_t *paddr, int prot, vm_memattr_t *memattr) + vm_paddr_t *paddr, int prot, vm_memattr_t *memattr) { return vidd_mmap(adp, offset, paddr, prot, memattr); } Index: head/sys/dev/fb/fbreg.h =================================================================== --- head/sys/dev/fb/fbreg.h +++ head/sys/dev/fb/fbreg.h @@ -327,7 +327,7 @@ int genfbioctl(genfb_softc_t *sc, video_adapter_t *adp, u_long cmd, caddr_t arg, int flag, struct thread *td); int genfbmmap(genfb_softc_t *sc, video_adapter_t *adp, - vm_ooffset_t offset, vm_offset_t *paddr, + vm_ooffset_t offset, vm_paddr_t *paddr, int prot, vm_memattr_t *memattr); #endif /* FB_INSTALL_CDEV */ Index: head/sys/dev/fb/vga.c =================================================================== --- head/sys/dev/fb/vga.c +++ head/sys/dev/fb/vga.c @@ -147,7 +147,7 @@ int vga_mmap(struct cdev *dev, vga_softc_t *sc, vm_ooffset_t offset, - vm_offset_t *paddr, int prot, vm_memattr_t *memattr) + vm_paddr_t *paddr, int prot, vm_memattr_t *memattr) { return genfbmmap(&sc->gensc, sc->adp, offset, paddr, prot, memattr); } Index: head/sys/dev/fb/vgareg.h =================================================================== --- head/sys/dev/fb/vgareg.h +++ head/sys/dev/fb/vgareg.h @@ -91,7 +91,7 @@ int vga_ioctl(struct cdev *dev, vga_softc_t *sc, u_long cmd, caddr_t arg, int flag, struct thread *td); int vga_mmap(struct cdev *dev, vga_softc_t *sc, vm_ooffset_t offset, - vm_offset_t *paddr, int prot, vm_memattr_t *memattr); + vm_paddr_t *paddr, int prot, vm_memattr_t *memattr); #endif extern int (*vga_sub_configure)(int flags); Index: head/sys/dev/syscons/syscons.c =================================================================== --- head/sys/dev/syscons/syscons.c +++ head/sys/dev/syscons/syscons.c @@ -291,7 +291,7 @@ #ifdef __amd64__ fb = KERNBASE + 0xb8000; #else /* __i386__ */ - fb = PMAP_MAP_LOW + 0xb8000; + fb = pmap_get_map_low() + 0xb8000; #endif xsize = 80; ysize = 25; Index: head/sys/i386/acpica/acpi_machdep.c =================================================================== --- head/sys/i386/acpica/acpi_machdep.c +++ head/sys/i386/acpica/acpi_machdep.c @@ -134,7 +134,7 @@ off = pa & PAGE_MASK; length = round_page(length + off); - pa = pa & PG_FRAME; + pa = pmap_pg_frame(pa); va = (vm_offset_t)pmap_kenter_temporary(pa, offset) + (offset * PAGE_SIZE); data = (void *)(va + off); Index: head/sys/i386/i386/bios.c =================================================================== --- head/sys/i386/i386/bios.c +++ head/sys/i386/i386/bios.c @@ -329,9 +329,7 @@ va_list ap; int flags = BIOSCODE_FLAG | BIOSDATA_FLAG; u_int i, arg_start, arg_end; - pt_entry_t *pte; - pd_entry_t *ptd, orig_ptd; - + void *bios16_pmap_handle; arg_start = 0xffffffff; arg_end = 0; @@ -388,18 +386,10 @@ args->seg.args.limit = 0xffff; } - args->seg.code32.base = (u_int)&bios16_jmp & PG_FRAME; + args->seg.code32.base = pmap_pg_frame((u_int)&bios16_jmp); args->seg.code32.limit = 0xffff; - /* - * no page table, so create one and install it. - */ - pte = (pt_entry_t *)malloc(PAGE_SIZE, M_TEMP, M_WAITOK); - ptd = IdlePTD; - *pte = vm86phystk | PG_RW | PG_V; - orig_ptd = *ptd; - *ptd = vtophys(pte) | PG_RW | PG_V; - pmap_invalidate_all(kernel_pmap); /* XXX insurance for now */ + bios16_pmap_handle = pmap_bios16_enter(); stack_top = stack; va_start(ap, fmt); @@ -451,13 +441,7 @@ bioscall_vector.vec16.segment = GSEL(GBIOSCODE16_SEL, SEL_KPL); i = bios16_call(&args->r, stack_top); - - *ptd = orig_ptd; /* remove page table */ - /* - * XXX only needs to be invlpg(0) but that doesn't work on the 386 - */ - pmap_invalidate_all(kernel_pmap); - free(pte, M_TEMP); /* ... and free it */ + pmap_bios16_leave(bios16_pmap_handle); return (i); } Index: head/sys/i386/i386/copyout.c =================================================================== --- head/sys/i386/i386/copyout.c +++ head/sys/i386/i386/copyout.c @@ -47,12 +47,6 @@ #include #include -#if defined(PAE) || defined(PAE_TABLES) -#define KCR3 ((u_int)IdlePDPT) -#else -#define KCR3 ((u_int)IdlePTD) -#endif - int copyin_fast(const void *udaddr, void *kaddr, size_t len, u_int); static int (*copyin_fast_tramp)(const void *, void *, size_t, u_int); int copyout_fast(const void *kaddr, void *udaddr, size_t len, u_int); @@ -103,7 +97,6 @@ { struct pcpu *pc; vm_page_t m[2]; - pt_entry_t *pte; vm_offset_t kaddr; int error, i, plen; bool sleepable; @@ -128,12 +121,7 @@ sx_xlock(&pc->pc_copyout_slock); kaddr = pc->pc_copyout_saddr; } - for (i = 0, pte = vtopte(kaddr); i < plen; i++, pte++) { - *pte = PG_V | PG_RW | PG_A | PG_M | VM_PAGE_TO_PHYS(m[i]) | - pmap_cache_bits(kernel_pmap, pmap_page_get_memattr(m[i]), - FALSE); - invlpg(kaddr + ptoa(i)); - } + pmap_cp_slow0_map(kaddr, plen, m); kaddr += uva - trunc_page(uva); f(kaddr, arg); sched_unpin(); @@ -225,7 +213,7 @@ (uintptr_t)udaddr + len > VM_MAXUSER_ADDRESS) return (EFAULT); if (len == 0 || (fast_copyout && len <= TRAMP_COPYOUT_SZ && - copyin_fast_tramp(udaddr, kaddr, len, KCR3) == 0)) + copyin_fast_tramp(udaddr, kaddr, len, pmap_get_kcr3()) == 0)) return (0); for (plen = 0, uc = (vm_offset_t)udaddr, ca.kc = (vm_offset_t)kaddr; plen < len; uc += ca.len, ca.kc += ca.len, plen += ca.len) { @@ -260,7 +248,7 @@ (uintptr_t)udaddr + len > VM_MAXUSER_ADDRESS) return (EFAULT); if (len == 0 || (fast_copyout && len <= TRAMP_COPYOUT_SZ && - copyout_fast_tramp(kaddr, udaddr, len, KCR3) == 0)) + copyout_fast_tramp(kaddr, udaddr, len, pmap_get_kcr3()) == 0)) return (0); for (plen = 0, uc = (vm_offset_t)udaddr, ca.kc = (vm_offset_t)kaddr; plen < len; uc += ca.len, ca.kc += ca.len, plen += ca.len) { @@ -296,7 +284,7 @@ (uintptr_t)base + sizeof(uint8_t) > VM_MAXUSER_ADDRESS) return (-1); if (fast_copyout) { - res = fubyte_fast_tramp(base, KCR3); + res = fubyte_fast_tramp(base, pmap_get_kcr3()); if (res != -1) return (res); } @@ -322,7 +310,7 @@ (uintptr_t)base + sizeof(uint16_t) > VM_MAXUSER_ADDRESS) return (-1); if (fast_copyout) { - res = fuword16_fast_tramp(base, KCR3); + res = fuword16_fast_tramp(base, pmap_get_kcr3()); if (res != -1) return (res); } @@ -348,7 +336,7 @@ (uintptr_t)base + sizeof(*val) > VM_MAXUSER_ADDRESS) return (-1); if (fast_copyout) { - if (fueword_fast_tramp(base, val, KCR3) == 0) + if (fueword_fast_tramp(base, val, pmap_get_kcr3()) == 0) return (0); } if (cp_slow0((vm_offset_t)base, sizeof(long), false, fueword_slow0, @@ -383,7 +371,7 @@ if ((uintptr_t)base + sizeof(uint8_t) < (uintptr_t)base || (uintptr_t)base + sizeof(uint8_t) > VM_MAXUSER_ADDRESS) return (-1); - if (fast_copyout && subyte_fast_tramp(base, byte, KCR3) == 0) + if (fast_copyout && subyte_fast_tramp(base, byte, pmap_get_kcr3()) == 0) return (0); return (cp_slow0((vm_offset_t)base, sizeof(u_char), true, subyte_slow0, &byte) != 0 ? -1 : 0); @@ -403,7 +391,8 @@ if ((uintptr_t)base + sizeof(uint16_t) < (uintptr_t)base || (uintptr_t)base + sizeof(uint16_t) > VM_MAXUSER_ADDRESS) return (-1); - if (fast_copyout && suword16_fast_tramp(base, word, KCR3) == 0) + if (fast_copyout && suword16_fast_tramp(base, word, pmap_get_kcr3()) + == 0) return (0); return (cp_slow0((vm_offset_t)base, sizeof(int16_t), true, suword16_slow0, &word) != 0 ? -1 : 0); @@ -423,7 +412,7 @@ if ((uintptr_t)base + sizeof(word) < (uintptr_t)base || (uintptr_t)base + sizeof(word) > VM_MAXUSER_ADDRESS) return (-1); - if (fast_copyout && suword_fast_tramp(base, word, KCR3) == 0) + if (fast_copyout && suword_fast_tramp(base, word, pmap_get_kcr3()) == 0) return (0); return (cp_slow0((vm_offset_t)base, sizeof(long), true, suword_slow0, &word) != 0 ? -1 : 0); Index: head/sys/i386/i386/genassym.c =================================================================== --- head/sys/i386/i386/genassym.c +++ head/sys/i386/i386/genassym.c @@ -101,21 +101,8 @@ ASSYM(TD0_KSTACK_PAGES, TD0_KSTACK_PAGES); ASSYM(PAGE_SIZE, PAGE_SIZE); -ASSYM(NPTEPG, NPTEPG); -ASSYM(NPDEPG, NPDEPG); -ASSYM(NPDEPTD, NPDEPTD); -ASSYM(NPGPTD, NPGPTD); -ASSYM(PDESIZE, sizeof(pd_entry_t)); -ASSYM(PTESIZE, sizeof(pt_entry_t)); -ASSYM(PDESHIFT, PDESHIFT); -ASSYM(PTESHIFT, PTESHIFT); ASSYM(PAGE_SHIFT, PAGE_SHIFT); ASSYM(PAGE_MASK, PAGE_MASK); -ASSYM(PDRSHIFT, PDRSHIFT); -ASSYM(PDRMASK, PDRMASK); -ASSYM(VM_MAXUSER_ADDRESS, VM_MAXUSER_ADDRESS); -ASSYM(KERNBASE, KERNBASE); -ASSYM(KERNLOAD, KERNLOAD); ASSYM(PCB_CR0, offsetof(struct pcb, pcb_cr0)); ASSYM(PCB_CR2, offsetof(struct pcb, pcb_cr2)); ASSYM(PCB_CR3, offsetof(struct pcb, pcb_cr3)); @@ -222,6 +209,9 @@ ASSYM(PC_TRAMPSTK, offsetof(struct pcpu, pc_trampstk)); ASSYM(PC_COPYOUT_BUF, offsetof(struct pcpu, pc_copyout_buf)); ASSYM(PC_IBPB_SET, offsetof(struct pcpu, pc_ibpb_set)); +ASSYM(PMAP_TRM_MIN_ADDRESS, PMAP_TRM_MIN_ADDRESS); +ASSYM(KERNLOAD, KERNLOAD); +ASSYM(KERNBASE, KERNBASE); #ifdef DEV_APIC ASSYM(LA_EOI, LAPIC_EOI * LAPIC_MEM_MUL); @@ -237,7 +227,6 @@ ASSYM(VM86_FRAMESIZE, sizeof(struct vm86frame)); ASSYM(VM86_STACK_SPACE, VM86_STACK_SPACE); -ASSYM(PMAP_TRM_MIN_ADDRESS, PMAP_TRM_MIN_ADDRESS); ASSYM(TRAMP_COPYOUT_SZ, TRAMP_COPYOUT_SZ); #ifdef HWPMC_HOOKS Index: head/sys/i386/i386/initcpu.c =================================================================== --- head/sys/i386/i386/initcpu.c +++ head/sys/i386/i386/initcpu.c @@ -632,6 +632,7 @@ void initializecpu(void) { + uint64_t msr; switch (cpu) { #ifdef I486_CPU @@ -744,16 +745,10 @@ load_cr4(rcr4() | CR4_FXSR | CR4_XMM); cpu_fxsr = hw_instruction_sse = 1; } -#if defined(PAE) || defined(PAE_TABLES) - if ((amd_feature & AMDID_NX) != 0) { - uint64_t msr; - + if (elf32_nxstack) { msr = rdmsr(MSR_EFER) | EFER_NXE; wrmsr(MSR_EFER, msr); - pg_nx = PG_NX; - elf32_nxstack = 1; } -#endif } void Index: head/sys/i386/i386/locore.s =================================================================== --- head/sys/i386/i386/locore.s +++ head/sys/i386/i386/locore.s @@ -54,15 +54,6 @@ #include "assym.inc" /* - * PTmap is recursive pagemap at top of virtual address space. - * Within PTmap, the page directory can be found (third indirection). - */ - .globl PTmap,PTD,PTDpde - .set PTmap,(PTDPTDI << PDRSHIFT) - .set PTD,PTmap + (PTDPTDI * PAGE_SIZE) - .set PTDpde,PTD + (PTDPTDI * PDESIZE) - -/* * Compiled KERNBASE location and the kernel load address, now identical. */ .globl kernbase Index: head/sys/i386/i386/machdep.c =================================================================== --- head/sys/i386/i386/machdep.c +++ head/sys/i386/i386/machdep.c @@ -175,6 +175,8 @@ int _udatasel, _ucodesel; u_int basemem; +static int above4g_allow = 1; +static int above24g_allow = 0; int cold = 1; @@ -1675,6 +1677,7 @@ add_physmap_entry(uint64_t base, uint64_t length, vm_paddr_t *physmap, int *physmap_idxp) { + uint64_t lim, ign; int i, insert_idx, physmap_idx; physmap_idx = *physmap_idxp; @@ -1682,13 +1685,24 @@ if (length == 0) return (1); -#ifndef PAE - if (base > 0xffffffff) { - printf("%uK of memory above 4GB ignored\n", - (u_int)(length / 1024)); + lim = 0x100000000; /* 4G */ + if (pae_mode && above4g_allow) + lim = above24g_allow ? -1ULL : 0x600000000; /* 24G */ + if (base >= lim) { + printf("%uK of memory above %uGB ignored, pae %d " + "above4g_allow %d above24g_allow %d\n", + (u_int)(length / 1024), (u_int)(lim >> 30), pae_mode, + above4g_allow, above24g_allow); return (1); } -#endif + if (base + length >= lim) { + ign = base + length - lim; + length -= ign; + printf("%uK of memory above %uGB ignored, pae %d " + "above4g_allow %d above24g_allow %d\n", + (u_int)(ign / 1024), (u_int)(lim >> 30), pae_mode, + above4g_allow, above24g_allow); + } /* * Find insertion point while checking for overlap. Start off by @@ -1781,8 +1795,6 @@ static void basemem_setup(void) { - pt_entry_t *pte; - int i; if (basemem > 640) { printf("Preposterous BIOS basemem of %uK, truncating to 640K\n", @@ -1790,15 +1802,7 @@ basemem = 640; } - /* - * Map pages between basemem and ISA_HOLE_START, if any, r/w into - * the vm86 page table so that vm86 can scribble on them using - * the vm86 map too. XXX: why 2 ways for this and only 1 way for - * page 0, at least as initialized here? - */ - pte = (pt_entry_t *)vm86paddr; - for (i = basemem / 4; i < 160; i++) - pte[i] = (i << PAGE_SHIFT) | PG_V | PG_RW | PG_U; + pmap_basemem_setup(basemem); } /* @@ -1820,7 +1824,6 @@ int has_smap, off, physmap_idx, pa_indx, da_indx; u_long memtest; vm_paddr_t physmap[PHYSMAP_SIZE]; - pt_entry_t *pte; quad_t dcons_addr, dcons_size, physmem_tunable; int hasbrokenint12, i, res; u_int extmem; @@ -1841,6 +1844,9 @@ */ vm_phys_add_seg((vm_paddr_t)KERNLOAD, trunc_page(first)); + TUNABLE_INT_FETCH("hw.above4g_allow", &above4g_allow); + TUNABLE_INT_FETCH("hw.above24g_allow", &above24g_allow); + /* * Check if the loader supplied an SMAP memory map. If so, * use that and do not make any VM86 calls. @@ -2031,7 +2037,6 @@ phys_avail[pa_indx++] = physmap[0]; phys_avail[pa_indx] = physmap[0]; dump_avail[da_indx] = physmap[0]; - pte = CMAP3; /* * Get dcons buffer address @@ -2052,7 +2057,7 @@ end = trunc_page(physmap[i + 1]); for (pa = round_page(physmap[i]); pa < end; pa += PAGE_SIZE) { int tmp, page_bad, full; - int *ptr = (int *)CADDR3; + int *ptr; full = FALSE; /* @@ -2076,8 +2081,7 @@ /* * map page into kernel: valid, read/write,non-cacheable */ - *pte = pa | PG_V | PG_RW | PG_N; - invltlb(); + ptr = (int *)pmap_cmap3(pa, PG_V | PG_RW | PG_N); tmp = *(int *)ptr; /* @@ -2158,8 +2162,7 @@ break; } } - *pte = 0; - invltlb(); + pmap_cmap3(0, 0); /* * XXX @@ -2414,6 +2417,7 @@ finishidentcpu(); /* Final stage of CPU initialization */ i386_setidt2(); + pmap_set_nx(); initializecpu(); /* Initialize CPU registers */ initializecpucache(); @@ -2508,11 +2512,7 @@ /* setup proc 0's pcb */ thread0.td_pcb->pcb_flags = 0; -#if defined(PAE) || defined(PAE_TABLES) - thread0.td_pcb->pcb_cr3 = (int)IdlePDPT; -#else - thread0.td_pcb->pcb_cr3 = (int)IdlePTD; -#endif + thread0.td_pcb->pcb_cr3 = pmap_get_kcr3(); thread0.td_pcb->pcb_ext = 0; thread0.td_frame = &proc0_tf; @@ -2581,11 +2581,7 @@ (int)dblfault_stack + PAGE_SIZE; dblfault_tss->tss_ss = dblfault_tss->tss_ss0 = dblfault_tss->tss_ss1 = dblfault_tss->tss_ss2 = GSEL(GDATA_SEL, SEL_KPL); -#if defined(PAE) || defined(PAE_TABLES) - dblfault_tss->tss_cr3 = (int)IdlePDPT; -#else - dblfault_tss->tss_cr3 = (int)IdlePTD; -#endif + dblfault_tss->tss_cr3 = pmap_get_kcr3(); dblfault_tss->tss_eip = (int)dblfault_handler; dblfault_tss->tss_eflags = PSL_KERNEL; dblfault_tss->tss_ds = dblfault_tss->tss_es = Index: head/sys/i386/i386/mem.c =================================================================== --- head/sys/i386/i386/mem.c +++ head/sys/i386/i386/mem.c @@ -148,7 +148,6 @@ error = uiomove((caddr_t)&ptvmmap[o], (int)c, uio); pmap_qremove((vm_offset_t)ptvmmap, 1); sx_xunlock(&memsxlock); - } return (error); Index: head/sys/i386/i386/minidump_machdep.c =================================================================== --- head/sys/i386/i386/minidump_machdep.c +++ head/sys/i386/i386/minidump_machdep.c @@ -49,310 +49,11 @@ CTASSERT(sizeof(struct kerneldumpheader) == 512); -#define MD_ALIGN(x) (((off_t)(x) + PAGE_MASK) & ~PAGE_MASK) -#define DEV_ALIGN(x) roundup2((off_t)(x), DEV_BSIZE) - uint32_t *vm_page_dump; int vm_page_dump_size; -static struct kerneldumpheader kdh; - -/* Handle chunked writes. */ -static size_t fragsz; -static void *dump_va; -static uint64_t counter, progress; - CTASSERT(sizeof(*vm_page_dump) == 4); - -static int -is_dumpable(vm_paddr_t pa) -{ - int i; - - for (i = 0; dump_avail[i] != 0 || dump_avail[i + 1] != 0; i += 2) { - if (pa >= dump_avail[i] && pa < dump_avail[i + 1]) - return (1); - } - return (0); -} - -#define PG2MB(pgs) (((pgs) + (1 << 8) - 1) >> 8) - -static int -blk_flush(struct dumperinfo *di) -{ - int error; - - if (fragsz == 0) - return (0); - - error = dump_append(di, dump_va, 0, fragsz); - fragsz = 0; - return (error); -} - -static int -blk_write(struct dumperinfo *di, char *ptr, vm_paddr_t pa, size_t sz) -{ - size_t len; - int error, i, c; - u_int maxdumpsz; - - maxdumpsz = min(di->maxiosize, MAXDUMPPGS * PAGE_SIZE); - if (maxdumpsz == 0) /* seatbelt */ - maxdumpsz = PAGE_SIZE; - error = 0; - if ((sz % PAGE_SIZE) != 0) { - printf("size not page aligned\n"); - return (EINVAL); - } - if (ptr != NULL && pa != 0) { - printf("cant have both va and pa!\n"); - return (EINVAL); - } - if (pa != 0 && (((uintptr_t)ptr) % PAGE_SIZE) != 0) { - printf("address not page aligned\n"); - return (EINVAL); - } - if (ptr != NULL) { - /* If we're doing a virtual dump, flush any pre-existing pa pages */ - error = blk_flush(di); - if (error) - return (error); - } - while (sz) { - len = maxdumpsz - fragsz; - if (len > sz) - len = sz; - counter += len; - progress -= len; - if (counter >> 24) { - printf(" %lld", PG2MB(progress >> PAGE_SHIFT)); - counter &= (1<<24) - 1; - } - - wdog_kern_pat(WD_LASTVAL); - - if (ptr) { - error = dump_append(di, ptr, 0, len); - if (error) - return (error); - ptr += len; - sz -= len; - } else { - for (i = 0; i < len; i += PAGE_SIZE) - dump_va = pmap_kenter_temporary(pa + i, (i + fragsz) >> PAGE_SHIFT); - fragsz += len; - pa += len; - sz -= len; - if (fragsz == maxdumpsz) { - error = blk_flush(di); - if (error) - return (error); - } - } - - /* Check for user abort. */ - c = cncheckc(); - if (c == 0x03) - return (ECANCELED); - if (c != -1) - printf(" (CTRL-C to abort) "); - } - - return (0); -} - -/* A fake page table page, to avoid having to handle both 4K and 2M pages */ -static pt_entry_t fakept[NPTEPG]; - -int -minidumpsys(struct dumperinfo *di) -{ - uint64_t dumpsize; - uint32_t ptesize; - vm_offset_t va; - int error; - uint32_t bits; - uint64_t pa; - pd_entry_t *pd; - pt_entry_t *pt; - int i, j, k, bit; - struct minidumphdr mdhdr; - - counter = 0; - /* Walk page table pages, set bits in vm_page_dump */ - ptesize = 0; - for (va = KERNBASE; va < kernel_vm_end; va += NBPDR) { - /* - * We always write a page, even if it is zero. Each - * page written corresponds to 2MB of space - */ - ptesize += PAGE_SIZE; - pd = IdlePTD; /* always mapped! */ - j = va >> PDRSHIFT; - if ((pd[j] & (PG_PS | PG_V)) == (PG_PS | PG_V)) { - /* This is an entire 2M page. */ - pa = pd[j] & PG_PS_FRAME; - for (k = 0; k < NPTEPG; k++) { - if (is_dumpable(pa)) - dump_add_page(pa); - pa += PAGE_SIZE; - } - continue; - } - if ((pd[j] & PG_V) == PG_V) { - /* set bit for each valid page in this 2MB block */ - pt = pmap_kenter_temporary(pd[j] & PG_FRAME, 0); - for (k = 0; k < NPTEPG; k++) { - if ((pt[k] & PG_V) == PG_V) { - pa = pt[k] & PG_FRAME; - if (is_dumpable(pa)) - dump_add_page(pa); - } - } - } else { - /* nothing, we're going to dump a null page */ - } - } - - /* Calculate dump size. */ - dumpsize = ptesize; - dumpsize += round_page(msgbufp->msg_size); - dumpsize += round_page(vm_page_dump_size); - for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) { - bits = vm_page_dump[i]; - while (bits) { - bit = bsfl(bits); - pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) + bit) * PAGE_SIZE; - /* Clear out undumpable pages now if needed */ - if (is_dumpable(pa)) { - dumpsize += PAGE_SIZE; - } else { - dump_drop_page(pa); - } - bits &= ~(1ul << bit); - } - } - dumpsize += PAGE_SIZE; - - progress = dumpsize; - - /* Initialize mdhdr */ - bzero(&mdhdr, sizeof(mdhdr)); - strcpy(mdhdr.magic, MINIDUMP_MAGIC); - mdhdr.version = MINIDUMP_VERSION; - mdhdr.msgbufsize = msgbufp->msg_size; - mdhdr.bitmapsize = vm_page_dump_size; - mdhdr.ptesize = ptesize; - mdhdr.kernbase = KERNBASE; -#if defined(PAE) || defined(PAE_TABLES) - mdhdr.paemode = 1; -#endif - - dump_init_header(di, &kdh, KERNELDUMPMAGIC, KERNELDUMP_I386_VERSION, - dumpsize); - - error = dump_start(di, &kdh); - if (error != 0) - goto fail; - - printf("Physical memory: %ju MB\n", ptoa((uintmax_t)physmem) / 1048576); - printf("Dumping %llu MB:", (long long)dumpsize >> 20); - - /* Dump my header */ - bzero(&fakept, sizeof(fakept)); - bcopy(&mdhdr, &fakept, sizeof(mdhdr)); - error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE); - if (error) - goto fail; - - /* Dump msgbuf up front */ - error = blk_write(di, (char *)msgbufp->msg_ptr, 0, round_page(msgbufp->msg_size)); - if (error) - goto fail; - - /* Dump bitmap */ - error = blk_write(di, (char *)vm_page_dump, 0, round_page(vm_page_dump_size)); - if (error) - goto fail; - - /* Dump kernel page table pages */ - for (va = KERNBASE; va < kernel_vm_end; va += NBPDR) { - /* We always write a page, even if it is zero */ - pd = IdlePTD; /* always mapped! */ - j = va >> PDRSHIFT; - if ((pd[j] & (PG_PS | PG_V)) == (PG_PS | PG_V)) { - /* This is a single 2M block. Generate a fake PTP */ - pa = pd[j] & PG_PS_FRAME; - for (k = 0; k < NPTEPG; k++) { - fakept[k] = (pa + (k * PAGE_SIZE)) | PG_V | PG_RW | PG_A | PG_M; - } - error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE); - if (error) - goto fail; - /* flush, in case we reuse fakept in the same block */ - error = blk_flush(di); - if (error) - goto fail; - continue; - } - if ((pd[j] & PG_V) == PG_V) { - pa = pd[j] & PG_FRAME; - error = blk_write(di, 0, pa, PAGE_SIZE); - if (error) - goto fail; - } else { - bzero(fakept, sizeof(fakept)); - error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE); - if (error) - goto fail; - /* flush, in case we reuse fakept in the same block */ - error = blk_flush(di); - if (error) - goto fail; - } - } - - /* Dump memory chunks */ - /* XXX cluster it up and use blk_dump() */ - for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) { - bits = vm_page_dump[i]; - while (bits) { - bit = bsfl(bits); - pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) + bit) * PAGE_SIZE; - error = blk_write(di, 0, pa, PAGE_SIZE); - if (error) - goto fail; - bits &= ~(1ul << bit); - } - } - - error = blk_flush(di); - if (error) - goto fail; - - error = dump_finish(di, &kdh); - if (error != 0) - goto fail; - - printf("\nDump complete\n"); - return (0); - - fail: - if (error < 0) - error = -error; - - if (error == ECANCELED) - printf("\nDump aborted\n"); - else if (error == E2BIG || error == ENOSPC) - printf("\nDump failed. Partition too small.\n"); - else - printf("\n** DUMP FAILED (ERROR %d) **\n", error); - return (error); -} - void dump_add_page(vm_paddr_t pa) { @@ -373,5 +74,12 @@ idx = pa >> 5; /* 2^5 = 32 */ bit = pa & 31; atomic_clear_int(&vm_page_dump[idx], 1ul << bit); +} + +int +minidumpsys(struct dumperinfo *di) +{ + + return (pae_mode ? minidumpsys_pae(di) : minidumpsys_nopae(di)); } Index: head/sys/i386/i386/minidump_machdep_base.c =================================================================== --- head/sys/i386/i386/minidump_machdep_base.c +++ head/sys/i386/i386/minidump_machdep_base.c @@ -0,0 +1,360 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2006 Peter Wemm + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include "opt_watchdog.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +CTASSERT(sizeof(struct kerneldumpheader) == 512); + +#define MD_ALIGN(x) (((off_t)(x) + PAGE_MASK) & ~PAGE_MASK) +#define DEV_ALIGN(x) roundup2((off_t)(x), DEV_BSIZE) + +extern uint32_t *vm_page_dump; +extern int vm_page_dump_size; + +static struct kerneldumpheader kdh; + +/* Handle chunked writes. */ +static size_t fragsz; +static void *dump_va; +static uint64_t counter, progress; + +CTASSERT(sizeof(*vm_page_dump) == 4); + + +static int +is_dumpable(vm_paddr_t pa) +{ + int i; + + for (i = 0; dump_avail[i] != 0 || dump_avail[i + 1] != 0; i += 2) { + if (pa >= dump_avail[i] && pa < dump_avail[i + 1]) + return (1); + } + return (0); +} + +#define PG2MB(pgs) (((pgs) + (1 << 8) - 1) >> 8) + +static int +blk_flush(struct dumperinfo *di) +{ + int error; + + if (fragsz == 0) + return (0); + + error = dump_append(di, dump_va, 0, fragsz); + fragsz = 0; + return (error); +} + +static int +blk_write(struct dumperinfo *di, char *ptr, vm_paddr_t pa, size_t sz) +{ + size_t len; + int error, i, c; + u_int maxdumpsz; + + maxdumpsz = min(di->maxiosize, MAXDUMPPGS * PAGE_SIZE); + if (maxdumpsz == 0) /* seatbelt */ + maxdumpsz = PAGE_SIZE; + error = 0; + if ((sz % PAGE_SIZE) != 0) { + printf("size not page aligned\n"); + return (EINVAL); + } + if (ptr != NULL && pa != 0) { + printf("cant have both va and pa!\n"); + return (EINVAL); + } + if (pa != 0 && (((uintptr_t)ptr) % PAGE_SIZE) != 0) { + printf("address not page aligned\n"); + return (EINVAL); + } + if (ptr != NULL) { + /* If we're doing a virtual dump, flush any pre-existing pa pages */ + error = blk_flush(di); + if (error) + return (error); + } + while (sz) { + len = maxdumpsz - fragsz; + if (len > sz) + len = sz; + counter += len; + progress -= len; + if (counter >> 24) { + printf(" %lld", PG2MB(progress >> PAGE_SHIFT)); + counter &= (1<<24) - 1; + } + + wdog_kern_pat(WD_LASTVAL); + + if (ptr) { + error = dump_append(di, ptr, 0, len); + if (error) + return (error); + ptr += len; + sz -= len; + } else { + for (i = 0; i < len; i += PAGE_SIZE) + dump_va = pmap_kenter_temporary(pa + i, (i + fragsz) >> PAGE_SHIFT); + fragsz += len; + pa += len; + sz -= len; + if (fragsz == maxdumpsz) { + error = blk_flush(di); + if (error) + return (error); + } + } + + /* Check for user abort. */ + c = cncheckc(); + if (c == 0x03) + return (ECANCELED); + if (c != -1) + printf(" (CTRL-C to abort) "); + } + + return (0); +} + +/* A fake page table page, to avoid having to handle both 4K and 2M pages */ +static pt_entry_t fakept[NPTEPG]; + +#ifdef PMAP_PAE_COMP +#define minidumpsys minidumpsys_pae +#define IdlePTD IdlePTD_pae +#else +#define minidumpsys minidumpsys_nopae +#define IdlePTD IdlePTD_nopae +#endif + +int +minidumpsys(struct dumperinfo *di) +{ + uint64_t dumpsize; + uint32_t ptesize; + vm_offset_t va; + int error; + uint32_t bits; + uint64_t pa; + pd_entry_t *pd; + pt_entry_t *pt; + int i, j, k, bit; + struct minidumphdr mdhdr; + + counter = 0; + /* Walk page table pages, set bits in vm_page_dump */ + ptesize = 0; + for (va = KERNBASE; va < kernel_vm_end; va += NBPDR) { + /* + * We always write a page, even if it is zero. Each + * page written corresponds to 2MB of space + */ + ptesize += PAGE_SIZE; + pd = IdlePTD; /* always mapped! */ + j = va >> PDRSHIFT; + if ((pd[j] & (PG_PS | PG_V)) == (PG_PS | PG_V)) { + /* This is an entire 2M page. */ + pa = pd[j] & PG_PS_FRAME; + for (k = 0; k < NPTEPG; k++) { + if (is_dumpable(pa)) + dump_add_page(pa); + pa += PAGE_SIZE; + } + continue; + } + if ((pd[j] & PG_V) == PG_V) { + /* set bit for each valid page in this 2MB block */ + pt = pmap_kenter_temporary(pd[j] & PG_FRAME, 0); + for (k = 0; k < NPTEPG; k++) { + if ((pt[k] & PG_V) == PG_V) { + pa = pt[k] & PG_FRAME; + if (is_dumpable(pa)) + dump_add_page(pa); + } + } + } else { + /* nothing, we're going to dump a null page */ + } + } + + /* Calculate dump size. */ + dumpsize = ptesize; + dumpsize += round_page(msgbufp->msg_size); + dumpsize += round_page(vm_page_dump_size); + for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) { + bits = vm_page_dump[i]; + while (bits) { + bit = bsfl(bits); + pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) + bit) * PAGE_SIZE; + /* Clear out undumpable pages now if needed */ + if (is_dumpable(pa)) { + dumpsize += PAGE_SIZE; + } else { + dump_drop_page(pa); + } + bits &= ~(1ul << bit); + } + } + dumpsize += PAGE_SIZE; + + progress = dumpsize; + + /* Initialize mdhdr */ + bzero(&mdhdr, sizeof(mdhdr)); + strcpy(mdhdr.magic, MINIDUMP_MAGIC); + mdhdr.version = MINIDUMP_VERSION; + mdhdr.msgbufsize = msgbufp->msg_size; + mdhdr.bitmapsize = vm_page_dump_size; + mdhdr.ptesize = ptesize; + mdhdr.kernbase = KERNBASE; + mdhdr.paemode = pae_mode; + + dump_init_header(di, &kdh, KERNELDUMPMAGIC, KERNELDUMP_I386_VERSION, + dumpsize); + + error = dump_start(di, &kdh); + if (error != 0) + goto fail; + + printf("Physical memory: %ju MB\n", ptoa((uintmax_t)physmem) / 1048576); + printf("Dumping %llu MB:", (long long)dumpsize >> 20); + + /* Dump my header */ + bzero(&fakept, sizeof(fakept)); + bcopy(&mdhdr, &fakept, sizeof(mdhdr)); + error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE); + if (error) + goto fail; + + /* Dump msgbuf up front */ + error = blk_write(di, (char *)msgbufp->msg_ptr, 0, round_page(msgbufp->msg_size)); + if (error) + goto fail; + + /* Dump bitmap */ + error = blk_write(di, (char *)vm_page_dump, 0, round_page(vm_page_dump_size)); + if (error) + goto fail; + + /* Dump kernel page table pages */ + for (va = KERNBASE; va < kernel_vm_end; va += NBPDR) { + /* We always write a page, even if it is zero */ + pd = IdlePTD; /* always mapped! */ + j = va >> PDRSHIFT; + if ((pd[j] & (PG_PS | PG_V)) == (PG_PS | PG_V)) { + /* This is a single 2M block. Generate a fake PTP */ + pa = pd[j] & PG_PS_FRAME; + for (k = 0; k < NPTEPG; k++) { + fakept[k] = (pa + (k * PAGE_SIZE)) | PG_V | PG_RW | PG_A | PG_M; + } + error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE); + if (error) + goto fail; + /* flush, in case we reuse fakept in the same block */ + error = blk_flush(di); + if (error) + goto fail; + continue; + } + if ((pd[j] & PG_V) == PG_V) { + pa = pd[j] & PG_FRAME; + error = blk_write(di, 0, pa, PAGE_SIZE); + if (error) + goto fail; + } else { + bzero(fakept, sizeof(fakept)); + error = blk_write(di, (char *)&fakept, 0, PAGE_SIZE); + if (error) + goto fail; + /* flush, in case we reuse fakept in the same block */ + error = blk_flush(di); + if (error) + goto fail; + } + } + + /* Dump memory chunks */ + /* XXX cluster it up and use blk_dump() */ + for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) { + bits = vm_page_dump[i]; + while (bits) { + bit = bsfl(bits); + pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) + bit) * PAGE_SIZE; + error = blk_write(di, 0, pa, PAGE_SIZE); + if (error) + goto fail; + bits &= ~(1ul << bit); + } + } + + error = blk_flush(di); + if (error) + goto fail; + + error = dump_finish(di, &kdh); + if (error != 0) + goto fail; + + printf("\nDump complete\n"); + return (0); + + fail: + if (error < 0) + error = -error; + + if (error == ECANCELED) + printf("\nDump aborted\n"); + else if (error == E2BIG || error == ENOSPC) + printf("\nDump failed. Partition too small.\n"); + else + printf("\n** DUMP FAILED (ERROR %d) **\n", error); + return (error); +} Index: head/sys/i386/i386/minidump_machdep_nopae.c =================================================================== --- head/sys/i386/i386/minidump_machdep_nopae.c +++ head/sys/i386/i386/minidump_machdep_nopae.c @@ -0,0 +1,40 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2018 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Konstantin Belousov + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include "minidump_machdep_base.c" Index: head/sys/i386/i386/minidump_machdep_pae.c =================================================================== --- head/sys/i386/i386/minidump_machdep_pae.c +++ head/sys/i386/i386/minidump_machdep_pae.c @@ -0,0 +1,41 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2018 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Konstantin Belousov + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#define PMAP_PAE_COMP +#include +#include +#include +#include +#include +#include "minidump_machdep_base.c" Index: head/sys/i386/i386/mp_machdep.c =================================================================== --- head/sys/i386/i386/mp_machdep.c +++ head/sys/i386/i386/mp_machdep.c @@ -309,9 +309,7 @@ mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN); - /* Remap lowest 1MB */ - IdlePTD[0] = IdlePTD[1]; - load_cr3(rcr3()); /* invalidate TLB */ + pmap_remap_lower(true); /* install the AP 1st level boot code */ install_ap_tramp(); @@ -359,9 +357,7 @@ CPU_SET(cpu, &all_cpus); /* record AP in CPU map */ } - /* Unmap lowest 1MB again */ - IdlePTD[0] = 0; - load_cr3(rcr3()); + pmap_remap_lower(false); /* restore the warmstart vector */ *(u_int32_t *) WARMBOOT_OFF = mpbioswarmvec; Index: head/sys/i386/i386/mpboot.s =================================================================== --- head/sys/i386/i386/mpboot.s +++ head/sys/i386/i386/mpboot.s @@ -81,19 +81,17 @@ testl $CPUID_PSE,%edx jz 1f orl $CR4_PSE,%eax /* Enable PSE */ -1: - testl $CPUID_PGE,%edx - jz 1f +1: testl $CPUID_PGE,%edx + jz 2f orl $CR4_PGE,%eax /* Enable PGE */ -1: - testl $CPUID_VME,%edx - jz 1f +2: testl $CPUID_VME,%edx + jz 3f orl $CR4_VME,%eax /* Enable VME */ -1: - movl %eax,%cr4 +3: movl %eax,%cr4 /* Now enable paging mode */ -#if defined(PAE) || defined(PAE_TABLES) + cmpl $0, pae_mode + je 4f movl IdlePDPT, %eax movl %eax, %cr3 movl %cr4, %eax @@ -103,21 +101,19 @@ cpuid movl $0x80000001, %ebx cmpl %ebx, %eax - jb 1f + jb 5f movl %ebx, %eax cpuid testl $AMDID_NX, %edx - je 1f + je 5f movl $MSR_EFER, %ecx rdmsr orl $EFER_NXE,%eax wrmsr -1: -#else - movl IdlePTD, %eax + jmp 5f +4: movl IdlePTD_nopae, %eax movl %eax,%cr3 -#endif - movl %cr0,%eax +5: movl %cr0,%eax orl $CR0_PE|CR0_PG,%eax /* enable paging */ movl %eax,%cr0 /* let the games begin! */ movl bootSTK,%esp /* boot stack end loc. */ Index: head/sys/i386/i386/pmap.c =================================================================== --- head/sys/i386/i386/pmap.c +++ head/sys/i386/i386/pmap.c @@ -158,11 +158,8 @@ #ifdef SMP #include #endif +#include -#ifndef PMAP_SHPGPERPROC -#define PMAP_SHPGPERPROC 200 -#endif - #if !defined(DIAGNOSTIC) #ifdef __GNUC_GNU_INLINE__ #define PMAP_INLINE __attribute__((__gnu_inline__)) inline @@ -183,6 +180,26 @@ #define pa_to_pvh(pa) (&pv_table[pa_index(pa)]) /* + * PTmap is recursive pagemap at top of virtual address space. + * Within PTmap, the page directory can be found (third indirection). + */ +#define PTmap ((pt_entry_t *)(PTDPTDI << PDRSHIFT)) +#define PTD ((pd_entry_t *)((PTDPTDI << PDRSHIFT) + (PTDPTDI * PAGE_SIZE))) +#define PTDpde ((pd_entry_t *)((PTDPTDI << PDRSHIFT) + (PTDPTDI * PAGE_SIZE) + \ + (PTDPTDI * PDESIZE))) + +/* + * Translate a virtual address to the kernel virtual address of its page table + * entry (PTE). This can be used recursively. If the address of a PTE as + * previously returned by this macro is itself given as the argument, then the + * address of the page directory entry (PDE) that maps the PTE will be + * returned. + * + * This macro may be used before pmap_bootstrap() is called. + */ +#define vtopte(va) (PTmap + i386_btop(va)) + +/* * Get PDEs and PTEs for user/kernel address space */ #define pmap_pde(m, v) (&((m)->pm_pdir[(vm_offset_t)(v) >> PDRSHIFT])) @@ -198,30 +215,29 @@ atomic_clear_int((u_int *)(pte), PG_W)) #define pmap_pte_set_prot(pte, v) ((*(int *)pte &= ~PG_PROT), (*(int *)pte |= (v))) -struct pmap kernel_pmap_store; +_Static_assert(sizeof(struct pmap) <= sizeof(struct pmap_KBI), + "pmap_KBI"); -vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */ -vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ static int pgeflag = 0; /* PG_G or-in */ static int pseflag = 0; /* PG_PS or-in */ static int nkpt = NKPT; -vm_offset_t kernel_vm_end = /* 0 + */ NKPT * NBPDR; -#if defined(PAE) || defined(PAE_TABLES) +#ifdef PMAP_PAE_COMP pt_entry_t pg_nx; static uma_zone_t pdptzone; #endif -static SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters"); +_Static_assert(VM_MAXUSER_ADDRESS == VADDR(TRPTDI, 0), "VM_MAXUSER_ADDRESS"); +_Static_assert(VM_MAX_KERNEL_ADDRESS <= VADDR(PTDPTDI, 0), + "VM_MAX_KERNEL_ADDRESS"); +_Static_assert(PMAP_MAP_LOW == VADDR(LOWPTDI, 0), "PMAP_MAP_LOW"); +_Static_assert(KERNLOAD == (KERNPTDI << PDRSHIFT), "KERNLOAD"); -static int pat_works = 1; -SYSCTL_INT(_vm_pmap, OID_AUTO, pat_works, CTLFLAG_RD, &pat_works, 1, - "Is page attribute table fully functional?"); +extern int pat_works; +extern int pg_ps_enabled; -static int pg_ps_enabled = 1; -SYSCTL_INT(_vm_pmap, OID_AUTO, pg_ps_enabled, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, - &pg_ps_enabled, 0, "Are large page mappings enabled?"); +extern int elf32_nxstack; #define PAT_INDEX_SIZE 8 static int pat_index[PAT_INDEX_SIZE]; /* cache mode to PAT index conversion */ @@ -244,21 +260,21 @@ * Data for the pv entry allocation mechanism */ static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks); -static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0; +extern int pv_entry_max, pv_entry_count; +static int pv_entry_high_water = 0; static struct md_page *pv_table; -static int shpgperproc = PMAP_SHPGPERPROC; +extern int shpgperproc; -struct pv_chunk *pv_chunkbase; /* KVA block for pv_chunks */ -int pv_maxchunks; /* How many chunks we have KVA for */ -vm_offset_t pv_vafree; /* freelist stored in the PTE */ +static struct pv_chunk *pv_chunkbase; /* KVA block for pv_chunks */ +static int pv_maxchunks; /* How many chunks we have KVA for */ +static vm_offset_t pv_vafree; /* freelist stored in the PTE */ /* * All those kernel PT submaps that BSD is so fond of */ -pt_entry_t *CMAP3; +static pt_entry_t *CMAP3; static pd_entry_t *KPTD; -caddr_t ptvmmap = 0; -caddr_t CADDR3; +static caddr_t CADDR3; /* * Crashdump maps. @@ -269,23 +285,12 @@ static pt_entry_t *PADDR1 = NULL, *PADDR2, *PADDR3; #ifdef SMP static int PMAP1cpu, PMAP3cpu; -static int PMAP1changedcpu; -SYSCTL_INT(_debug, OID_AUTO, PMAP1changedcpu, CTLFLAG_RD, - &PMAP1changedcpu, 0, - "Number of times pmap_pte_quick changed CPU with same PMAP1"); +extern int PMAP1changedcpu; #endif -static int PMAP1changed; -SYSCTL_INT(_debug, OID_AUTO, PMAP1changed, CTLFLAG_RD, - &PMAP1changed, 0, - "Number of times pmap_pte_quick changed PMAP1"); -static int PMAP1unchanged; -SYSCTL_INT(_debug, OID_AUTO, PMAP1unchanged, CTLFLAG_RD, - &PMAP1unchanged, 0, - "Number of times pmap_pte_quick didn't change PMAP1"); +extern int PMAP1changed; +extern int PMAP1unchanged; static struct mtx PMAP2mutex; -int pti; - /* * Internal flags for pmap_enter()'s helper functions. */ @@ -313,12 +318,7 @@ u_int flags, vm_page_t m); static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, vm_page_t mpte); -static void pmap_flush_page(vm_page_t m); static int pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte); -static void pmap_invalidate_cache_range_selfsnoop(vm_offset_t sva, - vm_offset_t eva); -static void pmap_invalidate_cache_range_all(vm_offset_t sva, - vm_offset_t eva); static void pmap_invalidate_pde_page(pmap_t pmap, vm_offset_t va, pd_entry_t pde); static void pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte); @@ -358,30 +358,37 @@ static pt_entry_t *pmap_pte_quick(pmap_t pmap, vm_offset_t va); static void pmap_pte_release(pt_entry_t *pte); static int pmap_unuse_pt(pmap_t, vm_offset_t, struct spglist *); -#if defined(PAE) || defined(PAE_TABLES) +#ifdef PMAP_PAE_COMP static void *pmap_pdpt_allocf(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *flags, int wait); #endif static void pmap_init_trm(void); +static void pmap_invalidate_all_int(pmap_t pmap); static __inline void pagezero(void *page); CTASSERT(1 << PDESHIFT == sizeof(pd_entry_t)); CTASSERT(1 << PTESHIFT == sizeof(pt_entry_t)); -void pmap_cold(void); extern char _end[]; -u_long physfree; /* phys addr of next free page */ -u_long vm86phystk; /* PA of vm86/bios stack */ -u_long vm86paddr; /* address of vm86 region */ -int vm86pa; /* phys addr of vm86 region */ -u_long KERNend; /* phys addr end of kernel (just after bss) */ -pd_entry_t *IdlePTD; /* phys addr of kernel PTD */ -#if defined(PAE) || defined(PAE_TABLES) +extern u_long physfree; /* phys addr of next free page */ +extern u_long vm86phystk;/* PA of vm86/bios stack */ +extern u_long vm86paddr;/* address of vm86 region */ +extern int vm86pa; /* phys addr of vm86 region */ +extern u_long KERNend; /* phys addr end of kernel (just after bss) */ +#ifdef PMAP_PAE_COMP +pd_entry_t *IdlePTD_pae; /* phys addr of kernel PTD */ pdpt_entry_t *IdlePDPT; /* phys addr of kernel PDPT */ +pt_entry_t *KPTmap_pae; /* address of kernel page tables */ +#define IdlePTD IdlePTD_pae +#define KPTmap KPTmap_pae +#else +pd_entry_t *IdlePTD_nopae; +pt_entry_t *KPTmap_nopae; +#define IdlePTD IdlePTD_nopae +#define KPTmap KPTmap_nopae #endif -pt_entry_t *KPTmap; /* address of kernel page tables */ -u_long KPTphys; /* phys addr of kernel page tables */ +extern u_long KPTphys; /* phys addr of kernel page tables */ extern u_long tramp_idleptd; static u_long @@ -412,15 +419,26 @@ pmap_cold_map(pa, pa, cnt); } -_Static_assert(2 * NBPDR == KERNBASE, "Broken double-map of zero PTD"); +_Static_assert(LOWPTDI * 2 * NBPDR == KERNBASE, + "Broken double-map of zero PTD"); +static void +__CONCAT(PMTYPE, remap_lower)(bool enable) +{ + int i; + + for (i = 0; i < LOWPTDI; i++) + IdlePTD[i] = enable ? IdlePTD[LOWPTDI + i] : 0; + load_cr3(rcr3()); /* invalidate TLB */ +} + /* * Called from locore.s before paging is enabled. Sets up the first * kernel page table. Since kernel is mapped with PA == VA, this code * does not require relocations. */ void -pmap_cold(void) +__CONCAT(PMTYPE, cold)(void) { pt_entry_t *pt; u_long a; @@ -439,7 +457,7 @@ KPTmap = (pt_entry_t *)KPTphys; /* Allocate Page Table Directory */ -#if defined(PAE) || defined(PAE_TABLES) +#ifdef PMAP_PAE_COMP /* XXX only need 32 bytes (easier for now) */ IdlePDPT = (pdpt_entry_t *)allocpages(1, &physfree); #endif @@ -464,7 +482,7 @@ for (a = 0; a < NKPT; a++) IdlePTD[a] = (KPTphys + ptoa(a)) | PG_V | PG_RW | PG_A | PG_M; -#if defined(PAE) || defined(PAE_TABLES) +#ifdef PMAP_PAE_COMP /* PAE install PTD pointers into PDPT */ for (a = 0; a < NPGPTD; a++) IdlePDPT[a] = ((u_int)IdlePTD + ptoa(a)) | PG_V; @@ -490,12 +508,12 @@ * access for various reasons. Kernel mappings never have any * access restrictions. */ - pmap_cold_mapident(0, atop(NBPDR)); - pmap_cold_map(0, NBPDR, atop(NBPDR)); + pmap_cold_mapident(0, atop(NBPDR) * LOWPTDI); + pmap_cold_map(0, NBPDR * LOWPTDI, atop(NBPDR) * LOWPTDI); pmap_cold_mapident(KERNBASE, atop(KERNend - KERNBASE)); /* Map page table directory */ -#if defined(PAE) || defined(PAE_TABLES) +#ifdef PMAP_PAE_COMP pmap_cold_mapident((u_long)IdlePDPT, 1); #endif pmap_cold_mapident((u_long)IdlePTD, NPGPTD); @@ -537,14 +555,14 @@ pgeflag = PG_G; } ncr4 |= (cpu_feature & CPUID_VME) != 0 ? CR4_VME : 0; -#if defined(PAE) || defined(PAE_TABLES) +#ifdef PMAP_PAE_COMP ncr4 |= CR4_PAE; #endif if (ncr4 != 0) load_cr4(rcr4() | ncr4); /* Now enable paging */ -#if defined(PAE) || defined(PAE_TABLES) +#ifdef PMAP_PAE_COMP cr3 = (u_int)IdlePDPT; #else cr3 = (u_int)IdlePTD; @@ -562,10 +580,33 @@ * Remove the lowest part of the double mapping of low memory * to get some null pointer checks. */ - IdlePTD[0] = 0; - load_cr3(cr3); /* invalidate TLB */ + __CONCAT(PMTYPE, remap_lower)(false); + + kernel_vm_end = /* 0 + */ NKPT * NBPDR; +#ifdef PMAP_PAE_COMP + i386_pmap_VM_NFREEORDER = VM_NFREEORDER_PAE; + i386_pmap_VM_LEVEL_0_ORDER = VM_LEVEL_0_ORDER_PAE; + i386_pmap_PDRSHIFT = PDRSHIFT_PAE; +#else + i386_pmap_VM_NFREEORDER = VM_NFREEORDER_NOPAE; + i386_pmap_VM_LEVEL_0_ORDER = VM_LEVEL_0_ORDER_NOPAE; + i386_pmap_PDRSHIFT = PDRSHIFT_NOPAE; +#endif } +static void +__CONCAT(PMTYPE, set_nx)(void) +{ + +#ifdef PMAP_PAE_COMP + if ((amd_feature & AMDID_NX) == 0) + return; + pg_nx = PG_NX; + elf32_nxstack = 1; + /* EFER.EFER_NXE is set in initializecpu(). */ +#endif +} + /* * Bootstrap the system enough to run with virtual memory. * @@ -573,8 +614,8 @@ * kernel page table and enabled paging, and just syncs the pmap * module with what has already been done. */ -void -pmap_bootstrap(vm_paddr_t firstaddr) +static void +__CONCAT(PMTYPE, bootstrap)(vm_paddr_t firstaddr) { vm_offset_t va; pt_entry_t *pte, *unused; @@ -611,7 +652,7 @@ */ PMAP_LOCK_INIT(kernel_pmap); kernel_pmap->pm_pdir = IdlePTD; -#if defined(PAE) || defined(PAE_TABLES) +#ifdef PMAP_PAE_COMP kernel_pmap->pm_pdpt = IdlePDPT; #endif CPU_FILL(&kernel_pmap->pm_active); /* don't allow deactivation */ @@ -705,6 +746,13 @@ vm_offset_t pages; int i; +#ifdef PMAP_PAE_COMP + if (!pae_mode) + return; +#else + if (pae_mode) + return; +#endif CPU_FOREACH(i) { pc = pcpu_find(i); mtx_init(&pc->pc_copyout_mlock, "cpmlk", NULL, MTX_DEF | @@ -745,8 +793,8 @@ /* * Setup the PAT MSR. */ -void -pmap_init_pat(void) +static void +__CONCAT(PMTYPE, init_pat)(void) { int pat_table[PAT_INDEX_SIZE]; uint64_t pat_msr; @@ -846,18 +894,7 @@ load_cr4(cr4); } -/* - * Initialize a vm_page's machine-dependent fields. - */ -void -pmap_page_init(vm_page_t m) -{ - - TAILQ_INIT(&m->md.pv_list); - m->md.pat_mode = PAT_WRITE_BACK; -} - -#if defined(PAE) || defined(PAE_TABLES) +#ifdef PMAP_PAE_COMP static void * pmap_pdpt_allocf(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *flags, int wait) @@ -930,8 +967,8 @@ * Called by vm_init, to initialize any structures that the pmap * system needs to map virtual memory. */ -void -pmap_init(void) +static void +__CONCAT(PMTYPE, init)(void) { struct pmap_preinit_mapping *ppim; vm_page_t mpte; @@ -1018,7 +1055,7 @@ if (pv_chunkbase == NULL) panic("pmap_init: not enough kvm for pv chunks"); pmap_ptelist_init(&pv_vafree, pv_chunkbase, pv_maxchunks); -#if defined(PAE) || defined(PAE_TABLES) +#ifdef PMAP_PAE_COMP pdptzone = uma_zcreate("PDPT", NPGPTD * sizeof(pdpt_entry_t), NULL, NULL, NULL, NULL, (NPGPTD * sizeof(pdpt_entry_t)) - 1, UMA_ZONE_VM | UMA_ZONE_NOFREE); @@ -1040,37 +1077,17 @@ } +extern u_long pmap_pde_demotions; +extern u_long pmap_pde_mappings; +extern u_long pmap_pde_p_failures; +extern u_long pmap_pde_promotions; -SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_max, CTLFLAG_RD, &pv_entry_max, 0, - "Max number of PV entries"); -SYSCTL_INT(_vm_pmap, OID_AUTO, shpgperproc, CTLFLAG_RD, &shpgperproc, 0, - "Page share factor per proc"); - -static SYSCTL_NODE(_vm_pmap, OID_AUTO, pde, CTLFLAG_RD, 0, - "2/4MB page mapping counters"); - -static u_long pmap_pde_demotions; -SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, demotions, CTLFLAG_RD, - &pmap_pde_demotions, 0, "2/4MB page demotions"); - -static u_long pmap_pde_mappings; -SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, mappings, CTLFLAG_RD, - &pmap_pde_mappings, 0, "2/4MB page mappings"); - -static u_long pmap_pde_p_failures; -SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, p_failures, CTLFLAG_RD, - &pmap_pde_p_failures, 0, "2/4MB page promotion failures"); - -static u_long pmap_pde_promotions; -SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, promotions, CTLFLAG_RD, - &pmap_pde_promotions, 0, "2/4MB page promotions"); - /*************************************************** * Low level helper routines..... ***************************************************/ -boolean_t -pmap_is_valid_memattr(pmap_t pmap __unused, vm_memattr_t mode) +static boolean_t +__CONCAT(PMTYPE, is_valid_memattr)(pmap_t pmap __unused, vm_memattr_t mode) { return (mode >= 0 && mode < PAT_INDEX_SIZE && @@ -1081,8 +1098,8 @@ * Determine the appropriate bits to set in a PTE or PDE for a specified * caching mode. */ -int -pmap_cache_bits(pmap_t pmap, int mode, boolean_t is_pde) +static int +__CONCAT(PMTYPE, cache_bits)(pmap_t pmap, int mode, boolean_t is_pde) { int cache_bits, pat_flag, pat_idx; @@ -1106,8 +1123,8 @@ return (cache_bits); } -bool -pmap_ps_enabled(pmap_t pmap __unused) +static bool +__CONCAT(PMTYPE, ps_enabled)(pmap_t pmap __unused) { return (pg_ps_enabled); @@ -1147,14 +1164,6 @@ invltlb(); } -void -invltlb_glob(void) -{ - - invltlb(); -} - - #ifdef SMP /* * For SMP, these functions have to use the IPI mechanism for coherence. @@ -1175,8 +1184,8 @@ * immutable. The kernel page table is always active on every * processor. */ -void -pmap_invalidate_page(pmap_t pmap, vm_offset_t va) +static void +pmap_invalidate_page_int(pmap_t pmap, vm_offset_t va) { cpuset_t *mask, other_cpus; u_int cpuid; @@ -1201,15 +1210,15 @@ /* 4k PTEs -- Chosen to exceed the total size of Broadwell L2 TLB */ #define PMAP_INVLPG_THRESHOLD (4 * 1024 * PAGE_SIZE) -void -pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) +static void +pmap_invalidate_range_int(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { cpuset_t *mask, other_cpus; vm_offset_t addr; u_int cpuid; if (eva - sva >= PMAP_INVLPG_THRESHOLD) { - pmap_invalidate_all(pmap); + pmap_invalidate_all_int(pmap); return; } @@ -1231,8 +1240,8 @@ sched_unpin(); } -void -pmap_invalidate_all(pmap_t pmap) +static void +pmap_invalidate_all_int(pmap_t pmap) { cpuset_t *mask, other_cpus; u_int cpuid; @@ -1254,8 +1263,8 @@ sched_unpin(); } -void -pmap_invalidate_cache(void) +static void +__CONCAT(PMTYPE, invalidate_cache)(void) { sched_pin(); @@ -1351,16 +1360,16 @@ * Normal, non-SMP, 486+ invalidation functions. * We inline these within pmap.c for speed. */ -PMAP_INLINE void -pmap_invalidate_page(pmap_t pmap, vm_offset_t va) +static void +pmap_invalidate_page_int(pmap_t pmap, vm_offset_t va) { if (pmap == kernel_pmap) invlpg(va); } -PMAP_INLINE void -pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) +static void +pmap_invalidate_range_int(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { vm_offset_t addr; @@ -1369,16 +1378,16 @@ invlpg(addr); } -PMAP_INLINE void -pmap_invalidate_all(pmap_t pmap) +static void +pmap_invalidate_all_int(pmap_t pmap) { if (pmap == kernel_pmap) invltlb(); } -PMAP_INLINE void -pmap_invalidate_cache(void) +static void +__CONCAT(PMTYPE, invalidate_cache)(void) { wbinvd(); @@ -1398,128 +1407,48 @@ #endif /* !SMP */ static void -pmap_invalidate_pde_page(pmap_t pmap, vm_offset_t va, pd_entry_t pde) +__CONCAT(PMTYPE, invalidate_page)(pmap_t pmap, vm_offset_t va) { - /* - * When the PDE has PG_PROMOTED set, the 2- or 4MB page mapping was - * created by a promotion that did not invalidate the 512 or 1024 4KB - * page mappings that might exist in the TLB. Consequently, at this - * point, the TLB may hold both 4KB and 2- or 4MB page mappings for - * the address range [va, va + NBPDR). Therefore, the entire range - * must be invalidated here. In contrast, when PG_PROMOTED is clear, - * the TLB will not hold any 4KB page mappings for the address range - * [va, va + NBPDR), and so a single INVLPG suffices to invalidate the - * 2- or 4MB page mapping from the TLB. - */ - if ((pde & PG_PROMOTED) != 0) - pmap_invalidate_range(pmap, va, va + NBPDR - 1); - else - pmap_invalidate_page(pmap, va); + pmap_invalidate_page_int(pmap, va); } -DEFINE_IFUNC(, void, pmap_invalidate_cache_range, (vm_offset_t, vm_offset_t), - static) +static void +__CONCAT(PMTYPE, invalidate_range)(pmap_t pmap, vm_offset_t sva, + vm_offset_t eva) { - if ((cpu_feature & CPUID_SS) != 0) - return (pmap_invalidate_cache_range_selfsnoop); - if ((cpu_feature & CPUID_CLFSH) != 0) - return (pmap_force_invalidate_cache_range); - return (pmap_invalidate_cache_range_all); + pmap_invalidate_range_int(pmap, sva, eva); } -#define PMAP_CLFLUSH_THRESHOLD (2 * 1024 * 1024) - static void -pmap_invalidate_cache_range_check_align(vm_offset_t sva, vm_offset_t eva) +__CONCAT(PMTYPE, invalidate_all)(pmap_t pmap) { - KASSERT((sva & PAGE_MASK) == 0, - ("pmap_invalidate_cache_range: sva not page-aligned")); - KASSERT((eva & PAGE_MASK) == 0, - ("pmap_invalidate_cache_range: eva not page-aligned")); + pmap_invalidate_all_int(pmap); } static void -pmap_invalidate_cache_range_selfsnoop(vm_offset_t sva, vm_offset_t eva) +pmap_invalidate_pde_page(pmap_t pmap, vm_offset_t va, pd_entry_t pde) { - pmap_invalidate_cache_range_check_align(sva, eva); -} - -void -pmap_force_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva) -{ - - sva &= ~(vm_offset_t)(cpu_clflush_line_size - 1); - if (eva - sva >= PMAP_CLFLUSH_THRESHOLD) { - /* - * The supplied range is bigger than 2MB. - * Globally invalidate cache. - */ - pmap_invalidate_cache(); - return; - } - -#ifdef DEV_APIC /* - * XXX: Some CPUs fault, hang, or trash the local APIC - * registers if we use CLFLUSH on the local APIC - * range. The local APIC is always uncached, so we - * don't need to flush for that range anyway. + * When the PDE has PG_PROMOTED set, the 2- or 4MB page mapping was + * created by a promotion that did not invalidate the 512 or 1024 4KB + * page mappings that might exist in the TLB. Consequently, at this + * point, the TLB may hold both 4KB and 2- or 4MB page mappings for + * the address range [va, va + NBPDR). Therefore, the entire range + * must be invalidated here. In contrast, when PG_PROMOTED is clear, + * the TLB will not hold any 4KB page mappings for the address range + * [va, va + NBPDR), and so a single INVLPG suffices to invalidate the + * 2- or 4MB page mapping from the TLB. */ - if (pmap_kextract(sva) == lapic_paddr) - return; -#endif - - if ((cpu_stdext_feature & CPUID_STDEXT_CLFLUSHOPT) != 0) { - /* - * Do per-cache line flush. Use the sfence - * instruction to insure that previous stores are - * included in the write-back. The processor - * propagates flush to other processors in the cache - * coherence domain. - */ - sfence(); - for (; sva < eva; sva += cpu_clflush_line_size) - clflushopt(sva); - sfence(); - } else { - /* - * Writes are ordered by CLFLUSH on Intel CPUs. - */ - if (cpu_vendor_id != CPU_VENDOR_INTEL) - mfence(); - for (; sva < eva; sva += cpu_clflush_line_size) - clflush(sva); - if (cpu_vendor_id != CPU_VENDOR_INTEL) - mfence(); - } + if ((pde & PG_PROMOTED) != 0) + pmap_invalidate_range_int(pmap, va, va + NBPDR - 1); + else + pmap_invalidate_page_int(pmap, va); } -static void -pmap_invalidate_cache_range_all(vm_offset_t sva, vm_offset_t eva) -{ - - pmap_invalidate_cache_range_check_align(sva, eva); - pmap_invalidate_cache(); -} - -void -pmap_invalidate_cache_pages(vm_page_t *pages, int count) -{ - int i; - - if (count >= PMAP_CLFLUSH_THRESHOLD / PAGE_SIZE || - (cpu_feature & CPUID_CLFSH) == 0) { - pmap_invalidate_cache(); - } else { - for (i = 0; i < count; i++) - pmap_flush_page(pages[i]); - } -} - /* * Are we current address space or kernel? */ @@ -1534,8 +1463,8 @@ * If the given pmap is not the current or kernel pmap, the returned pte must * be released by passing it to pmap_pte_release(). */ -pt_entry_t * -pmap_pte(pmap_t pmap, vm_offset_t va) +static pt_entry_t * +__CONCAT(PMTYPE, pte)(pmap_t pmap, vm_offset_t va) { pd_entry_t newpf; pd_entry_t *pde; @@ -1551,7 +1480,8 @@ newpf = *pde & PG_FRAME; if ((*PMAP2 & PG_FRAME) != newpf) { *PMAP2 = newpf | PG_RW | PG_V | PG_A | PG_M; - pmap_invalidate_page(kernel_pmap, (vm_offset_t)PADDR2); + pmap_invalidate_page_int(kernel_pmap, + (vm_offset_t)PADDR2); } return (PADDR2 + (i386_btop(va) & (NPTEPG - 1))); } @@ -1686,13 +1616,40 @@ } /* + * Extract from the kernel page table the physical address that is mapped by + * the given virtual address "va". + * + * This function may be used before pmap_bootstrap() is called. + */ +static vm_paddr_t +__CONCAT(PMTYPE, kextract)(vm_offset_t va) +{ + vm_paddr_t pa; + + if ((pa = pte_load(&PTD[va >> PDRSHIFT])) & PG_PS) { + pa = (pa & PG_PS_FRAME) | (va & PDRMASK); + } else { + /* + * Beware of a concurrent promotion that changes the PDE at + * this point! For example, vtopte() must not be used to + * access the PTE because it would use the new PDE. It is, + * however, safe to use the old PDE because the page table + * page is preserved by the promotion. + */ + pa = KPTmap[i386_btop(va)]; + pa = (pa & PG_FRAME) | (va & PAGE_MASK); + } + return (pa); +} + +/* * Routine: pmap_extract * Function: * Extract the physical page address associated * with the given map/virtual_address pair. */ -vm_paddr_t -pmap_extract(pmap_t pmap, vm_offset_t va) +static vm_paddr_t +__CONCAT(PMTYPE, extract)(pmap_t pmap, vm_offset_t va) { vm_paddr_t rtval; pt_entry_t pte; @@ -1720,8 +1677,8 @@ * with the given pmap and virtual address pair * if that mapping permits the given protection. */ -vm_page_t -pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) +static vm_page_t +__CONCAT(PMTYPE, extract_and_hold)(pmap_t pmap, vm_offset_t va, vm_prot_t prot) { pd_entry_t pde; pt_entry_t pte; @@ -1769,8 +1726,8 @@ * * This function may be used before pmap_bootstrap() is called. */ -PMAP_INLINE void -pmap_kenter(vm_offset_t va, vm_paddr_t pa) +static void +__CONCAT(PMTYPE, kenter)(vm_offset_t va, vm_paddr_t pa) { pt_entry_t *pte; @@ -1794,8 +1751,8 @@ * * This function may be used before pmap_bootstrap() is called. */ -PMAP_INLINE void -pmap_kremove(vm_offset_t va) +static void +__CONCAT(PMTYPE, kremove)(vm_offset_t va) { pt_entry_t *pte; @@ -1815,8 +1772,9 @@ * update '*virt' with the first usable address after the mapped * region. */ -vm_offset_t -pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot) +static vm_offset_t +__CONCAT(PMTYPE, map)(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, + int prot) { vm_offset_t va, sva; vm_paddr_t superpage_offset; @@ -1854,7 +1812,7 @@ start += PAGE_SIZE; } } - pmap_invalidate_range(kernel_pmap, sva, va); + pmap_invalidate_range_int(kernel_pmap, sva, va); *virt = va; return (sva); } @@ -1869,8 +1827,8 @@ * over. The page *must* be wired. * Note: SMP coherent. Uses a ranged shootdown IPI. */ -void -pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count) +static void +__CONCAT(PMTYPE, qenter)(vm_offset_t sva, vm_page_t *ma, int count) { pt_entry_t *endpte, oldpte, pa, *pte; vm_page_t m; @@ -1884,7 +1842,7 @@ m->md.pat_mode, 0); if ((*pte & (PG_FRAME | PG_PTE_CACHE)) != pa) { oldpte |= *pte; -#if defined(PAE) || defined(PAE_TABLES) +#ifdef PMAP_PAE_COMP pte_store(pte, pa | pg_nx | PG_RW | PG_V); #else pte_store(pte, pa | PG_RW | PG_V); @@ -1893,7 +1851,7 @@ pte++; } if (__predict_false((oldpte & PG_V) != 0)) - pmap_invalidate_range(kernel_pmap, sva, sva + count * + pmap_invalidate_range_int(kernel_pmap, sva, sva + count * PAGE_SIZE); } @@ -1902,8 +1860,8 @@ * kernel -- it is meant only for temporary mappings. * Note: SMP coherent. Uses a ranged shootdown IPI. */ -void -pmap_qremove(vm_offset_t sva, int count) +static void +__CONCAT(PMTYPE, qremove)(vm_offset_t sva, int count) { vm_offset_t va; @@ -1912,7 +1870,7 @@ pmap_kremove(va); va += PAGE_SIZE; } - pmap_invalidate_range(kernel_pmap, sva, va); + pmap_invalidate_range_int(kernel_pmap, sva, va); } /*************************************************** @@ -2022,13 +1980,13 @@ /* * Initialize the pmap for the swapper process. */ -void -pmap_pinit0(pmap_t pmap) +static void +__CONCAT(PMTYPE, pinit0)(pmap_t pmap) { PMAP_LOCK_INIT(pmap); pmap->pm_pdir = IdlePTD; -#if defined(PAE) || defined(PAE_TABLES) +#ifdef PMAP_PAE_COMP pmap->pm_pdpt = IdlePDPT; #endif pmap->pm_root.rt_root = 0; @@ -2042,8 +2000,8 @@ * Initialize a preallocated and zeroed pmap structure, * such as one in a vmspace structure. */ -int -pmap_pinit(pmap_t pmap) +static int +__CONCAT(PMTYPE, pinit)(pmap_t pmap) { vm_page_t m; int i; @@ -2056,7 +2014,7 @@ pmap->pm_pdir = (pd_entry_t *)kva_alloc(NBPTD); if (pmap->pm_pdir == NULL) return (0); -#if defined(PAE) || defined(PAE_TABLES) +#ifdef PMAP_PAE_COMP pmap->pm_pdpt = uma_zalloc(pdptzone, M_WAITOK | M_ZERO); KASSERT(((vm_offset_t)pmap->pm_pdpt & ((NPGPTD * sizeof(pdpt_entry_t)) - 1)) == 0, @@ -2072,18 +2030,13 @@ /* * allocate the page directory page(s) */ - for (i = 0; i < NPGPTD;) { + for (i = 0; i < NPGPTD; i++) { m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | - VM_ALLOC_WIRED | VM_ALLOC_ZERO); - if (m == NULL) { - vm_wait(NULL); - } else { - pmap->pm_ptdpg[i] = m; -#if defined(PAE) || defined(PAE_TABLES) - pmap->pm_pdpt[i] = VM_PAGE_TO_PHYS(m) | PG_V; + VM_ALLOC_WIRED | VM_ALLOC_ZERO | VM_ALLOC_WAITOK); + pmap->pm_ptdpg[i] = m; +#ifdef PMAP_PAE_COMP + pmap->pm_pdpt[i] = VM_PAGE_TO_PHYS(m) | PG_V; #endif - i++; - } } pmap_qenter((vm_offset_t)pmap->pm_pdir, pmap->pm_ptdpg, NPGPTD); @@ -2203,8 +2156,8 @@ * Called when a pmap initialized by pmap_pinit is being released. * Should only be called if the map contains no valid mappings. */ -void -pmap_release(pmap_t pmap) +static void +__CONCAT(PMTYPE, release)(pmap_t pmap) { vm_page_t m; int i; @@ -2221,7 +2174,7 @@ for (i = 0; i < NPGPTD; i++) { m = pmap->pm_ptdpg[i]; -#if defined(PAE) || defined(PAE_TABLES) +#ifdef PMAP_PAE_COMP KASSERT(VM_PAGE_TO_PHYS(m) == (pmap->pm_pdpt[i] & PG_FRAME), ("pmap_release: got wrong ptd page")); #endif @@ -2230,31 +2183,11 @@ } } -static int -kvm_size(SYSCTL_HANDLER_ARGS) -{ - unsigned long ksize = VM_MAX_KERNEL_ADDRESS - KERNBASE; - - return (sysctl_handle_long(oidp, &ksize, 0, req)); -} -SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD, - 0, 0, kvm_size, "IU", "Size of KVM"); - -static int -kvm_free(SYSCTL_HANDLER_ARGS) -{ - unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end; - - return (sysctl_handle_long(oidp, &kfree, 0, req)); -} -SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, - 0, 0, kvm_free, "IU", "Amount of KVM free"); - /* * grow the number of kernel page table entries, if needed */ -void -pmap_growkernel(vm_offset_t addr) +static void +__CONCAT(PMTYPE, growkernel)(vm_offset_t addr) { vm_paddr_t ptppaddr; vm_page_t nkpg; @@ -2325,30 +2258,10 @@ PC_FREE0_9, PC_FREE10 }; -SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0, - "Current number of pv entries"); - #ifdef PV_STATS -static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail; - -SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0, - "Current number of pv entry chunks"); -SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0, - "Current number of pv entry chunks allocated"); -SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0, - "Current number of pv entry chunks frees"); -SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0, - "Number of times tried to get a chunk page but failed."); - -static long pv_entry_frees, pv_entry_allocs; -static int pv_entry_spare; - -SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0, - "Current number of pv entry frees"); -SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0, - "Current number of pv entry allocs"); -SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0, - "Current number of spare pv entries"); +extern int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail; +extern long pv_entry_frees, pv_entry_allocs; +extern int pv_entry_spare; #endif /* @@ -2382,7 +2295,7 @@ TAILQ_REMOVE(&pv_chunks, pc, pc_lru); if (pmap != pc->pc_pmap) { if (pmap != NULL) { - pmap_invalidate_all(pmap); + pmap_invalidate_all_int(pmap); if (pmap != locked_pmap) PMAP_UNLOCK(pmap); } @@ -2410,7 +2323,7 @@ pde = pmap_pde(pmap, va); if ((*pde & PG_PS) != 0) continue; - pte = pmap_pte(pmap, va); + pte = __CONCAT(PMTYPE, pte)(pmap, va); tpte = *pte; if ((tpte & PG_W) == 0) tpte = pte_load_clear(pte); @@ -2421,7 +2334,7 @@ ("pmap_pv_reclaim: pmap %p va %x zero pte", pmap, va)); if ((tpte & PG_G) != 0) - pmap_invalidate_page(pmap, va); + pmap_invalidate_page_int(pmap, va); m = PHYS_TO_VM_PAGE(tpte & PG_FRAME); if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) vm_page_dirty(m); @@ -2479,7 +2392,7 @@ out: TAILQ_CONCAT(&pv_chunks, &newtail, pc_lru); if (pmap != NULL) { - pmap_invalidate_all(pmap); + pmap_invalidate_all_int(pmap); if (pmap != locked_pmap) PMAP_UNLOCK(pmap); } @@ -2880,7 +2793,8 @@ mtx_lock(&PMAP2mutex); if ((*PMAP2 & PG_FRAME) != mptepa) { *PMAP2 = mptepa | PG_RW | PG_V | PG_A | PG_M; - pmap_invalidate_page(kernel_pmap, (vm_offset_t)PADDR2); + pmap_invalidate_page_int(kernel_pmap, + (vm_offset_t)PADDR2); } firstpte = PADDR2; } @@ -2930,7 +2844,7 @@ /* * Invalidate the recursive mapping of the page table page. */ - pmap_invalidate_page(pmap, (vm_offset_t)vtopte(va)); + pmap_invalidate_page_int(pmap, (vm_offset_t)vtopte(va)); /* * Demote the pv entry. This depends on the earlier demotion @@ -2984,7 +2898,7 @@ /* * Invalidate the recursive mapping of the page table page. */ - pmap_invalidate_page(pmap, (vm_offset_t)vtopte(va)); + pmap_invalidate_page_int(pmap, (vm_offset_t)vtopte(va)); } /* @@ -3065,7 +2979,7 @@ * PG_G. */ if (oldpte & PG_G) - pmap_invalidate_page(kernel_pmap, va); + pmap_invalidate_page_int(kernel_pmap, va); pmap->pm_stats.resident_count -= 1; if (oldpte & PG_MANAGED) { m = PHYS_TO_VM_PAGE(oldpte & PG_FRAME); @@ -3092,7 +3006,7 @@ if ((pte = pmap_pte_quick(pmap, va)) == NULL || *pte == 0) return; pmap_remove_pte(pmap, pte, va, free); - pmap_invalidate_page(pmap, va); + pmap_invalidate_page_int(pmap, va); } /* @@ -3133,8 +3047,8 @@ * It is assumed that the start and end are properly * rounded to the page size. */ -void -pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) +static void +__CONCAT(PMTYPE, remove)(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { vm_offset_t pdnxt; pd_entry_t ptpaddr; @@ -3226,7 +3140,7 @@ out: sched_unpin(); if (anyvalid) - pmap_invalidate_all(pmap); + pmap_invalidate_all_int(pmap); rw_wunlock(&pvh_global_lock); PMAP_UNLOCK(pmap); vm_page_free_pages_toq(&free, true); @@ -3245,8 +3159,8 @@ * pmap_remove (slow...) */ -void -pmap_remove_all(vm_page_t m) +static void +__CONCAT(PMTYPE, remove_all)(vm_page_t m) { struct md_page *pvh; pv_entry_t pv; @@ -3295,7 +3209,7 @@ if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) vm_page_dirty(m); pmap_unuse_pt(pmap, pv->pv_va, &free); - pmap_invalidate_page(pmap, pv->pv_va); + pmap_invalidate_page_int(pmap, pv->pv_va); TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); free_pv_entry(pmap, pv); PMAP_UNLOCK(pmap); @@ -3332,7 +3246,7 @@ } if ((prot & VM_PROT_WRITE) == 0) newpde &= ~(PG_RW | PG_M); -#if defined(PAE) || defined(PAE_TABLES) +#ifdef PMAP_PAE_COMP if ((prot & VM_PROT_EXECUTE) == 0) newpde |= pg_nx; #endif @@ -3356,8 +3270,9 @@ * Set the physical protection on the * specified range of this map as requested. */ -void -pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) +static void +__CONCAT(PMTYPE, protect)(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, + vm_prot_t prot) { vm_offset_t pdnxt; pd_entry_t ptpaddr; @@ -3370,9 +3285,9 @@ return; } -#if defined(PAE) || defined(PAE_TABLES) - if ((prot & (VM_PROT_WRITE|VM_PROT_EXECUTE)) == - (VM_PROT_WRITE|VM_PROT_EXECUTE)) +#ifdef PMAP_PAE_COMP + if ((prot & (VM_PROT_WRITE | VM_PROT_EXECUTE)) == + (VM_PROT_WRITE | VM_PROT_EXECUTE)) return; #else if (prot & VM_PROT_WRITE) @@ -3430,7 +3345,7 @@ pv_lists_locked = TRUE; if (!rw_try_wlock(&pvh_global_lock)) { if (anychanged) - pmap_invalidate_all( + pmap_invalidate_all_int( pmap); PMAP_UNLOCK(pmap); goto resume; @@ -3473,13 +3388,13 @@ } pbits &= ~(PG_RW | PG_M); } -#if defined(PAE) || defined(PAE_TABLES) +#ifdef PMAP_PAE_COMP if ((prot & VM_PROT_EXECUTE) == 0) pbits |= pg_nx; #endif if (pbits != obits) { -#if defined(PAE) || defined(PAE_TABLES) +#ifdef PMAP_PAE_COMP if (!atomic_cmpset_64(pte, obits, pbits)) goto retry; #else @@ -3488,14 +3403,14 @@ goto retry; #endif if (obits & PG_G) - pmap_invalidate_page(pmap, sva); + pmap_invalidate_page_int(pmap, sva); else anychanged = TRUE; } } } if (anychanged) - pmap_invalidate_all(pmap); + pmap_invalidate_all_int(pmap); if (pv_lists_locked) { sched_unpin(); rw_wunlock(&pvh_global_lock); @@ -3654,9 +3569,9 @@ * or lose information. That is, this routine must actually * insert this page into the given map NOW. */ -int -pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, - u_int flags, int8_t psind) +static int +__CONCAT(PMTYPE, enter)(pmap_t pmap, vm_offset_t va, vm_page_t m, + vm_prot_t prot, u_int flags, int8_t psind) { pd_entry_t *pde; pt_entry_t *pte; @@ -3688,7 +3603,7 @@ newpte |= PG_RW; KASSERT((newpte & (PG_M | PG_RW)) != PG_M, ("pmap_enter: flags includes VM_PROT_WRITE but prot doesn't")); -#if defined(PAE) || defined(PAE_TABLES) +#ifdef PMAP_PAE_COMP if ((prot & VM_PROT_EXECUTE) == 0) newpte |= pg_nx; #endif @@ -3832,7 +3747,7 @@ vm_page_aflag_clear(om, PGA_WRITEABLE); } if ((origpte & PG_A) != 0) - pmap_invalidate_page(pmap, va); + pmap_invalidate_page_int(pmap, va); origpte = 0; } else { /* @@ -3875,7 +3790,7 @@ * the PTE no longer has PG_M set. */ } -#if defined(PAE) || defined(PAE_TABLES) +#ifdef PMAP_PAE_COMP else if ((origpte & PG_NX) != 0 || (newpte & PG_NX) == 0) { /* * This PTE change does not require TLB invalidation. @@ -3884,7 +3799,7 @@ } #endif if ((origpte & PG_A) != 0) - pmap_invalidate_page(pmap, va); + pmap_invalidate_page_int(pmap, va); } else pte_store(pte, newpte); @@ -3925,7 +3840,7 @@ PG_PS | PG_V; if ((m->oflags & VPO_UNMANAGED) == 0) newpde |= PG_MANAGED; -#if defined(PAE) || defined(PAE_TABLES) +#ifdef PMAP_PAE_COMP if ((prot & VM_PROT_EXECUTE) == 0) newpde |= pg_nx; #endif @@ -3978,7 +3893,7 @@ pmap_invalidate_pde_page(pmap, va, oldpde); } else { if (pmap_remove_ptes(pmap, va, va + NBPDR, &free)) - pmap_invalidate_all(pmap); + pmap_invalidate_all_int(pmap); } vm_page_free_pages_toq(&free, true); if (pmap == kernel_pmap) { @@ -4041,8 +3956,8 @@ * is mapped; only those for which a resident page exists with the * corresponding offset from m_start are mapped. */ -void -pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end, +static void +__CONCAT(PMTYPE, enter_object)(pmap_t pmap, vm_offset_t start, vm_offset_t end, vm_page_t m_start, vm_prot_t prot) { vm_offset_t va; @@ -4080,8 +3995,9 @@ * but is *MUCH* faster than pmap_enter... */ -void -pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) +static void +__CONCAT(PMTYPE, enter_quick)(pmap_t pmap, vm_offset_t va, vm_page_t m, + vm_prot_t prot) { rw_wlock(&pvh_global_lock); @@ -4163,7 +4079,7 @@ if (mpte != NULL) { SLIST_INIT(&free); if (pmap_unwire_ptp(pmap, mpte, &free)) { - pmap_invalidate_page(pmap, va); + pmap_invalidate_page_int(pmap, va); vm_page_free_pages_toq(&free, true); } @@ -4182,7 +4098,7 @@ pmap_cache_bits(pmap, m->md.pat_mode, 0); if ((m->oflags & VPO_UNMANAGED) == 0) newpte |= PG_MANAGED; -#if defined(PAE) || defined(PAE_TABLES) +#ifdef PMAP_PAE_COMP if ((prot & VM_PROT_EXECUTE) == 0) newpte |= pg_nx; #endif @@ -4197,8 +4113,8 @@ * Make a temporary mapping for a physical address. This is only intended * to be used for panic dumps. */ -void * -pmap_kenter_temporary(vm_paddr_t pa, int i) +static void * +__CONCAT(PMTYPE, kenter_temporary)(vm_paddr_t pa, int i) { vm_offset_t va; @@ -4213,9 +4129,9 @@ * processor address space. Note that some shortcuts * are taken, but the code works. */ -void -pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object, - vm_pindex_t pindex, vm_size_t size) +static void +__CONCAT(PMTYPE, object_init_pt)(pmap_t pmap, vm_offset_t addr, + vm_object_t object, vm_pindex_t pindex, vm_size_t size) { pd_entry_t *pde; vm_paddr_t pa, ptepa; @@ -4290,8 +4206,8 @@ * The wired attribute of the page table entry is not a hardware feature, * so there is no need to invalidate any TLB entries. */ -void -pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) +static void +__CONCAT(PMTYPE, unwire)(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { vm_offset_t pdnxt; pd_entry_t *pde; @@ -4388,9 +4304,9 @@ * world. */ -void -pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, - vm_offset_t src_addr) +static void +__CONCAT(PMTYPE, copy)(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, + vm_size_t len, vm_offset_t src_addr) { struct spglist free; pt_entry_t *src_pte, *dst_pte, ptetemp; @@ -4476,8 +4392,8 @@ SLIST_INIT(&free); if (pmap_unwire_ptp(dst_pmap, dstmpte, &free)) { - pmap_invalidate_page(dst_pmap, - addr); + pmap_invalidate_page_int( + dst_pmap, addr); vm_page_free_pages_toq(&free, true); } @@ -4517,8 +4433,8 @@ /* * Zero the specified hardware page. */ -void -pmap_zero_page(vm_page_t m) +static void +__CONCAT(PMTYPE, zero_page)(vm_page_t m) { pt_entry_t *cmap_pte2; struct pcpu *pc; @@ -4548,8 +4464,8 @@ * Zero an an area within a single hardware page. off and size must not * cover an area beyond a single hardware page. */ -void -pmap_zero_page_area(vm_page_t m, int off, int size) +static void +__CONCAT(PMTYPE, zero_page_area)(vm_page_t m, int off, int size) { pt_entry_t *cmap_pte2; struct pcpu *pc; @@ -4575,8 +4491,8 @@ /* * Copy 1 specified hardware page to another. */ -void -pmap_copy_page(vm_page_t src, vm_page_t dst) +static void +__CONCAT(PMTYPE, copy_page)(vm_page_t src, vm_page_t dst) { pt_entry_t *cmap_pte1, *cmap_pte2; struct pcpu *pc; @@ -4603,11 +4519,9 @@ mtx_unlock(&pc->pc_cmap_lock); } -int unmapped_buf_allowed = 1; - -void -pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[], - vm_offset_t b_offset, int xfersize) +static void +__CONCAT(PMTYPE, copy_pages)(vm_page_t ma[], vm_offset_t a_offset, + vm_page_t mb[], vm_offset_t b_offset, int xfersize) { vm_page_t a_pg, b_pg; char *a_cp, *b_cp; @@ -4658,8 +4572,8 @@ * is only necessary that true be returned for a small * subset of pmaps for proper page aging. */ -boolean_t -pmap_page_exists_quick(pmap_t pmap, vm_page_t m) +static boolean_t +__CONCAT(PMTYPE, page_exists_quick)(pmap_t pmap, vm_page_t m) { struct md_page *pvh; pv_entry_t pv; @@ -4701,8 +4615,8 @@ * Return the number of managed mappings to the given physical page * that are wired. */ -int -pmap_page_wired_mappings(vm_page_t m) +static int +__CONCAT(PMTYPE, page_wired_mappings)(vm_page_t m) { int count; @@ -4749,8 +4663,8 @@ * Returns TRUE if the given page is mapped individually or as part of * a 4mpage. Otherwise, returns FALSE. */ -boolean_t -pmap_page_is_mapped(vm_page_t m) +static boolean_t +__CONCAT(PMTYPE, page_is_mapped)(vm_page_t m) { boolean_t rv; @@ -4772,8 +4686,8 @@ * mode enabled. This is much faster than pmap_remove * in the case of running down an entire address space. */ -void -pmap_remove_pages(pmap_t pmap) +static void +__CONCAT(PMTYPE, remove_pages)(pmap_t pmap) { pt_entry_t *pte, tpte; vm_page_t m, mpte, mt; @@ -4894,7 +4808,7 @@ } } sched_unpin(); - pmap_invalidate_all(pmap); + pmap_invalidate_all_int(pmap); rw_wunlock(&pvh_global_lock); PMAP_UNLOCK(pmap); vm_page_free_pages_toq(&free, true); @@ -4906,8 +4820,8 @@ * Return whether or not the specified physical page was modified * in any physical maps. */ -boolean_t -pmap_is_modified(vm_page_t m) +static boolean_t +__CONCAT(PMTYPE, is_modified)(vm_page_t m) { boolean_t rv; @@ -4965,8 +4879,8 @@ * Return whether or not the specified virtual address is elgible * for prefault. */ -boolean_t -pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr) +static boolean_t +__CONCAT(PMTYPE, is_prefaultable)(pmap_t pmap, vm_offset_t addr) { pd_entry_t pde; boolean_t rv; @@ -4986,8 +4900,8 @@ * Return whether or not the specified physical page was referenced * in any physical maps. */ -boolean_t -pmap_is_referenced(vm_page_t m) +static boolean_t +__CONCAT(PMTYPE, is_referenced)(vm_page_t m) { boolean_t rv; @@ -5032,8 +4946,8 @@ /* * Clear the write and modified bits in each of the given page's mappings. */ -void -pmap_remove_write(vm_page_t m) +static void +__CONCAT(PMTYPE, remove_write)(vm_page_t m) { struct md_page *pvh; pv_entry_t next_pv, pv; @@ -5088,7 +5002,7 @@ goto retry; if ((oldpte & PG_M) != 0) vm_page_dirty(m); - pmap_invalidate_page(pmap, pv->pv_va); + pmap_invalidate_page_int(pmap, pv->pv_va); } PMAP_UNLOCK(pmap); } @@ -5113,8 +5027,8 @@ * dirty pages. Those dirty pages will only be detected by a future call * to pmap_is_modified(). */ -int -pmap_ts_referenced(vm_page_t m) +static int +__CONCAT(PMTYPE, ts_referenced)(vm_page_t m) { struct md_page *pvh; pv_entry_t pv, pvf; @@ -5169,7 +5083,7 @@ (uintptr_t)pmap) & (NPTEPG - 1)) == 0 && (*pde & PG_W) == 0) { atomic_clear_int((u_int *)pde, PG_A); - pmap_invalidate_page(pmap, pv->pv_va); + pmap_invalidate_page_int(pmap, pv->pv_va); } rtval++; } @@ -5198,7 +5112,7 @@ vm_page_dirty(m); if ((*pte & PG_A) != 0) { atomic_clear_int((u_int *)pte, PG_A); - pmap_invalidate_page(pmap, pv->pv_va); + pmap_invalidate_page_int(pmap, pv->pv_va); rtval++; } PMAP_UNLOCK(pmap); @@ -5220,8 +5134,9 @@ * given pmap. Depending on the advice, clear the referenced and/or * modified flags in each mapping and set the mapped page's dirty field. */ -void -pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice) +static void +__CONCAT(PMTYPE, advise)(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, + int advice) { pd_entry_t oldpde, *pde; pt_entry_t *pte; @@ -5256,7 +5171,7 @@ pv_lists_locked = TRUE; if (!rw_try_wlock(&pvh_global_lock)) { if (anychanged) - pmap_invalidate_all(pmap); + pmap_invalidate_all_int(pmap); PMAP_UNLOCK(pmap); goto resume; } @@ -5314,15 +5229,15 @@ continue; maybe_invlrng: if (va != pdnxt) { - pmap_invalidate_range(pmap, va, sva); + pmap_invalidate_range_int(pmap, va, sva); va = pdnxt; } } if (va != pdnxt) - pmap_invalidate_range(pmap, va, sva); + pmap_invalidate_range_int(pmap, va, sva); } if (anychanged) - pmap_invalidate_all(pmap); + pmap_invalidate_all_int(pmap); if (pv_lists_locked) { sched_unpin(); rw_wunlock(&pvh_global_lock); @@ -5333,8 +5248,8 @@ /* * Clear the modify bits on the specified physical page. */ -void -pmap_clear_modify(vm_page_t m) +static void +__CONCAT(PMTYPE, clear_modify)(vm_page_t m) { struct md_page *pvh; pv_entry_t next_pv, pv; @@ -5390,7 +5305,8 @@ oldpte & ~(PG_M | PG_RW))) oldpte = *pte; vm_page_dirty(m); - pmap_invalidate_page(pmap, va); + pmap_invalidate_page_int(pmap, + va); } } } @@ -5412,7 +5328,7 @@ * 32 bits. */ atomic_clear_int((u_int *)pte, PG_M); - pmap_invalidate_page(pmap, pv->pv_va); + pmap_invalidate_page_int(pmap, pv->pv_va); } PMAP_UNLOCK(pmap); } @@ -5464,8 +5380,8 @@ * routine is intended to be used for mapping device memory, * NOT real memory. */ -void * -pmap_mapdev_attr(vm_paddr_t pa, vm_size_t size, int mode) +static void * +__CONCAT(PMTYPE, mapdev_attr)(vm_paddr_t pa, vm_size_t size, int mode) { struct pmap_preinit_mapping *ppim; vm_offset_t va, offset; @@ -5510,28 +5426,14 @@ } for (tmpsize = 0; tmpsize < size; tmpsize += PAGE_SIZE) pmap_kenter_attr(va + tmpsize, pa + tmpsize, mode); - pmap_invalidate_range(kernel_pmap, va, va + tmpsize); + pmap_invalidate_range_int(kernel_pmap, va, va + tmpsize); pmap_invalidate_cache_range(va, va + size); return ((void *)(va + offset)); } -void * -pmap_mapdev(vm_paddr_t pa, vm_size_t size) +static void +__CONCAT(PMTYPE, unmapdev)(vm_offset_t va, vm_size_t size) { - - return (pmap_mapdev_attr(pa, size, PAT_UNCACHEABLE)); -} - -void * -pmap_mapbios(vm_paddr_t pa, vm_size_t size) -{ - - return (pmap_mapdev_attr(pa, size, PAT_WRITE_BACK)); -} - -void -pmap_unmapdev(vm_offset_t va, vm_size_t size) -{ struct pmap_preinit_mapping *ppim; vm_offset_t offset; int i; @@ -5562,8 +5464,8 @@ /* * Sets the memory attribute for the specified page. */ -void -pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma) +static void +__CONCAT(PMTYPE, page_set_memattr)(vm_page_t m, vm_memattr_t ma) { m->md.pat_mode = ma; @@ -5592,7 +5494,7 @@ } static void -pmap_flush_page(vm_page_t m) +__CONCAT(PMTYPE, flush_page)(vm_page_t m) { pt_entry_t *cmap_pte2; struct pcpu *pc; @@ -5651,8 +5553,8 @@ * of the virtual address range was not mapped, and ENOMEM is returned if * there was insufficient memory available to complete the change. */ -int -pmap_change_attr(vm_offset_t va, vm_size_t size, int mode) +static int +__CONCAT(PMTYPE, change_attr)(vm_offset_t va, vm_size_t size, int mode) { vm_offset_t base, offset, tmpva; pd_entry_t *pde; @@ -5749,7 +5651,7 @@ * shouldn't be, etc. */ if (changed) { - pmap_invalidate_range(kernel_pmap, base, tmpva); + pmap_invalidate_range_int(kernel_pmap, base, tmpva); pmap_invalidate_cache_range(base, tmpva); } return (0); @@ -5758,8 +5660,8 @@ /* * perform the pmap work for mincore */ -int -pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa) +static int +__CONCAT(PMTYPE, mincore)(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa) { pd_entry_t pde; pt_entry_t pte; @@ -5805,8 +5707,8 @@ return (val); } -void -pmap_activate(struct thread *td) +static void +__CONCAT(PMTYPE, activate)(struct thread *td) { pmap_t pmap, oldpmap; u_int cpuid; @@ -5823,7 +5725,7 @@ CPU_CLR(cpuid, &oldpmap->pm_active); CPU_SET(cpuid, &pmap->pm_active); #endif -#if defined(PAE) || defined(PAE_TABLES) +#ifdef PMAP_PAE_COMP cr3 = vtophys(pmap->pm_pdpt); #else cr3 = vtophys(pmap->pm_pdir); @@ -5836,8 +5738,8 @@ critical_exit(); } -void -pmap_activate_boot(pmap_t pmap) +static void +__CONCAT(PMTYPE, activate_boot)(pmap_t pmap) { u_int cpuid; @@ -5850,17 +5752,12 @@ PCPU_SET(curpmap, pmap); } -void -pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz) -{ -} - /* * Increase the starting virtual address of the given mapping if a * different alignment might result in more superpage mappings. */ -void -pmap_align_superpage(vm_object_t object, vm_ooffset_t offset, +static void +__CONCAT(PMTYPE, align_superpage)(vm_object_t object, vm_ooffset_t offset, vm_offset_t *addr, vm_size_t size) { vm_offset_t superpage_offset; @@ -5879,8 +5776,8 @@ *addr = ((*addr + PDRMASK) & ~PDRMASK) + superpage_offset; } -vm_offset_t -pmap_quick_enter_page(vm_page_t m) +static vm_offset_t +__CONCAT(PMTYPE, quick_enter_page)(vm_page_t m) { vm_offset_t qaddr; pt_entry_t *pte; @@ -5889,7 +5786,8 @@ qaddr = PCPU_GET(qmap_addr); pte = vtopte(qaddr); - KASSERT(*pte == 0, ("pmap_quick_enter_page: PTE busy")); + KASSERT(*pte == 0, + ("pmap_quick_enter_page: PTE busy %#jx", (uintmax_t)*pte)); *pte = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M | pmap_cache_bits(kernel_pmap, pmap_page_get_memattr(m), 0); invlpg(qaddr); @@ -5897,8 +5795,8 @@ return (qaddr); } -void -pmap_quick_remove_page(vm_offset_t addr) +static void +__CONCAT(PMTYPE, quick_remove_page)(vm_offset_t addr) { vm_offset_t qaddr; pt_entry_t *pte; @@ -5948,8 +5846,8 @@ return (0); } -static -void pmap_init_trm(void) +void +pmap_init_trm(void) { vm_page_t pd_m; @@ -5966,8 +5864,8 @@ pmap_cache_bits(kernel_pmap, VM_MEMATTR_DEFAULT, TRUE); } -void * -pmap_trm_alloc(size_t size, int flags) +static void * +__CONCAT(PMTYPE, trm_alloc)(size_t size, int flags) { vmem_addr_t res; int error; @@ -5982,8 +5880,8 @@ return ((void *)res); } -void -pmap_trm_free(void *addr, size_t size) +static void +__CONCAT(PMTYPE, trm_free)(void *addr, size_t size) { vmem_free(pmap_trm_arena, (uintptr_t)addr, roundup2(size, 4)); @@ -6049,3 +5947,254 @@ return (npte); } #endif + +static void +__CONCAT(PMTYPE, ksetrw)(vm_offset_t va) +{ + + *vtopte(va) |= PG_RW; +} + +static void +__CONCAT(PMTYPE, remap_lowptdi)(bool enable) +{ + + PTD[KPTDI] = enable ? PTD[LOWPTDI] : 0; + invltlb_glob(); +} + +static vm_offset_t +__CONCAT(PMTYPE, get_map_low)(void) +{ + + return (PMAP_MAP_LOW); +} + +static vm_offset_t +__CONCAT(PMTYPE, get_vm_maxuser_address)(void) +{ + + return (VM_MAXUSER_ADDRESS); +} + +static vm_paddr_t +__CONCAT(PMTYPE, pg_frame)(vm_paddr_t pa) +{ + + return (pa & PG_FRAME); +} + +static void +__CONCAT(PMTYPE, sf_buf_map)(struct sf_buf *sf) +{ + pt_entry_t opte, *ptep; + + /* + * Update the sf_buf's virtual-to-physical mapping, flushing the + * virtual address from the TLB. Since the reference count for + * the sf_buf's old mapping was zero, that mapping is not + * currently in use. Consequently, there is no need to exchange + * the old and new PTEs atomically, even under PAE. + */ + ptep = vtopte(sf->kva); + opte = *ptep; + *ptep = VM_PAGE_TO_PHYS(sf->m) | PG_RW | PG_V | + pmap_cache_bits(kernel_pmap, sf->m->md.pat_mode, 0); + + /* + * Avoid unnecessary TLB invalidations: If the sf_buf's old + * virtual-to-physical mapping was not used, then any processor + * that has invalidated the sf_buf's virtual address from its TLB + * since the last used mapping need not invalidate again. + */ +#ifdef SMP + if ((opte & (PG_V | PG_A)) == (PG_V | PG_A)) + CPU_ZERO(&sf->cpumask); +#else + if ((opte & (PG_V | PG_A)) == (PG_V | PG_A)) + pmap_invalidate_page_int(kernel_pmap, sf->kva); +#endif +} + +static void +__CONCAT(PMTYPE, cp_slow0_map)(vm_offset_t kaddr, int plen, vm_page_t *ma) +{ + pt_entry_t *pte; + int i; + + for (i = 0, pte = vtopte(kaddr); i < plen; i++, pte++) { + *pte = PG_V | PG_RW | PG_A | PG_M | VM_PAGE_TO_PHYS(ma[i]) | + pmap_cache_bits(kernel_pmap, pmap_page_get_memattr(ma[i]), + FALSE); + invlpg(kaddr + ptoa(i)); + } +} + +static u_int +__CONCAT(PMTYPE, get_kcr3)(void) +{ + +#ifdef PMAP_PAE_COMP + return ((u_int)IdlePDPT); +#else + return ((u_int)IdlePTD); +#endif +} + +static u_int +__CONCAT(PMTYPE, get_cr3)(pmap_t pmap) +{ + +#ifdef PMAP_PAE_COMP + return ((u_int)vtophys(pmap->pm_pdpt)); +#else + return ((u_int)vtophys(pmap->pm_pdir)); +#endif +} + +static caddr_t +__CONCAT(PMTYPE, cmap3)(vm_paddr_t pa, u_int pte_bits) +{ + pt_entry_t *pte; + + pte = CMAP3; + *pte = pa | pte_bits; + invltlb(); + return (CADDR3); +} + +static void +__CONCAT(PMTYPE, basemem_setup)(u_int basemem) +{ + pt_entry_t *pte; + int i; + + /* + * Map pages between basemem and ISA_HOLE_START, if any, r/w into + * the vm86 page table so that vm86 can scribble on them using + * the vm86 map too. XXX: why 2 ways for this and only 1 way for + * page 0, at least as initialized here? + */ + pte = (pt_entry_t *)vm86paddr; + for (i = basemem / 4; i < 160; i++) + pte[i] = (i << PAGE_SHIFT) | PG_V | PG_RW | PG_U; +} + +struct bios16_pmap_handle { + pt_entry_t *pte; + pd_entry_t *ptd; + pt_entry_t orig_ptd; +}; + +static void * +__CONCAT(PMTYPE, bios16_enter)(void) +{ + struct bios16_pmap_handle *h; + + /* + * no page table, so create one and install it. + */ + h = malloc(sizeof(struct bios16_pmap_handle), M_TEMP, M_WAITOK); + h->pte = (pt_entry_t *)malloc(PAGE_SIZE, M_TEMP, M_WAITOK); + h->ptd = IdlePTD; + *h->pte = vm86phystk | PG_RW | PG_V; + h->orig_ptd = *h->ptd; + *h->ptd = vtophys(h->pte) | PG_RW | PG_V; + pmap_invalidate_all_int(kernel_pmap); /* XXX insurance for now */ + return (h); +} + +static void +__CONCAT(PMTYPE, bios16_leave)(void *arg) +{ + struct bios16_pmap_handle *h; + + h = arg; + *h->ptd = h->orig_ptd; /* remove page table */ + /* + * XXX only needs to be invlpg(0) but that doesn't work on the 386 + */ + pmap_invalidate_all_int(kernel_pmap); + free(h->pte, M_TEMP); /* ... and free it */ +} + +#define PMM(a) \ + .pm_##a = __CONCAT(PMTYPE, a), + +struct pmap_methods __CONCAT(PMTYPE, methods) = { + PMM(ksetrw) + PMM(remap_lower) + PMM(remap_lowptdi) + PMM(align_superpage) + PMM(quick_enter_page) + PMM(quick_remove_page) + PMM(trm_alloc) + PMM(trm_free) + PMM(get_map_low) + PMM(get_vm_maxuser_address) + PMM(kextract) + PMM(pg_frame) + PMM(sf_buf_map) + PMM(cp_slow0_map) + PMM(get_kcr3) + PMM(get_cr3) + PMM(cmap3) + PMM(basemem_setup) + PMM(set_nx) + PMM(bios16_enter) + PMM(bios16_leave) + PMM(bootstrap) + PMM(is_valid_memattr) + PMM(cache_bits) + PMM(ps_enabled) + PMM(pinit0) + PMM(pinit) + PMM(activate) + PMM(activate_boot) + PMM(advise) + PMM(clear_modify) + PMM(change_attr) + PMM(mincore) + PMM(copy) + PMM(copy_page) + PMM(copy_pages) + PMM(zero_page) + PMM(zero_page_area) + PMM(enter) + PMM(enter_object) + PMM(enter_quick) + PMM(kenter_temporary) + PMM(object_init_pt) + PMM(unwire) + PMM(page_exists_quick) + PMM(page_wired_mappings) + PMM(page_is_mapped) + PMM(remove_pages) + PMM(is_modified) + PMM(is_prefaultable) + PMM(is_referenced) + PMM(remove_write) + PMM(ts_referenced) + PMM(mapdev_attr) + PMM(unmapdev) + PMM(page_set_memattr) + PMM(extract) + PMM(extract_and_hold) + PMM(map) + PMM(qenter) + PMM(qremove) + PMM(release) + PMM(remove) + PMM(protect) + PMM(remove_all) + PMM(init) + PMM(init_pat) + PMM(growkernel) + PMM(invalidate_page) + PMM(invalidate_range) + PMM(invalidate_all) + PMM(invalidate_cache) + PMM(flush_page) + PMM(kenter) + PMM(kremove) +}; Index: head/sys/i386/i386/pmap_base.c =================================================================== --- head/sys/i386/i386/pmap_base.c +++ head/sys/i386/i386/pmap_base.c @@ -0,0 +1,954 @@ +/*- + * SPDX-License-Identifier: BSD-4-Clause + * + * Copyright (c) 1991 Regents of the University of California. + * All rights reserved. + * Copyright (c) 1994 John S. Dyson + * All rights reserved. + * Copyright (c) 1994 David Greenman + * All rights reserved. + * Copyright (c) 2005-2010 Alan L. Cox + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * the Systems Programming Group of the University of Utah Computer + * Science Department and William Jolitz of UUNET Technologies Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 + */ +/*- + * Copyright (c) 2003 Networks Associates Technology, Inc. + * All rights reserved. + * Copyright (c) 2018 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed for the FreeBSD Project by Jake Burkholder, + * Safeport Network Services, and Network Associates Laboratories, the + * Security Research Division of Network Associates, Inc. under + * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA + * CHATS research program. + * + * Portions of this software were developed by + * Konstantin Belousov under sponsorship from + * the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include "opt_apic.h" +#include "opt_cpu.h" +#include "opt_pmap.h" +#include "opt_smp.h" +#include "opt_vm.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef DEV_APIC +#include +#include +#include +#endif +#include + +static SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters"); + +#include +#include +#include +#include +#include + +vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */ +vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ + +int unmapped_buf_allowed = 1; + +int pti; + +u_long physfree; /* phys addr of next free page */ +u_long vm86phystk; /* PA of vm86/bios stack */ +u_long vm86paddr; /* address of vm86 region */ +int vm86pa; /* phys addr of vm86 region */ +u_long KERNend; /* phys addr end of kernel (just after bss) */ +u_long KPTphys; /* phys addr of kernel page tables */ +caddr_t ptvmmap = 0; +vm_offset_t kernel_vm_end; + +int i386_pmap_VM_NFREEORDER; +int i386_pmap_VM_LEVEL_0_ORDER; +int i386_pmap_PDRSHIFT; + +int pat_works = 1; +SYSCTL_INT(_vm_pmap, OID_AUTO, pat_works, CTLFLAG_RD, + &pat_works, 1, + "Is page attribute table fully functional?"); + +int pg_ps_enabled = 1; +SYSCTL_INT(_vm_pmap, OID_AUTO, pg_ps_enabled, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, + &pg_ps_enabled, 0, + "Are large page mappings enabled?"); + +int pv_entry_max = 0; +SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_max, CTLFLAG_RD, + &pv_entry_max, 0, + "Max number of PV entries"); + +int pv_entry_count = 0; +SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, + &pv_entry_count, 0, + "Current number of pv entries"); + +#ifndef PMAP_SHPGPERPROC +#define PMAP_SHPGPERPROC 200 +#endif + +int shpgperproc = PMAP_SHPGPERPROC; +SYSCTL_INT(_vm_pmap, OID_AUTO, shpgperproc, CTLFLAG_RD, + &shpgperproc, 0, + "Page share factor per proc"); + +static SYSCTL_NODE(_vm_pmap, OID_AUTO, pde, CTLFLAG_RD, 0, + "2/4MB page mapping counters"); + +u_long pmap_pde_demotions; +SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, demotions, CTLFLAG_RD, + &pmap_pde_demotions, 0, + "2/4MB page demotions"); + +u_long pmap_pde_mappings; +SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, mappings, CTLFLAG_RD, + &pmap_pde_mappings, 0, + "2/4MB page mappings"); + +u_long pmap_pde_p_failures; +SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, p_failures, CTLFLAG_RD, + &pmap_pde_p_failures, 0, + "2/4MB page promotion failures"); + +u_long pmap_pde_promotions; +SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, promotions, CTLFLAG_RD, + &pmap_pde_promotions, 0, + "2/4MB page promotions"); + +#ifdef SMP +int PMAP1changedcpu; +SYSCTL_INT(_debug, OID_AUTO, PMAP1changedcpu, CTLFLAG_RD, + &PMAP1changedcpu, 0, + "Number of times pmap_pte_quick changed CPU with same PMAP1"); +#endif + +int PMAP1changed; +SYSCTL_INT(_debug, OID_AUTO, PMAP1changed, CTLFLAG_RD, + &PMAP1changed, 0, + "Number of times pmap_pte_quick changed PMAP1"); +int PMAP1unchanged; +SYSCTL_INT(_debug, OID_AUTO, PMAP1unchanged, CTLFLAG_RD, + &PMAP1unchanged, 0, + "Number of times pmap_pte_quick didn't change PMAP1"); + +static int +kvm_size(SYSCTL_HANDLER_ARGS) +{ + unsigned long ksize; + + ksize = VM_MAX_KERNEL_ADDRESS - KERNBASE; + return (sysctl_handle_long(oidp, &ksize, 0, req)); +} +SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG | CTLFLAG_RD | CTLFLAG_MPSAFE, + 0, 0, kvm_size, "IU", + "Size of KVM"); + +static int +kvm_free(SYSCTL_HANDLER_ARGS) +{ + unsigned long kfree; + + kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end; + return (sysctl_handle_long(oidp, &kfree, 0, req)); +} +SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG | CTLFLAG_RD | CTLFLAG_MPSAFE, + 0, 0, kvm_free, "IU", + "Amount of KVM free"); + +#ifdef PV_STATS +int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail; +long pv_entry_frees, pv_entry_allocs; +int pv_entry_spare; + +SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, + &pc_chunk_count, 0, + "Current number of pv entry chunks"); +SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, + &pc_chunk_allocs, 0, + "Current number of pv entry chunks allocated"); +SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, + &pc_chunk_frees, 0, + "Current number of pv entry chunks frees"); +SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, + &pc_chunk_tryfail, 0, + "Number of times tried to get a chunk page but failed."); +SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, + &pv_entry_frees, 0, + "Current number of pv entry frees"); +SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, + &pv_entry_allocs, 0, + "Current number of pv entry allocs"); +SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, + &pv_entry_spare, 0, + "Current number of spare pv entries"); +#endif + +struct pmap kernel_pmap_store; +static struct pmap_methods *pmap_methods_ptr; + +/* + * Initialize a vm_page's machine-dependent fields. + */ +void +pmap_page_init(vm_page_t m) +{ + + TAILQ_INIT(&m->md.pv_list); + m->md.pat_mode = PAT_WRITE_BACK; +} + +void +invltlb_glob(void) +{ + + invltlb(); +} + +static void pmap_invalidate_cache_range_selfsnoop(vm_offset_t sva, + vm_offset_t eva); +static void pmap_invalidate_cache_range_all(vm_offset_t sva, + vm_offset_t eva); + +void +pmap_flush_page(vm_page_t m) +{ + + pmap_methods_ptr->pm_flush_page(m); +} + +DEFINE_IFUNC(, void, pmap_invalidate_cache_range, (vm_offset_t, vm_offset_t), + static) +{ + + if ((cpu_feature & CPUID_SS) != 0) + return (pmap_invalidate_cache_range_selfsnoop); + if ((cpu_feature & CPUID_CLFSH) != 0) + return (pmap_force_invalidate_cache_range); + return (pmap_invalidate_cache_range_all); +} + +#define PMAP_CLFLUSH_THRESHOLD (2 * 1024 * 1024) + +static void +pmap_invalidate_cache_range_check_align(vm_offset_t sva, vm_offset_t eva) +{ + + KASSERT((sva & PAGE_MASK) == 0, + ("pmap_invalidate_cache_range: sva not page-aligned")); + KASSERT((eva & PAGE_MASK) == 0, + ("pmap_invalidate_cache_range: eva not page-aligned")); +} + +static void +pmap_invalidate_cache_range_selfsnoop(vm_offset_t sva, vm_offset_t eva) +{ + + pmap_invalidate_cache_range_check_align(sva, eva); +} + +void +pmap_force_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva) +{ + + sva &= ~(vm_offset_t)(cpu_clflush_line_size - 1); + if (eva - sva >= PMAP_CLFLUSH_THRESHOLD) { + /* + * The supplied range is bigger than 2MB. + * Globally invalidate cache. + */ + pmap_invalidate_cache(); + return; + } + +#ifdef DEV_APIC + /* + * XXX: Some CPUs fault, hang, or trash the local APIC + * registers if we use CLFLUSH on the local APIC + * range. The local APIC is always uncached, so we + * don't need to flush for that range anyway. + */ + if (pmap_kextract(sva) == lapic_paddr) + return; +#endif + + if ((cpu_stdext_feature & CPUID_STDEXT_CLFLUSHOPT) != 0) { + /* + * Do per-cache line flush. Use the sfence + * instruction to insure that previous stores are + * included in the write-back. The processor + * propagates flush to other processors in the cache + * coherence domain. + */ + sfence(); + for (; sva < eva; sva += cpu_clflush_line_size) + clflushopt(sva); + sfence(); + } else { + /* + * Writes are ordered by CLFLUSH on Intel CPUs. + */ + if (cpu_vendor_id != CPU_VENDOR_INTEL) + mfence(); + for (; sva < eva; sva += cpu_clflush_line_size) + clflush(sva); + if (cpu_vendor_id != CPU_VENDOR_INTEL) + mfence(); + } +} + +static void +pmap_invalidate_cache_range_all(vm_offset_t sva, vm_offset_t eva) +{ + + pmap_invalidate_cache_range_check_align(sva, eva); + pmap_invalidate_cache(); +} + +void +pmap_invalidate_cache_pages(vm_page_t *pages, int count) +{ + int i; + + if (count >= PMAP_CLFLUSH_THRESHOLD / PAGE_SIZE || + (cpu_feature & CPUID_CLFSH) == 0) { + pmap_invalidate_cache(); + } else { + for (i = 0; i < count; i++) + pmap_flush_page(pages[i]); + } +} + +void +pmap_ksetrw(vm_offset_t va) +{ + + pmap_methods_ptr->pm_ksetrw(va); +} + +void +pmap_remap_lower(bool enable) +{ + + pmap_methods_ptr->pm_remap_lower(enable); +} + +void +pmap_remap_lowptdi(bool enable) +{ + + pmap_methods_ptr->pm_remap_lowptdi(enable); +} + +void +pmap_align_superpage(vm_object_t object, vm_ooffset_t offset, + vm_offset_t *addr, vm_size_t size) +{ + + return (pmap_methods_ptr->pm_align_superpage(object, offset, + addr, size)); +} + +vm_offset_t +pmap_quick_enter_page(vm_page_t m) +{ + + return (pmap_methods_ptr->pm_quick_enter_page(m)); +} + +void +pmap_quick_remove_page(vm_offset_t addr) +{ + + return (pmap_methods_ptr->pm_quick_remove_page(addr)); +} + +void * +pmap_trm_alloc(size_t size, int flags) +{ + + return (pmap_methods_ptr->pm_trm_alloc(size, flags)); +} + +void +pmap_trm_free(void *addr, size_t size) +{ + + pmap_methods_ptr->pm_trm_free(addr, size); +} + +void +pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz) +{ +} + +vm_offset_t +pmap_get_map_low(void) +{ + + return (pmap_methods_ptr->pm_get_map_low()); +} + +vm_offset_t +pmap_get_vm_maxuser_address(void) +{ + + return (pmap_methods_ptr->pm_get_vm_maxuser_address()); +} + +vm_paddr_t +pmap_kextract(vm_offset_t va) +{ + + return (pmap_methods_ptr->pm_kextract(va)); +} + +vm_paddr_t +pmap_pg_frame(vm_paddr_t pa) +{ + + return (pmap_methods_ptr->pm_pg_frame(pa)); +} + +void +pmap_sf_buf_map(struct sf_buf *sf) +{ + + pmap_methods_ptr->pm_sf_buf_map(sf); +} + +void +pmap_cp_slow0_map(vm_offset_t kaddr, int plen, vm_page_t *ma) +{ + + pmap_methods_ptr->pm_cp_slow0_map(kaddr, plen, ma); +} + +u_int +pmap_get_kcr3(void) +{ + + return (pmap_methods_ptr->pm_get_kcr3()); +} + +u_int +pmap_get_cr3(pmap_t pmap) +{ + + return (pmap_methods_ptr->pm_get_cr3(pmap)); +} + +caddr_t +pmap_cmap3(vm_paddr_t pa, u_int pte_flags) +{ + + return (pmap_methods_ptr->pm_cmap3(pa, pte_flags)); +} + +void +pmap_basemem_setup(u_int basemem) +{ + + pmap_methods_ptr->pm_basemem_setup(basemem); +} + +void +pmap_set_nx(void) +{ + + pmap_methods_ptr->pm_set_nx(); +} + +void * +pmap_bios16_enter(void) +{ + + return (pmap_methods_ptr->pm_bios16_enter()); +} + +void +pmap_bios16_leave(void *handle) +{ + + pmap_methods_ptr->pm_bios16_leave(handle); +} + +void +pmap_bootstrap(vm_paddr_t firstaddr) +{ + + pmap_methods_ptr->pm_bootstrap(firstaddr); +} + +boolean_t +pmap_is_valid_memattr(pmap_t pmap, vm_memattr_t mode) +{ + + return (pmap_methods_ptr->pm_is_valid_memattr(pmap, mode)); +} + +int +pmap_cache_bits(pmap_t pmap, int mode, boolean_t is_pde) +{ + + return (pmap_methods_ptr->pm_cache_bits(pmap, mode, is_pde)); +} + +bool +pmap_ps_enabled(pmap_t pmap) +{ + + return (pmap_methods_ptr->pm_ps_enabled(pmap)); +} + +void +pmap_pinit0(pmap_t pmap) +{ + + pmap_methods_ptr->pm_pinit0(pmap); +} + +int +pmap_pinit(pmap_t pmap) +{ + + return (pmap_methods_ptr->pm_pinit(pmap)); +} + +void +pmap_activate(struct thread *td) +{ + + pmap_methods_ptr->pm_activate(td); +} + +void +pmap_activate_boot(pmap_t pmap) +{ + + pmap_methods_ptr->pm_activate_boot(pmap); +} + +void +pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice) +{ + + pmap_methods_ptr->pm_advise(pmap, sva, eva, advice); +} + +void +pmap_clear_modify(vm_page_t m) +{ + + pmap_methods_ptr->pm_clear_modify(m); +} + +int +pmap_change_attr(vm_offset_t va, vm_size_t size, int mode) +{ + + return (pmap_methods_ptr->pm_change_attr(va, size, mode)); +} + +int +pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa) +{ + + return (pmap_methods_ptr->pm_mincore(pmap, addr, locked_pa)); +} + +void +pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, + vm_offset_t src_addr) +{ + + pmap_methods_ptr->pm_copy(dst_pmap, src_pmap, dst_addr, len, src_addr); +} + +void +pmap_copy_page(vm_page_t src, vm_page_t dst) +{ + + pmap_methods_ptr->pm_copy_page(src, dst); +} + +void +pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[], + vm_offset_t b_offset, int xfersize) +{ + + pmap_methods_ptr->pm_copy_pages(ma, a_offset, mb, b_offset, xfersize); +} + +void +pmap_zero_page(vm_page_t m) +{ + + pmap_methods_ptr->pm_zero_page(m); +} + +void +pmap_zero_page_area(vm_page_t m, int off, int size) +{ + + pmap_methods_ptr->pm_zero_page_area(m, off, size); +} + +int +pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, + u_int flags, int8_t psind) +{ + + return (pmap_methods_ptr->pm_enter(pmap, va, m, prot, flags, psind)); +} + +void +pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end, + vm_page_t m_start, vm_prot_t prot) +{ + + pmap_methods_ptr->pm_enter_object(pmap, start, end, m_start, prot); +} + +void +pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) +{ + + pmap_methods_ptr->pm_enter_quick(pmap, va, m, prot); +} + +void * +pmap_kenter_temporary(vm_paddr_t pa, int i) +{ + + return (pmap_methods_ptr->pm_kenter_temporary(pa, i)); +} + +void +pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object, + vm_pindex_t pindex, vm_size_t size) +{ + + pmap_methods_ptr->pm_object_init_pt(pmap, addr, object, pindex, size); +} + +void +pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) +{ + + pmap_methods_ptr->pm_unwire(pmap, sva, eva); +} + +boolean_t +pmap_page_exists_quick(pmap_t pmap, vm_page_t m) +{ + + return (pmap_methods_ptr->pm_page_exists_quick(pmap, m)); +} + +int +pmap_page_wired_mappings(vm_page_t m) +{ + + return (pmap_methods_ptr->pm_page_wired_mappings(m)); +} + +boolean_t +pmap_page_is_mapped(vm_page_t m) +{ + + return (pmap_methods_ptr->pm_page_is_mapped(m)); +} + +void +pmap_remove_pages(pmap_t pmap) +{ + + pmap_methods_ptr->pm_remove_pages(pmap); +} + +boolean_t +pmap_is_modified(vm_page_t m) +{ + + return (pmap_methods_ptr->pm_is_modified(m)); +} + +boolean_t +pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr) +{ + + return (pmap_methods_ptr->pm_is_prefaultable(pmap, addr)); +} + +boolean_t +pmap_is_referenced(vm_page_t m) +{ + + return (pmap_methods_ptr->pm_is_referenced(m)); +} + +void +pmap_remove_write(vm_page_t m) +{ + + pmap_methods_ptr->pm_remove_write(m); +} + +int +pmap_ts_referenced(vm_page_t m) +{ + + return (pmap_methods_ptr->pm_ts_referenced(m)); +} + +void * +pmap_mapdev_attr(vm_paddr_t pa, vm_size_t size, int mode) +{ + + return (pmap_methods_ptr->pm_mapdev_attr(pa, size, mode)); +} + +void * +pmap_mapdev(vm_paddr_t pa, vm_size_t size) +{ + + return (pmap_methods_ptr->pm_mapdev_attr(pa, size, PAT_UNCACHEABLE)); +} + +void * +pmap_mapbios(vm_paddr_t pa, vm_size_t size) +{ + + return (pmap_methods_ptr->pm_mapdev_attr(pa, size, PAT_WRITE_BACK)); +} + +void +pmap_unmapdev(vm_offset_t va, vm_size_t size) +{ + + pmap_methods_ptr->pm_unmapdev(va, size); +} + +void +pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma) +{ + + pmap_methods_ptr->pm_page_set_memattr(m, ma); +} + +vm_paddr_t +pmap_extract(pmap_t pmap, vm_offset_t va) +{ + + return (pmap_methods_ptr->pm_extract(pmap, va)); +} + +vm_page_t +pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) +{ + + return (pmap_methods_ptr->pm_extract_and_hold(pmap, va, prot)); +} + +vm_offset_t +pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot) +{ + + return (pmap_methods_ptr->pm_map(virt, start, end, prot)); +} + +void +pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count) +{ + + pmap_methods_ptr->pm_qenter(sva, ma, count); +} + +void +pmap_qremove(vm_offset_t sva, int count) +{ + + pmap_methods_ptr->pm_qremove(sva, count); +} + +void +pmap_release(pmap_t pmap) +{ + + pmap_methods_ptr->pm_release(pmap); +} + +void +pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) +{ + + pmap_methods_ptr->pm_remove(pmap, sva, eva); +} + +void +pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) +{ + + pmap_methods_ptr->pm_protect(pmap, sva, eva, prot); +} + +void +pmap_remove_all(vm_page_t m) +{ + + pmap_methods_ptr->pm_remove_all(m); +} + +void +pmap_init(void) +{ + + pmap_methods_ptr->pm_init(); +} + +void +pmap_init_pat(void) +{ + + pmap_methods_ptr->pm_init_pat(); +} + +void +pmap_growkernel(vm_offset_t addr) +{ + + pmap_methods_ptr->pm_growkernel(addr); +} + +void +pmap_invalidate_page(pmap_t pmap, vm_offset_t va) +{ + + pmap_methods_ptr->pm_invalidate_page(pmap, va); +} + +void +pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) +{ + + pmap_methods_ptr->pm_invalidate_range(pmap, sva, eva); +} + +void +pmap_invalidate_all(pmap_t pmap) +{ + + pmap_methods_ptr->pm_invalidate_all(pmap); +} + +void +pmap_invalidate_cache(void) +{ + + pmap_methods_ptr->pm_invalidate_cache(); +} + +void +pmap_kenter(vm_offset_t va, vm_paddr_t pa) +{ + + pmap_methods_ptr->pm_kenter(va, pa); +} + +void +pmap_kremove(vm_offset_t va) +{ + + pmap_methods_ptr->pm_kremove(va); +} + +extern struct pmap_methods pmap_pae_methods, pmap_nopae_methods; +int pae_mode; +SYSCTL_INT(_vm_pmap, OID_AUTO, pae_mode, CTLFLAG_RD, + &pae_mode, 1, + "PAE"); + +void +pmap_cold(void) +{ + + if ((cpu_feature & CPUID_PAE) != 0) { + pae_mode = 1; + pmap_methods_ptr = &pmap_pae_methods; + pmap_pae_cold(); + } else { + pmap_methods_ptr = &pmap_nopae_methods; + pmap_nopae_cold(); + } +} Index: head/sys/i386/i386/pmap_nopae.c =================================================================== --- head/sys/i386/i386/pmap_nopae.c +++ head/sys/i386/i386/pmap_nopae.c @@ -0,0 +1,48 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2019 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Konstantin Belousov + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include "opt_apic.h" +#include "opt_cpu.h" +#include "opt_pmap.h" +#include "opt_smp.h" +#include "opt_vm.h" + +#include +#include +#include +#define PMTYPE pmap_nopae_ +#include +#include +_Static_assert(sizeof(struct pmap_KBI) >= sizeof(struct pmap), "pmap KBI"); +#include "pmap.c" Index: head/sys/i386/i386/pmap_pae.c =================================================================== --- head/sys/i386/i386/pmap_pae.c +++ head/sys/i386/i386/pmap_pae.c @@ -0,0 +1,49 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2018 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Konstantin Belousov + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include "opt_apic.h" +#include "opt_cpu.h" +#include "opt_pmap.h" +#include "opt_smp.h" +#include "opt_vm.h" + +#define PMAP_PAE_COMP +#include +#include +#include +#define PMTYPE pmap_pae_ +#include +#include +_Static_assert(sizeof(struct pmap_KBI) >= sizeof(struct pmap), "pmap KBI"); +#include "pmap.c" Index: head/sys/i386/i386/trap.c =================================================================== --- head/sys/i386/i386/trap.c +++ head/sys/i386/i386/trap.c @@ -119,6 +119,7 @@ void dblfault_handler(void); extern inthand_t IDTVEC(bpt), IDTVEC(dbg), IDTVEC(int0x80_syscall); +extern uint64_t pg_nx; #define MAX_TRAP_MSG 32 @@ -871,10 +872,8 @@ */ if (frame->tf_err & PGEX_W) ftype = VM_PROT_WRITE; -#if defined(PAE) || defined(PAE_TABLES) else if ((frame->tf_err & PGEX_I) && pg_nx != 0) ftype = VM_PROT_EXECUTE; -#endif else ftype = VM_PROT_READ; @@ -935,10 +934,8 @@ printf("fault code = %s %s%s, %s\n", code & PGEX_U ? "user" : "supervisor", code & PGEX_W ? "write" : "read", -#if defined(PAE) || defined(PAE_TABLES) pg_nx != 0 ? (code & PGEX_I ? " instruction" : " data") : -#endif "", code & PGEX_RSV ? "reserved bits in PTE" : code & PGEX_P ? "protection violation" : "page not present"); Index: head/sys/i386/i386/vm86.c =================================================================== --- head/sys/i386/i386/vm86.c +++ head/sys/i386/i386/vm86.c @@ -397,8 +397,8 @@ (sizeof(struct pcb_ext) - sizeof(struct segment_descriptor) + \ INTMAP_SIZE + IOMAP_SIZE + 1) -struct vm86_layout { - pt_entry_t vml_pgtbl[PGTABLE_SIZE]; +struct vm86_layout_pae { + uint64_t vml_pgtbl[PGTABLE_SIZE]; struct pcb vml_pcb; struct pcb_ext vml_ext; char vml_intmap[INTMAP_SIZE]; @@ -406,12 +406,26 @@ char vml_iomap_trailer; }; -void -vm86_initialize(void) +struct vm86_layout_nopae { + uint32_t vml_pgtbl[PGTABLE_SIZE]; + struct pcb vml_pcb; + struct pcb_ext vml_ext; + char vml_intmap[INTMAP_SIZE]; + char vml_iomap[IOMAP_SIZE]; + char vml_iomap_trailer; +}; + +_Static_assert(sizeof(struct vm86_layout_pae) <= ctob(3), + "struct vm86_layout_pae exceeds space allocated in locore.s"); +_Static_assert(sizeof(struct vm86_layout_nopae) <= ctob(3), + "struct vm86_layout_nopae exceeds space allocated in locore.s"); + +static void +vm86_initialize_pae(void) { int i; u_int *addr; - struct vm86_layout *vml = (struct vm86_layout *)vm86paddr; + struct vm86_layout_pae *vml; struct pcb *pcb; struct pcb_ext *ext; struct soft_segment_descriptor ssd = { @@ -426,12 +440,6 @@ }; /* - * this should be a compile time error, but cpp doesn't grok sizeof(). - */ - if (sizeof(struct vm86_layout) > ctob(3)) - panic("struct vm86_layout exceeds space allocated in locore.s"); - - /* * Below is the memory layout that we use for the vm86 region. * * +--------+ @@ -473,6 +481,7 @@ #define vm86_frame pcb_ebp #define pgtable_va pcb_ebx + vml = (struct vm86_layout_pae *)vm86paddr; pcb = &vml->vml_pcb; ext = &vml->vml_ext; @@ -482,13 +491,13 @@ pcb->new_ptd = vm86pa | PG_V | PG_RW | PG_U; pcb->vm86_frame = vm86paddr - sizeof(struct vm86frame); pcb->pgtable_va = vm86paddr; - pcb->pcb_flags = PCB_VM86CALL; + pcb->pcb_flags = PCB_VM86CALL; pcb->pcb_ext = ext; - bzero(ext, sizeof(struct pcb_ext)); + bzero(ext, sizeof(struct pcb_ext)); ext->ext_tss.tss_esp0 = vm86paddr; ext->ext_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL); - ext->ext_tss.tss_ioopt = + ext->ext_tss.tss_ioopt = ((u_int)vml->vml_iomap - (u_int)&ext->ext_tss) << 16; ext->ext_iomap = vml->vml_iomap; ext->ext_vm86.vm86_intmap = vml->vml_intmap; @@ -502,7 +511,7 @@ vml->vml_iomap_trailer = 0xff; ssd.ssd_base = (u_int)&ext->ext_tss; - ssd.ssd_limit = TSS_SIZE - 1; + ssd.ssd_limit = TSS_SIZE - 1; ssdtosd(&ssd, &ext->ext_tssd); vm86pcb = pcb; @@ -517,6 +526,80 @@ #endif } +static void +vm86_initialize_nopae(void) +{ + int i; + u_int *addr; + struct vm86_layout_nopae *vml; + struct pcb *pcb; + struct pcb_ext *ext; + struct soft_segment_descriptor ssd = { + 0, /* segment base address (overwritten) */ + 0, /* length (overwritten) */ + SDT_SYS386TSS, /* segment type */ + 0, /* priority level */ + 1, /* descriptor present */ + 0, 0, + 0, /* default 16 size */ + 0 /* granularity */ + }; + + vml = (struct vm86_layout_nopae *)vm86paddr; + pcb = &vml->vml_pcb; + ext = &vml->vml_ext; + + mtx_init(&vm86_lock, "vm86 lock", NULL, MTX_DEF); + + bzero(pcb, sizeof(struct pcb)); + pcb->new_ptd = vm86pa | PG_V | PG_RW | PG_U; + pcb->vm86_frame = vm86paddr - sizeof(struct vm86frame); + pcb->pgtable_va = vm86paddr; + pcb->pcb_flags = PCB_VM86CALL; + pcb->pcb_ext = ext; + + bzero(ext, sizeof(struct pcb_ext)); + ext->ext_tss.tss_esp0 = vm86paddr; + ext->ext_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL); + ext->ext_tss.tss_ioopt = + ((u_int)vml->vml_iomap - (u_int)&ext->ext_tss) << 16; + ext->ext_iomap = vml->vml_iomap; + ext->ext_vm86.vm86_intmap = vml->vml_intmap; + + if (cpu_feature & CPUID_VME) + ext->ext_vm86.vm86_has_vme = (rcr4() & CR4_VME ? 1 : 0); + + addr = (u_int *)ext->ext_vm86.vm86_intmap; + for (i = 0; i < (INTMAP_SIZE + IOMAP_SIZE) / sizeof(u_int); i++) + *addr++ = 0; + vml->vml_iomap_trailer = 0xff; + + ssd.ssd_base = (u_int)&ext->ext_tss; + ssd.ssd_limit = TSS_SIZE - 1; + ssdtosd(&ssd, &ext->ext_tssd); + + vm86pcb = pcb; + +#if 0 + /* + * use whatever is leftover of the vm86 page layout as a + * message buffer so we can capture early output. + */ + msgbufinit((vm_offset_t)vm86paddr + sizeof(struct vm86_layout), + ctob(3) - sizeof(struct vm86_layout)); +#endif +} + +void +vm86_initialize(void) +{ + + if (pae_mode) + vm86_initialize_pae(); + else + vm86_initialize_nopae(); +} + vm_offset_t vm86_getpage(struct vm86context *vmc, int pagenum) { @@ -644,19 +727,31 @@ int vm86_datacall(int intnum, struct vm86frame *vmf, struct vm86context *vmc) { - pt_entry_t *pte; + uint64_t *pte_pae; + uint32_t *pte_nopae; int (*p)(struct vm86frame *); vm_paddr_t page; int i, entry, retval; - pte = (pt_entry_t *)vm86paddr; mtx_lock(&vm86_lock); - for (i = 0; i < vmc->npages; i++) { - page = vtophys(vmc->pmap[i].kva & PG_FRAME); - entry = vmc->pmap[i].pte_num; - vmc->pmap[i].old_pte = pte[entry]; - pte[entry] = page | PG_V | PG_RW | PG_U; - pmap_invalidate_page(kernel_pmap, vmc->pmap[i].kva); + if (pae_mode) { + pte_pae = (uint64_t *)vm86paddr; + for (i = 0; i < vmc->npages; i++) { + page = vtophys(vmc->pmap[i].kva & PG_FRAME_PAE); + entry = vmc->pmap[i].pte_num; + vmc->pmap[i].old_pte = pte_pae[entry]; + pte_pae[entry] = page | PG_V | PG_RW | PG_U; + pmap_invalidate_page(kernel_pmap, vmc->pmap[i].kva); + } + } else { + pte_nopae = (uint32_t *)vm86paddr; + for (i = 0; i < vmc->npages; i++) { + page = vtophys(vmc->pmap[i].kva & PG_FRAME_NOPAE); + entry = vmc->pmap[i].pte_num; + vmc->pmap[i].old_pte = pte_nopae[entry]; + pte_nopae[entry] = page | PG_V | PG_RW | PG_U; + pmap_invalidate_page(kernel_pmap, vmc->pmap[i].kva); + } } vmf->vmf_trapno = intnum; @@ -666,10 +761,18 @@ retval = p(vmf); critical_exit(); - for (i = 0; i < vmc->npages; i++) { - entry = vmc->pmap[i].pte_num; - pte[entry] = vmc->pmap[i].old_pte; - pmap_invalidate_page(kernel_pmap, vmc->pmap[i].kva); + if (pae_mode) { + for (i = 0; i < vmc->npages; i++) { + entry = vmc->pmap[i].pte_num; + pte_pae[entry] = vmc->pmap[i].old_pte; + pmap_invalidate_page(kernel_pmap, vmc->pmap[i].kva); + } + } else { + for (i = 0; i < vmc->npages; i++) { + entry = vmc->pmap[i].pte_num; + pte_nopae[entry] = vmc->pmap[i].old_pte; + pmap_invalidate_page(kernel_pmap, vmc->pmap[i].kva); + } } mtx_unlock(&vm86_lock); Index: head/sys/i386/i386/vm86bios.s =================================================================== --- head/sys/i386/i386/vm86bios.s +++ head/sys/i386/i386/vm86bios.s @@ -101,8 +101,12 @@ movl %cr3,%eax pushl %eax /* save address space */ - movl IdlePTD,%ecx /* va (and pa) of Idle PTD */ - movl %ecx,%ebx + cmpb $0,pae_mode + jne 2f + movl IdlePTD_nopae,%ecx /* va (and pa) of Idle PTD */ + jmp 3f +2: movl IdlePTD_pae,%ecx +3: movl %ecx,%ebx movl 0(%ebx),%eax pushl %eax /* old ptde != 0 when booting */ pushl %ebx /* keep for reuse */ @@ -112,10 +116,10 @@ movl SCR_NEWPTD(%edx),%eax /* mapping for vm86 page table */ movl %eax,0(%ebx) /* ... install as PTD entry 0 */ -#if defined(PAE) || defined(PAE_TABLES) + cmpb $0,pae_mode + je 4f movl IdlePDPT,%ecx -#endif - movl %ecx,%cr3 /* new page tables */ +4: movl %ecx,%cr3 /* new page tables */ movl SCR_VMFRAME(%edx),%esp /* switch to new stack */ pushl %esp Index: head/sys/i386/i386/vm_machdep.c =================================================================== --- head/sys/i386/i386/vm_machdep.c +++ head/sys/i386/i386/vm_machdep.c @@ -230,11 +230,7 @@ * Set registers for trampoline to user mode. Leave space for the * return address on stack. These are the kernel mode register values. */ -#if defined(PAE) || defined(PAE_TABLES) - pcb2->pcb_cr3 = vtophys(vmspace_pmap(p2->p_vmspace)->pm_pdpt); -#else - pcb2->pcb_cr3 = vtophys(vmspace_pmap(p2->p_vmspace)->pm_pdir); -#endif + pcb2->pcb_cr3 = pmap_get_cr3(vmspace_pmap(p2->p_vmspace)); pcb2->pcb_edi = 0; pcb2->pcb_esi = (int)fork_return; /* fork_trampoline argument */ pcb2->pcb_ebp = 0; @@ -572,34 +568,10 @@ void sf_buf_map(struct sf_buf *sf, int flags) { - pt_entry_t opte, *ptep; - /* - * Update the sf_buf's virtual-to-physical mapping, flushing the - * virtual address from the TLB. Since the reference count for - * the sf_buf's old mapping was zero, that mapping is not - * currently in use. Consequently, there is no need to exchange - * the old and new PTEs atomically, even under PAE. - */ - ptep = vtopte(sf->kva); - opte = *ptep; - *ptep = VM_PAGE_TO_PHYS(sf->m) | PG_RW | PG_V | - pmap_cache_bits(kernel_pmap, sf->m->md.pat_mode, 0); - - /* - * Avoid unnecessary TLB invalidations: If the sf_buf's old - * virtual-to-physical mapping was not used, then any processor - * that has invalidated the sf_buf's virtual address from its TLB - * since the last used mapping need not invalidate again. - */ + pmap_sf_buf_map(sf); #ifdef SMP - if ((opte & (PG_V | PG_A)) == (PG_V | PG_A)) - CPU_ZERO(&sf->cpumask); - sf_buf_shootdown(sf, flags); -#else - if ((opte & (PG_V | PG_A)) == (PG_V | PG_A)) - pmap_invalidate_page(kernel_pmap, sf->kva); #endif } Index: head/sys/i386/include/md_var.h =================================================================== --- head/sys/i386/include/md_var.h +++ head/sys/i386/include/md_var.h @@ -69,6 +69,8 @@ void fill_based_sd(struct segment_descriptor *sdp, uint32_t base); void i686_pagezero(void *addr); void sse2_pagezero(void *addr); +int minidumpsys_nopae(struct dumperinfo *); +int minidumpsys_pae(struct dumperinfo *); void init_AMD_Elan_sc520(void); vm_paddr_t kvtop(void *addr); void panicifcpuunsupported(void); Index: head/sys/i386/include/param.h =================================================================== --- head/sys/i386/include/param.h +++ head/sys/i386/include/param.h @@ -88,25 +88,23 @@ #define CACHE_LINE_SIZE (1 << CACHE_LINE_SHIFT) #define PAGE_SHIFT 12 /* LOG2(PAGE_SIZE) */ -#define PAGE_SIZE (1< -#if defined(PAE) || defined(PAE_TABLES) - -typedef uint64_t pdpt_entry_t; -typedef uint64_t pd_entry_t; -typedef uint64_t pt_entry_t; - -#define PTESHIFT (3) -#define PDESHIFT (3) - -#else - -typedef uint32_t pd_entry_t; -typedef uint32_t pt_entry_t; - -#define PTESHIFT (2) -#define PDESHIFT (2) - -#endif - /* * Address of current address space page table maps and directories. */ #ifdef _KERNEL -#include -extern pt_entry_t PTmap[]; -extern pd_entry_t PTD[]; -extern pd_entry_t PTDpde[]; - -#if defined(PAE) || defined(PAE_TABLES) -extern pdpt_entry_t *IdlePDPT; -#endif -extern pd_entry_t *IdlePTD; /* physical address of "Idle" state directory */ - /* - * Translate a virtual address to the kernel virtual address of its page table - * entry (PTE). This can be used recursively. If the address of a PTE as - * previously returned by this macro is itself given as the argument, then the - * address of the page directory entry (PDE) that maps the PTE will be - * returned. - * - * This macro may be used before pmap_bootstrap() is called. - */ -#define vtopte(va) (PTmap + i386_btop(va)) - -/* * Translate a virtual address to its physical address. * * This macro may be used before pmap_bootstrap() is called. */ #define vtophys(va) pmap_kextract((vm_offset_t)(va)) -/* - * KPTmap is a linear mapping of the kernel page table. It differs from the - * recursive mapping in two ways: (1) it only provides access to kernel page - * table pages, and not user page table pages, and (2) it provides access to - * a kernel page table page after the corresponding virtual addresses have - * been promoted to a 2/4MB page mapping. - * - * KPTmap is first initialized by pmap_cold() to support just NPKT page table - * pages. Later, it is reinitialized by pmap_bootstrap() to allow for - * expansion of the kernel page table. - */ -extern pt_entry_t *KPTmap; - -#if (defined(PAE) || defined(PAE_TABLES)) - -#define pde_cmpset(pdep, old, new) atomic_cmpset_64_i586(pdep, old, new) -#define pte_load_store(ptep, pte) atomic_swap_64_i586(ptep, pte) -#define pte_load_clear(ptep) atomic_swap_64_i586(ptep, 0) -#define pte_store(ptep, pte) atomic_store_rel_64_i586(ptep, pte) -#define pte_load(ptep) atomic_load_acq_64_i586(ptep) - -extern pt_entry_t pg_nx; - -#else /* !(PAE || PAE_TABLES) */ - -#define pde_cmpset(pdep, old, new) atomic_cmpset_int(pdep, old, new) -#define pte_load_store(ptep, pte) atomic_swap_int(ptep, pte) -#define pte_load_clear(ptep) atomic_swap_int(ptep, 0) -#define pte_store(ptep, pte) do { \ - *(u_int *)(ptep) = (u_int)(pte); \ -} while (0) -#define pte_load(ptep) atomic_load_acq_int(ptep) - -#endif /* !(PAE || PAE_TABLES) */ - #define pte_clear(ptep) pte_store(ptep, 0) #define pde_store(pdep, pde) pte_store(pdep, pde) -/* - * Extract from the kernel page table the physical address that is mapped by - * the given virtual address "va". - * - * This function may be used before pmap_bootstrap() is called. - */ -static __inline vm_paddr_t -pmap_kextract(vm_offset_t va) -{ - vm_paddr_t pa; - - if ((pa = pte_load(&PTD[va >> PDRSHIFT])) & PG_PS) { - pa = (pa & PG_PS_FRAME) | (va & PDRMASK); - } else { - /* - * Beware of a concurrent promotion that changes the PDE at - * this point! For example, vtopte() must not be used to - * access the PTE because it would use the new PDE. It is, - * however, safe to use the old PDE because the page table - * page is preserved by the promotion. - */ - pa = KPTmap[i386_btop(va)]; - pa = (pa & PG_FRAME) | (va & PAGE_MASK); - } - return (pa); -} - #endif /* _KERNEL */ /* @@ -302,20 +166,30 @@ int pat_mode; }; +#define PMAP_EXTERN_FIELDS \ + cpuset_t pm_active; /* active on cpus */ \ + struct mtx pm_mtx; \ + struct pmap_statistics pm_stats; /* pmap statistics */ + +struct pmap_KBI { + PMAP_EXTERN_FIELDS + int32_t pm_fill[32]; +}; + +#ifdef PMTYPE struct pmap { - struct mtx pm_mtx; + PMAP_EXTERN_FIELDS pd_entry_t *pm_pdir; /* KVA of page directory */ TAILQ_HEAD(,pv_chunk) pm_pvchunk; /* list of mappings in pmap */ - cpuset_t pm_active; /* active on cpus */ - struct pmap_statistics pm_stats; /* pmap statistics */ LIST_ENTRY(pmap) pm_list; /* List of all pmaps */ -#if defined(PAE) || defined(PAE_TABLES) pdpt_entry_t *pm_pdpt; /* KVA of page directory pointer table */ -#endif struct vm_radix pm_root; /* spare page table pages */ vm_page_t pm_ptdpg[NPGPTD]; }; +#else +#define pmap pmap_KBI +#endif typedef struct pmap *pmap_t; @@ -360,8 +234,6 @@ #ifdef _KERNEL -extern caddr_t CADDR3; -extern pt_entry_t *CMAP3; extern vm_paddr_t phys_avail[]; extern vm_paddr_t dump_avail[]; extern char *ptvmmap; /* poor name! */ @@ -372,27 +244,45 @@ #define pmap_page_is_write_mapped(m) (((m)->aflags & PGA_WRITEABLE) != 0) #define pmap_unmapbios(va, sz) pmap_unmapdev((va), (sz)) +struct sf_buf; + /* * Only the following functions or macros may be used before pmap_bootstrap() * is called: pmap_kenter(), pmap_kextract(), pmap_kremove(), vtophys(), and * vtopte(). */ void pmap_activate_boot(pmap_t pmap); +void pmap_basemem_setup(u_int basemem); +void *pmap_bios16_enter(void); +void pmap_bios16_leave(void *handle); void pmap_bootstrap(vm_paddr_t); int pmap_cache_bits(pmap_t, int mode, boolean_t is_pde); int pmap_change_attr(vm_offset_t, vm_size_t, int); +caddr_t pmap_cmap3(vm_paddr_t pa, u_int pte_bits); +void pmap_cp_slow0_map(vm_offset_t kaddr, int plen, vm_page_t *ma); +void pmap_flush_page(vm_page_t m); +u_int pmap_get_kcr3(void); +u_int pmap_get_cr3(pmap_t); +vm_offset_t pmap_get_map_low(void); +vm_offset_t pmap_get_vm_maxuser_address(void); void pmap_init_pat(void); void pmap_kenter(vm_offset_t va, vm_paddr_t pa); void *pmap_kenter_temporary(vm_paddr_t pa, int i); +vm_paddr_t pmap_kextract(vm_offset_t va); void pmap_kremove(vm_offset_t); +void pmap_ksetrw(vm_offset_t va); void *pmap_mapbios(vm_paddr_t, vm_size_t); void *pmap_mapdev(vm_paddr_t, vm_size_t); void *pmap_mapdev_attr(vm_paddr_t, vm_size_t, int); boolean_t pmap_page_is_mapped(vm_page_t m); void pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma); +vm_paddr_t pmap_pg_frame(vm_paddr_t pa); bool pmap_ps_enabled(pmap_t pmap); +void pmap_remap_lower(bool); +void pmap_remap_lowptdi(bool); +void pmap_set_nx(void); +void pmap_sf_buf_map(struct sf_buf *sf); void pmap_unmapdev(vm_offset_t, vm_size_t); -pt_entry_t *pmap_pte(pmap_t, vm_offset_t) __pure2; void pmap_invalidate_page(pmap_t, vm_offset_t); void pmap_invalidate_range(pmap_t, vm_offset_t, vm_offset_t); void pmap_invalidate_all(pmap_t); @@ -404,6 +294,13 @@ void pmap_trm_free(void *addr, size_t size); void invltlb_glob(void); + +struct thread; + +extern int pae_mode; +extern int i386_pmap_VM_NFREEORDER; +extern int i386_pmap_VM_LEVEL_0_ORDER; +extern int i386_pmap_PDRSHIFT; #endif /* _KERNEL */ Index: head/sys/i386/include/pmap_base.h =================================================================== --- head/sys/i386/include/pmap_base.h +++ head/sys/i386/include/pmap_base.h @@ -0,0 +1,124 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2018 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Konstantin Belousov + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _MACHINE_PMAP_BASE_H_ +#define _MACHINE_PMAP_BASE_H_ + +struct pmap_methods { + void (*pm_ksetrw)(vm_offset_t); + void (*pm_remap_lower)(bool); + void (*pm_remap_lowptdi)(bool); + void (*pm_align_superpage)(vm_object_t object, vm_ooffset_t offset, + vm_offset_t *addr, vm_size_t size); + vm_offset_t (*pm_quick_enter_page)(vm_page_t m); + void (*pm_quick_remove_page)(vm_offset_t addr); + void *(*pm_trm_alloc)(size_t size, int flags); + void (*pm_trm_free)(void *addr, size_t size); + vm_offset_t (*pm_get_map_low)(void); + vm_offset_t (*pm_get_vm_maxuser_address)(void); + vm_paddr_t (*pm_kextract)(vm_offset_t va); + vm_paddr_t (*pm_pg_frame)(vm_paddr_t pa); + void (*pm_sf_buf_map)(struct sf_buf *sf); + void (*pm_cp_slow0_map)(vm_offset_t kaddr, int plen, vm_page_t *ma); + u_int (*pm_get_kcr3)(void); + u_int (*pm_get_cr3)(pmap_t); + caddr_t (*pm_cmap3)(vm_paddr_t pa, u_int pte_flags); + void (*pm_basemem_setup)(u_int basemem); + void (*pm_set_nx)(void); + void *(*pm_bios16_enter)(void); + void (*pm_bios16_leave)(void *handle); + void (*pm_bootstrap)(vm_paddr_t firstaddr); + boolean_t (*pm_is_valid_memattr)(pmap_t, vm_memattr_t); + int (*pm_cache_bits)(pmap_t, int, boolean_t); + bool (*pm_ps_enabled)(pmap_t); + void (*pm_pinit0)(pmap_t); + int (*pm_pinit)(pmap_t); + void (*pm_activate)(struct thread *); + void (*pm_activate_boot)(pmap_t); + void (*pm_advise)(pmap_t, vm_offset_t, vm_offset_t, int); + void (*pm_clear_modify)(vm_page_t); + int (*pm_change_attr)(vm_offset_t, vm_size_t, int); + int (*pm_mincore)(pmap_t, vm_offset_t, vm_paddr_t *); + void (*pm_copy)(pmap_t, pmap_t, vm_offset_t, vm_size_t, vm_offset_t); + void (*pm_copy_page)(vm_page_t, vm_page_t); + void (*pm_copy_pages)(vm_page_t [], vm_offset_t, vm_page_t [], + vm_offset_t, int); + void (*pm_zero_page)(vm_page_t); + void (*pm_zero_page_area)(vm_page_t, int, int); + int (*pm_enter)(pmap_t, vm_offset_t, vm_page_t, vm_prot_t, u_int, + int8_t); + void (*pm_enter_object)(pmap_t, vm_offset_t, vm_offset_t, + vm_page_t, vm_prot_t); + void (*pm_enter_quick)(pmap_t, vm_offset_t, vm_page_t, vm_prot_t); + void *(*pm_kenter_temporary)(vm_paddr_t pa, int); + void (*pm_object_init_pt)(pmap_t, vm_offset_t, vm_object_t, + vm_pindex_t, vm_size_t); + void (*pm_unwire)(pmap_t, vm_offset_t, vm_offset_t); + boolean_t (*pm_page_exists_quick)(pmap_t, vm_page_t); + int (*pm_page_wired_mappings)(vm_page_t); + boolean_t (*pm_page_is_mapped)(vm_page_t); + void (*pm_remove_pages)(pmap_t); + boolean_t (*pm_is_modified)(vm_page_t); + boolean_t (*pm_is_prefaultable)(pmap_t, vm_offset_t); + boolean_t (*pm_is_referenced)(vm_page_t); + void (*pm_remove_write)(vm_page_t); + int (*pm_ts_referenced)(vm_page_t); + void *(*pm_mapdev_attr)(vm_paddr_t, vm_size_t, int); + void (*pm_unmapdev)(vm_offset_t, vm_size_t); + void (*pm_page_set_memattr)(vm_page_t, vm_memattr_t); + vm_paddr_t (*pm_extract)(pmap_t, vm_offset_t); + vm_page_t (*pm_extract_and_hold)(pmap_t, vm_offset_t, vm_prot_t); + vm_offset_t (*pm_map)(vm_offset_t *, vm_paddr_t, vm_paddr_t, int); + void (*pm_qenter)(vm_offset_t sva, vm_page_t *, int); + void (*pm_qremove)(vm_offset_t, int); + void (*pm_release)(pmap_t); + void (*pm_protect)(pmap_t, vm_offset_t, vm_offset_t, vm_prot_t); + void (*pm_remove)(pmap_t, vm_offset_t, vm_offset_t); + void (*pm_remove_all)(vm_page_t); + void (*pm_init)(void); + void (*pm_init_pat)(void); + void (*pm_growkernel)(vm_offset_t); + void (*pm_invalidate_page)(pmap_t, vm_offset_t); + void (*pm_invalidate_range)(pmap_t, vm_offset_t, vm_offset_t); + void (*pm_invalidate_all)(pmap_t); + void (*pm_invalidate_cache)(void); + void (*pm_flush_page)(vm_page_t); + void (*pm_kenter)(vm_offset_t, vm_paddr_t); + void (*pm_kremove)(vm_offset_t); +}; + +void pmap_cold(void); +void pmap_pae_cold(void); +void pmap_nopae_cold(void); + +#endif Index: head/sys/i386/include/pmap_nopae.h =================================================================== --- head/sys/i386/include/pmap_nopae.h +++ head/sys/i386/include/pmap_nopae.h @@ -0,0 +1,100 @@ +/*- + * SPDX-License-Identifier: BSD-3-Clause + * + * Copyright (c) 1991 Regents of the University of California. + * All rights reserved. + * + * Copyright (c) 2018 The FreeBSD Foundation + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * the Systems Programming Group of the University of Utah Computer + * Science Department and William Jolitz of UUNET Technologies Inc. + * + * Portions of this software were developed by + * Konstantin Belousov under sponsorship from + * the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * Derived from hp300 version by Mike Hibler, this version by William + * Jolitz uses a recursive map [a pde points to the page directory] to + * map the page tables using the pagetables themselves. This is done to + * reduce the impact on kernel virtual memory for lots of sparse address + * space, and to reduce the cost of memory to each process. + * + * from: hp300: @(#)pmap.h 7.2 (Berkeley) 12/16/90 + * from: @(#)pmap.h 7.4 (Berkeley) 5/12/91 + * $FreeBSD$ + */ + +#ifndef _MACHINE_PMAP_NOPAE_H +#define _MACHINE_PMAP_NOPAE_H + +#define NTRPPTD 1 +#define LOWPTDI 1 +#define KERNPTDI 2 + +#define NPGPTD 1 +#define NPGPTD_SHIFT 10 +#undef PDRSHIFT +#define PDRSHIFT PDRSHIFT_NOPAE +#undef NBPDR +#define NBPDR (1 << PDRSHIFT_NOPAE) /* bytes/page dir */ + +#define PG_FRAME PG_FRAME_NOPAE +#define PG_PS_FRAME PG_PS_FRAME_NOPAE + +#define KVA_PAGES (256*4) + +#ifndef NKPT +#define NKPT 30 +#endif + +typedef uint32_t pd_entry_t; +typedef uint32_t pt_entry_t; +typedef uint32_t pdpt_entry_t; /* Only to keep struct pmap layout. */ + +#define PTESHIFT (2) +#define PDESHIFT (2) + +#define pde_cmpset(pdep, old, new) atomic_cmpset_int(pdep, old, new) +#define pte_load_store(ptep, pte) atomic_swap_int(ptep, pte) +#define pte_load_clear(ptep) atomic_swap_int(ptep, 0) +#define pte_store(ptep, pte) do { \ + *(u_int *)(ptep) = (u_int)(pte); \ +} while (0) +#define pte_load(ptep) atomic_load_int(ptep) + +extern pt_entry_t PTmap[]; +extern pd_entry_t PTD[]; +extern pd_entry_t PTDpde[]; +extern pd_entry_t *IdlePTD_nopae; +extern pt_entry_t *KPTmap_nopae; + +struct pmap; +pt_entry_t *__CONCAT(PMTYPE, pmap_pte)(struct pmap *, vm_offset_t) __pure2; + +#endif Index: head/sys/i386/include/pmap_pae.h =================================================================== --- head/sys/i386/include/pmap_pae.h +++ head/sys/i386/include/pmap_pae.h @@ -0,0 +1,123 @@ +/*- + * SPDX-License-Identifier: BSD-3-Clause + * + * Copyright (c) 1991 Regents of the University of California. + * All rights reserved. + * + * Copyright (c) 2018 The FreeBSD Foundation + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * the Systems Programming Group of the University of Utah Computer + * Science Department and William Jolitz of UUNET Technologies Inc. + * + * Portions of this software were developed by + * Konstantin Belousov under sponsorship from + * the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * Derived from hp300 version by Mike Hibler, this version by William + * Jolitz uses a recursive map [a pde points to the page directory] to + * map the page tables using the pagetables themselves. This is done to + * reduce the impact on kernel virtual memory for lots of sparse address + * space, and to reduce the cost of memory to each process. + * + * from: hp300: @(#)pmap.h 7.2 (Berkeley) 12/16/90 + * from: @(#)pmap.h 7.4 (Berkeley) 5/12/91 + * $FreeBSD$ + */ + +#ifndef _MACHINE_PMAP_PAE_H +#define _MACHINE_PMAP_PAE_H + +#define NTRPPTD 2 /* Number of PTDs for trampoline + mapping */ +#define LOWPTDI 2 /* low memory map pde */ +#define KERNPTDI 4 /* start of kernel text pde */ + +#define NPGPTD 4 /* Num of pages for page directory */ +#define NPGPTD_SHIFT 9 +#undef PDRSHIFT +#define PDRSHIFT PDRSHIFT_PAE +#undef NBPDR +#define NBPDR (1 << PDRSHIFT_PAE) /* bytes/page dir */ + +#define PG_FRAME PG_FRAME_PAE +#define PG_PS_FRAME PG_PS_FRAME_PAE + +/* + * Size of Kernel address space. This is the number of page table pages + * (4MB each) to use for the kernel. 256 pages == 1 Gigabyte. + * This **MUST** be a multiple of 4 (eg: 252, 256, 260, etc). + * For PAE, the page table page unit size is 2MB. This means that 512 pages + * is 1 Gigabyte. Double everything. It must be a multiple of 8 for PAE. + */ +#define KVA_PAGES (512*4) + +/* + * The initial number of kernel page table pages that are constructed + * by pmap_cold() must be sufficient to map vm_page_array. That number can + * be calculated as follows: + * max_phys / PAGE_SIZE * sizeof(struct vm_page) / NBPDR + * PAE: max_phys 16G, sizeof(vm_page) 76, NBPDR 2M, 152 page table pages. + * PAE_TABLES: max_phys 4G, sizeof(vm_page) 68, NBPDR 2M, 36 page table pages. + * Non-PAE: max_phys 4G, sizeof(vm_page) 68, NBPDR 4M, 18 page table pages. + */ +#ifndef NKPT +#define NKPT 240 +#endif + +typedef uint64_t pdpt_entry_t; +typedef uint64_t pd_entry_t; +typedef uint64_t pt_entry_t; + +#define PTESHIFT (3) +#define PDESHIFT (3) + +#define pde_cmpset(pdep, old, new) atomic_cmpset_64_i586(pdep, old, new) +#define pte_load_store(ptep, pte) atomic_swap_64_i586(ptep, pte) +#define pte_load_clear(ptep) atomic_swap_64_i586(ptep, 0) +#define pte_store(ptep, pte) atomic_store_rel_64_i586(ptep, pte) +#define pte_load(ptep) atomic_load_acq_64_i586(ptep) + +extern pdpt_entry_t *IdlePDPT; +extern pt_entry_t pg_nx; +extern pd_entry_t *IdlePTD_pae; /* physical address of "Idle" state directory */ + +/* + * KPTmap is a linear mapping of the kernel page table. It differs from the + * recursive mapping in two ways: (1) it only provides access to kernel page + * table pages, and not user page table pages, and (2) it provides access to + * a kernel page table page after the corresponding virtual addresses have + * been promoted to a 2/4MB page mapping. + * + * KPTmap is first initialized by pmap_cold() to support just NPKT page table + * pages. Later, it is reinitialized by pmap_bootstrap() to allow for + * expansion of the kernel page table. + */ +extern pt_entry_t *KPTmap_pae; + +#endif Index: head/sys/i386/include/vm86.h =================================================================== --- head/sys/i386/include/vm86.h +++ head/sys/i386/include/vm86.h @@ -111,7 +111,7 @@ int flags; int pte_num; vm_offset_t kva; - u_int old_pte; + uint64_t old_pte; } pmap[VM86_PMAPSIZE]; }; Index: head/sys/i386/include/vmparam.h =================================================================== --- head/sys/i386/include/vmparam.h +++ head/sys/i386/include/vmparam.h @@ -95,25 +95,32 @@ #define VM_FREEPOOL_DIRECT 0 /* - * Create two free page lists: VM_FREELIST_DEFAULT is for physical - * pages that are above the largest physical address that is - * accessible by ISA DMA and VM_FREELIST_LOWMEM is for physical pages - * that are below that address. + * Create up to three free page lists: VM_FREELIST_DMA32 is for physical pages + * that have physical addresses below 4G but are not accessible by ISA DMA, + * and VM_FREELIST_ISADMA is for physical pages that are accessible by ISA + * DMA. */ -#define VM_NFREELIST 2 +#define VM_NFREELIST 3 #define VM_FREELIST_DEFAULT 0 -#define VM_FREELIST_LOWMEM 1 +#define VM_FREELIST_DMA32 1 +#define VM_FREELIST_LOWMEM 2 #define VM_LOWMEM_BOUNDARY (16 << 20) /* 16MB ISA DMA limit */ /* + * Always create DMA32 freelist if there is any memory above 4G. + * Bounce dma is extremely fragile and simultaneously intensively + * used. + */ +#define VM_DMA32_NPAGES_THRESHOLD 1 + +/* * The largest allocation size is 2MB under PAE and 4MB otherwise. */ -#ifdef PAE -#define VM_NFREEORDER 10 -#else -#define VM_NFREEORDER 11 -#endif +#define VM_NFREEORDER_PAE 10 +#define VM_NFREEORDER_NOPAE 11 +#define VM_NFREEORDER_MAX VM_NFREEORDER_NOPAE +#define VM_NFREEORDER i386_pmap_VM_NFREEORDER /* * Enable superpage reservations: 1 level. @@ -127,18 +134,19 @@ * used, and 1024 pages otherwise. */ #ifndef VM_LEVEL_0_ORDER -#if defined(PAE) || defined(PAE_TABLES) -#define VM_LEVEL_0_ORDER 9 +#define VM_LEVEL_0_ORDER_PAE 9 +#define VM_LEVEL_0_ORDER_NOPAE 10 +#define VM_LEVEL_0_ORDER_MAX VM_LEVEL_0_ORDER_NOPAE +#define VM_LEVEL_0_ORDER i386_pmap_VM_LEVEL_0_ORDER #else -#define VM_LEVEL_0_ORDER 10 +#define VM_LEVEL_0_ORDER_MAX VM_LEVEL_0_ORDER #endif -#endif /* * Kernel physical load address. */ #ifndef KERNLOAD -#define KERNLOAD (KERNPTDI << PDRSHIFT) +#define KERNLOAD (8 * 1024 * 1024) #endif /* !defined(KERNLOAD) */ /* @@ -148,7 +156,7 @@ * messy at times, but hey, we'll do anything to save a page :-) */ -#define VM_MAX_KERNEL_ADDRESS VADDR(PTDPTDI, 0) +#define VM_MAX_KERNEL_ADDRESS (0xffffffffU - 16 * 1024 * 1024 + 1) #define VM_MIN_KERNEL_ADDRESS 0 @@ -157,7 +165,7 @@ #define UPT_MAX_ADDRESS VADDR(PTDPTDI, PTDPTDI) #define UPT_MIN_ADDRESS VADDR(PTDPTDI, 0) -#define VM_MAXUSER_ADDRESS VADDR(TRPTDI, 0) +#define VM_MAXUSER_ADDRESS (0xffffffff - 4 * 1024 * 1024 + 1) #define SHAREDPAGE (VM_MAXUSER_ADDRESS - PAGE_SIZE) #define USRSTACK SHAREDPAGE @@ -168,12 +176,13 @@ #define PMAP_TRM_MIN_ADDRESS VM_MAXUSER_ADDRESS #define PMAP_TRM_MAX_ADDRESS 0xffffffff -#define PMAP_MAP_LOW VADDR(LOWPTDI, 0) +#define PMAP_MAP_LOW (4 * 1024 * 1024) /* * KVA layout. The unit of the system allocation is single PDE, which * represents NBPDR bytes, aligned to NBPDR. NBPDR is 4M for non-PAE - * page tables, and 2M for PAE. Addresses below are shown for non-PAE. + * page tables, and 2M for PAE, so PAE mode requires twice as many PTDs + * to create the same memory map as non-PAE. * * 0x00000000 - 0x003fffff Transient identity map of low memory (0-4M), * normally disabled to catch NULL derefs. @@ -193,7 +202,7 @@ * How many physical pages per kmem arena virtual page. */ #ifndef VM_KMEM_SIZE_SCALE -#define VM_KMEM_SIZE_SCALE (3) +#define VM_KMEM_SIZE_SCALE (1) #endif /* Index: head/sys/i386/pci/pci_cfgreg.c =================================================================== --- head/sys/i386/pci/pci_cfgreg.c +++ head/sys/i386/pci/pci_cfgreg.c @@ -490,15 +490,13 @@ if (minbus != 0) return (0); -#ifndef PAE - if (base >= 0x100000000) { + if (!pae_mode && base >= 0x100000000) { if (bootverbose) printf( "PCI: Memory Mapped PCI configuration area base 0x%jx too high\n", (uintmax_t)base); return (0); } -#endif if (bootverbose) printf("PCIe: Memory Mapped configuration base @ 0x%jx\n", Index: head/sys/x86/acpica/acpi_wakeup.c =================================================================== --- head/sys/x86/acpica/acpi_wakeup.c +++ head/sys/x86/acpica/acpi_wakeup.c @@ -194,8 +194,7 @@ * cpususpend_handler() and we will release them soon. Then each * will invalidate its TLB. */ - PTD[KPTDI] = 0; - invltlb_glob(); + pmap_remap_lowptdi(false); #endif /* restore the warmstart vector */ @@ -277,7 +276,7 @@ * be careful to use the kernel map (PTD[0] is for curthread * which may be a user thread in deprecated APIs). */ - PTD[KPTDI] = PTD[LOWPTDI]; + pmap_remap_lowptdi(true); #endif /* Call ACPICA to enter the desired sleep state */ @@ -449,12 +448,7 @@ /* Save pointers to some global data. */ WAKECODE_FIXUP(wakeup_ret, void *, resumectx); #ifndef __amd64__ -#if defined(PAE) || defined(PAE_TABLES) - WAKECODE_FIXUP(wakeup_cr3, register_t, vtophys(kernel_pmap->pm_pdpt)); -#else - WAKECODE_FIXUP(wakeup_cr3, register_t, vtophys(kernel_pmap->pm_pdir)); -#endif - + WAKECODE_FIXUP(wakeup_cr3, register_t, pmap_get_kcr3()); #else /* __amd64__ */ /* Create the initial 1GB replicated page tables */ for (i = 0; i < 512; i++) { Index: head/sys/x86/include/_types.h =================================================================== --- head/sys/x86/include/_types.h +++ head/sys/x86/include/_types.h @@ -135,11 +135,7 @@ #else typedef __uint32_t __u_register_t; typedef __uint32_t __vm_offset_t; -#ifdef PAE typedef __uint64_t __vm_paddr_t; -#else -typedef __uint32_t __vm_paddr_t; -#endif typedef __uint32_t __vm_size_t; #endif typedef int ___wchar_t; Index: head/sys/x86/include/x86_var.h =================================================================== --- head/sys/x86/include/x86_var.h +++ head/sys/x86/include/x86_var.h @@ -102,23 +102,10 @@ */ typedef void alias_for_inthand_t(void); -/* - * Returns the maximum physical address that can be used with the - * current system. - */ -static __inline vm_paddr_t -cpu_getmaxphyaddr(void) -{ -#if defined(__i386__) && !defined(PAE) - return (0xffffffff); -#else - return ((1ULL << cpu_maxphyaddr) - 1); -#endif -} - bool acpi_get_fadt_bootflags(uint16_t *flagsp); void *alloc_fpusave(int flags); void busdma_swi(void); +vm_paddr_t cpu_getmaxphyaddr(void); bool cpu_mwait_usable(void); void cpu_probe_amdc1e(void); void cpu_setregs(void); Index: head/sys/x86/x86/identcpu.c =================================================================== --- head/sys/x86/x86/identcpu.c +++ head/sys/x86/x86/identcpu.c @@ -53,6 +53,9 @@ #include #include +#include +#include + #include #include #include @@ -2532,4 +2535,19 @@ if (*hv_vendor) printf("Hypervisor: Origin = \"%s\"\n", hv_vendor); +} + +/* + * Returns the maximum physical address that can be used with the + * current system. + */ +vm_paddr_t +cpu_getmaxphyaddr(void) +{ + +#if defined(__i386__) + if (!pae_mode) + return (0xffffffff); +#endif + return ((1ULL << cpu_maxphyaddr) - 1); }