Changeset View
Standalone View
sys/amd64/amd64/pmap.c
- This file is larger than 256 KB, so syntax highlighting is disabled by default.
| Show First 20 Lines • Show All 523 Lines • ▼ Show 20 Lines | #define PMAP_ADDRESS_IN_LARGEMAP(va) ((va) >= LARGEMAP_MIN_ADDRESS && \ | ||||
| (va) < LARGEMAP_MIN_ADDRESS + NBPML4 * (u_long)lm_ents) | (va) < LARGEMAP_MIN_ADDRESS + NBPML4 * (u_long)lm_ents) | ||||
| int pmap_pcid_enabled = 1; | int pmap_pcid_enabled = 1; | ||||
| SYSCTL_INT(_vm_pmap, OID_AUTO, pcid_enabled, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, | SYSCTL_INT(_vm_pmap, OID_AUTO, pcid_enabled, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, | ||||
| &pmap_pcid_enabled, 0, "Is TLB Context ID enabled ?"); | &pmap_pcid_enabled, 0, "Is TLB Context ID enabled ?"); | ||||
| int invpcid_works = 0; | int invpcid_works = 0; | ||||
| SYSCTL_INT(_vm_pmap, OID_AUTO, invpcid_works, CTLFLAG_RD, &invpcid_works, 0, | SYSCTL_INT(_vm_pmap, OID_AUTO, invpcid_works, CTLFLAG_RD, &invpcid_works, 0, | ||||
| "Is the invpcid instruction available ?"); | "Is the invpcid instruction available ?"); | ||||
| int pmap_pcid_invlpg_workaround = 0; | |||||
| SYSCTL_INT(_vm_pmap, OID_AUTO, pcid_invlpg_workaround, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, | |||||
| &pmap_pcid_invlpg_workaround, 0, | |||||
| "Enable small core PCID/INVLPG workaround"); | |||||
emaste: Should have a description, something like `Use INVPCID_CTXGLOB to work around small core INVLPG… | |||||
| int __read_frequently pti = 0; | int __read_frequently pti = 0; | ||||
| SYSCTL_INT(_vm_pmap, OID_AUTO, pti, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, | SYSCTL_INT(_vm_pmap, OID_AUTO, pti, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, | ||||
| &pti, 0, | &pti, 0, | ||||
| "Page Table Isolation enabled"); | "Page Table Isolation enabled"); | ||||
| static vm_object_t pti_obj; | static vm_object_t pti_obj; | ||||
| static pml4_entry_t *pti_pml4; | static pml4_entry_t *pti_pml4; | ||||
| static vm_pindex_t pti_pg_idx; | static vm_pindex_t pti_pg_idx; | ||||
| ▲ Show 20 Lines • Show All 2,252 Lines • ▼ Show 20 Lines | pmap_update_pde_invalidate(pmap_t pmap, vm_offset_t va, pd_entry_t newpde) | ||||
| KASSERT(pmap->pm_type == PT_X86, | KASSERT(pmap->pm_type == PT_X86, | ||||
| ("pmap_update_pde_invalidate: invalid type %d", pmap->pm_type)); | ("pmap_update_pde_invalidate: invalid type %d", pmap->pm_type)); | ||||
| PG_G = pmap_global_bit(pmap); | PG_G = pmap_global_bit(pmap); | ||||
| if ((newpde & PG_PS) == 0) | if ((newpde & PG_PS) == 0) | ||||
| /* Demotion: flush a specific 2MB page mapping. */ | /* Demotion: flush a specific 2MB page mapping. */ | ||||
| invlpg(va); | pmap_invlpg(pmap, va); | ||||
| else if ((newpde & PG_G) == 0) | else if ((newpde & PG_G) == 0) | ||||
| /* | /* | ||||
| * Promotion: flush every 4KB page mapping from the TLB | * Promotion: flush every 4KB page mapping from the TLB | ||||
| * because there are too many to flush individually. | * because there are too many to flush individually. | ||||
| */ | */ | ||||
| invltlb(); | invltlb(); | ||||
| else { | else { | ||||
| /* | /* | ||||
| ▲ Show 20 Lines • Show All 322 Lines • ▼ Show 20 Lines | DEFINE_IFUNC(static, void, pmap_invalidate_page_cb, (pmap_t, vm_offset_t)) | ||||
| return (pmap_invalidate_page_nopcid_cb); | return (pmap_invalidate_page_nopcid_cb); | ||||
| } | } | ||||
| static void | static void | ||||
| pmap_invalidate_page_curcpu_cb(pmap_t pmap, vm_offset_t va, | pmap_invalidate_page_curcpu_cb(pmap_t pmap, vm_offset_t va, | ||||
| vm_offset_t addr2 __unused) | vm_offset_t addr2 __unused) | ||||
| { | { | ||||
| if (pmap == kernel_pmap) { | if (pmap == kernel_pmap) { | ||||
| invlpg(va); | pmap_invlpg(kernel_pmap, va); | ||||
| } else if (pmap == PCPU_GET(curpmap)) { | } else if (pmap == PCPU_GET(curpmap)) { | ||||
| invlpg(va); | invlpg(va); | ||||
| pmap_invalidate_page_cb(pmap, va); | pmap_invalidate_page_cb(pmap, va); | ||||
| } | } | ||||
| } | } | ||||
| void | void | ||||
| pmap_invalidate_page(pmap_t pmap, vm_offset_t va) | pmap_invalidate_page(pmap_t pmap, vm_offset_t va) | ||||
| ▲ Show 20 Lines • Show All 74 Lines • ▼ Show 20 Lines | |||||
| } | } | ||||
| static void | static void | ||||
| pmap_invalidate_range_curcpu_cb(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) | pmap_invalidate_range_curcpu_cb(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) | ||||
| { | { | ||||
| vm_offset_t addr; | vm_offset_t addr; | ||||
| if (pmap == kernel_pmap) { | if (pmap == kernel_pmap) { | ||||
| if (PCPU_GET(pcid_invlpg_bug)) { | |||||
| struct invpcid_descr d = { 0 }; | |||||
| invpcid(&d, INVPCID_CTXGLOB); | |||||
| } else { | |||||
| for (addr = sva; addr < eva; addr += PAGE_SIZE) | for (addr = sva; addr < eva; addr += PAGE_SIZE) | ||||
| invlpg(addr); | pmap_invlpg(kernel_pmap, addr); | ||||
| } | |||||
| } else if (pmap == PCPU_GET(curpmap)) { | } else if (pmap == PCPU_GET(curpmap)) { | ||||
| for (addr = sva; addr < eva; addr += PAGE_SIZE) | for (addr = sva; addr < eva; addr += PAGE_SIZE) | ||||
| invlpg(addr); | invlpg(addr); | ||||
| pmap_invalidate_range_cb(pmap, sva, eva); | pmap_invalidate_range_cb(pmap, sva, eva); | ||||
| } | } | ||||
| } | } | ||||
| void | void | ||||
| ▲ Show 20 Lines • Show All 521 Lines • ▼ Show 20 Lines | pte_bits = pmap_cache_bits(kernel_pmap, mattr, 0) | X86_PG_RW | | ||||
| X86_PG_V; | X86_PG_V; | ||||
| error = vmem_alloc(kernel_arena, PAGE_SIZE, M_BESTFIT | M_WAITOK, | error = vmem_alloc(kernel_arena, PAGE_SIZE, M_BESTFIT | M_WAITOK, | ||||
| &vaddr); | &vaddr); | ||||
| KASSERT(error == 0, ("vmem_alloc failed: %d", error)); | KASSERT(error == 0, ("vmem_alloc failed: %d", error)); | ||||
| pte = vtopte(vaddr); | pte = vtopte(vaddr); | ||||
| for (; spa < epa; spa += PAGE_SIZE) { | for (; spa < epa; spa += PAGE_SIZE) { | ||||
| sched_pin(); | sched_pin(); | ||||
| pte_store(pte, spa | pte_bits); | pte_store(pte, spa | pte_bits); | ||||
| invlpg(vaddr); | invlpg(vaddr); | ||||
Done Inline ActionsBecause the virtual address being mapped here was dynamically allocated, I don't think that we should make the assumption that a stale TLB entry for vaddr didn't have PG_G set. So, I would use pmap_invlpg() here. alc: Because the virtual address being mapped here was dynamically allocated, I don't think that we… | |||||
| /* XXXKIB atomic inside flush_cache_range are excessive */ | /* XXXKIB atomic inside flush_cache_range are excessive */ | ||||
| pmap_flush_cache_range(vaddr, vaddr + PAGE_SIZE); | pmap_flush_cache_range(vaddr, vaddr + PAGE_SIZE); | ||||
| sched_unpin(); | sched_unpin(); | ||||
| } | } | ||||
| vmem_free(kernel_arena, vaddr, PAGE_SIZE); | vmem_free(kernel_arena, vaddr, PAGE_SIZE); | ||||
| } | } | ||||
| /* | /* | ||||
| ▲ Show 20 Lines • Show All 3,891 Lines • ▼ Show 20 Lines | |||||
| */ | */ | ||||
| void * | void * | ||||
| pmap_kenter_temporary(vm_paddr_t pa, int i) | pmap_kenter_temporary(vm_paddr_t pa, int i) | ||||
| { | { | ||||
| vm_offset_t va; | vm_offset_t va; | ||||
| va = (vm_offset_t)crashdumpmap + (i * PAGE_SIZE); | va = (vm_offset_t)crashdumpmap + (i * PAGE_SIZE); | ||||
| pmap_kenter(va, pa); | pmap_kenter(va, pa); | ||||
| invlpg(va); | pmap_invlpg(kernel_pmap, va); | ||||
| return ((void *)crashdumpmap); | return ((void *)crashdumpmap); | ||||
| } | } | ||||
| /* | /* | ||||
| * This code maps large physical mmap regions into the | * This code maps large physical mmap regions into the | ||||
| * processor address space. Note that some shortcuts | * processor address space. Note that some shortcuts | ||||
| * are taken, but the code works. | * are taken, but the code works. | ||||
| */ | */ | ||||
| ▲ Show 20 Lines • Show All 2,686 Lines • ▼ Show 20 Lines | if (paddr >= dmaplimit) { | ||||
| */ | */ | ||||
| pmap_qenter(vaddr[i], &page[i], 1); | pmap_qenter(vaddr[i], &page[i], 1); | ||||
| } else { | } else { | ||||
| pte = vtopte(vaddr[i]); | pte = vtopte(vaddr[i]); | ||||
| cache_bits = pmap_cache_bits(kernel_pmap, | cache_bits = pmap_cache_bits(kernel_pmap, | ||||
| page[i]->md.pat_mode, 0); | page[i]->md.pat_mode, 0); | ||||
| pte_store(pte, paddr | X86_PG_RW | X86_PG_V | | pte_store(pte, paddr | X86_PG_RW | X86_PG_V | | ||||
| cache_bits); | cache_bits); | ||||
| invlpg(vaddr[i]); | pmap_invlpg(kernel_pmap, vaddr[i]); | ||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| return (needs_mapping); | return (needs_mapping); | ||||
| } | } | ||||
| void | void | ||||
| Show All 20 Lines | |||||
| { | { | ||||
| vm_paddr_t paddr; | vm_paddr_t paddr; | ||||
| paddr = VM_PAGE_TO_PHYS(m); | paddr = VM_PAGE_TO_PHYS(m); | ||||
| if (paddr < dmaplimit) | if (paddr < dmaplimit) | ||||
| return (PHYS_TO_DMAP(paddr)); | return (PHYS_TO_DMAP(paddr)); | ||||
| mtx_lock_spin(&qframe_mtx); | mtx_lock_spin(&qframe_mtx); | ||||
| KASSERT(*vtopte(qframe) == 0, ("qframe busy")); | KASSERT(*vtopte(qframe) == 0, ("qframe busy")); | ||||
| pte_store(vtopte(qframe), paddr | X86_PG_RW | X86_PG_V | X86_PG_A | | pte_store(vtopte(qframe), paddr | X86_PG_RW | X86_PG_V | X86_PG_A | | ||||
| X86_PG_M | pmap_cache_bits(kernel_pmap, m->md.pat_mode, 0)); | X86_PG_M | pmap_cache_bits(kernel_pmap, m->md.pat_mode, 0)); | ||||
Done Inline ActionsOn an unrelated note, I don't understand why the invlpg is performed in pmap_quick_remove_page() instead of here. If the processor prefetches neighboring TLB entries to the one being accessed (as some have been reported to do), then the spin lock does not prevent the situation described in the "AMD64 Architecture Programmer's Manual Volume 2: System Programming" rev. 3.23, "7.3.1 Special Coherency Considerations". alc: On an unrelated note, I don't understand why the invlpg is performed in pmap_quick_remove_page… | |||||
Done Inline ActionsThe change is added to the branch as a separate commit. kib: The change is added to the branch as a separate commit. | |||||
| return (qframe); | return (qframe); | ||||
| } | } | ||||
| void | void | ||||
| pmap_quick_remove_page(vm_offset_t addr) | pmap_quick_remove_page(vm_offset_t addr) | ||||
| { | { | ||||
| if (addr != qframe) | if (addr != qframe) | ||||
| return; | return; | ||||
| pte_store(vtopte(qframe), 0); | pte_store(vtopte(qframe), 0); | ||||
| /* | |||||
| * pmap_quick_enter_page() doesn't set PG_G, so we can use | |||||
| * INVLPG there. | |||||
Done Inline ActionsChange "there" to "here". Also, the correctness of a simple invlpg depends on this virtual address being exclusively mapped by pmap_quick_enter_page(). If someplace else had created a mapping at qframe, then that other place might have included PG_G in the mapping, and we would have to uise pmap_invlpg() here: "Since qframe is exclusively mapped by pmap_quick_enter_page() and that function doesn't set PG_G, we can use INVLPG here. alc: Change "there" to "here". Also, the correctness of a simple invlpg depends on this virtual… | |||||
| */ | |||||
| invlpg(qframe); | invlpg(qframe); | ||||
Done Inline Actionspmap_quick_enter_page() doesn't set PG_G, so strictly speaking this case doesn't need to change. alc: pmap_quick_enter_page() doesn't set PG_G, so strictly speaking this case doesn't need to change. | |||||
| mtx_unlock_spin(&qframe_mtx); | mtx_unlock_spin(&qframe_mtx); | ||||
| } | } | ||||
| /* | /* | ||||
| * Pdp pages from the large map are managed differently from either | * Pdp pages from the large map are managed differently from either | ||||
| * kernel or user page table pages. They are permanently allocated at | * kernel or user page table pages. They are permanently allocated at | ||||
| * initialization time, and their reference count is permanently set to | * initialization time, and their reference count is permanently set to | ||||
| * zero. The pml4 entries pointing to those pages are copied into | * zero. The pml4 entries pointing to those pages are copied into | ||||
| ▲ Show 20 Lines • Show All 1,687 Lines • Show Last 20 Lines | |||||
Should have a description, something like Use INVPCID_CTXGLOB to work around small core INVLPG microarchitectural bug or Enable small core INVLPG workaround perhaps?
I wonder about the name too, maybe pcid_invlpg_workaround instead, or something like force_invpcid? pcid_invlpg_bug might give the impression that this is just reporting whether the bug is present, I think.
@markj, thoughts?