Index: sys/amd64/amd64/pmap.c =================================================================== --- sys/amd64/amd64/pmap.c +++ sys/amd64/amd64/pmap.c @@ -115,6 +115,7 @@ #include #include #include +#include #include #include #include @@ -402,11 +403,6 @@ CTLFLAG_MPSAFE, NULL, 0, pmap_pcid_save_cnt_proc, "QU", "Count of saved TLB context on switch"); -/* pmap_copy_pages() over non-DMAP */ -static struct mtx cpage_lock; -static vm_offset_t cpage_a; -static vm_offset_t cpage_b; - /* * Crashdump maps. */ @@ -1072,10 +1068,6 @@ M_WAITOK | M_ZERO); for (i = 0; i < pv_npg; i++) TAILQ_INIT(&pv_table[i].pv_list); - - mtx_init(&cpage_lock, "cpage", NULL, MTX_DEF); - cpage_a = kva_alloc(PAGE_SIZE); - cpage_b = kva_alloc(PAGE_SIZE); } static SYSCTL_NODE(_vm_pmap, OID_AUTO, pde, CTLFLAG_RD, 0, @@ -5056,66 +5048,24 @@ vm_offset_t b_offset, int xfersize) { void *a_cp, *b_cp; - vm_page_t m_a, m_b; - vm_paddr_t p_a, p_b; - pt_entry_t *pte; - vm_offset_t a_pg_offset, b_pg_offset; + vm_page_t pages[2]; + vm_offset_t vaddr[2], a_pg_offset, b_pg_offset; int cnt; - boolean_t pinned; + boolean_t mapped; - /* - * NB: The sequence of updating a page table followed by accesses - * to the corresponding pages used in the !DMAP case is subject to - * the situation described in the "AMD64 Architecture Programmer's - * Manual Volume 2: System Programming" rev. 3.23, "7.3.1 Special - * Coherency Considerations". Therefore, issuing the INVLPG right - * after modifying the PTE bits is crucial. - */ - pinned = FALSE; while (xfersize > 0) { a_pg_offset = a_offset & PAGE_MASK; - m_a = ma[a_offset >> PAGE_SHIFT]; - p_a = m_a->phys_addr; + pages[0] = ma[a_offset >> PAGE_SHIFT]; b_pg_offset = b_offset & PAGE_MASK; - m_b = mb[b_offset >> PAGE_SHIFT]; - p_b = m_b->phys_addr; + pages[1] = mb[b_offset >> PAGE_SHIFT]; cnt = min(xfersize, PAGE_SIZE - a_pg_offset); cnt = min(cnt, PAGE_SIZE - b_pg_offset); - if (__predict_false(p_a < DMAP_MIN_ADDRESS || - p_a > DMAP_MIN_ADDRESS + dmaplimit)) { - mtx_lock(&cpage_lock); - sched_pin(); - pinned = TRUE; - pte = vtopte(cpage_a); - *pte = p_a | X86_PG_A | X86_PG_V | - pmap_cache_bits(kernel_pmap, m_a->md.pat_mode, 0); - invlpg(cpage_a); - a_cp = (char *)cpage_a + a_pg_offset; - } else { - a_cp = (char *)PHYS_TO_DMAP(p_a) + a_pg_offset; - } - if (__predict_false(p_b < DMAP_MIN_ADDRESS || - p_b > DMAP_MIN_ADDRESS + dmaplimit)) { - if (!pinned) { - mtx_lock(&cpage_lock); - sched_pin(); - pinned = TRUE; - } - pte = vtopte(cpage_b); - *pte = p_b | X86_PG_A | X86_PG_M | X86_PG_RW | - X86_PG_V | pmap_cache_bits(kernel_pmap, - m_b->md.pat_mode, 0); - invlpg(cpage_b); - b_cp = (char *)cpage_b + b_pg_offset; - } else { - b_cp = (char *)PHYS_TO_DMAP(p_b) + b_pg_offset; - } + mapped = pmap_get_vaddr(pages, vaddr, 2, FALSE); + a_cp = (char *)vaddr[0] + a_pg_offset; + b_cp = (char *)vaddr[1] + b_pg_offset; bcopy(a_cp, b_cp, cnt); - if (__predict_false(pinned)) { - sched_unpin(); - mtx_unlock(&cpage_lock); - pinned = FALSE; - } + if (__predict_false(mapped)) + pmap_remove_vaddr(pages, vaddr, 2, FALSE); a_offset += cnt; b_offset += cnt; xfersize -= cnt; @@ -6901,6 +6851,97 @@ *num = idx; } +/* + * Get the kernel virtual address of a physical page. If the physical address + * is not covered by the DMAP, perform a transient mapping. If the function + * has to perform any of such mappings, TRUE will be returned and the caller + * must call pmap_remove_vaddr when done. + */ +boolean_t +pmap_get_vaddr(vm_page_t page[], vm_offset_t vaddr[], int count, + boolean_t can_fault) +{ + vm_paddr_t paddr; + boolean_t needs_mapping; + pt_entry_t *pte; + int cache_bits, error, i; + + /* + * Allocate any KVA space that we need, this is done in a separate + * loop to prevent calling vmem_alloc while pinned. + */ + needs_mapping = FALSE; + for (i = 0; i < count; i++) { + paddr = VM_PAGE_TO_PHYS(page[i]); + if (__predict_false(paddr >= dmaplimit)) { + error = vmem_alloc(kernel_arena, PAGE_SIZE, + M_BESTFIT | M_WAITOK, &vaddr[i]); + KASSERT(error == 0, ("vmem_alloc failed: %d", error)); + needs_mapping = TRUE; + } else { + vaddr[i] = PHYS_TO_DMAP(paddr); + } + } + + /* Exit early if everything is covered by the DMAP */ + if (!needs_mapping) + goto out; + + /* + * NB: The sequence of updating a page table followed by accesses + * to the corresponding pages used in the !DMAP case is subject to + * the situation described in the "AMD64 Architecture Programmer's + * Manual Volume 2: System Programming" rev. 3.23, "7.3.1 Special + * Coherency Considerations". Therefore, issuing the INVLPG right + * after modifying the PTE bits is crucial. + */ + if (!can_fault) + sched_pin(); + for (i = 0; i < count; i++) { + paddr = VM_PAGE_TO_PHYS(page[i]); + if (paddr >= dmaplimit) { + if (can_fault) { + /* + * Slow path, since we can get page faults + * while mappings are active don't pin the + * thread to the CPU and instead add a global + * mapping visible to all CPUs. + */ + pmap_qenter(vaddr[i], &page[i], 1); + } else { + pte = vtopte(vaddr[i]); + cache_bits = pmap_cache_bits(kernel_pmap, + page[i]->md.pat_mode, 0); + pte_store(pte, paddr | X86_PG_RW | X86_PG_V | + cache_bits); + invlpg(vaddr[i]); + } + } + } + +out: + return (needs_mapping); +} + +void +pmap_remove_vaddr(vm_page_t page[], vm_offset_t vaddr[], int count, + boolean_t can_fault) +{ + vm_paddr_t paddr; + int i; + + if (!can_fault) + sched_unpin(); + for (i = 0; i < count; i++) { + paddr = VM_PAGE_TO_PHYS(page[i]); + if (paddr >= dmaplimit) { + if (can_fault) + pmap_qremove(vaddr[i], 1); + vmem_free(kernel_arena, vaddr[i], PAGE_SIZE); + } + } +} + #include "opt_ddb.h" #ifdef DDB #include Index: sys/amd64/amd64/uio_machdep.c =================================================================== --- sys/amd64/amd64/uio_machdep.c +++ sys/amd64/amd64/uio_machdep.c @@ -61,10 +61,11 @@ struct thread *td = curthread; struct iovec *iov; void *cp; - vm_offset_t page_offset; + vm_offset_t page_offset, vaddr; size_t cnt; int error = 0; int save = 0; + boolean_t mapped = FALSE; KASSERT(uio->uio_rw == UIO_READ || uio->uio_rw == UIO_WRITE, ("uiomove_fromphys: mode")); @@ -84,8 +85,9 @@ cnt = n; page_offset = offset & PAGE_MASK; cnt = min(cnt, PAGE_SIZE - page_offset); - cp = (char *)PHYS_TO_DMAP(ma[offset >> PAGE_SHIFT]->phys_addr) + - page_offset; + mapped = pmap_get_vaddr(&ma[offset >> PAGE_SHIFT], &vaddr, 1, + TRUE); + cp = (char *)vaddr + page_offset; switch (uio->uio_segflg) { case UIO_USERSPACE: maybe_yield(); @@ -105,6 +107,11 @@ case UIO_NOCOPY: break; } + if (__predict_false(mapped)) { + pmap_remove_vaddr(&ma[offset >> PAGE_SHIFT], &vaddr, 1, + TRUE); + mapped = FALSE; + } iov->iov_base = (char *)iov->iov_base + cnt; iov->iov_len -= cnt; uio->uio_resid -= cnt; @@ -113,6 +120,9 @@ n -= cnt; } out: + if (__predict_false(mapped)) + pmap_remove_vaddr(&ma[offset >> PAGE_SHIFT], &vaddr, 1, + TRUE); if (save == 0) td->td_pflags &= ~TDP_DEADLKTREAT; return (error); Index: sys/amd64/include/pmap.h =================================================================== --- sys/amd64/include/pmap.h +++ sys/amd64/include/pmap.h @@ -397,6 +397,8 @@ void pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva, boolean_t force); void pmap_get_mapping(pmap_t pmap, vm_offset_t va, uint64_t *ptr, int *num); +boolean_t pmap_get_vaddr(vm_page_t *, vm_offset_t *, int, boolean_t); +void pmap_remove_vaddr(vm_page_t *, vm_offset_t *, int, boolean_t); #endif /* _KERNEL */ #endif /* !LOCORE */ Index: sys/amd64/include/vmparam.h =================================================================== --- sys/amd64/include/vmparam.h +++ sys/amd64/include/vmparam.h @@ -175,8 +175,18 @@ #define VM_MAX_ADDRESS UPT_MAX_ADDRESS #define VM_MIN_ADDRESS (0) -#define PHYS_TO_DMAP(x) ((x) | DMAP_MIN_ADDRESS) -#define DMAP_TO_PHYS(x) ((x) & ~DMAP_MIN_ADDRESS) +#define PHYS_TO_DMAP(x) ({ \ + KASSERT((x) < dmaplimit, \ + ("physical address %#jx not covered by the DMAP", \ + (uintmax_t)x)); \ + (x) | DMAP_MIN_ADDRESS; }) + +#define DMAP_TO_PHYS(x) ({ \ + KASSERT((x) < (DMAP_MIN_ADDRESS + dmaplimit) && \ + (x) >= DMAP_MIN_ADDRESS, \ + ("virtual address %#jx not covered by the DMAP", \ + (uintmax_t)x)); \ + (x) & ~DMAP_MIN_ADDRESS; }) /* * How many physical pages per kmem arena virtual page.