Index: sys/amd64/amd64/pmap.c =================================================================== --- sys/amd64/amd64/pmap.c +++ sys/amd64/amd64/pmap.c @@ -391,9 +391,9 @@ "Count of saved TLB context on switch"); /* pmap_copy_pages() over non-DMAP */ -static struct mtx cpage_lock; -static vm_offset_t cpage_a; -static vm_offset_t cpage_b; +#define MAX_PCPU_MAPPINGS 2 +DPCPU_DEFINE(vm_offset_t, cpages[MAX_PCPU_MAPPINGS]); +DPCPU_DEFINE(struct sx, cpages_lock); /* * Crashdump maps. @@ -1060,11 +1060,20 @@ M_WAITOK | M_ZERO); for (i = 0; i < pv_npg; i++) TAILQ_INIT(&pv_table[i].pv_list); +} + +static void +pmap_init_cpages() +{ + int i, j; - mtx_init(&cpage_lock, "cpage", NULL, MTX_DEF); - cpage_a = kva_alloc(PAGE_SIZE); - cpage_b = kva_alloc(PAGE_SIZE); + CPU_FOREACH(i) { + for (j = 0; j < MAX_PCPU_MAPPINGS; j++) + DPCPU_ID_SET(i, cpages[j], kva_alloc(PAGE_SIZE)); + sx_init(DPCPU_ID_PTR(i, cpages_lock), "cpage"); + } } +SYSINIT(cpages_init, SI_SUB_CPU, SI_ORDER_ANY, pmap_init_cpages, NULL); static SYSCTL_NODE(_vm_pmap, OID_AUTO, pde, CTLFLAG_RD, 0, "2MB page mapping counters"); @@ -5033,66 +5042,24 @@ vm_offset_t b_offset, int xfersize) { void *a_cp, *b_cp; - vm_page_t m_a, m_b; - vm_paddr_t p_a, p_b; - pt_entry_t *pte; - vm_offset_t a_pg_offset, b_pg_offset; + vm_page_t pages[2]; + vm_offset_t vaddr[2], a_pg_offset, b_pg_offset; int cnt; - boolean_t pinned; + boolean_t mapped; - /* - * NB: The sequence of updating a page table followed by accesses - * to the corresponding pages used in the !DMAP case is subject to - * the situation described in the "AMD64 Architecture Programmer's - * Manual Volume 2: System Programming" rev. 3.23, "7.3.1 Special - * Coherency Considerations". Therefore, issuing the INVLPG right - * after modifying the PTE bits is crucial. - */ - pinned = FALSE; while (xfersize > 0) { a_pg_offset = a_offset & PAGE_MASK; - m_a = ma[a_offset >> PAGE_SHIFT]; - p_a = m_a->phys_addr; + pages[0] = ma[a_offset >> PAGE_SHIFT]; b_pg_offset = b_offset & PAGE_MASK; - m_b = mb[b_offset >> PAGE_SHIFT]; - p_b = m_b->phys_addr; + pages[1] = mb[b_offset >> PAGE_SHIFT]; cnt = min(xfersize, PAGE_SIZE - a_pg_offset); cnt = min(cnt, PAGE_SIZE - b_pg_offset); - if (__predict_false(p_a < DMAP_MIN_ADDRESS || - p_a > DMAP_MIN_ADDRESS + dmaplimit)) { - mtx_lock(&cpage_lock); - sched_pin(); - pinned = TRUE; - pte = vtopte(cpage_a); - *pte = p_a | X86_PG_A | X86_PG_V | - pmap_cache_bits(kernel_pmap, m_a->md.pat_mode, 0); - invlpg(cpage_a); - a_cp = (char *)cpage_a + a_pg_offset; - } else { - a_cp = (char *)PHYS_TO_DMAP(p_a) + a_pg_offset; - } - if (__predict_false(p_b < DMAP_MIN_ADDRESS || - p_b > DMAP_MIN_ADDRESS + dmaplimit)) { - if (!pinned) { - mtx_lock(&cpage_lock); - sched_pin(); - pinned = TRUE; - } - pte = vtopte(cpage_b); - *pte = p_b | X86_PG_A | X86_PG_M | X86_PG_RW | - X86_PG_V | pmap_cache_bits(kernel_pmap, - m_b->md.pat_mode, 0); - invlpg(cpage_b); - b_cp = (char *)cpage_b + b_pg_offset; - } else { - b_cp = (char *)PHYS_TO_DMAP(p_b) + b_pg_offset; - } + mapped = pmap_get_vaddr(pages, vaddr, 2); + a_cp = (char *)vaddr[0] + a_pg_offset; + b_cp = (char *)vaddr[1] + b_pg_offset; bcopy(a_cp, b_cp, cnt); - if (__predict_false(pinned)) { - sched_unpin(); - mtx_unlock(&cpage_lock); - pinned = FALSE; - } + if (__predict_false(mapped)) + pmap_remove_vaddr(); a_offset += cnt; b_offset += cnt; xfersize -= cnt; @@ -6868,6 +6835,62 @@ *num = idx; } +/* + * Get the kernel virtual address of a physical page. If the physical address + * is not covered by the DMAP, perform a transient per-cpu mapping. If the + * function has to perform any of such mappings, TRUE will be returned and the + * caller must call pmap_remove_vaddr when done. + */ +boolean_t +pmap_get_vaddr(vm_page_t page[], vm_offset_t vaddr[], int count) +{ + vm_paddr_t paddr; + vm_offset_t *cpages; + boolean_t pinned; + int i; + + KASSERT(count <= MAX_PCPU_MAPPINGS, + ("pmap_get_vaddr only supports %u simultaneous per-cpu mappings", + MAX_PCPU_MAPPINGS)); + + /* + * NB: The sequence of updating a page table followed by accesses + * to the corresponding pages used in the !DMAP case is subject to + * the situation described in the "AMD64 Architecture Programmer's + * Manual Volume 2: System Programming" rev. 3.23, "7.3.1 Special + * Coherency Considerations". Therefore, issuing the INVLPG right + * after modifying the PTE bits is crucial. + */ + pinned = FALSE; + for (i = 0; i < count; i++) { + paddr = VM_PAGE_TO_PHYS(page[i]); + if (__predict_false(paddr >= dmaplimit)) { + if (!pinned) { + sched_pin(); + sx_xlock(DPCPU_PTR(cpages_lock)); + cpages = DPCPU_GET(cpages); + pinned = TRUE; + } + pmap_kenter_attr(cpages[i], paddr, page[i]->md.pat_mode); + invlpg(cpages[i]); + vaddr[i] = cpages[i]; + } else { + vaddr[i] = PHYS_TO_DMAP(paddr); + } + } + + return (pinned); +} + +void +pmap_remove_vaddr() +{ + + sx_assert(DPCPU_PTR(cpages_lock), SA_XLOCKED); + sx_xunlock(DPCPU_PTR(cpages_lock)); + sched_unpin(); +} + #include "opt_ddb.h" #ifdef DDB #include Index: sys/amd64/amd64/uio_machdep.c =================================================================== --- sys/amd64/amd64/uio_machdep.c +++ sys/amd64/amd64/uio_machdep.c @@ -61,10 +61,11 @@ struct thread *td = curthread; struct iovec *iov; void *cp; - vm_offset_t page_offset; + vm_offset_t page_offset, vaddr; size_t cnt; int error = 0; int save = 0; + boolean_t mapped; KASSERT(uio->uio_rw == UIO_READ || uio->uio_rw == UIO_WRITE, ("uiomove_fromphys: mode")); @@ -84,8 +85,8 @@ cnt = n; page_offset = offset & PAGE_MASK; cnt = min(cnt, PAGE_SIZE - page_offset); - cp = (char *)PHYS_TO_DMAP(ma[offset >> PAGE_SHIFT]->phys_addr) + - page_offset; + mapped = pmap_get_vaddr(&ma[offset >> PAGE_SHIFT], &vaddr, 1); + cp = (char *)vaddr + page_offset; switch (uio->uio_segflg) { case UIO_USERSPACE: maybe_yield(); @@ -105,6 +106,10 @@ case UIO_NOCOPY: break; } + if (__predict_false(mapped)) { + pmap_remove_vaddr(); + mapped = FALSE; + } iov->iov_base = (char *)iov->iov_base + cnt; iov->iov_len -= cnt; uio->uio_resid -= cnt; @@ -113,6 +118,8 @@ n -= cnt; } out: + if (__predict_false(mapped)) + pmap_remove_vaddr(); if (save == 0) td->td_pflags &= ~TDP_DEADLKTREAT; return (error); Index: sys/amd64/include/pmap.h =================================================================== --- sys/amd64/include/pmap.h +++ sys/amd64/include/pmap.h @@ -396,6 +396,8 @@ void pmap_invalidate_cache_pages(vm_page_t *pages, int count); void pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva); void pmap_get_mapping(pmap_t pmap, vm_offset_t va, uint64_t *ptr, int *num); +boolean_t pmap_get_vaddr(vm_page_t *, vm_offset_t *, int); +void pmap_remove_vaddr(void); #endif /* _KERNEL */ #endif /* !LOCORE */ Index: sys/amd64/include/vmparam.h =================================================================== --- sys/amd64/include/vmparam.h +++ sys/amd64/include/vmparam.h @@ -175,8 +175,18 @@ #define VM_MAX_ADDRESS UPT_MAX_ADDRESS #define VM_MIN_ADDRESS (0) -#define PHYS_TO_DMAP(x) ((x) | DMAP_MIN_ADDRESS) -#define DMAP_TO_PHYS(x) ((x) & ~DMAP_MIN_ADDRESS) +#define PHYS_TO_DMAP(x) ({ \ + KASSERT((x) < dmaplimit, \ + ("physical address %#jx not covered by the DMAP", \ + (uintmax_t)x)); \ + (x) | DMAP_MIN_ADDRESS; }) + +#define DMAP_TO_PHYS(x) ({ \ + KASSERT((x) < (DMAP_MIN_ADDRESS + dmaplimit) && \ + (x) >= DMAP_MIN_ADDRESS, \ + ("virtual address %#jx not covered by the DMAP", \ + (uintmax_t)x)); \ + (x) & ~DMAP_MIN_ADDRESS; }) /* * How many physical pages per kmem arena virtual page.