diff --git a/sys/compat/linuxkpi/common/include/asm/set_memory.h b/sys/compat/linuxkpi/common/include/asm/set_memory.h --- a/sys/compat/linuxkpi/common/include/asm/set_memory.h +++ b/sys/compat/linuxkpi/common/include/asm/set_memory.h @@ -67,19 +67,30 @@ { KASSERT(numpages == 1, ("%s: numpages %d", __func__, numpages)); +#ifdef PAGE_IS_LKPI_PAGE + pmap_page_set_memattr(page->vm_page, VM_MEMATTR_UNCACHEABLE); +#else pmap_page_set_memattr(page, VM_MEMATTR_UNCACHEABLE); +#endif return (0); } static inline int set_pages_wc(struct page *page, int numpages) { + vm_page_t vmp; + KASSERT(numpages == 1, ("%s: numpages %d", __func__, numpages)); +#ifdef PAGE_IS_LKPI_PAGE + vmp = page->vm_page; +#else + vmp = page; +#endif #ifdef VM_MEMATTR_WRITE_COMBINING - pmap_page_set_memattr(page, VM_MEMATTR_WRITE_COMBINING); + pmap_page_set_memattr(vmp, VM_MEMATTR_WRITE_COMBINING); #else - return (set_pages_uc(page, numpages)); + return (set_pages_uc(vmp, numpages)); #endif return (0); } @@ -89,7 +100,11 @@ { KASSERT(numpages == 1, ("%s: numpages %d", __func__, numpages)); +#ifdef PAGE_IS_LKPI_PAGE + pmap_page_set_memattr(page->vm_page, VM_MEMATTR_WRITE_BACK); +#else pmap_page_set_memattr(page, VM_MEMATTR_WRITE_BACK); +#endif return (0); } diff --git a/sys/compat/linuxkpi/common/include/linux/highmem.h b/sys/compat/linuxkpi/common/include/linux/highmem.h --- a/sys/compat/linuxkpi/common/include/linux/highmem.h +++ b/sys/compat/linuxkpi/common/include/linux/highmem.h @@ -64,7 +64,11 @@ return ((void *)PHYS_TO_DMAP(page_to_phys(page))); } else { sched_pin(); +#ifdef PAGE_IS_LKPI_PAGE + sf = sf_buf_alloc(page->vm_page, SFB_NOWAIT | SFB_CPUPRIVATE); +#else sf = sf_buf_alloc(page, SFB_NOWAIT | SFB_CPUPRIVATE); +#endif if (sf == NULL) { sched_unpin(); return (NULL); @@ -79,8 +83,13 @@ vm_memattr_t attr = pgprot2cachemode(prot); if (attr != VM_MEMATTR_DEFAULT) { +#ifdef PAGE_IS_LKPI_PAGE + page->vm_page->flags |= PG_FICTITIOUS; + pmap_page_set_memattr(page->vm_page, attr); +#else page->flags |= PG_FICTITIOUS; pmap_page_set_memattr(page, attr); +#endif } return (kmap(page)); } @@ -112,7 +121,11 @@ if (!PMAP_HAS_DMAP) { /* lookup SF buffer in list */ +#ifdef PAGE_IS_LKPI_PAGE + sf = sf_buf_alloc(page->vm_page, SFB_NOWAIT | SFB_CPUPRIVATE); +#else sf = sf_buf_alloc(page, SFB_NOWAIT | SFB_CPUPRIVATE); +#endif /* double-free */ sf_buf_free(sf); diff --git a/sys/compat/linuxkpi/common/include/linux/mm.h b/sys/compat/linuxkpi/common/include/linux/mm.h --- a/sys/compat/linuxkpi/common/include/linux/mm.h +++ b/sys/compat/linuxkpi/common/include/linux/mm.h @@ -268,19 +268,31 @@ static inline void set_page_dirty(struct page *page) { +#ifdef PAGE_IS_LKPI_PAGE + vm_page_dirty(page->vm_page); +#else vm_page_dirty(page); +#endif } static inline void mark_page_accessed(struct page *page) { +#ifdef PAGE_IS_LKPI_PAGE + vm_page_reference(page->vm_page); +#else vm_page_reference(page); +#endif } static inline void get_page(struct page *page) { +#ifdef PAGE_IS_LKPI_PAGE + vm_page_wire(page->vm_page); +#else vm_page_wire(page); +#endif } static inline void @@ -400,20 +412,32 @@ vm_paddr_t paddr; paddr = pmap_kextract((vm_offset_t)addr); +#ifdef PAGE_IS_LKPI_PAGE + panic("XXX-BZ TODO: %jx", (uintmax_t)paddr); +#else return (PHYS_TO_VM_PAGE(paddr)); +#endif } static inline int trylock_page(struct page *page) { +#ifdef PAGE_IS_LKPI_PAGE + return (vm_page_tryxbusy(page->vm_page)); +#else return (vm_page_tryxbusy(page)); +#endif } static inline void unlock_page(struct page *page) { +#ifdef PAGE_IS_LKPI_PAGE + vm_page_xunbusy(page->vm_page); +#else vm_page_xunbusy(page); +#endif } extern int is_vmalloc_addr(const void *addr); diff --git a/sys/compat/linuxkpi/common/include/linux/page.h b/sys/compat/linuxkpi/common/include/linux/page.h --- a/sys/compat/linuxkpi/common/include/linux/page.h +++ b/sys/compat/linuxkpi/common/include/linux/page.h @@ -48,7 +48,20 @@ typedef unsigned long linux_pgd_t; typedef unsigned long pgprot_t; +#ifdef __not_yet__ +#define PAGE_IS_LKPI_PAGE +#endif +#ifdef PAGE_IS_LKPI_PAGE +struct page { + /* Native FreeBSD vm_page. */ + struct vm_page *vm_page; + + /* Linux fields. */ + struct page_pool *pp; +}; +#else #define page vm_page +#endif #define LINUXKPI_PROT_VALID (1 << 3) #define LINUXKPI_CACHE_MODE_SHIFT 4 @@ -72,14 +85,30 @@ return (VM_MEMATTR_DEFAULT); } -#define page_to_virt(page) linux_page_address(page) +#ifdef PAGE_IS_LKPI_PAGE +struct page *lkpi_vm_page_to_page(struct vm_page *); /* Internal */ +struct page *linuxkpi_virt_to_page(const void *); +struct page *linuxkpi_pfn_to_page(unsigned long); + +#define virt_to_page(_v) linuxkpi_virt_to_page(_v) +#define page_to_pfn(_page) (VM_PAGE_TO_PHYS((_page)->vm_page) >> PAGE_SHIFT) +#define pfn_to_page(_pfn) linuxkpi_pfn_to_page(_pfn) +#define page_to_phys(_page) VM_PAGE_TO_PHYS((_page)->vm_page) +#else #define virt_to_page(x) PHYS_TO_VM_PAGE(vtophys(x)) #define page_to_pfn(pp) (VM_PAGE_TO_PHYS(pp) >> PAGE_SHIFT) #define pfn_to_page(pfn) (PHYS_TO_VM_PAGE((pfn) << PAGE_SHIFT)) -#define nth_page(page,n) pfn_to_page(page_to_pfn(page) + (n)) #define page_to_phys(page) VM_PAGE_TO_PHYS(page) +#define page_to_virt(page) linux_page_address(page) +#endif +#define nth_page(page,n) pfn_to_page(page_to_pfn(page) + (n)) + +/* + * clear_page() is also called on kmem allocated pages. + * [2023-07-31] mthca(4) is the only in-tree consumer. + */ +#define clear_page(page) memset(page, 0, PAGE_SIZE) -#define clear_page(page) memset(page, 0, PAGE_SIZE) #define pgprot_noncached(prot) \ (((prot) & VM_PROT_ALL) | cachemode2protval(VM_MEMATTR_UNCACHEABLE)) #ifdef VM_MEMATTR_WRITE_COMBINING diff --git a/sys/compat/linuxkpi/common/include/linux/scatterlist.h b/sys/compat/linuxkpi/common/include/linux/scatterlist.h --- a/sys/compat/linuxkpi/common/include/linux/scatterlist.h +++ b/sys/compat/linuxkpi/common/include/linux/scatterlist.h @@ -596,7 +596,11 @@ len = min(piter.sg->length - skip, buflen); page = sg_page_iter_page(&piter); +#ifdef PAGE_IS_LKPI_PAGE + sf = sf_buf_alloc(page->vm_page, SFB_CPUPRIVATE | SFB_NOWAIT); +#else sf = sf_buf_alloc(page, SFB_CPUPRIVATE | SFB_NOWAIT); +#endif if (sf == NULL) break; p = (char *)sf_buf_kva(sf) + piter.sg_pgoffset + skip; @@ -651,7 +655,11 @@ page = sg_page_iter_page(&iter); if (!PMAP_HAS_DMAP) { +#ifdef PAGE_IS_LKPI_PAGE + sf = sf_buf_alloc(page->vm_page, SFB_CPUPRIVATE | SFB_NOWAIT); +#else sf = sf_buf_alloc(page, SFB_CPUPRIVATE | SFB_NOWAIT); +#endif if (sf == NULL) break; vaddr = (char *)sf_buf_kva(sf); diff --git a/sys/compat/linuxkpi/common/src/linux_compat.c b/sys/compat/linuxkpi/common/src/linux_compat.c --- a/sys/compat/linuxkpi/common/src/linux_compat.c +++ b/sys/compat/linuxkpi/common/src/linux_compat.c @@ -1893,15 +1893,33 @@ void * vmap(struct page **pages, unsigned int count, unsigned long flags, int prot) { + struct vm_page **mpp; vm_offset_t off; size_t size; + int i; + + mpp = malloc(sizeof(*mpp) * count, M_KMALLOC, M_NOWAIT|M_ZERO); + if (mpp == NULL) + return (NULL); size = count * PAGE_SIZE; off = kva_alloc(size); - if (off == 0) + if (off == 0) { + free(mpp, M_KMALLOC); return (NULL); + } + + for (i = 0; i < count; i++) +#ifdef PAGE_IS_LKPI_PAGE + mpp[i] = pages[i]->vm_page; +#else + mpp[i] = pages[i]; +#endif + vmmap_add((void *)off, size); - pmap_qenter(off, pages, count); + pmap_qenter(off, mpp, count); + + free(mpp, M_KMALLOC); return ((void *)off); } diff --git a/sys/compat/linuxkpi/common/src/linux_page.c b/sys/compat/linuxkpi/common/src/linux_page.c --- a/sys/compat/linuxkpi/common/src/linux_page.c +++ b/sys/compat/linuxkpi/common/src/linux_page.c @@ -85,14 +85,21 @@ void * linux_page_address(const struct page *page) { + struct vm_page *m; - if (page->object != kernel_object) { +#ifdef PAGE_IS_LKPI_PAGE + m = page->vm_page; +#else + m = page; +#endif + + if (m->object != kernel_object) { return (PMAP_HAS_DMAP ? ((void *)(uintptr_t)PHYS_TO_DMAP(page_to_phys(page))) : NULL); } return ((void *)(uintptr_t)(VM_MIN_KERNEL_ADDRESS + - IDX_TO_OFF(page->pindex))); + IDX_TO_OFF(m->pindex))); } struct page * @@ -101,6 +108,7 @@ struct page *page; if (PMAP_HAS_DMAP) { + struct vm_page *m; unsigned long npages = 1UL << order; int req = VM_ALLOC_WIRED; @@ -108,8 +116,8 @@ req |= VM_ALLOC_ZERO; if (order == 0 && (flags & GFP_DMA32) == 0) { - page = vm_page_alloc_noobj(req); - if (page == NULL) + m = vm_page_alloc_noobj(req); + if (m == NULL) return (NULL); } else { vm_paddr_t pmax = (flags & GFP_DMA32) ? @@ -119,11 +127,15 @@ req |= VM_ALLOC_NORECLAIM; retry: - page = vm_page_alloc_noobj_contig(req, npages, 0, pmax, + m = vm_page_alloc_noobj_contig(req, npages, 0, pmax, PAGE_SIZE, 0, VM_MEMATTR_DEFAULT); +#ifdef PAGE_IS_LKPI_PAGE + page = lkpi_vm_page_to_page(m); +#else + page = m; +#endif if (page == NULL) { - if ((flags & (M_WAITOK | __GFP_NORETRY)) == - M_WAITOK) { + if (flags & M_WAITOK) { int err = vm_page_reclaim_contig(req, npages, 0, pmax, PAGE_SIZE, 0); if (err == ENOMEM) @@ -136,6 +148,11 @@ return (NULL); } } +#ifdef PAGE_IS_LKPI_PAGE + page = lkpi_vm_page_to_page(m); +#else + page = m; +#endif } else { vm_offset_t vaddr; @@ -164,11 +181,17 @@ linux_free_pages(struct page *page, unsigned int order) { if (PMAP_HAS_DMAP) { + struct vm_page *m; unsigned long npages = 1UL << order; unsigned long x; +#ifdef PAGE_IS_LKPI_PAGE + m = page->vm_page; +#else + m = page; +#endif for (x = 0; x != npages; x++) { - vm_page_t pgo = page + x; + vm_page_t pgo = m + x; /* * The "free page" function is used in several @@ -231,9 +254,11 @@ if (addr >= VM_MIN_KERNEL_ADDRESS && addr < VM_MAX_KERNEL_ADDRESS) { _linux_free_kmem(addr, order); } else { - vm_page_t page; + struct vm_page *m; + struct page *page; - page = PHYS_TO_VM_PAGE(DMAP_TO_PHYS(addr)); + m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS(addr)); + page = lkpi_vm_page_to_page(m); linux_free_pages(page, order); } } @@ -242,13 +267,26 @@ linux_get_user_pages_internal(vm_map_t map, unsigned long start, int nr_pages, int write, struct page **pages) { + struct vm_page **mpp; vm_prot_t prot; size_t len; - int count; + int count, i; + + mpp = malloc(sizeof(*mpp) * nr_pages, M_KMALLOC, M_NOWAIT|M_ZERO); + if (mpp == NULL) + return (-ENOMEM); + + for (i = 0; i < nr_pages; i++) +#ifdef PAGE_IS_LKPI_PAGE + mpp[i] = pages[i]->vm_page; +#else + mpp[i] = pages[i]; +#endif prot = write ? (VM_PROT_READ | VM_PROT_WRITE) : VM_PROT_READ; len = ptoa((vm_offset_t)nr_pages); - count = vm_fault_quick_hold_pages(map, start, len, prot, pages, nr_pages); + count = vm_fault_quick_hold_pages(map, start, len, prot, mpp, nr_pages); + free(mpp, M_KMALLOC); return (count == -1 ? -EFAULT : nr_pages); } @@ -256,12 +294,13 @@ __get_user_pages_fast(unsigned long start, int nr_pages, int write, struct page **pages) { + struct vm_page **mpp; vm_map_t map; vm_page_t *mp; vm_offset_t va; vm_offset_t end; vm_prot_t prot; - int count; + int count, i; if (nr_pages == 0 || in_interrupt()) return (0); @@ -272,7 +311,19 @@ if (!vm_map_range_valid(map, start, end)) return (-EINVAL); prot = write ? (VM_PROT_READ | VM_PROT_WRITE) : VM_PROT_READ; - for (count = 0, mp = pages, va = start; va < end; + + mpp = malloc(sizeof(*mpp) * nr_pages, M_KMALLOC, M_NOWAIT|M_ZERO); + if (mpp == NULL) + return (-ENOMEM); + + for (i = 0; i < nr_pages; i++) +#ifdef PAGE_IS_LKPI_PAGE + mpp[i] = pages[i]->vm_page; +#else + mpp[i] = pages[i]; +#endif + mp = mpp; + for (count = 0, va = start; va < end; mp++, va += PAGE_SIZE, count++) { *mp = pmap_extract_and_hold(map->pmap, va, prot); if (*mp == NULL) @@ -292,6 +343,8 @@ vm_page_dirty(*mp); } } + free(mpp, M_KMALLOC); + return (count); } @@ -539,7 +592,7 @@ linuxkpi_page_frag_alloc(struct page_frag_cache *pfc, size_t fragsz, gfp_t gfp) { - vm_page_t pages; + struct page *pages; if (fragsz == 0) return (NULL); @@ -561,7 +614,7 @@ void linuxkpi_page_frag_free(void *addr) { - vm_page_t page; + struct page *page; page = virt_to_page(addr); linux_free_pages(page, 0); @@ -573,3 +626,67 @@ linux_free_pages(page, 0); } + +#ifdef PAGE_IS_LKPI_PAGE +/* -------------------------------------------------------------------------- */ +/* + * Mimic vm_page_array[] and similar logic dependent on PMAP_HAS_PAGE_ARRAY and + * VM_PHYSSEG_SPARSE or VM_PHYSSEG_DENSE for struct page essentially creating a + * shadow structure to keep Linux KPI specific information along native pages. + */ + +static MALLOC_DEFINE(M_LKPI_PAGE, "lkpipage", "LinuxKPI page array"); +static struct page *lkpi_page_array; + +struct page * +lkpi_vm_page_to_page(struct vm_page *m) +{ + struct vm_phys_seg *seg; + vm_paddr_t pa; + + pa = VM_PAGE_TO_PHYS(m); + seg = &vm_phys_segs[m->segind]; + return (&lkpi_page_array[atop(pa - seg->start)]); +} + +struct page * +linuxkpi_virt_to_page(const void *v) +{ + struct vm_page *m; + + m = PHYS_TO_VM_PAGE(vtophys(v)); + return (lkpi_vm_page_to_page(m)); +} + +/* pfn (page-frame number) to page. */ +struct page * +linuxkpi_pfn_to_page(unsigned long pfn) +{ + struct vm_page *m; + + m = PHYS_TO_VM_PAGE((pfn) << PAGE_SHIFT); + return (lkpi_vm_page_to_page(m)); +} + +static void +lkpi_page_init(void *arg) +{ + lkpi_page_array = malloc(vm_page_array_size * sizeof(struct page), + M_LKPI_PAGE, M_WAITOK | M_ZERO); +} +SYSINIT(lkpi_page_init, SI_SUB_VM_CONF+1, SI_ORDER_ANY, lkpi_page_init, NULL); + +static void +lkpi_page_uninit(void *arg) +{ + free(lkpi_page_array, M_LKPI_PAGE); +} +SYSUNINIT(lkpi_page_uninit, SI_SUB_VM_CONF+1, SI_ORDER_ANY, lkpi_page_uninit, NULL); +#endif + +static void +lkpi_page_init_BZ(void *arg) +{ + printf("XXX-BZ: %s:%d: vm_page_array_size %ju\n", __func__, __LINE__, vm_page_array_size); +} +SYSINIT(lkpi_page_init_BZ, SI_SUB_VM_CONF+1, SI_ORDER_ANY, lkpi_page_init_BZ, NULL); diff --git a/sys/compat/linuxkpi/common/src/linux_shmemfs.c b/sys/compat/linuxkpi/common/src/linux_shmemfs.c --- a/sys/compat/linuxkpi/common/src/linux_shmemfs.c +++ b/sys/compat/linuxkpi/common/src/linux_shmemfs.c @@ -44,6 +44,7 @@ struct page * linux_shmem_read_mapping_page_gfp(vm_object_t obj, int pindex, gfp_t gfp) { + struct vm_page *m; struct page *page; int rv; @@ -51,11 +52,16 @@ panic("GFP_NOWAIT is unimplemented"); VM_OBJECT_WLOCK(obj); - rv = vm_page_grab_valid(&page, obj, pindex, VM_ALLOC_NORMAL | + rv = vm_page_grab_valid(&m, obj, pindex, VM_ALLOC_NORMAL | VM_ALLOC_NOBUSY | VM_ALLOC_WIRED); VM_OBJECT_WUNLOCK(obj); if (rv != VM_PAGER_OK) return (ERR_PTR(-EINVAL)); +#ifdef PAGE_IS_LKPI_PAGE + page = lkpi_vm_page_to_page(m); +#else + page = m; +#endif return (page); } diff --git a/sys/dev/iser/iser_initiator.c b/sys/dev/iser/iser_initiator.c --- a/sys/dev/iser/iser_initiator.c +++ b/sys/dev/iser/iser_initiator.c @@ -304,6 +304,7 @@ int i; size_t len, tlen; int offset; + struct page *page; tlen = bp->bio_bcount; offset = bp->bio_ma_offset; @@ -311,7 +312,13 @@ for (i = 0; 0 < tlen; i++, tlen -= len) { sg = &data_buf->sgl[i]; len = min(PAGE_SIZE - offset, tlen); - sg_set_page(sg, bp->bio_ma[i], len, offset); +#ifdef PAGE_IS_LKPI_PAGE + /* Fix this "abuse" feeding LinuxKPI a FreeBSD vm_page. */ + page = lkpi_vm_page_to_page(bp->bio_ma[i]); +#else + page = bp->bio_ma[i]; +#endif + sg_set_page(sg, page, len, offset); offset = 0; }