Index: head/sys/riscv/include/cpufunc.h
===================================================================
--- head/sys/riscv/include/cpufunc.h (revision 341692)
+++ head/sys/riscv/include/cpufunc.h (revision 341693)
@@ -1,117 +1,124 @@
/*-
* Copyright (c) 2015-2016 Ruslan Bukin
* All rights reserved.
*
* Portions of this software were developed by SRI International and the
* University of Cambridge Computer Laboratory under DARPA/AFRL contract
* FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme.
*
* Portions of this software were developed by the University of Cambridge
* Computer Laboratory as part of the CTSRD Project, with support from the
* UK Higher Education Innovation Fund (HEIF).
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _MACHINE_CPUFUNC_H_
#define _MACHINE_CPUFUNC_H_
static __inline void
breakpoint(void)
{
__asm("ebreak");
}
#ifdef _KERNEL
#include
static __inline register_t
intr_disable(void)
{
uint64_t ret;
__asm __volatile(
"csrrci %0, sstatus, %1"
: "=&r" (ret) : "i" (SSTATUS_SIE)
);
return (ret & (SSTATUS_SIE));
}
static __inline void
intr_restore(register_t s)
{
__asm __volatile(
"csrs sstatus, %0"
:: "r" (s)
);
}
static __inline void
intr_enable(void)
{
__asm __volatile(
"csrsi sstatus, %0"
:: "i" (SSTATUS_SIE)
);
}
/* NB: fence() is defined as a macro in . */
static __inline void
fence_i(void)
{
__asm __volatile("fence.i" ::: "memory");
}
static __inline void
sfence_vma(void)
{
__asm __volatile("sfence.vma" ::: "memory");
}
static __inline void
sfence_vma_page(uintptr_t addr)
{
__asm __volatile("sfence.vma %0" :: "r" (addr) : "memory");
}
#define rdcycle() csr_read64(cycle)
#define rdtime() csr_read64(time)
#define rdinstret() csr_read64(instret)
#define rdhpmcounter(n) csr_read64(hpmcounter##n)
+static __inline void
+load_satp(uint64_t val)
+{
+
+ __asm __volatile("csrw satp, %0" :: "r"(val));
+}
+
#define cpufunc_nullop() riscv_nullop()
void riscv_nullop(void);
#endif /* _KERNEL */
#endif /* _MACHINE_CPUFUNC_H_ */
Index: head/sys/riscv/riscv/locore.S
===================================================================
--- head/sys/riscv/riscv/locore.S (revision 341692)
+++ head/sys/riscv/riscv/locore.S (revision 341693)
@@ -1,314 +1,314 @@
/*-
* Copyright (c) 2015-2018 Ruslan Bukin
* All rights reserved.
*
* Portions of this software were developed by SRI International and the
* University of Cambridge Computer Laboratory under DARPA/AFRL contract
* FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme.
*
* Portions of this software were developed by the University of Cambridge
* Computer Laboratory as part of the CTSRD Project, with support from the
* UK Higher Education Innovation Fund (HEIF).
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#include "assym.inc"
#include
#include
#include
#include
#include
#include
.globl kernbase
.set kernbase, KERNBASE
/* Trap entries */
.text
/* Reset vector */
.text
.globl _start
_start:
/* Get the physical address kernel loaded to */
la t0, virt_map
ld t1, 0(t0)
sub t1, t1, t0
li t2, KERNBASE
sub s9, t2, t1 /* s9 = physmem base */
mv s10, a0 /* s10 = hart id */
mv s11, a1 /* s11 = dtbp */
/* Direct secondary cores to mpentry */
bnez s10, mpentry
/*
* Page tables
*/
/* Add L1 entry for kernel */
la s1, pagetable_l1
la s2, pagetable_l2 /* Link to next level PN */
srli s2, s2, PAGE_SHIFT
li a5, KERNBASE
srli a5, a5, L1_SHIFT /* >> L1_SHIFT */
andi a5, a5, 0x1ff /* & 0x1ff */
li t4, PTE_V
slli t5, s2, PTE_PPN0_S /* (s2 << PTE_PPN0_S) */
or t6, t4, t5
/* Store L1 PTE entry to position */
li a6, PTE_SIZE
mulw a5, a5, a6
add t0, s1, a5
sd t6, (t0)
/* Level 2 superpages (512 x 2MiB) */
la s1, pagetable_l2
srli t4, s9, 21 /* Div physmem base by 2 MiB */
li t2, 512 /* Build 512 entries */
add t3, t4, t2
li t5, 0
2:
li t0, (PTE_KERN | PTE_X)
slli t2, t4, PTE_PPN1_S /* << PTE_PPN1_S */
or t5, t0, t2
sd t5, (s1) /* Store PTE entry to position */
addi s1, s1, PTE_SIZE
addi t4, t4, 1
bltu t4, t3, 2b
/* Create an L1 page for early devmap */
la s1, pagetable_l1
la s2, pagetable_l2_devmap /* Link to next level PN */
srli s2, s2, PAGE_SHIFT
li a5, (VM_MAX_KERNEL_ADDRESS - L2_SIZE)
srli a5, a5, L1_SHIFT /* >> L1_SHIFT */
andi a5, a5, 0x1ff /* & 0x1ff */
li t4, PTE_V
slli t5, s2, PTE_PPN0_S /* (s2 << PTE_PPN0_S) */
or t6, t4, t5
/* Store single level1 PTE entry to position */
li a6, PTE_SIZE
mulw a5, a5, a6
add t0, s1, a5
sd t6, (t0)
/* Create an L2 page superpage for DTB */
la s1, pagetable_l2_devmap
mv s2, s11
srli s2, s2, PAGE_SHIFT
li t0, (PTE_KERN)
slli t2, s2, PTE_PPN0_S /* << PTE_PPN0_S */
or t0, t0, t2
/* Store PTE entry to position */
li a6, PTE_SIZE
li a5, 510
mulw a5, a5, a6
add t1, s1, a5
sd t0, (t1)
/* Page tables END */
/* Setup supervisor trap vector */
la t0, va
sub t0, t0, s9
li t1, KERNBASE
add t0, t0, t1
csrw stvec, t0
/* Set page tables base register */
la s2, pagetable_l1
srli s2, s2, PAGE_SHIFT
li t0, SATP_MODE_SV39
or s2, s2, t0
sfence.vma
- csrw sptbr, s2
+ csrw satp, s2
.align 2
va:
/* Setup supervisor trap vector */
la t0, cpu_exception_handler
csrw stvec, t0
/* Ensure sscratch is zero */
li t0, 0
csrw sscratch, t0
/* Initialize stack pointer */
la s3, initstack_end
mv sp, s3
addi sp, sp, -PCB_SIZE
/* Clear BSS */
la a0, _C_LABEL(__bss_start)
la s1, _C_LABEL(_end)
1:
sd zero, 0(a0)
addi a0, a0, 8
bltu a0, s1, 1b
/* Fill riscv_bootparams */
addi sp, sp, -40
la t0, pagetable_l1
sd t0, 0(sp) /* kern_l1pt */
sd s9, 8(sp) /* kern_phys */
la t0, initstack_end
sd t0, 16(sp) /* kern_stack */
li t0, (VM_MAX_KERNEL_ADDRESS - 2 * L2_SIZE)
sd t0, 24(sp) /* dtbp_virt */
sd s11, 32(sp) /* dtbp_phys */
mv a0, sp
call _C_LABEL(initriscv) /* Off we go */
call _C_LABEL(mi_startup)
.align 4
initstack:
.space (PAGE_SIZE * KSTACK_PAGES)
initstack_end:
ENTRY(sigcode)
mv a0, sp
addi a0, a0, SF_UC
1:
li t0, SYS_sigreturn
ecall
/* sigreturn failed, exit */
li t0, SYS_exit
ecall
j 1b
END(sigcode)
/* This may be copied to the stack, keep it 16-byte aligned */
.align 3
esigcode:
.data
.align 3
.global szsigcode
szsigcode:
.quad esigcode - sigcode
.align 12
pagetable_l1:
.space PAGE_SIZE
pagetable_l2:
.space PAGE_SIZE
pagetable_l2_devmap:
.space PAGE_SIZE
.align 3
virt_map:
.quad virt_map
/* Not in use, but required for linking. */
.align 3
.globl __global_pointer$
__global_pointer$:
.space 8
.globl init_pt_va
init_pt_va:
.quad pagetable_l2 /* XXX: Keep page tables VA */
#ifndef SMP
ENTRY(mpentry)
1:
wfi
j 1b
END(mpentry)
#else
/*
* mpentry(unsigned long)
*
* Called by a core when it is being brought online.
*/
ENTRY(mpentry)
/*
* Calculate the offset to __riscv_boot_ap
* for the current core, cpuid is in a0.
*/
li t1, 4
mulw t1, t1, a0
/* Get the pointer */
la t0, __riscv_boot_ap
add t0, t0, t1
1:
/* Wait the kernel to be ready */
lw t1, 0(t0)
beqz t1, 1b
/* Setup stack pointer */
la t0, secondary_stacks
li t1, (PAGE_SIZE * KSTACK_PAGES)
mulw t1, t1, s10
add t0, t0, t1
sub t0, t0, s9
li t1, KERNBASE
add sp, t0, t1
/* Setup supervisor trap vector */
la t0, mpva
sub t0, t0, s9
li t1, KERNBASE
add t0, t0, t1
csrw stvec, t0
/* Set page tables base register */
la s2, pagetable_l1
srli s2, s2, PAGE_SHIFT
li t0, SATP_MODE_SV39
or s2, s2, t0
sfence.vma
- csrw sptbr, s2
+ csrw satp, s2
.align 2
mpva:
/* Setup supervisor trap vector */
la t0, cpu_exception_handler
csrw stvec, t0
/* Ensure sscratch is zero */
li t0, 0
csrw sscratch, t0
call init_secondary
END(mpentry)
#endif
Index: head/sys/riscv/riscv/pmap.c
===================================================================
--- head/sys/riscv/riscv/pmap.c (revision 341692)
+++ head/sys/riscv/riscv/pmap.c (revision 341693)
@@ -1,3423 +1,3423 @@
/*-
* SPDX-License-Identifier: BSD-4-Clause
*
* Copyright (c) 1991 Regents of the University of California.
* All rights reserved.
* Copyright (c) 1994 John S. Dyson
* All rights reserved.
* Copyright (c) 1994 David Greenman
* All rights reserved.
* Copyright (c) 2003 Peter Wemm
* All rights reserved.
* Copyright (c) 2005-2010 Alan L. Cox
* All rights reserved.
* Copyright (c) 2014 Andrew Turner
* All rights reserved.
* Copyright (c) 2014 The FreeBSD Foundation
* All rights reserved.
* Copyright (c) 2015-2018 Ruslan Bukin
* All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* the Systems Programming Group of the University of Utah Computer
* Science Department and William Jolitz of UUNET Technologies Inc.
*
* Portions of this software were developed by Andrew Turner under
* sponsorship from The FreeBSD Foundation.
*
* Portions of this software were developed by SRI International and the
* University of Cambridge Computer Laboratory under DARPA/AFRL contract
* FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme.
*
* Portions of this software were developed by the University of Cambridge
* Computer Laboratory as part of the CTSRD Project, with support from the
* UK Higher Education Innovation Fund (HEIF).
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* from: @(#)pmap.c 7.7 (Berkeley) 5/12/91
*/
/*-
* Copyright (c) 2003 Networks Associates Technology, Inc.
* All rights reserved.
*
* This software was developed for the FreeBSD Project by Jake Burkholder,
* Safeport Network Services, and Network Associates Laboratories, the
* Security Research Division of Network Associates, Inc. under
* DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA
* CHATS research program.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include
__FBSDID("$FreeBSD$");
/*
* Manages physical address maps.
*
* Since the information managed by this module is
* also stored by the logical address mapping module,
* this module may throw away valid virtual-to-physical
* mappings at almost any time. However, invalidations
* of virtual-to-physical mappings must be done as
* requested.
*
* In order to cope with hardware architectures which
* make virtual-to-physical map invalidates expensive,
* this module may delay invalidate or reduced protection
* operations until such time as they are actually
* necessary. This module is given full information as
* to which processors are currently using which maps,
* and to when physical maps must be made correct.
*/
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#define NPDEPG (PAGE_SIZE/(sizeof (pd_entry_t)))
#define NUPDE (NPDEPG * NPDEPG)
#define NUSERPGTBLS (NUPDE + NPDEPG)
#if !defined(DIAGNOSTIC)
#ifdef __GNUC_GNU_INLINE__
#define PMAP_INLINE __attribute__((__gnu_inline__)) inline
#else
#define PMAP_INLINE extern inline
#endif
#else
#define PMAP_INLINE
#endif
#ifdef PV_STATS
#define PV_STAT(x) do { x ; } while (0)
#else
#define PV_STAT(x) do { } while (0)
#endif
#define pmap_l2_pindex(v) ((v) >> L2_SHIFT)
#define NPV_LIST_LOCKS MAXCPU
#define PHYS_TO_PV_LIST_LOCK(pa) \
(&pv_list_locks[pa_index(pa) % NPV_LIST_LOCKS])
#define CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa) do { \
struct rwlock **_lockp = (lockp); \
struct rwlock *_new_lock; \
\
_new_lock = PHYS_TO_PV_LIST_LOCK(pa); \
if (_new_lock != *_lockp) { \
if (*_lockp != NULL) \
rw_wunlock(*_lockp); \
*_lockp = _new_lock; \
rw_wlock(*_lockp); \
} \
} while (0)
#define CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m) \
CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, VM_PAGE_TO_PHYS(m))
#define RELEASE_PV_LIST_LOCK(lockp) do { \
struct rwlock **_lockp = (lockp); \
\
if (*_lockp != NULL) { \
rw_wunlock(*_lockp); \
*_lockp = NULL; \
} \
} while (0)
#define VM_PAGE_TO_PV_LIST_LOCK(m) \
PHYS_TO_PV_LIST_LOCK(VM_PAGE_TO_PHYS(m))
/* The list of all the user pmaps */
LIST_HEAD(pmaplist, pmap);
static struct pmaplist allpmaps;
static MALLOC_DEFINE(M_VMPMAP, "pmap", "PMAP L1");
struct pmap kernel_pmap_store;
vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */
vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */
vm_offset_t kernel_vm_end = 0;
vm_paddr_t dmap_phys_base; /* The start of the dmap region */
vm_paddr_t dmap_phys_max; /* The limit of the dmap region */
vm_offset_t dmap_max_addr; /* The virtual address limit of the dmap */
/* This code assumes all L1 DMAP entries will be used */
CTASSERT((DMAP_MIN_ADDRESS & ~L1_OFFSET) == DMAP_MIN_ADDRESS);
CTASSERT((DMAP_MAX_ADDRESS & ~L1_OFFSET) == DMAP_MAX_ADDRESS);
static struct rwlock_padalign pvh_global_lock;
/*
* Data for the pv entry allocation mechanism
*/
static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks);
static struct mtx pv_chunks_mutex;
static struct rwlock pv_list_locks[NPV_LIST_LOCKS];
static void free_pv_chunk(struct pv_chunk *pc);
static void free_pv_entry(pmap_t pmap, pv_entry_t pv);
static pv_entry_t get_pv_entry(pmap_t pmap, struct rwlock **lockp);
static vm_page_t reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp);
static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va);
static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap,
vm_offset_t va);
static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp);
static int pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t sva,
pd_entry_t ptepde, struct spglist *free, struct rwlock **lockp);
static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va,
vm_page_t m, struct rwlock **lockp);
static vm_page_t _pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex,
struct rwlock **lockp);
static void _pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m,
struct spglist *free);
static int pmap_unuse_l3(pmap_t, vm_offset_t, pd_entry_t, struct spglist *);
/*
* These load the old table data and store the new value.
* They need to be atomic as the System MMU may write to the table at
* the same time as the CPU.
*/
#define pmap_load_store(table, entry) atomic_swap_64(table, entry)
#define pmap_set(table, mask) atomic_set_64(table, mask)
#define pmap_load_clear(table) atomic_swap_64(table, 0)
#define pmap_load(table) (*table)
/********************/
/* Inline functions */
/********************/
static __inline void
pagecopy(void *s, void *d)
{
memcpy(d, s, PAGE_SIZE);
}
static __inline void
pagezero(void *p)
{
bzero(p, PAGE_SIZE);
}
#define pmap_l1_index(va) (((va) >> L1_SHIFT) & Ln_ADDR_MASK)
#define pmap_l2_index(va) (((va) >> L2_SHIFT) & Ln_ADDR_MASK)
#define pmap_l3_index(va) (((va) >> L3_SHIFT) & Ln_ADDR_MASK)
#define PTE_TO_PHYS(pte) ((pte >> PTE_PPN0_S) * PAGE_SIZE)
static __inline pd_entry_t *
pmap_l1(pmap_t pmap, vm_offset_t va)
{
return (&pmap->pm_l1[pmap_l1_index(va)]);
}
static __inline pd_entry_t *
pmap_l1_to_l2(pd_entry_t *l1, vm_offset_t va)
{
vm_paddr_t phys;
pd_entry_t *l2;
phys = PTE_TO_PHYS(pmap_load(l1));
l2 = (pd_entry_t *)PHYS_TO_DMAP(phys);
return (&l2[pmap_l2_index(va)]);
}
static __inline pd_entry_t *
pmap_l2(pmap_t pmap, vm_offset_t va)
{
pd_entry_t *l1;
l1 = pmap_l1(pmap, va);
if (l1 == NULL)
return (NULL);
if ((pmap_load(l1) & PTE_V) == 0)
return (NULL);
if ((pmap_load(l1) & PTE_RX) != 0)
return (NULL);
return (pmap_l1_to_l2(l1, va));
}
static __inline pt_entry_t *
pmap_l2_to_l3(pd_entry_t *l2, vm_offset_t va)
{
vm_paddr_t phys;
pt_entry_t *l3;
phys = PTE_TO_PHYS(pmap_load(l2));
l3 = (pd_entry_t *)PHYS_TO_DMAP(phys);
return (&l3[pmap_l3_index(va)]);
}
static __inline pt_entry_t *
pmap_l3(pmap_t pmap, vm_offset_t va)
{
pd_entry_t *l2;
l2 = pmap_l2(pmap, va);
if (l2 == NULL)
return (NULL);
if ((pmap_load(l2) & PTE_V) == 0)
return (NULL);
if ((pmap_load(l2) & PTE_RX) != 0)
return (NULL);
return (pmap_l2_to_l3(l2, va));
}
static __inline int
pmap_is_write(pt_entry_t entry)
{
return (entry & PTE_W);
}
static __inline int
pmap_l3_valid(pt_entry_t l3)
{
return (l3 & PTE_V);
}
static inline int
pmap_page_accessed(pt_entry_t pte)
{
return (pte & PTE_A);
}
/* Checks if the page is dirty. */
static inline int
pmap_page_dirty(pt_entry_t pte)
{
return (pte & PTE_D);
}
static __inline void
pmap_resident_count_inc(pmap_t pmap, int count)
{
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
pmap->pm_stats.resident_count += count;
}
static __inline void
pmap_resident_count_dec(pmap_t pmap, int count)
{
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
KASSERT(pmap->pm_stats.resident_count >= count,
("pmap %p resident count underflow %ld %d", pmap,
pmap->pm_stats.resident_count, count));
pmap->pm_stats.resident_count -= count;
}
static void
pmap_distribute_l1(struct pmap *pmap, vm_pindex_t l1index,
pt_entry_t entry)
{
struct pmap *user_pmap;
pd_entry_t *l1;
/* Distribute new kernel L1 entry to all the user pmaps */
if (pmap != kernel_pmap)
return;
LIST_FOREACH(user_pmap, &allpmaps, pm_list) {
l1 = &user_pmap->pm_l1[l1index];
if (entry)
pmap_load_store(l1, entry);
else
pmap_load_clear(l1);
}
}
static pt_entry_t *
pmap_early_page_idx(vm_offset_t l1pt, vm_offset_t va, u_int *l1_slot,
u_int *l2_slot)
{
pt_entry_t *l2;
pd_entry_t *l1;
l1 = (pd_entry_t *)l1pt;
*l1_slot = (va >> L1_SHIFT) & Ln_ADDR_MASK;
/* Check locore has used a table L1 map */
KASSERT((l1[*l1_slot] & PTE_RX) == 0,
("Invalid bootstrap L1 table"));
/* Find the address of the L2 table */
l2 = (pt_entry_t *)init_pt_va;
*l2_slot = pmap_l2_index(va);
return (l2);
}
static vm_paddr_t
pmap_early_vtophys(vm_offset_t l1pt, vm_offset_t va)
{
u_int l1_slot, l2_slot;
pt_entry_t *l2;
u_int ret;
l2 = pmap_early_page_idx(l1pt, va, &l1_slot, &l2_slot);
/* Check locore has used L2 superpages */
KASSERT((l2[l2_slot] & PTE_RX) != 0,
("Invalid bootstrap L2 table"));
/* L2 is superpages */
ret = (l2[l2_slot] >> PTE_PPN1_S) << L2_SHIFT;
ret += (va & L2_OFFSET);
return (ret);
}
static void
pmap_bootstrap_dmap(vm_offset_t kern_l1, vm_paddr_t min_pa, vm_paddr_t max_pa)
{
vm_offset_t va;
vm_paddr_t pa;
pd_entry_t *l1;
u_int l1_slot;
pt_entry_t entry;
pn_t pn;
pa = dmap_phys_base = min_pa & ~L1_OFFSET;
va = DMAP_MIN_ADDRESS;
l1 = (pd_entry_t *)kern_l1;
l1_slot = pmap_l1_index(DMAP_MIN_ADDRESS);
for (; va < DMAP_MAX_ADDRESS && pa < max_pa;
pa += L1_SIZE, va += L1_SIZE, l1_slot++) {
KASSERT(l1_slot < Ln_ENTRIES, ("Invalid L1 index"));
/* superpages */
pn = (pa / PAGE_SIZE);
entry = PTE_KERN;
entry |= (pn << PTE_PPN0_S);
pmap_load_store(&l1[l1_slot], entry);
}
/* Set the upper limit of the DMAP region */
dmap_phys_max = pa;
dmap_max_addr = va;
sfence_vma();
}
static vm_offset_t
pmap_bootstrap_l3(vm_offset_t l1pt, vm_offset_t va, vm_offset_t l3_start)
{
vm_offset_t l3pt;
pt_entry_t entry;
pd_entry_t *l2;
vm_paddr_t pa;
u_int l2_slot;
pn_t pn;
KASSERT((va & L2_OFFSET) == 0, ("Invalid virtual address"));
l2 = pmap_l2(kernel_pmap, va);
l2 = (pd_entry_t *)((uintptr_t)l2 & ~(PAGE_SIZE - 1));
l2_slot = pmap_l2_index(va);
l3pt = l3_start;
for (; va < VM_MAX_KERNEL_ADDRESS; l2_slot++, va += L2_SIZE) {
KASSERT(l2_slot < Ln_ENTRIES, ("Invalid L2 index"));
pa = pmap_early_vtophys(l1pt, l3pt);
pn = (pa / PAGE_SIZE);
entry = (PTE_V);
entry |= (pn << PTE_PPN0_S);
pmap_load_store(&l2[l2_slot], entry);
l3pt += PAGE_SIZE;
}
/* Clean the L2 page table */
memset((void *)l3_start, 0, l3pt - l3_start);
return (l3pt);
}
/*
* Bootstrap the system enough to run with virtual memory.
*/
void
pmap_bootstrap(vm_offset_t l1pt, vm_paddr_t kernstart, vm_size_t kernlen)
{
u_int l1_slot, l2_slot, avail_slot, map_slot, used_map_slot;
uint64_t kern_delta;
pt_entry_t *l2;
vm_offset_t va, freemempos;
vm_offset_t dpcpu, msgbufpv;
vm_paddr_t pa, min_pa, max_pa;
int i;
kern_delta = KERNBASE - kernstart;
physmem = 0;
printf("pmap_bootstrap %lx %lx %lx\n", l1pt, kernstart, kernlen);
printf("%lx\n", l1pt);
printf("%lx\n", (KERNBASE >> L1_SHIFT) & Ln_ADDR_MASK);
/* Set this early so we can use the pagetable walking functions */
kernel_pmap_store.pm_l1 = (pd_entry_t *)l1pt;
PMAP_LOCK_INIT(kernel_pmap);
/*
* Initialize the global pv list lock.
*/
rw_init(&pvh_global_lock, "pmap pv global");
LIST_INIT(&allpmaps);
/* Assume the address we were loaded to is a valid physical address */
min_pa = max_pa = KERNBASE - kern_delta;
/*
* Find the minimum physical address. physmap is sorted,
* but may contain empty ranges.
*/
for (i = 0; i < (physmap_idx * 2); i += 2) {
if (physmap[i] == physmap[i + 1])
continue;
if (physmap[i] <= min_pa)
min_pa = physmap[i];
if (physmap[i + 1] > max_pa)
max_pa = physmap[i + 1];
}
printf("physmap_idx %lx\n", physmap_idx);
printf("min_pa %lx\n", min_pa);
printf("max_pa %lx\n", max_pa);
/* Create a direct map region early so we can use it for pa -> va */
pmap_bootstrap_dmap(l1pt, min_pa, max_pa);
va = KERNBASE;
pa = KERNBASE - kern_delta;
/*
* Start to initialize phys_avail by copying from physmap
* up to the physical address KERNBASE points at.
*/
map_slot = avail_slot = 0;
for (; map_slot < (physmap_idx * 2); map_slot += 2) {
if (physmap[map_slot] == physmap[map_slot + 1])
continue;
if (physmap[map_slot] <= pa &&
physmap[map_slot + 1] > pa)
break;
phys_avail[avail_slot] = physmap[map_slot];
phys_avail[avail_slot + 1] = physmap[map_slot + 1];
physmem += (phys_avail[avail_slot + 1] -
phys_avail[avail_slot]) >> PAGE_SHIFT;
avail_slot += 2;
}
/* Add the memory before the kernel */
if (physmap[avail_slot] < pa) {
phys_avail[avail_slot] = physmap[map_slot];
phys_avail[avail_slot + 1] = pa;
physmem += (phys_avail[avail_slot + 1] -
phys_avail[avail_slot]) >> PAGE_SHIFT;
avail_slot += 2;
}
used_map_slot = map_slot;
/*
* Read the page table to find out what is already mapped.
* This assumes we have mapped a block of memory from KERNBASE
* using a single L1 entry.
*/
l2 = pmap_early_page_idx(l1pt, KERNBASE, &l1_slot, &l2_slot);
/* Sanity check the index, KERNBASE should be the first VA */
KASSERT(l2_slot == 0, ("The L2 index is non-zero"));
/* Find how many pages we have mapped */
for (; l2_slot < Ln_ENTRIES; l2_slot++) {
if ((l2[l2_slot] & PTE_V) == 0)
break;
/* Check locore used L2 superpages */
KASSERT((l2[l2_slot] & PTE_RX) != 0,
("Invalid bootstrap L2 table"));
va += L2_SIZE;
pa += L2_SIZE;
}
va = roundup2(va, L2_SIZE);
freemempos = KERNBASE + kernlen;
freemempos = roundup2(freemempos, PAGE_SIZE);
/* Create the l3 tables for the early devmap */
freemempos = pmap_bootstrap_l3(l1pt,
VM_MAX_KERNEL_ADDRESS - L2_SIZE, freemempos);
sfence_vma();
#define alloc_pages(var, np) \
(var) = freemempos; \
freemempos += (np * PAGE_SIZE); \
memset((char *)(var), 0, ((np) * PAGE_SIZE));
/* Allocate dynamic per-cpu area. */
alloc_pages(dpcpu, DPCPU_SIZE / PAGE_SIZE);
dpcpu_init((void *)dpcpu, 0);
/* Allocate memory for the msgbuf, e.g. for /sbin/dmesg */
alloc_pages(msgbufpv, round_page(msgbufsize) / PAGE_SIZE);
msgbufp = (void *)msgbufpv;
virtual_avail = roundup2(freemempos, L2_SIZE);
virtual_end = VM_MAX_KERNEL_ADDRESS - L2_SIZE;
kernel_vm_end = virtual_avail;
pa = pmap_early_vtophys(l1pt, freemempos);
/* Finish initialising physmap */
map_slot = used_map_slot;
for (; avail_slot < (PHYS_AVAIL_SIZE - 2) &&
map_slot < (physmap_idx * 2); map_slot += 2) {
if (physmap[map_slot] == physmap[map_slot + 1]) {
continue;
}
/* Have we used the current range? */
if (physmap[map_slot + 1] <= pa) {
continue;
}
/* Do we need to split the entry? */
if (physmap[map_slot] < pa) {
phys_avail[avail_slot] = pa;
phys_avail[avail_slot + 1] = physmap[map_slot + 1];
} else {
phys_avail[avail_slot] = physmap[map_slot];
phys_avail[avail_slot + 1] = physmap[map_slot + 1];
}
physmem += (phys_avail[avail_slot + 1] -
phys_avail[avail_slot]) >> PAGE_SHIFT;
avail_slot += 2;
}
phys_avail[avail_slot] = 0;
phys_avail[avail_slot + 1] = 0;
/*
* Maxmem isn't the "maximum memory", it's one larger than the
* highest page of the physical address space. It should be
* called something like "Maxphyspage".
*/
Maxmem = atop(phys_avail[avail_slot - 1]);
}
/*
* Initialize a vm_page's machine-dependent fields.
*/
void
pmap_page_init(vm_page_t m)
{
TAILQ_INIT(&m->md.pv_list);
m->md.pv_memattr = VM_MEMATTR_WRITE_BACK;
}
/*
* Initialize the pmap module.
* Called by vm_init, to initialize any structures that the pmap
* system needs to map virtual memory.
*/
void
pmap_init(void)
{
int i;
/*
* Initialize the pv chunk list mutex.
*/
mtx_init(&pv_chunks_mutex, "pmap pv chunk list", NULL, MTX_DEF);
/*
* Initialize the pool of pv list locks.
*/
for (i = 0; i < NPV_LIST_LOCKS; i++)
rw_init(&pv_list_locks[i], "pmap pv list");
}
#ifdef SMP
/*
* For SMP, these functions have to use IPIs for coherence.
*
* In general, the calling thread uses a plain fence to order the
* writes to the page tables before invoking an SBI callback to invoke
* sfence_vma() on remote CPUs.
*
* Since the riscv pmap does not yet have a pm_active field, IPIs are
* sent to all CPUs in the system.
*/
static void
pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
{
cpuset_t mask;
sched_pin();
mask = all_cpus;
CPU_CLR(PCPU_GET(cpuid), &mask);
fence();
sbi_remote_sfence_vma(mask.__bits, va, 1);
sfence_vma_page(va);
sched_unpin();
}
static void
pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
{
cpuset_t mask;
sched_pin();
mask = all_cpus;
CPU_CLR(PCPU_GET(cpuid), &mask);
fence();
sbi_remote_sfence_vma(mask.__bits, sva, eva - sva + 1);
/*
* Might consider a loop of sfence_vma_page() for a small
* number of pages in the future.
*/
sfence_vma();
sched_unpin();
}
static void
pmap_invalidate_all(pmap_t pmap)
{
cpuset_t mask;
sched_pin();
mask = all_cpus;
CPU_CLR(PCPU_GET(cpuid), &mask);
fence();
/*
* XXX: The SBI doc doesn't detail how to specify x0 as the
* address to perform a global fence. BBL currently treats
* all sfence_vma requests as global however.
*/
sbi_remote_sfence_vma(mask.__bits, 0, 0);
sfence_vma();
sched_unpin();
}
#else
/*
* Normal, non-SMP, invalidation functions.
* We inline these within pmap.c for speed.
*/
static __inline void
pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
{
sfence_vma_page(va);
}
static __inline void
pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
{
/*
* Might consider a loop of sfence_vma_page() for a small
* number of pages in the future.
*/
sfence_vma();
}
static __inline void
pmap_invalidate_all(pmap_t pmap)
{
sfence_vma();
}
#endif
/*
* Routine: pmap_extract
* Function:
* Extract the physical page address associated
* with the given map/virtual_address pair.
*/
vm_paddr_t
pmap_extract(pmap_t pmap, vm_offset_t va)
{
pd_entry_t *l2p, l2;
pt_entry_t *l3p, l3;
vm_paddr_t pa;
pa = 0;
PMAP_LOCK(pmap);
/*
* Start with the l2 tabel. We are unable to allocate
* pages in the l1 table.
*/
l2p = pmap_l2(pmap, va);
if (l2p != NULL) {
l2 = pmap_load(l2p);
if ((l2 & PTE_RX) == 0) {
l3p = pmap_l2_to_l3(l2p, va);
if (l3p != NULL) {
l3 = pmap_load(l3p);
pa = PTE_TO_PHYS(l3);
pa |= (va & L3_OFFSET);
}
} else {
/* L2 is superpages */
pa = (l2 >> PTE_PPN1_S) << L2_SHIFT;
pa |= (va & L2_OFFSET);
}
}
PMAP_UNLOCK(pmap);
return (pa);
}
/*
* Routine: pmap_extract_and_hold
* Function:
* Atomically extract and hold the physical page
* with the given pmap and virtual address pair
* if that mapping permits the given protection.
*/
vm_page_t
pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
{
pt_entry_t *l3p, l3;
vm_paddr_t phys;
vm_paddr_t pa;
vm_page_t m;
pa = 0;
m = NULL;
PMAP_LOCK(pmap);
retry:
l3p = pmap_l3(pmap, va);
if (l3p != NULL && (l3 = pmap_load(l3p)) != 0) {
if ((pmap_is_write(l3)) || ((prot & VM_PROT_WRITE) == 0)) {
phys = PTE_TO_PHYS(l3);
if (vm_page_pa_tryrelock(pmap, phys, &pa))
goto retry;
m = PHYS_TO_VM_PAGE(phys);
vm_page_hold(m);
}
}
PA_UNLOCK_COND(pa);
PMAP_UNLOCK(pmap);
return (m);
}
vm_paddr_t
pmap_kextract(vm_offset_t va)
{
pd_entry_t *l2;
pt_entry_t *l3;
vm_paddr_t pa;
if (va >= DMAP_MIN_ADDRESS && va < DMAP_MAX_ADDRESS) {
pa = DMAP_TO_PHYS(va);
} else {
l2 = pmap_l2(kernel_pmap, va);
if (l2 == NULL)
panic("pmap_kextract: No l2");
if ((pmap_load(l2) & PTE_RX) != 0) {
/* superpages */
pa = (pmap_load(l2) >> PTE_PPN1_S) << L2_SHIFT;
pa |= (va & L2_OFFSET);
return (pa);
}
l3 = pmap_l2_to_l3(l2, va);
if (l3 == NULL)
panic("pmap_kextract: No l3...");
pa = PTE_TO_PHYS(pmap_load(l3));
pa |= (va & PAGE_MASK);
}
return (pa);
}
/***************************************************
* Low level mapping routines.....
***************************************************/
void
pmap_kenter_device(vm_offset_t sva, vm_size_t size, vm_paddr_t pa)
{
pt_entry_t entry;
pt_entry_t *l3;
vm_offset_t va;
pn_t pn;
KASSERT((pa & L3_OFFSET) == 0,
("pmap_kenter_device: Invalid physical address"));
KASSERT((sva & L3_OFFSET) == 0,
("pmap_kenter_device: Invalid virtual address"));
KASSERT((size & PAGE_MASK) == 0,
("pmap_kenter_device: Mapping is not page-sized"));
va = sva;
while (size != 0) {
l3 = pmap_l3(kernel_pmap, va);
KASSERT(l3 != NULL, ("Invalid page table, va: 0x%lx", va));
pn = (pa / PAGE_SIZE);
entry = PTE_KERN;
entry |= (pn << PTE_PPN0_S);
pmap_load_store(l3, entry);
va += PAGE_SIZE;
pa += PAGE_SIZE;
size -= PAGE_SIZE;
}
pmap_invalidate_range(kernel_pmap, sva, va);
}
/*
* Remove a page from the kernel pagetables.
* Note: not SMP coherent.
*/
PMAP_INLINE void
pmap_kremove(vm_offset_t va)
{
pt_entry_t *l3;
l3 = pmap_l3(kernel_pmap, va);
KASSERT(l3 != NULL, ("pmap_kremove: Invalid address"));
pmap_load_clear(l3);
sfence_vma();
}
void
pmap_kremove_device(vm_offset_t sva, vm_size_t size)
{
pt_entry_t *l3;
vm_offset_t va;
KASSERT((sva & L3_OFFSET) == 0,
("pmap_kremove_device: Invalid virtual address"));
KASSERT((size & PAGE_MASK) == 0,
("pmap_kremove_device: Mapping is not page-sized"));
va = sva;
while (size != 0) {
l3 = pmap_l3(kernel_pmap, va);
KASSERT(l3 != NULL, ("Invalid page table, va: 0x%lx", va));
pmap_load_clear(l3);
va += PAGE_SIZE;
size -= PAGE_SIZE;
}
pmap_invalidate_range(kernel_pmap, sva, va);
}
/*
* Used to map a range of physical addresses into kernel
* virtual address space.
*
* The value passed in '*virt' is a suggested virtual address for
* the mapping. Architectures which can support a direct-mapped
* physical to virtual region can return the appropriate address
* within that region, leaving '*virt' unchanged. Other
* architectures should map the pages starting at '*virt' and
* update '*virt' with the first usable address after the mapped
* region.
*/
vm_offset_t
pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot)
{
return PHYS_TO_DMAP(start);
}
/*
* Add a list of wired pages to the kva
* this routine is only used for temporary
* kernel mappings that do not need to have
* page modification or references recorded.
* Note that old mappings are simply written
* over. The page *must* be wired.
* Note: SMP coherent. Uses a ranged shootdown IPI.
*/
void
pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count)
{
pt_entry_t *l3, pa;
vm_offset_t va;
vm_page_t m;
pt_entry_t entry;
pn_t pn;
int i;
va = sva;
for (i = 0; i < count; i++) {
m = ma[i];
pa = VM_PAGE_TO_PHYS(m);
pn = (pa / PAGE_SIZE);
l3 = pmap_l3(kernel_pmap, va);
entry = PTE_KERN;
entry |= (pn << PTE_PPN0_S);
pmap_load_store(l3, entry);
va += L3_SIZE;
}
pmap_invalidate_range(kernel_pmap, sva, va);
}
/*
* This routine tears out page mappings from the
* kernel -- it is meant only for temporary mappings.
* Note: SMP coherent. Uses a ranged shootdown IPI.
*/
void
pmap_qremove(vm_offset_t sva, int count)
{
pt_entry_t *l3;
vm_offset_t va;
KASSERT(sva >= VM_MIN_KERNEL_ADDRESS, ("usermode va %lx", sva));
va = sva;
while (count-- > 0) {
l3 = pmap_l3(kernel_pmap, va);
KASSERT(l3 != NULL, ("pmap_kremove: Invalid address"));
pmap_load_clear(l3);
va += PAGE_SIZE;
}
pmap_invalidate_range(kernel_pmap, sva, va);
}
/***************************************************
* Page table page management routines.....
***************************************************/
/*
* Schedule the specified unused page table page to be freed. Specifically,
* add the page to the specified list of pages that will be released to the
* physical memory manager after the TLB has been updated.
*/
static __inline void
pmap_add_delayed_free_list(vm_page_t m, struct spglist *free,
boolean_t set_PG_ZERO)
{
if (set_PG_ZERO)
m->flags |= PG_ZERO;
else
m->flags &= ~PG_ZERO;
SLIST_INSERT_HEAD(free, m, plinks.s.ss);
}
/*
* Decrements a page table page's wire count, which is used to record the
* number of valid page table entries within the page. If the wire count
* drops to zero, then the page table page is unmapped. Returns TRUE if the
* page table page was unmapped and FALSE otherwise.
*/
static inline boolean_t
pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free)
{
--m->wire_count;
if (m->wire_count == 0) {
_pmap_unwire_l3(pmap, va, m, free);
return (TRUE);
} else {
return (FALSE);
}
}
static void
_pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free)
{
vm_paddr_t phys;
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
/*
* unmap the page table page
*/
if (m->pindex >= NUPDE) {
/* PD page */
pd_entry_t *l1;
l1 = pmap_l1(pmap, va);
pmap_load_clear(l1);
pmap_distribute_l1(pmap, pmap_l1_index(va), 0);
} else {
/* PTE page */
pd_entry_t *l2;
l2 = pmap_l2(pmap, va);
pmap_load_clear(l2);
}
pmap_resident_count_dec(pmap, 1);
if (m->pindex < NUPDE) {
pd_entry_t *l1;
/* We just released a PT, unhold the matching PD */
vm_page_t pdpg;
l1 = pmap_l1(pmap, va);
phys = PTE_TO_PHYS(pmap_load(l1));
pdpg = PHYS_TO_VM_PAGE(phys);
pmap_unwire_l3(pmap, va, pdpg, free);
}
pmap_invalidate_page(pmap, va);
vm_wire_sub(1);
/*
* Put page on a list so that it is released after
* *ALL* TLB shootdown is done
*/
pmap_add_delayed_free_list(m, free, TRUE);
}
/*
* After removing an l3 entry, this routine is used to
* conditionally free the page, and manage the hold/wire counts.
*/
static int
pmap_unuse_l3(pmap_t pmap, vm_offset_t va, pd_entry_t ptepde,
struct spglist *free)
{
vm_paddr_t phys;
vm_page_t mpte;
if (va >= VM_MAXUSER_ADDRESS)
return (0);
KASSERT(ptepde != 0, ("pmap_unuse_pt: ptepde != 0"));
phys = PTE_TO_PHYS(ptepde);
mpte = PHYS_TO_VM_PAGE(phys);
return (pmap_unwire_l3(pmap, va, mpte, free));
}
void
pmap_pinit0(pmap_t pmap)
{
PMAP_LOCK_INIT(pmap);
bzero(&pmap->pm_stats, sizeof(pmap->pm_stats));
pmap->pm_l1 = kernel_pmap->pm_l1;
}
int
pmap_pinit(pmap_t pmap)
{
vm_paddr_t l1phys;
vm_page_t l1pt;
/*
* allocate the l1 page
*/
while ((l1pt = vm_page_alloc(NULL, 0xdeadbeef, VM_ALLOC_NORMAL |
VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL)
vm_wait(NULL);
l1phys = VM_PAGE_TO_PHYS(l1pt);
pmap->pm_l1 = (pd_entry_t *)PHYS_TO_DMAP(l1phys);
if ((l1pt->flags & PG_ZERO) == 0)
pagezero(pmap->pm_l1);
bzero(&pmap->pm_stats, sizeof(pmap->pm_stats));
/* Install kernel pagetables */
memcpy(pmap->pm_l1, kernel_pmap->pm_l1, PAGE_SIZE);
/* Add to the list of all user pmaps */
LIST_INSERT_HEAD(&allpmaps, pmap, pm_list);
return (1);
}
/*
* This routine is called if the desired page table page does not exist.
*
* If page table page allocation fails, this routine may sleep before
* returning NULL. It sleeps only if a lock pointer was given.
*
* Note: If a page allocation fails at page table level two or three,
* one or two pages may be held during the wait, only to be released
* afterwards. This conservative approach is easily argued to avoid
* race conditions.
*/
static vm_page_t
_pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp)
{
vm_page_t m, /*pdppg, */pdpg;
pt_entry_t entry;
vm_paddr_t phys;
pn_t pn;
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
/*
* Allocate a page table page.
*/
if ((m = vm_page_alloc(NULL, ptepindex, VM_ALLOC_NOOBJ |
VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) {
if (lockp != NULL) {
RELEASE_PV_LIST_LOCK(lockp);
PMAP_UNLOCK(pmap);
rw_runlock(&pvh_global_lock);
vm_wait(NULL);
rw_rlock(&pvh_global_lock);
PMAP_LOCK(pmap);
}
/*
* Indicate the need to retry. While waiting, the page table
* page may have been allocated.
*/
return (NULL);
}
if ((m->flags & PG_ZERO) == 0)
pmap_zero_page(m);
/*
* Map the pagetable page into the process address space, if
* it isn't already there.
*/
if (ptepindex >= NUPDE) {
pd_entry_t *l1;
vm_pindex_t l1index;
l1index = ptepindex - NUPDE;
l1 = &pmap->pm_l1[l1index];
pn = (VM_PAGE_TO_PHYS(m) / PAGE_SIZE);
entry = (PTE_V);
entry |= (pn << PTE_PPN0_S);
pmap_load_store(l1, entry);
pmap_distribute_l1(pmap, l1index, entry);
} else {
vm_pindex_t l1index;
pd_entry_t *l1, *l2;
l1index = ptepindex >> (L1_SHIFT - L2_SHIFT);
l1 = &pmap->pm_l1[l1index];
if (pmap_load(l1) == 0) {
/* recurse for allocating page dir */
if (_pmap_alloc_l3(pmap, NUPDE + l1index,
lockp) == NULL) {
vm_page_unwire_noq(m);
vm_page_free_zero(m);
return (NULL);
}
} else {
phys = PTE_TO_PHYS(pmap_load(l1));
pdpg = PHYS_TO_VM_PAGE(phys);
pdpg->wire_count++;
}
phys = PTE_TO_PHYS(pmap_load(l1));
l2 = (pd_entry_t *)PHYS_TO_DMAP(phys);
l2 = &l2[ptepindex & Ln_ADDR_MASK];
pn = (VM_PAGE_TO_PHYS(m) / PAGE_SIZE);
entry = (PTE_V);
entry |= (pn << PTE_PPN0_S);
pmap_load_store(l2, entry);
}
pmap_resident_count_inc(pmap, 1);
return (m);
}
static vm_page_t
pmap_alloc_l3(pmap_t pmap, vm_offset_t va, struct rwlock **lockp)
{
vm_pindex_t ptepindex;
pd_entry_t *l2;
vm_paddr_t phys;
vm_page_t m;
/*
* Calculate pagetable page index
*/
ptepindex = pmap_l2_pindex(va);
retry:
/*
* Get the page directory entry
*/
l2 = pmap_l2(pmap, va);
/*
* If the page table page is mapped, we just increment the
* hold count, and activate it.
*/
if (l2 != NULL && pmap_load(l2) != 0) {
phys = PTE_TO_PHYS(pmap_load(l2));
m = PHYS_TO_VM_PAGE(phys);
m->wire_count++;
} else {
/*
* Here if the pte page isn't mapped, or if it has been
* deallocated.
*/
m = _pmap_alloc_l3(pmap, ptepindex, lockp);
if (m == NULL && lockp != NULL)
goto retry;
}
return (m);
}
/***************************************************
* Pmap allocation/deallocation routines.
***************************************************/
/*
* Release any resources held by the given physical map.
* Called when a pmap initialized by pmap_pinit is being released.
* Should only be called if the map contains no valid mappings.
*/
void
pmap_release(pmap_t pmap)
{
vm_page_t m;
KASSERT(pmap->pm_stats.resident_count == 0,
("pmap_release: pmap resident count %ld != 0",
pmap->pm_stats.resident_count));
m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pmap->pm_l1));
vm_page_unwire_noq(m);
vm_page_free_zero(m);
/* Remove pmap from the allpmaps list */
LIST_REMOVE(pmap, pm_list);
/* Remove kernel pagetables */
bzero(pmap->pm_l1, PAGE_SIZE);
}
#if 0
static int
kvm_size(SYSCTL_HANDLER_ARGS)
{
unsigned long ksize = VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS;
return sysctl_handle_long(oidp, &ksize, 0, req);
}
SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD,
0, 0, kvm_size, "LU", "Size of KVM");
static int
kvm_free(SYSCTL_HANDLER_ARGS)
{
unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end;
return sysctl_handle_long(oidp, &kfree, 0, req);
}
SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD,
0, 0, kvm_free, "LU", "Amount of KVM free");
#endif /* 0 */
/*
* grow the number of kernel page table entries, if needed
*/
void
pmap_growkernel(vm_offset_t addr)
{
vm_paddr_t paddr;
vm_page_t nkpg;
pd_entry_t *l1, *l2;
pt_entry_t entry;
pn_t pn;
mtx_assert(&kernel_map->system_mtx, MA_OWNED);
addr = roundup2(addr, L2_SIZE);
if (addr - 1 >= vm_map_max(kernel_map))
addr = vm_map_max(kernel_map);
while (kernel_vm_end < addr) {
l1 = pmap_l1(kernel_pmap, kernel_vm_end);
if (pmap_load(l1) == 0) {
/* We need a new PDP entry */
nkpg = vm_page_alloc(NULL, kernel_vm_end >> L1_SHIFT,
VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ |
VM_ALLOC_WIRED | VM_ALLOC_ZERO);
if (nkpg == NULL)
panic("pmap_growkernel: no memory to grow kernel");
if ((nkpg->flags & PG_ZERO) == 0)
pmap_zero_page(nkpg);
paddr = VM_PAGE_TO_PHYS(nkpg);
pn = (paddr / PAGE_SIZE);
entry = (PTE_V);
entry |= (pn << PTE_PPN0_S);
pmap_load_store(l1, entry);
pmap_distribute_l1(kernel_pmap,
pmap_l1_index(kernel_vm_end), entry);
continue; /* try again */
}
l2 = pmap_l1_to_l2(l1, kernel_vm_end);
if ((pmap_load(l2) & PTE_V) != 0 &&
(pmap_load(l2) & PTE_RWX) == 0) {
kernel_vm_end = (kernel_vm_end + L2_SIZE) & ~L2_OFFSET;
if (kernel_vm_end - 1 >= vm_map_max(kernel_map)) {
kernel_vm_end = vm_map_max(kernel_map);
break;
}
continue;
}
nkpg = vm_page_alloc(NULL, kernel_vm_end >> L2_SHIFT,
VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
VM_ALLOC_ZERO);
if (nkpg == NULL)
panic("pmap_growkernel: no memory to grow kernel");
if ((nkpg->flags & PG_ZERO) == 0) {
pmap_zero_page(nkpg);
}
paddr = VM_PAGE_TO_PHYS(nkpg);
pn = (paddr / PAGE_SIZE);
entry = (PTE_V);
entry |= (pn << PTE_PPN0_S);
pmap_load_store(l2, entry);
pmap_invalidate_page(kernel_pmap, kernel_vm_end);
kernel_vm_end = (kernel_vm_end + L2_SIZE) & ~L2_OFFSET;
if (kernel_vm_end - 1 >= vm_map_max(kernel_map)) {
kernel_vm_end = vm_map_max(kernel_map);
break;
}
}
}
/***************************************************
* page management routines.
***************************************************/
CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE);
CTASSERT(_NPCM == 3);
CTASSERT(_NPCPV == 168);
static __inline struct pv_chunk *
pv_to_chunk(pv_entry_t pv)
{
return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK));
}
#define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap)
#define PC_FREE0 0xfffffffffffffffful
#define PC_FREE1 0xfffffffffffffffful
#define PC_FREE2 0x000000fffffffffful
static const uint64_t pc_freemask[_NPCM] = { PC_FREE0, PC_FREE1, PC_FREE2 };
#if 0
#ifdef PV_STATS
static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail;
SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0,
"Current number of pv entry chunks");
SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0,
"Current number of pv entry chunks allocated");
SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0,
"Current number of pv entry chunks frees");
SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0,
"Number of times tried to get a chunk page but failed.");
static long pv_entry_frees, pv_entry_allocs, pv_entry_count;
static int pv_entry_spare;
SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0,
"Current number of pv entry frees");
SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0,
"Current number of pv entry allocs");
SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0,
"Current number of pv entries");
SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0,
"Current number of spare pv entries");
#endif
#endif /* 0 */
/*
* We are in a serious low memory condition. Resort to
* drastic measures to free some pages so we can allocate
* another pv entry chunk.
*
* Returns NULL if PV entries were reclaimed from the specified pmap.
*
* We do not, however, unmap 2mpages because subsequent accesses will
* allocate per-page pv entries until repromotion occurs, thereby
* exacerbating the shortage of free pv entries.
*/
static vm_page_t
reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp)
{
panic("RISCVTODO: reclaim_pv_chunk");
}
/*
* free the pv_entry back to the free list
*/
static void
free_pv_entry(pmap_t pmap, pv_entry_t pv)
{
struct pv_chunk *pc;
int idx, field, bit;
rw_assert(&pvh_global_lock, RA_LOCKED);
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
PV_STAT(atomic_add_long(&pv_entry_frees, 1));
PV_STAT(atomic_add_int(&pv_entry_spare, 1));
PV_STAT(atomic_subtract_long(&pv_entry_count, 1));
pc = pv_to_chunk(pv);
idx = pv - &pc->pc_pventry[0];
field = idx / 64;
bit = idx % 64;
pc->pc_map[field] |= 1ul << bit;
if (pc->pc_map[0] != PC_FREE0 || pc->pc_map[1] != PC_FREE1 ||
pc->pc_map[2] != PC_FREE2) {
/* 98% of the time, pc is already at the head of the list. */
if (__predict_false(pc != TAILQ_FIRST(&pmap->pm_pvchunk))) {
TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
}
return;
}
TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
free_pv_chunk(pc);
}
static void
free_pv_chunk(struct pv_chunk *pc)
{
vm_page_t m;
mtx_lock(&pv_chunks_mutex);
TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
mtx_unlock(&pv_chunks_mutex);
PV_STAT(atomic_subtract_int(&pv_entry_spare, _NPCPV));
PV_STAT(atomic_subtract_int(&pc_chunk_count, 1));
PV_STAT(atomic_add_int(&pc_chunk_frees, 1));
/* entire chunk is free, return it */
m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc));
#if 0 /* TODO: For minidump */
dump_drop_page(m->phys_addr);
#endif
vm_page_unwire(m, PQ_NONE);
vm_page_free(m);
}
/*
* Returns a new PV entry, allocating a new PV chunk from the system when
* needed. If this PV chunk allocation fails and a PV list lock pointer was
* given, a PV chunk is reclaimed from an arbitrary pmap. Otherwise, NULL is
* returned.
*
* The given PV list lock may be released.
*/
static pv_entry_t
get_pv_entry(pmap_t pmap, struct rwlock **lockp)
{
int bit, field;
pv_entry_t pv;
struct pv_chunk *pc;
vm_page_t m;
rw_assert(&pvh_global_lock, RA_LOCKED);
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
PV_STAT(atomic_add_long(&pv_entry_allocs, 1));
retry:
pc = TAILQ_FIRST(&pmap->pm_pvchunk);
if (pc != NULL) {
for (field = 0; field < _NPCM; field++) {
if (pc->pc_map[field]) {
bit = ffsl(pc->pc_map[field]) - 1;
break;
}
}
if (field < _NPCM) {
pv = &pc->pc_pventry[field * 64 + bit];
pc->pc_map[field] &= ~(1ul << bit);
/* If this was the last item, move it to tail */
if (pc->pc_map[0] == 0 && pc->pc_map[1] == 0 &&
pc->pc_map[2] == 0) {
TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc,
pc_list);
}
PV_STAT(atomic_add_long(&pv_entry_count, 1));
PV_STAT(atomic_subtract_int(&pv_entry_spare, 1));
return (pv);
}
}
/* No free items, allocate another chunk */
m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ |
VM_ALLOC_WIRED);
if (m == NULL) {
if (lockp == NULL) {
PV_STAT(pc_chunk_tryfail++);
return (NULL);
}
m = reclaim_pv_chunk(pmap, lockp);
if (m == NULL)
goto retry;
}
PV_STAT(atomic_add_int(&pc_chunk_count, 1));
PV_STAT(atomic_add_int(&pc_chunk_allocs, 1));
#if 0 /* TODO: This is for minidump */
dump_add_page(m->phys_addr);
#endif
pc = (void *)PHYS_TO_DMAP(m->phys_addr);
pc->pc_pmap = pmap;
pc->pc_map[0] = PC_FREE0 & ~1ul; /* preallocated bit 0 */
pc->pc_map[1] = PC_FREE1;
pc->pc_map[2] = PC_FREE2;
mtx_lock(&pv_chunks_mutex);
TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru);
mtx_unlock(&pv_chunks_mutex);
pv = &pc->pc_pventry[0];
TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
PV_STAT(atomic_add_long(&pv_entry_count, 1));
PV_STAT(atomic_add_int(&pv_entry_spare, _NPCPV - 1));
return (pv);
}
/*
* First find and then remove the pv entry for the specified pmap and virtual
* address from the specified pv list. Returns the pv entry if found and NULL
* otherwise. This operation can be performed on pv lists for either 4KB or
* 2MB page mappings.
*/
static __inline pv_entry_t
pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va)
{
pv_entry_t pv;
rw_assert(&pvh_global_lock, RA_LOCKED);
TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) {
if (pmap == PV_PMAP(pv) && va == pv->pv_va) {
TAILQ_REMOVE(&pvh->pv_list, pv, pv_next);
pvh->pv_gen++;
break;
}
}
return (pv);
}
/*
* First find and then destroy the pv entry for the specified pmap and virtual
* address. This operation can be performed on pv lists for either 4KB or 2MB
* page mappings.
*/
static void
pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va)
{
pv_entry_t pv;
pv = pmap_pvh_remove(pvh, pmap, va);
KASSERT(pv != NULL, ("pmap_pvh_free: pv not found"));
free_pv_entry(pmap, pv);
}
/*
* Conditionally create the PV entry for a 4KB page mapping if the required
* memory can be allocated without resorting to reclamation.
*/
static boolean_t
pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m,
struct rwlock **lockp)
{
pv_entry_t pv;
rw_assert(&pvh_global_lock, RA_LOCKED);
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
/* Pass NULL instead of the lock pointer to disable reclamation. */
if ((pv = get_pv_entry(pmap, NULL)) != NULL) {
pv->pv_va = va;
CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m);
TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
m->md.pv_gen++;
return (TRUE);
} else
return (FALSE);
}
/*
* pmap_remove_l3: do the things to unmap a page in a process
*/
static int
pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t va,
pd_entry_t l2e, struct spglist *free, struct rwlock **lockp)
{
pt_entry_t old_l3;
vm_paddr_t phys;
vm_page_t m;
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
old_l3 = pmap_load_clear(l3);
pmap_invalidate_page(pmap, va);
if (old_l3 & PTE_SW_WIRED)
pmap->pm_stats.wired_count -= 1;
pmap_resident_count_dec(pmap, 1);
if (old_l3 & PTE_SW_MANAGED) {
phys = PTE_TO_PHYS(old_l3);
m = PHYS_TO_VM_PAGE(phys);
if (pmap_page_dirty(old_l3))
vm_page_dirty(m);
if (old_l3 & PTE_A)
vm_page_aflag_set(m, PGA_REFERENCED);
CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m);
pmap_pvh_free(&m->md, pmap, va);
}
return (pmap_unuse_l3(pmap, va, l2e, free));
}
/*
* Remove the given range of addresses from the specified map.
*
* It is assumed that the start and end are properly
* rounded to the page size.
*/
void
pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
{
struct rwlock *lock;
vm_offset_t va, va_next;
pd_entry_t *l1, *l2;
pt_entry_t l3_pte, *l3;
struct spglist free;
/*
* Perform an unsynchronized read. This is, however, safe.
*/
if (pmap->pm_stats.resident_count == 0)
return;
SLIST_INIT(&free);
rw_rlock(&pvh_global_lock);
PMAP_LOCK(pmap);
lock = NULL;
for (; sva < eva; sva = va_next) {
if (pmap->pm_stats.resident_count == 0)
break;
l1 = pmap_l1(pmap, sva);
if (pmap_load(l1) == 0) {
va_next = (sva + L1_SIZE) & ~L1_OFFSET;
if (va_next < sva)
va_next = eva;
continue;
}
/*
* Calculate index for next page table.
*/
va_next = (sva + L2_SIZE) & ~L2_OFFSET;
if (va_next < sva)
va_next = eva;
l2 = pmap_l1_to_l2(l1, sva);
if (l2 == NULL)
continue;
l3_pte = pmap_load(l2);
/*
* Weed out invalid mappings.
*/
if (l3_pte == 0)
continue;
if ((pmap_load(l2) & PTE_RX) != 0)
continue;
/*
* Limit our scan to either the end of the va represented
* by the current page table page, or to the end of the
* range being removed.
*/
if (va_next > eva)
va_next = eva;
va = va_next;
for (l3 = pmap_l2_to_l3(l2, sva); sva != va_next; l3++,
sva += L3_SIZE) {
if (l3 == NULL)
panic("l3 == NULL");
if (pmap_load(l3) == 0) {
if (va != va_next) {
pmap_invalidate_range(pmap, va, sva);
va = va_next;
}
continue;
}
if (va == va_next)
va = sva;
if (pmap_remove_l3(pmap, l3, sva, l3_pte, &free,
&lock)) {
sva += L3_SIZE;
break;
}
}
if (va != va_next)
pmap_invalidate_range(pmap, va, sva);
}
if (lock != NULL)
rw_wunlock(lock);
rw_runlock(&pvh_global_lock);
PMAP_UNLOCK(pmap);
vm_page_free_pages_toq(&free, false);
}
/*
* Routine: pmap_remove_all
* Function:
* Removes this physical page from
* all physical maps in which it resides.
* Reflects back modify bits to the pager.
*
* Notes:
* Original versions of this routine were very
* inefficient because they iteratively called
* pmap_remove (slow...)
*/
void
pmap_remove_all(vm_page_t m)
{
pv_entry_t pv;
pmap_t pmap;
pt_entry_t *l3, tl3;
pd_entry_t *l2, tl2;
struct spglist free;
KASSERT((m->oflags & VPO_UNMANAGED) == 0,
("pmap_remove_all: page %p is not managed", m));
SLIST_INIT(&free);
rw_wlock(&pvh_global_lock);
while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
pmap = PV_PMAP(pv);
PMAP_LOCK(pmap);
pmap_resident_count_dec(pmap, 1);
l2 = pmap_l2(pmap, pv->pv_va);
KASSERT(l2 != NULL, ("pmap_remove_all: no l2 table found"));
tl2 = pmap_load(l2);
KASSERT((tl2 & PTE_RX) == 0,
("pmap_remove_all: found a table when expecting "
"a block in %p's pv list", m));
l3 = pmap_l2_to_l3(l2, pv->pv_va);
tl3 = pmap_load_clear(l3);
pmap_invalidate_page(pmap, pv->pv_va);
if (tl3 & PTE_SW_WIRED)
pmap->pm_stats.wired_count--;
if ((tl3 & PTE_A) != 0)
vm_page_aflag_set(m, PGA_REFERENCED);
/*
* Update the vm_page_t clean and reference bits.
*/
if (pmap_page_dirty(tl3))
vm_page_dirty(m);
pmap_unuse_l3(pmap, pv->pv_va, pmap_load(l2), &free);
TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
m->md.pv_gen++;
free_pv_entry(pmap, pv);
PMAP_UNLOCK(pmap);
}
vm_page_aflag_clear(m, PGA_WRITEABLE);
rw_wunlock(&pvh_global_lock);
vm_page_free_pages_toq(&free, false);
}
/*
* Set the physical protection on the
* specified range of this map as requested.
*/
void
pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
{
vm_offset_t va_next;
pd_entry_t *l1, *l2;
pt_entry_t *l3p, l3;
pt_entry_t entry;
if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
pmap_remove(pmap, sva, eva);
return;
}
if ((prot & VM_PROT_WRITE) == VM_PROT_WRITE)
return;
PMAP_LOCK(pmap);
for (; sva < eva; sva = va_next) {
l1 = pmap_l1(pmap, sva);
if (pmap_load(l1) == 0) {
va_next = (sva + L1_SIZE) & ~L1_OFFSET;
if (va_next < sva)
va_next = eva;
continue;
}
va_next = (sva + L2_SIZE) & ~L2_OFFSET;
if (va_next < sva)
va_next = eva;
l2 = pmap_l1_to_l2(l1, sva);
if (l2 == NULL)
continue;
if (pmap_load(l2) == 0)
continue;
if ((pmap_load(l2) & PTE_RX) != 0)
continue;
if (va_next > eva)
va_next = eva;
for (l3p = pmap_l2_to_l3(l2, sva); sva != va_next; l3p++,
sva += L3_SIZE) {
l3 = pmap_load(l3p);
if (pmap_l3_valid(l3)) {
entry = pmap_load(l3p);
entry &= ~(PTE_W);
pmap_load_store(l3p, entry);
/* XXX: Use pmap_invalidate_range */
pmap_invalidate_page(pmap, sva);
}
}
}
PMAP_UNLOCK(pmap);
}
int
pmap_fault_fixup(pmap_t pmap, vm_offset_t va, vm_prot_t ftype)
{
pt_entry_t orig_l3;
pt_entry_t new_l3;
pt_entry_t *l3;
int rv;
rv = 0;
PMAP_LOCK(pmap);
l3 = pmap_l3(pmap, va);
if (l3 == NULL)
goto done;
orig_l3 = pmap_load(l3);
if ((orig_l3 & PTE_V) == 0 ||
(ftype == VM_PROT_WRITE && (orig_l3 & PTE_W) == 0) ||
(ftype == VM_PROT_EXECUTE && (orig_l3 & PTE_X) == 0) ||
(ftype == VM_PROT_READ && (orig_l3 & PTE_R) == 0))
goto done;
new_l3 = orig_l3 | PTE_A;
if (ftype == VM_PROT_WRITE)
new_l3 |= PTE_D;
if (orig_l3 != new_l3) {
pmap_load_store(l3, new_l3);
pmap_invalidate_page(pmap, va);
rv = 1;
goto done;
}
/*
* XXX: This case should never happen since it means
* the PTE shouldn't have resulted in a fault.
*/
done:
PMAP_UNLOCK(pmap);
return (rv);
}
/*
* Insert the given physical page (p) at
* the specified virtual address (v) in the
* target physical map with the protection requested.
*
* If specified, the page will be wired down, meaning
* that the related pte can not be reclaimed.
*
* NB: This is the only routine which MAY NOT lazy-evaluate
* or lose information. That is, this routine must actually
* insert this page into the given map NOW.
*/
int
pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
u_int flags, int8_t psind __unused)
{
struct rwlock *lock;
pd_entry_t *l1, *l2;
pt_entry_t new_l3, orig_l3;
pt_entry_t *l3;
pv_entry_t pv;
vm_paddr_t opa, pa, l2_pa, l3_pa;
vm_page_t mpte, om, l2_m, l3_m;
boolean_t nosleep;
pt_entry_t entry;
pn_t l2_pn;
pn_t l3_pn;
pn_t pn;
va = trunc_page(va);
if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m))
VM_OBJECT_ASSERT_LOCKED(m->object);
pa = VM_PAGE_TO_PHYS(m);
pn = (pa / PAGE_SIZE);
new_l3 = PTE_V | PTE_R | PTE_A;
if (prot & VM_PROT_EXECUTE)
new_l3 |= PTE_X;
if (flags & VM_PROT_WRITE)
new_l3 |= PTE_D;
if (prot & VM_PROT_WRITE)
new_l3 |= PTE_W;
if ((va >> 63) == 0)
new_l3 |= PTE_U;
new_l3 |= (pn << PTE_PPN0_S);
if ((flags & PMAP_ENTER_WIRED) != 0)
new_l3 |= PTE_SW_WIRED;
/*
* Set modified bit gratuitously for writeable mappings if
* the page is unmanaged. We do not want to take a fault
* to do the dirty bit accounting for these mappings.
*/
if ((m->oflags & VPO_UNMANAGED) != 0) {
if (prot & VM_PROT_WRITE)
new_l3 |= PTE_D;
} else
new_l3 |= PTE_SW_MANAGED;
CTR2(KTR_PMAP, "pmap_enter: %.16lx -> %.16lx", va, pa);
mpte = NULL;
lock = NULL;
rw_rlock(&pvh_global_lock);
PMAP_LOCK(pmap);
if (va < VM_MAXUSER_ADDRESS) {
nosleep = (flags & PMAP_ENTER_NOSLEEP) != 0;
mpte = pmap_alloc_l3(pmap, va, nosleep ? NULL : &lock);
if (mpte == NULL && nosleep) {
CTR0(KTR_PMAP, "pmap_enter: mpte == NULL");
if (lock != NULL)
rw_wunlock(lock);
rw_runlock(&pvh_global_lock);
PMAP_UNLOCK(pmap);
return (KERN_RESOURCE_SHORTAGE);
}
l3 = pmap_l3(pmap, va);
} else {
l3 = pmap_l3(pmap, va);
/* TODO: This is not optimal, but should mostly work */
if (l3 == NULL) {
l2 = pmap_l2(pmap, va);
if (l2 == NULL) {
l2_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL |
VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
VM_ALLOC_ZERO);
if (l2_m == NULL)
panic("pmap_enter: l2 pte_m == NULL");
if ((l2_m->flags & PG_ZERO) == 0)
pmap_zero_page(l2_m);
l2_pa = VM_PAGE_TO_PHYS(l2_m);
l2_pn = (l2_pa / PAGE_SIZE);
l1 = pmap_l1(pmap, va);
entry = (PTE_V);
entry |= (l2_pn << PTE_PPN0_S);
pmap_load_store(l1, entry);
pmap_distribute_l1(pmap, pmap_l1_index(va), entry);
l2 = pmap_l1_to_l2(l1, va);
}
KASSERT(l2 != NULL,
("No l2 table after allocating one"));
l3_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL |
VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO);
if (l3_m == NULL)
panic("pmap_enter: l3 pte_m == NULL");
if ((l3_m->flags & PG_ZERO) == 0)
pmap_zero_page(l3_m);
l3_pa = VM_PAGE_TO_PHYS(l3_m);
l3_pn = (l3_pa / PAGE_SIZE);
entry = (PTE_V);
entry |= (l3_pn << PTE_PPN0_S);
pmap_load_store(l2, entry);
l3 = pmap_l2_to_l3(l2, va);
}
pmap_invalidate_page(pmap, va);
}
orig_l3 = pmap_load(l3);
opa = PTE_TO_PHYS(orig_l3);
pv = NULL;
/*
* Is the specified virtual address already mapped?
*/
if (pmap_l3_valid(orig_l3)) {
/*
* Wiring change, just update stats. We don't worry about
* wiring PT pages as they remain resident as long as there
* are valid mappings in them. Hence, if a user page is wired,
* the PT page will be also.
*/
if ((flags & PMAP_ENTER_WIRED) != 0 &&
(orig_l3 & PTE_SW_WIRED) == 0)
pmap->pm_stats.wired_count++;
else if ((flags & PMAP_ENTER_WIRED) == 0 &&
(orig_l3 & PTE_SW_WIRED) != 0)
pmap->pm_stats.wired_count--;
/*
* Remove the extra PT page reference.
*/
if (mpte != NULL) {
mpte->wire_count--;
KASSERT(mpte->wire_count > 0,
("pmap_enter: missing reference to page table page,"
" va: 0x%lx", va));
}
/*
* Has the physical page changed?
*/
if (opa == pa) {
/*
* No, might be a protection or wiring change.
*/
if ((orig_l3 & PTE_SW_MANAGED) != 0) {
if (pmap_is_write(new_l3))
vm_page_aflag_set(m, PGA_WRITEABLE);
}
goto validate;
}
/*
* The physical page has changed. Temporarily invalidate
* the mapping. This ensures that all threads sharing the
* pmap keep a consistent view of the mapping, which is
* necessary for the correct handling of COW faults. It
* also permits reuse of the old mapping's PV entry,
* avoiding an allocation.
*
* For consistency, handle unmanaged mappings the same way.
*/
orig_l3 = pmap_load_clear(l3);
KASSERT(PTE_TO_PHYS(orig_l3) == opa,
("pmap_enter: unexpected pa update for %#lx", va));
if ((orig_l3 & PTE_SW_MANAGED) != 0) {
om = PHYS_TO_VM_PAGE(opa);
/*
* The pmap lock is sufficient to synchronize with
* concurrent calls to pmap_page_test_mappings() and
* pmap_ts_referenced().
*/
if (pmap_page_dirty(orig_l3))
vm_page_dirty(om);
if ((orig_l3 & PTE_A) != 0)
vm_page_aflag_set(om, PGA_REFERENCED);
CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, opa);
pv = pmap_pvh_remove(&om->md, pmap, va);
if ((new_l3 & PTE_SW_MANAGED) == 0)
free_pv_entry(pmap, pv);
if ((om->aflags & PGA_WRITEABLE) != 0 &&
TAILQ_EMPTY(&om->md.pv_list))
vm_page_aflag_clear(om, PGA_WRITEABLE);
}
pmap_invalidate_page(pmap, va);
orig_l3 = 0;
} else {
/*
* Increment the counters.
*/
if ((new_l3 & PTE_SW_WIRED) != 0)
pmap->pm_stats.wired_count++;
pmap_resident_count_inc(pmap, 1);
}
/*
* Enter on the PV list if part of our managed memory.
*/
if ((new_l3 & PTE_SW_MANAGED) != 0) {
if (pv == NULL) {
pv = get_pv_entry(pmap, &lock);
pv->pv_va = va;
}
CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, pa);
TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
m->md.pv_gen++;
if (pmap_is_write(new_l3))
vm_page_aflag_set(m, PGA_WRITEABLE);
}
validate:
/*
* Sync the i-cache on all harts before updating the PTE
* if the new PTE is executable.
*/
if (prot & VM_PROT_EXECUTE)
pmap_sync_icache(pmap, va, PAGE_SIZE);
/*
* Update the L3 entry.
*/
if (orig_l3 != 0) {
orig_l3 = pmap_load_store(l3, new_l3);
pmap_invalidate_page(pmap, va);
KASSERT(PTE_TO_PHYS(orig_l3) == pa,
("pmap_enter: invalid update"));
if (pmap_page_dirty(orig_l3) &&
(orig_l3 & PTE_SW_MANAGED) != 0)
vm_page_dirty(m);
} else {
pmap_load_store(l3, new_l3);
}
if (lock != NULL)
rw_wunlock(lock);
rw_runlock(&pvh_global_lock);
PMAP_UNLOCK(pmap);
return (KERN_SUCCESS);
}
/*
* Maps a sequence of resident pages belonging to the same object.
* The sequence begins with the given page m_start. This page is
* mapped at the given virtual address start. Each subsequent page is
* mapped at a virtual address that is offset from start by the same
* amount as the page is offset from m_start within the object. The
* last page in the sequence is the page with the largest offset from
* m_start that can be mapped at a virtual address less than the given
* virtual address end. Not every virtual page between start and end
* is mapped; only those for which a resident page exists with the
* corresponding offset from m_start are mapped.
*/
void
pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end,
vm_page_t m_start, vm_prot_t prot)
{
struct rwlock *lock;
vm_offset_t va;
vm_page_t m, mpte;
vm_pindex_t diff, psize;
VM_OBJECT_ASSERT_LOCKED(m_start->object);
psize = atop(end - start);
mpte = NULL;
m = m_start;
lock = NULL;
rw_rlock(&pvh_global_lock);
PMAP_LOCK(pmap);
while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
va = start + ptoa(diff);
mpte = pmap_enter_quick_locked(pmap, va, m, prot, mpte, &lock);
m = TAILQ_NEXT(m, listq);
}
if (lock != NULL)
rw_wunlock(lock);
rw_runlock(&pvh_global_lock);
PMAP_UNLOCK(pmap);
}
/*
* this code makes some *MAJOR* assumptions:
* 1. Current pmap & pmap exists.
* 2. Not wired.
* 3. Read access.
* 4. No page table pages.
* but is *MUCH* faster than pmap_enter...
*/
void
pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
{
struct rwlock *lock;
lock = NULL;
rw_rlock(&pvh_global_lock);
PMAP_LOCK(pmap);
(void)pmap_enter_quick_locked(pmap, va, m, prot, NULL, &lock);
if (lock != NULL)
rw_wunlock(lock);
rw_runlock(&pvh_global_lock);
PMAP_UNLOCK(pmap);
}
static vm_page_t
pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m,
vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp)
{
struct spglist free;
vm_paddr_t phys;
pd_entry_t *l2;
pt_entry_t *l3;
vm_paddr_t pa;
pt_entry_t entry;
pn_t pn;
KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva ||
(m->oflags & VPO_UNMANAGED) != 0,
("pmap_enter_quick_locked: managed mapping within the clean submap"));
rw_assert(&pvh_global_lock, RA_LOCKED);
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
CTR2(KTR_PMAP, "pmap_enter_quick_locked: %p %lx", pmap, va);
/*
* In the case that a page table page is not
* resident, we are creating it here.
*/
if (va < VM_MAXUSER_ADDRESS) {
vm_pindex_t l2pindex;
/*
* Calculate pagetable page index
*/
l2pindex = pmap_l2_pindex(va);
if (mpte && (mpte->pindex == l2pindex)) {
mpte->wire_count++;
} else {
/*
* Get the l2 entry
*/
l2 = pmap_l2(pmap, va);
/*
* If the page table page is mapped, we just increment
* the hold count, and activate it. Otherwise, we
* attempt to allocate a page table page. If this
* attempt fails, we don't retry. Instead, we give up.
*/
if (l2 != NULL && pmap_load(l2) != 0) {
phys = PTE_TO_PHYS(pmap_load(l2));
mpte = PHYS_TO_VM_PAGE(phys);
mpte->wire_count++;
} else {
/*
* Pass NULL instead of the PV list lock
* pointer, because we don't intend to sleep.
*/
mpte = _pmap_alloc_l3(pmap, l2pindex, NULL);
if (mpte == NULL)
return (mpte);
}
}
l3 = (pt_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mpte));
l3 = &l3[pmap_l3_index(va)];
} else {
mpte = NULL;
l3 = pmap_l3(kernel_pmap, va);
}
if (l3 == NULL)
panic("pmap_enter_quick_locked: No l3");
if (pmap_load(l3) != 0) {
if (mpte != NULL) {
mpte->wire_count--;
mpte = NULL;
}
return (mpte);
}
/*
* Enter on the PV list if part of our managed memory.
*/
if ((m->oflags & VPO_UNMANAGED) == 0 &&
!pmap_try_insert_pv_entry(pmap, va, m, lockp)) {
if (mpte != NULL) {
SLIST_INIT(&free);
if (pmap_unwire_l3(pmap, va, mpte, &free)) {
pmap_invalidate_page(pmap, va);
vm_page_free_pages_toq(&free, false);
}
mpte = NULL;
}
return (mpte);
}
/*
* Increment counters
*/
pmap_resident_count_inc(pmap, 1);
pa = VM_PAGE_TO_PHYS(m);
pn = (pa / PAGE_SIZE);
entry = PTE_V | PTE_R;
if (prot & VM_PROT_EXECUTE)
entry |= PTE_X;
entry |= (pn << PTE_PPN0_S);
/*
* Now validate mapping with RO protection
*/
if ((m->oflags & VPO_UNMANAGED) == 0)
entry |= PTE_SW_MANAGED;
/*
* Sync the i-cache on all harts before updating the PTE
* if the new PTE is executable.
*/
if (prot & VM_PROT_EXECUTE)
pmap_sync_icache(pmap, va, PAGE_SIZE);
pmap_load_store(l3, entry);
pmap_invalidate_page(pmap, va);
return (mpte);
}
/*
* This code maps large physical mmap regions into the
* processor address space. Note that some shortcuts
* are taken, but the code works.
*/
void
pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object,
vm_pindex_t pindex, vm_size_t size)
{
VM_OBJECT_ASSERT_WLOCKED(object);
KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG,
("pmap_object_init_pt: non-device object"));
}
/*
* Clear the wired attribute from the mappings for the specified range of
* addresses in the given pmap. Every valid mapping within that range
* must have the wired attribute set. In contrast, invalid mappings
* cannot have the wired attribute set, so they are ignored.
*
* The wired attribute of the page table entry is not a hardware feature,
* so there is no need to invalidate any TLB entries.
*/
void
pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
{
vm_offset_t va_next;
pd_entry_t *l1, *l2;
pt_entry_t *l3;
boolean_t pv_lists_locked;
pv_lists_locked = FALSE;
PMAP_LOCK(pmap);
for (; sva < eva; sva = va_next) {
l1 = pmap_l1(pmap, sva);
if (pmap_load(l1) == 0) {
va_next = (sva + L1_SIZE) & ~L1_OFFSET;
if (va_next < sva)
va_next = eva;
continue;
}
va_next = (sva + L2_SIZE) & ~L2_OFFSET;
if (va_next < sva)
va_next = eva;
l2 = pmap_l1_to_l2(l1, sva);
if (pmap_load(l2) == 0)
continue;
if (va_next > eva)
va_next = eva;
for (l3 = pmap_l2_to_l3(l2, sva); sva != va_next; l3++,
sva += L3_SIZE) {
if (pmap_load(l3) == 0)
continue;
if ((pmap_load(l3) & PTE_SW_WIRED) == 0)
panic("pmap_unwire: l3 %#jx is missing "
"PTE_SW_WIRED", (uintmax_t)*l3);
/*
* PG_W must be cleared atomically. Although the pmap
* lock synchronizes access to PG_W, another processor
* could be setting PG_M and/or PG_A concurrently.
*/
atomic_clear_long(l3, PTE_SW_WIRED);
pmap->pm_stats.wired_count--;
}
}
if (pv_lists_locked)
rw_runlock(&pvh_global_lock);
PMAP_UNLOCK(pmap);
}
/*
* Copy the range specified by src_addr/len
* from the source map to the range dst_addr/len
* in the destination map.
*
* This routine is only advisory and need not do anything.
*/
void
pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
vm_offset_t src_addr)
{
}
/*
* pmap_zero_page zeros the specified hardware page by mapping
* the page into KVM and using bzero to clear its contents.
*/
void
pmap_zero_page(vm_page_t m)
{
vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m));
pagezero((void *)va);
}
/*
* pmap_zero_page_area zeros the specified hardware page by mapping
* the page into KVM and using bzero to clear its contents.
*
* off and size may not cover an area beyond a single hardware page.
*/
void
pmap_zero_page_area(vm_page_t m, int off, int size)
{
vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m));
if (off == 0 && size == PAGE_SIZE)
pagezero((void *)va);
else
bzero((char *)va + off, size);
}
/*
* pmap_copy_page copies the specified (machine independent)
* page by mapping the page into virtual memory and using
* bcopy to copy the page, one machine dependent page at a
* time.
*/
void
pmap_copy_page(vm_page_t msrc, vm_page_t mdst)
{
vm_offset_t src = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(msrc));
vm_offset_t dst = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mdst));
pagecopy((void *)src, (void *)dst);
}
int unmapped_buf_allowed = 1;
void
pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[],
vm_offset_t b_offset, int xfersize)
{
void *a_cp, *b_cp;
vm_page_t m_a, m_b;
vm_paddr_t p_a, p_b;
vm_offset_t a_pg_offset, b_pg_offset;
int cnt;
while (xfersize > 0) {
a_pg_offset = a_offset & PAGE_MASK;
m_a = ma[a_offset >> PAGE_SHIFT];
p_a = m_a->phys_addr;
b_pg_offset = b_offset & PAGE_MASK;
m_b = mb[b_offset >> PAGE_SHIFT];
p_b = m_b->phys_addr;
cnt = min(xfersize, PAGE_SIZE - a_pg_offset);
cnt = min(cnt, PAGE_SIZE - b_pg_offset);
if (__predict_false(!PHYS_IN_DMAP(p_a))) {
panic("!DMAP a %lx", p_a);
} else {
a_cp = (char *)PHYS_TO_DMAP(p_a) + a_pg_offset;
}
if (__predict_false(!PHYS_IN_DMAP(p_b))) {
panic("!DMAP b %lx", p_b);
} else {
b_cp = (char *)PHYS_TO_DMAP(p_b) + b_pg_offset;
}
bcopy(a_cp, b_cp, cnt);
a_offset += cnt;
b_offset += cnt;
xfersize -= cnt;
}
}
vm_offset_t
pmap_quick_enter_page(vm_page_t m)
{
return (PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)));
}
void
pmap_quick_remove_page(vm_offset_t addr)
{
}
/*
* Returns true if the pmap's pv is one of the first
* 16 pvs linked to from this page. This count may
* be changed upwards or downwards in the future; it
* is only necessary that true be returned for a small
* subset of pmaps for proper page aging.
*/
boolean_t
pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
{
struct rwlock *lock;
pv_entry_t pv;
int loops = 0;
boolean_t rv;
KASSERT((m->oflags & VPO_UNMANAGED) == 0,
("pmap_page_exists_quick: page %p is not managed", m));
rv = FALSE;
rw_rlock(&pvh_global_lock);
lock = VM_PAGE_TO_PV_LIST_LOCK(m);
rw_rlock(lock);
TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
if (PV_PMAP(pv) == pmap) {
rv = TRUE;
break;
}
loops++;
if (loops >= 16)
break;
}
rw_runlock(lock);
rw_runlock(&pvh_global_lock);
return (rv);
}
/*
* pmap_page_wired_mappings:
*
* Return the number of managed mappings to the given physical page
* that are wired.
*/
int
pmap_page_wired_mappings(vm_page_t m)
{
struct rwlock *lock;
pmap_t pmap;
pt_entry_t *l3;
pv_entry_t pv;
int count, md_gen;
if ((m->oflags & VPO_UNMANAGED) != 0)
return (0);
rw_rlock(&pvh_global_lock);
lock = VM_PAGE_TO_PV_LIST_LOCK(m);
rw_rlock(lock);
restart:
count = 0;
TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
pmap = PV_PMAP(pv);
if (!PMAP_TRYLOCK(pmap)) {
md_gen = m->md.pv_gen;
rw_runlock(lock);
PMAP_LOCK(pmap);
rw_rlock(lock);
if (md_gen != m->md.pv_gen) {
PMAP_UNLOCK(pmap);
goto restart;
}
}
l3 = pmap_l3(pmap, pv->pv_va);
if (l3 != NULL && (pmap_load(l3) & PTE_SW_WIRED) != 0)
count++;
PMAP_UNLOCK(pmap);
}
rw_runlock(lock);
rw_runlock(&pvh_global_lock);
return (count);
}
/*
* Destroy all managed, non-wired mappings in the given user-space
* pmap. This pmap cannot be active on any processor besides the
* caller.
*
* This function cannot be applied to the kernel pmap. Moreover, it
* is not intended for general use. It is only to be used during
* process termination. Consequently, it can be implemented in ways
* that make it faster than pmap_remove(). First, it can more quickly
* destroy mappings by iterating over the pmap's collection of PV
* entries, rather than searching the page table. Second, it doesn't
* have to test and clear the page table entries atomically, because
* no processor is currently accessing the user address space. In
* particular, a page table entry's dirty bit won't change state once
* this function starts.
*/
void
pmap_remove_pages(pmap_t pmap)
{
pd_entry_t ptepde, *l2;
pt_entry_t *l3, tl3;
struct spglist free;
vm_page_t m;
pv_entry_t pv;
struct pv_chunk *pc, *npc;
struct rwlock *lock;
int64_t bit;
uint64_t inuse, bitmask;
int allfree, field, freed, idx;
vm_paddr_t pa;
lock = NULL;
SLIST_INIT(&free);
rw_rlock(&pvh_global_lock);
PMAP_LOCK(pmap);
TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) {
allfree = 1;
freed = 0;
for (field = 0; field < _NPCM; field++) {
inuse = ~pc->pc_map[field] & pc_freemask[field];
while (inuse != 0) {
bit = ffsl(inuse) - 1;
bitmask = 1UL << bit;
idx = field * 64 + bit;
pv = &pc->pc_pventry[idx];
inuse &= ~bitmask;
l2 = pmap_l2(pmap, pv->pv_va);
ptepde = pmap_load(l2);
l3 = pmap_l2_to_l3(l2, pv->pv_va);
tl3 = pmap_load(l3);
/*
* We cannot remove wired pages from a process' mapping at this time
*/
if (tl3 & PTE_SW_WIRED) {
allfree = 0;
continue;
}
pa = PTE_TO_PHYS(tl3);
m = PHYS_TO_VM_PAGE(pa);
KASSERT(m->phys_addr == pa,
("vm_page_t %p phys_addr mismatch %016jx %016jx",
m, (uintmax_t)m->phys_addr,
(uintmax_t)tl3));
KASSERT((m->flags & PG_FICTITIOUS) != 0 ||
m < &vm_page_array[vm_page_array_size],
("pmap_remove_pages: bad l3 %#jx",
(uintmax_t)tl3));
pmap_load_clear(l3);
pmap_invalidate_page(pmap, pv->pv_va);
/*
* Update the vm_page_t clean/reference bits.
*/
if (pmap_page_dirty(tl3))
vm_page_dirty(m);
CHANGE_PV_LIST_LOCK_TO_VM_PAGE(&lock, m);
/* Mark free */
pc->pc_map[field] |= bitmask;
pmap_resident_count_dec(pmap, 1);
TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
m->md.pv_gen++;
pmap_unuse_l3(pmap, pv->pv_va, ptepde, &free);
freed++;
}
}
PV_STAT(atomic_add_long(&pv_entry_frees, freed));
PV_STAT(atomic_add_int(&pv_entry_spare, freed));
PV_STAT(atomic_subtract_long(&pv_entry_count, freed));
if (allfree) {
TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
free_pv_chunk(pc);
}
}
pmap_invalidate_all(pmap);
if (lock != NULL)
rw_wunlock(lock);
rw_runlock(&pvh_global_lock);
PMAP_UNLOCK(pmap);
vm_page_free_pages_toq(&free, false);
}
/*
* This is used to check if a page has been accessed or modified. As we
* don't have a bit to see if it has been modified we have to assume it
* has been if the page is read/write.
*/
static boolean_t
pmap_page_test_mappings(vm_page_t m, boolean_t accessed, boolean_t modified)
{
struct rwlock *lock;
pv_entry_t pv;
pt_entry_t *l3, mask, value;
pmap_t pmap;
int md_gen;
boolean_t rv;
rv = FALSE;
rw_rlock(&pvh_global_lock);
lock = VM_PAGE_TO_PV_LIST_LOCK(m);
rw_rlock(lock);
restart:
TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
pmap = PV_PMAP(pv);
if (!PMAP_TRYLOCK(pmap)) {
md_gen = m->md.pv_gen;
rw_runlock(lock);
PMAP_LOCK(pmap);
rw_rlock(lock);
if (md_gen != m->md.pv_gen) {
PMAP_UNLOCK(pmap);
goto restart;
}
}
l3 = pmap_l3(pmap, pv->pv_va);
mask = 0;
value = 0;
if (modified) {
mask |= PTE_D;
value |= PTE_D;
}
if (accessed) {
mask |= PTE_A;
value |= PTE_A;
}
#if 0
if (modified) {
mask |= ATTR_AP_RW_BIT;
value |= ATTR_AP(ATTR_AP_RW);
}
if (accessed) {
mask |= ATTR_AF | ATTR_DESCR_MASK;
value |= ATTR_AF | L3_PAGE;
}
#endif
rv = (pmap_load(l3) & mask) == value;
PMAP_UNLOCK(pmap);
if (rv)
goto out;
}
out:
rw_runlock(lock);
rw_runlock(&pvh_global_lock);
return (rv);
}
/*
* pmap_is_modified:
*
* Return whether or not the specified physical page was modified
* in any physical maps.
*/
boolean_t
pmap_is_modified(vm_page_t m)
{
KASSERT((m->oflags & VPO_UNMANAGED) == 0,
("pmap_is_modified: page %p is not managed", m));
/*
* If the page is not exclusive busied, then PGA_WRITEABLE cannot be
* concurrently set while the object is locked. Thus, if PGA_WRITEABLE
* is clear, no PTEs can have PG_M set.
*/
VM_OBJECT_ASSERT_WLOCKED(m->object);
if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
return (FALSE);
return (pmap_page_test_mappings(m, FALSE, TRUE));
}
/*
* pmap_is_prefaultable:
*
* Return whether or not the specified virtual address is eligible
* for prefault.
*/
boolean_t
pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
{
pt_entry_t *l3;
boolean_t rv;
rv = FALSE;
PMAP_LOCK(pmap);
l3 = pmap_l3(pmap, addr);
if (l3 != NULL && pmap_load(l3) != 0) {
rv = TRUE;
}
PMAP_UNLOCK(pmap);
return (rv);
}
/*
* pmap_is_referenced:
*
* Return whether or not the specified physical page was referenced
* in any physical maps.
*/
boolean_t
pmap_is_referenced(vm_page_t m)
{
KASSERT((m->oflags & VPO_UNMANAGED) == 0,
("pmap_is_referenced: page %p is not managed", m));
return (pmap_page_test_mappings(m, TRUE, FALSE));
}
/*
* Clear the write and modified bits in each of the given page's mappings.
*/
void
pmap_remove_write(vm_page_t m)
{
pmap_t pmap;
struct rwlock *lock;
pv_entry_t pv;
pt_entry_t *l3, oldl3;
pt_entry_t newl3;
int md_gen;
KASSERT((m->oflags & VPO_UNMANAGED) == 0,
("pmap_remove_write: page %p is not managed", m));
/*
* If the page is not exclusive busied, then PGA_WRITEABLE cannot be
* set by another thread while the object is locked. Thus,
* if PGA_WRITEABLE is clear, no page table entries need updating.
*/
VM_OBJECT_ASSERT_WLOCKED(m->object);
if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
return;
rw_rlock(&pvh_global_lock);
lock = VM_PAGE_TO_PV_LIST_LOCK(m);
retry_pv_loop:
rw_wlock(lock);
TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
pmap = PV_PMAP(pv);
if (!PMAP_TRYLOCK(pmap)) {
md_gen = m->md.pv_gen;
rw_wunlock(lock);
PMAP_LOCK(pmap);
rw_wlock(lock);
if (md_gen != m->md.pv_gen) {
PMAP_UNLOCK(pmap);
rw_wunlock(lock);
goto retry_pv_loop;
}
}
l3 = pmap_l3(pmap, pv->pv_va);
retry:
oldl3 = pmap_load(l3);
if (pmap_is_write(oldl3)) {
newl3 = oldl3 & ~(PTE_W);
if (!atomic_cmpset_long(l3, oldl3, newl3))
goto retry;
/* TODO: use pmap_page_dirty(oldl3) ? */
if ((oldl3 & PTE_A) != 0)
vm_page_dirty(m);
pmap_invalidate_page(pmap, pv->pv_va);
}
PMAP_UNLOCK(pmap);
}
rw_wunlock(lock);
vm_page_aflag_clear(m, PGA_WRITEABLE);
rw_runlock(&pvh_global_lock);
}
static __inline boolean_t
safe_to_clear_referenced(pmap_t pmap, pt_entry_t pte)
{
return (FALSE);
}
/*
* pmap_ts_referenced:
*
* Return a count of reference bits for a page, clearing those bits.
* It is not necessary for every reference bit to be cleared, but it
* is necessary that 0 only be returned when there are truly no
* reference bits set.
*
* As an optimization, update the page's dirty field if a modified bit is
* found while counting reference bits. This opportunistic update can be
* performed at low cost and can eliminate the need for some future calls
* to pmap_is_modified(). However, since this function stops after
* finding PMAP_TS_REFERENCED_MAX reference bits, it may not detect some
* dirty pages. Those dirty pages will only be detected by a future call
* to pmap_is_modified().
*/
int
pmap_ts_referenced(vm_page_t m)
{
pv_entry_t pv, pvf;
pmap_t pmap;
struct rwlock *lock;
pd_entry_t *l2;
pt_entry_t *l3, old_l3;
vm_paddr_t pa;
int cleared, md_gen, not_cleared;
struct spglist free;
KASSERT((m->oflags & VPO_UNMANAGED) == 0,
("pmap_ts_referenced: page %p is not managed", m));
SLIST_INIT(&free);
cleared = 0;
pa = VM_PAGE_TO_PHYS(m);
lock = PHYS_TO_PV_LIST_LOCK(pa);
rw_rlock(&pvh_global_lock);
rw_wlock(lock);
retry:
not_cleared = 0;
if ((pvf = TAILQ_FIRST(&m->md.pv_list)) == NULL)
goto out;
pv = pvf;
do {
if (pvf == NULL)
pvf = pv;
pmap = PV_PMAP(pv);
if (!PMAP_TRYLOCK(pmap)) {
md_gen = m->md.pv_gen;
rw_wunlock(lock);
PMAP_LOCK(pmap);
rw_wlock(lock);
if (md_gen != m->md.pv_gen) {
PMAP_UNLOCK(pmap);
goto retry;
}
}
l2 = pmap_l2(pmap, pv->pv_va);
KASSERT((pmap_load(l2) & PTE_RX) == 0,
("pmap_ts_referenced: found an invalid l2 table"));
l3 = pmap_l2_to_l3(l2, pv->pv_va);
old_l3 = pmap_load(l3);
if (pmap_page_dirty(old_l3))
vm_page_dirty(m);
if ((old_l3 & PTE_A) != 0) {
if (safe_to_clear_referenced(pmap, old_l3)) {
/*
* TODO: We don't handle the access flag
* at all. We need to be able to set it in
* the exception handler.
*/
panic("RISCVTODO: safe_to_clear_referenced\n");
} else if ((old_l3 & PTE_SW_WIRED) == 0) {
/*
* Wired pages cannot be paged out so
* doing accessed bit emulation for
* them is wasted effort. We do the
* hard work for unwired pages only.
*/
pmap_remove_l3(pmap, l3, pv->pv_va,
pmap_load(l2), &free, &lock);
pmap_invalidate_page(pmap, pv->pv_va);
cleared++;
if (pvf == pv)
pvf = NULL;
pv = NULL;
KASSERT(lock == VM_PAGE_TO_PV_LIST_LOCK(m),
("inconsistent pv lock %p %p for page %p",
lock, VM_PAGE_TO_PV_LIST_LOCK(m), m));
} else
not_cleared++;
}
PMAP_UNLOCK(pmap);
/* Rotate the PV list if it has more than one entry. */
if (pv != NULL && TAILQ_NEXT(pv, pv_next) != NULL) {
TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
m->md.pv_gen++;
}
} while ((pv = TAILQ_FIRST(&m->md.pv_list)) != pvf && cleared +
not_cleared < PMAP_TS_REFERENCED_MAX);
out:
rw_wunlock(lock);
rw_runlock(&pvh_global_lock);
vm_page_free_pages_toq(&free, false);
return (cleared + not_cleared);
}
/*
* Apply the given advice to the specified range of addresses within the
* given pmap. Depending on the advice, clear the referenced and/or
* modified flags in each mapping and set the mapped page's dirty field.
*/
void
pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice)
{
}
/*
* Clear the modify bits on the specified physical page.
*/
void
pmap_clear_modify(vm_page_t m)
{
KASSERT((m->oflags & VPO_UNMANAGED) == 0,
("pmap_clear_modify: page %p is not managed", m));
VM_OBJECT_ASSERT_WLOCKED(m->object);
KASSERT(!vm_page_xbusied(m),
("pmap_clear_modify: page %p is exclusive busied", m));
/*
* If the page is not PGA_WRITEABLE, then no PTEs can have PG_M set.
* If the object containing the page is locked and the page is not
* exclusive busied, then PGA_WRITEABLE cannot be concurrently set.
*/
if ((m->aflags & PGA_WRITEABLE) == 0)
return;
/* RISCVTODO: We lack support for tracking if a page is modified */
}
void *
pmap_mapbios(vm_paddr_t pa, vm_size_t size)
{
return ((void *)PHYS_TO_DMAP(pa));
}
void
pmap_unmapbios(vm_paddr_t pa, vm_size_t size)
{
}
/*
* Sets the memory attribute for the specified page.
*/
void
pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma)
{
m->md.pv_memattr = ma;
/*
* RISCVTODO: Implement the below (from the amd64 pmap)
* If "m" is a normal page, update its direct mapping. This update
* can be relied upon to perform any cache operations that are
* required for data coherence.
*/
if ((m->flags & PG_FICTITIOUS) == 0 &&
PHYS_IN_DMAP(VM_PAGE_TO_PHYS(m)))
panic("RISCVTODO: pmap_page_set_memattr");
}
/*
* perform the pmap work for mincore
*/
int
pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa)
{
pt_entry_t *l2, *l3, tpte;
vm_paddr_t pa;
int val;
bool managed;
PMAP_LOCK(pmap);
retry:
managed = false;
val = 0;
l2 = pmap_l2(pmap, addr);
if (l2 != NULL && ((tpte = pmap_load(l2)) & PTE_V) != 0) {
if ((tpte & (PTE_R | PTE_W | PTE_X)) != 0) {
pa = PTE_TO_PHYS(tpte) | (addr & L2_OFFSET);
val = MINCORE_INCORE | MINCORE_SUPER;
} else {
l3 = pmap_l2_to_l3(l2, addr);
tpte = pmap_load(l3);
if ((tpte & PTE_V) == 0)
goto done;
pa = PTE_TO_PHYS(tpte) | (addr & L3_OFFSET);
val = MINCORE_INCORE;
}
if (pmap_page_dirty(tpte))
val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER;
if (pmap_page_accessed(tpte))
val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER;
managed = (tpte & PTE_SW_MANAGED) == PTE_SW_MANAGED;
}
done:
if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) !=
(MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) && managed) {
/* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */
if (vm_page_pa_tryrelock(pmap, pa, locked_pa))
goto retry;
} else
PA_UNLOCK_COND(*locked_pa);
PMAP_UNLOCK(pmap);
return (val);
}
void
pmap_activate(struct thread *td)
{
pmap_t pmap;
uint64_t reg;
critical_enter();
pmap = vmspace_pmap(td->td_proc->p_vmspace);
td->td_pcb->pcb_l1addr = vtophys(pmap->pm_l1);
reg = SATP_MODE_SV39;
reg |= (td->td_pcb->pcb_l1addr >> PAGE_SHIFT);
- __asm __volatile("csrw sptbr, %0" :: "r"(reg));
+ load_satp(reg);
pmap_invalidate_all(pmap);
critical_exit();
}
void
pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz)
{
cpuset_t mask;
/*
* From the RISC-V User-Level ISA V2.2:
*
* "To make a store to instruction memory visible to all
* RISC-V harts, the writing hart has to execute a data FENCE
* before requesting that all remote RISC-V harts execute a
* FENCE.I."
*/
sched_pin();
mask = all_cpus;
CPU_CLR(PCPU_GET(cpuid), &mask);
fence();
sbi_remote_fence_i(mask.__bits);
sched_unpin();
}
/*
* Increase the starting virtual address of the given mapping if a
* different alignment might result in more superpage mappings.
*/
void
pmap_align_superpage(vm_object_t object, vm_ooffset_t offset,
vm_offset_t *addr, vm_size_t size)
{
}
/**
* Get the kernel virtual address of a set of physical pages. If there are
* physical addresses not covered by the DMAP perform a transient mapping
* that will be removed when calling pmap_unmap_io_transient.
*
* \param page The pages the caller wishes to obtain the virtual
* address on the kernel memory map.
* \param vaddr On return contains the kernel virtual memory address
* of the pages passed in the page parameter.
* \param count Number of pages passed in.
* \param can_fault TRUE if the thread using the mapped pages can take
* page faults, FALSE otherwise.
*
* \returns TRUE if the caller must call pmap_unmap_io_transient when
* finished or FALSE otherwise.
*
*/
boolean_t
pmap_map_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count,
boolean_t can_fault)
{
vm_paddr_t paddr;
boolean_t needs_mapping;
int error, i;
/*
* Allocate any KVA space that we need, this is done in a separate
* loop to prevent calling vmem_alloc while pinned.
*/
needs_mapping = FALSE;
for (i = 0; i < count; i++) {
paddr = VM_PAGE_TO_PHYS(page[i]);
if (__predict_false(paddr >= DMAP_MAX_PHYSADDR)) {
error = vmem_alloc(kernel_arena, PAGE_SIZE,
M_BESTFIT | M_WAITOK, &vaddr[i]);
KASSERT(error == 0, ("vmem_alloc failed: %d", error));
needs_mapping = TRUE;
} else {
vaddr[i] = PHYS_TO_DMAP(paddr);
}
}
/* Exit early if everything is covered by the DMAP */
if (!needs_mapping)
return (FALSE);
if (!can_fault)
sched_pin();
for (i = 0; i < count; i++) {
paddr = VM_PAGE_TO_PHYS(page[i]);
if (paddr >= DMAP_MAX_PHYSADDR) {
panic(
"pmap_map_io_transient: TODO: Map out of DMAP data");
}
}
return (needs_mapping);
}
void
pmap_unmap_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count,
boolean_t can_fault)
{
vm_paddr_t paddr;
int i;
if (!can_fault)
sched_unpin();
for (i = 0; i < count; i++) {
paddr = VM_PAGE_TO_PHYS(page[i]);
if (paddr >= DMAP_MAX_PHYSADDR) {
panic("RISCVTODO: pmap_unmap_io_transient: Unmap data");
}
}
}
boolean_t
pmap_is_valid_memattr(pmap_t pmap __unused, vm_memattr_t mode)
{
return (mode >= VM_MEMATTR_DEVICE && mode <= VM_MEMATTR_WRITE_BACK);
}
Index: head/sys/riscv/riscv/swtch.S
===================================================================
--- head/sys/riscv/riscv/swtch.S (revision 341692)
+++ head/sys/riscv/riscv/swtch.S (revision 341693)
@@ -1,494 +1,494 @@
/*-
* Copyright (c) 2015-2017 Ruslan Bukin
* All rights reserved.
*
* Portions of this software were developed by SRI International and the
* University of Cambridge Computer Laboratory under DARPA/AFRL contract
* FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme.
*
* Portions of this software were developed by the University of Cambridge
* Computer Laboratory as part of the CTSRD Project, with support from the
* UK Higher Education Innovation Fund (HEIF).
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "assym.inc"
#include "opt_sched.h"
#include
#include
#include
#include
__FBSDID("$FreeBSD$");
#ifdef FPE
.macro __fpe_state_save p
/*
* Enable FPE usage in supervisor mode,
* so we can access registers.
*/
li t0, SSTATUS_FS_INITIAL
csrs sstatus, t0
/* Store registers */
frcsr t0
sd t0, (PCB_FCSR)(\p)
fsd f0, (PCB_X + 0 * 16)(\p)
fsd f1, (PCB_X + 1 * 16)(\p)
fsd f2, (PCB_X + 2 * 16)(\p)
fsd f3, (PCB_X + 3 * 16)(\p)
fsd f4, (PCB_X + 4 * 16)(\p)
fsd f5, (PCB_X + 5 * 16)(\p)
fsd f6, (PCB_X + 6 * 16)(\p)
fsd f7, (PCB_X + 7 * 16)(\p)
fsd f8, (PCB_X + 8 * 16)(\p)
fsd f9, (PCB_X + 9 * 16)(\p)
fsd f10, (PCB_X + 10 * 16)(\p)
fsd f11, (PCB_X + 11 * 16)(\p)
fsd f12, (PCB_X + 12 * 16)(\p)
fsd f13, (PCB_X + 13 * 16)(\p)
fsd f14, (PCB_X + 14 * 16)(\p)
fsd f15, (PCB_X + 15 * 16)(\p)
fsd f16, (PCB_X + 16 * 16)(\p)
fsd f17, (PCB_X + 17 * 16)(\p)
fsd f18, (PCB_X + 18 * 16)(\p)
fsd f19, (PCB_X + 19 * 16)(\p)
fsd f20, (PCB_X + 20 * 16)(\p)
fsd f21, (PCB_X + 21 * 16)(\p)
fsd f22, (PCB_X + 22 * 16)(\p)
fsd f23, (PCB_X + 23 * 16)(\p)
fsd f24, (PCB_X + 24 * 16)(\p)
fsd f25, (PCB_X + 25 * 16)(\p)
fsd f26, (PCB_X + 26 * 16)(\p)
fsd f27, (PCB_X + 27 * 16)(\p)
fsd f28, (PCB_X + 28 * 16)(\p)
fsd f29, (PCB_X + 29 * 16)(\p)
fsd f30, (PCB_X + 30 * 16)(\p)
fsd f31, (PCB_X + 31 * 16)(\p)
/* Disable FPE usage in supervisor mode. */
li t0, SSTATUS_FS_MASK
csrc sstatus, t0
.endm
.macro __fpe_state_load p
/*
* Enable FPE usage in supervisor mode,
* so we can access registers.
*/
li t0, SSTATUS_FS_INITIAL
csrs sstatus, t0
/* Restore registers */
ld t0, (PCB_FCSR)(\p)
fscsr t0
fld f0, (PCB_X + 0 * 16)(\p)
fld f1, (PCB_X + 1 * 16)(\p)
fld f2, (PCB_X + 2 * 16)(\p)
fld f3, (PCB_X + 3 * 16)(\p)
fld f4, (PCB_X + 4 * 16)(\p)
fld f5, (PCB_X + 5 * 16)(\p)
fld f6, (PCB_X + 6 * 16)(\p)
fld f7, (PCB_X + 7 * 16)(\p)
fld f8, (PCB_X + 8 * 16)(\p)
fld f9, (PCB_X + 9 * 16)(\p)
fld f10, (PCB_X + 10 * 16)(\p)
fld f11, (PCB_X + 11 * 16)(\p)
fld f12, (PCB_X + 12 * 16)(\p)
fld f13, (PCB_X + 13 * 16)(\p)
fld f14, (PCB_X + 14 * 16)(\p)
fld f15, (PCB_X + 15 * 16)(\p)
fld f16, (PCB_X + 16 * 16)(\p)
fld f17, (PCB_X + 17 * 16)(\p)
fld f18, (PCB_X + 18 * 16)(\p)
fld f19, (PCB_X + 19 * 16)(\p)
fld f20, (PCB_X + 20 * 16)(\p)
fld f21, (PCB_X + 21 * 16)(\p)
fld f22, (PCB_X + 22 * 16)(\p)
fld f23, (PCB_X + 23 * 16)(\p)
fld f24, (PCB_X + 24 * 16)(\p)
fld f25, (PCB_X + 25 * 16)(\p)
fld f26, (PCB_X + 26 * 16)(\p)
fld f27, (PCB_X + 27 * 16)(\p)
fld f28, (PCB_X + 28 * 16)(\p)
fld f29, (PCB_X + 29 * 16)(\p)
fld f30, (PCB_X + 30 * 16)(\p)
fld f31, (PCB_X + 31 * 16)(\p)
/* Disable FPE usage in supervisor mode. */
li t0, SSTATUS_FS_MASK
csrc sstatus, t0
.endm
/*
* void
* fpe_state_save(struct thread *td)
*/
ENTRY(fpe_state_save)
/* Get pointer to PCB */
ld a0, TD_PCB(a0)
__fpe_state_save a0
ret
END(fpe_state_save)
#endif /* FPE */
/*
* void
* fpe_state_clear(void)
*/
ENTRY(fpe_state_clear)
/*
* Enable FPE usage in supervisor mode,
* so we can access registers.
*/
li t0, SSTATUS_FS_INITIAL
csrs sstatus, t0
fscsr zero
fcvt.d.l f0, zero
fcvt.d.l f1, zero
fcvt.d.l f2, zero
fcvt.d.l f3, zero
fcvt.d.l f4, zero
fcvt.d.l f5, zero
fcvt.d.l f6, zero
fcvt.d.l f7, zero
fcvt.d.l f8, zero
fcvt.d.l f9, zero
fcvt.d.l f10, zero
fcvt.d.l f11, zero
fcvt.d.l f12, zero
fcvt.d.l f13, zero
fcvt.d.l f14, zero
fcvt.d.l f15, zero
fcvt.d.l f16, zero
fcvt.d.l f17, zero
fcvt.d.l f18, zero
fcvt.d.l f19, zero
fcvt.d.l f20, zero
fcvt.d.l f21, zero
fcvt.d.l f22, zero
fcvt.d.l f23, zero
fcvt.d.l f24, zero
fcvt.d.l f25, zero
fcvt.d.l f26, zero
fcvt.d.l f27, zero
fcvt.d.l f28, zero
fcvt.d.l f29, zero
fcvt.d.l f30, zero
fcvt.d.l f31, zero
/* Disable FPE usage in supervisor mode. */
li t0, SSTATUS_FS_MASK
csrc sstatus, t0
ret
END(fpe_state_clear)
/*
* void cpu_throw(struct thread *old, struct thread *new)
*/
ENTRY(cpu_throw)
/* Store the new curthread */
sd a1, PC_CURTHREAD(gp)
/* And the new pcb */
ld x13, TD_PCB(a1)
sd x13, PC_CURPCB(gp)
sfence.vma
/* Switch to the new pmap */
ld t0, PCB_L1ADDR(x13)
srli t0, t0, PAGE_SHIFT
li t1, SATP_MODE_SV39
or t0, t0, t1
- csrw sptbr, t0
+ csrw satp, t0
/* TODO: Invalidate the TLB */
sfence.vma
/* Load registers */
ld ra, (PCB_RA)(x13)
ld sp, (PCB_SP)(x13)
ld tp, (PCB_TP)(x13)
/* s[0-11] */
ld s0, (PCB_S + 0 * 8)(x13)
ld s1, (PCB_S + 1 * 8)(x13)
ld s2, (PCB_S + 2 * 8)(x13)
ld s3, (PCB_S + 3 * 8)(x13)
ld s4, (PCB_S + 4 * 8)(x13)
ld s5, (PCB_S + 5 * 8)(x13)
ld s6, (PCB_S + 6 * 8)(x13)
ld s7, (PCB_S + 7 * 8)(x13)
ld s8, (PCB_S + 8 * 8)(x13)
ld s9, (PCB_S + 9 * 8)(x13)
ld s10, (PCB_S + 10 * 8)(x13)
ld s11, (PCB_S + 11 * 8)(x13)
#ifdef FPE
/* Is FPE enabled for new thread? */
ld t0, TD_FRAME(a1)
ld t1, (TF_SSTATUS)(t0)
li t2, SSTATUS_FS_MASK
and t3, t1, t2
beqz t3, 1f /* No, skip. */
/* Restore registers. */
__fpe_state_load x13
1:
#endif
ret
END(cpu_throw)
/*
* void cpu_switch(struct thread *old, struct thread *new, struct mtx *mtx)
*
* a0 = old
* a1 = new
* a2 = mtx
* x3 to x7, x16 and x17 are caller saved
*/
ENTRY(cpu_switch)
/* Store the new curthread */
sd a1, PC_CURTHREAD(gp)
/* And the new pcb */
ld x13, TD_PCB(a1)
sd x13, PC_CURPCB(gp)
/* Save the old context. */
ld x13, TD_PCB(a0)
/* Store ra, sp and the callee-saved registers */
sd ra, (PCB_RA)(x13)
sd sp, (PCB_SP)(x13)
sd tp, (PCB_TP)(x13)
/* s[0-11] */
sd s0, (PCB_S + 0 * 8)(x13)
sd s1, (PCB_S + 1 * 8)(x13)
sd s2, (PCB_S + 2 * 8)(x13)
sd s3, (PCB_S + 3 * 8)(x13)
sd s4, (PCB_S + 4 * 8)(x13)
sd s5, (PCB_S + 5 * 8)(x13)
sd s6, (PCB_S + 6 * 8)(x13)
sd s7, (PCB_S + 7 * 8)(x13)
sd s8, (PCB_S + 8 * 8)(x13)
sd s9, (PCB_S + 9 * 8)(x13)
sd s10, (PCB_S + 10 * 8)(x13)
sd s11, (PCB_S + 11 * 8)(x13)
#ifdef FPE
/*
* Is FPE enabled and is it in dirty state
* for the old thread?
*/
ld t0, TD_FRAME(a0)
ld t1, (TF_SSTATUS)(t0)
li t2, SSTATUS_FS_MASK
and t3, t1, t2
li t2, SSTATUS_FS_DIRTY
bne t3, t2, 1f /* No, skip. */
/* Yes, mark FPE state clean and save registers. */
li t2, ~SSTATUS_FS_MASK
and t3, t1, t2
li t2, SSTATUS_FS_CLEAN
or t3, t3, t2
sd t3, (TF_SSTATUS)(t0)
__fpe_state_save x13
1:
#endif
/*
* Restore the saved context.
*/
ld x13, TD_PCB(a1)
/*
* TODO: We may need to flush the cache here if switching
* to a user process.
*/
sfence.vma
/* Switch to the new pmap */
ld t0, PCB_L1ADDR(x13)
srli t0, t0, PAGE_SHIFT
li t1, SATP_MODE_SV39
or t0, t0, t1
- csrw sptbr, t0
+ csrw satp, t0
/* TODO: Invalidate the TLB */
sfence.vma
/* Release the old thread */
sd a2, TD_LOCK(a0)
#if defined(SCHED_ULE) && defined(SMP)
/* Spin if TD_LOCK points to a blocked_lock */
la a2, _C_LABEL(blocked_lock)
1:
ld t0, TD_LOCK(a1)
beq t0, a2, 1b
#endif
/* Restore the registers */
ld tp, (PCB_TP)(x13)
ld ra, (PCB_RA)(x13)
ld sp, (PCB_SP)(x13)
/* s[0-11] */
ld s0, (PCB_S + 0 * 8)(x13)
ld s1, (PCB_S + 1 * 8)(x13)
ld s2, (PCB_S + 2 * 8)(x13)
ld s3, (PCB_S + 3 * 8)(x13)
ld s4, (PCB_S + 4 * 8)(x13)
ld s5, (PCB_S + 5 * 8)(x13)
ld s6, (PCB_S + 6 * 8)(x13)
ld s7, (PCB_S + 7 * 8)(x13)
ld s8, (PCB_S + 8 * 8)(x13)
ld s9, (PCB_S + 9 * 8)(x13)
ld s10, (PCB_S + 10 * 8)(x13)
ld s11, (PCB_S + 11 * 8)(x13)
#ifdef FPE
/* Is FPE enabled for new thread? */
ld t0, TD_FRAME(a1)
ld t1, (TF_SSTATUS)(t0)
li t2, SSTATUS_FS_MASK
and t3, t1, t2
beqz t3, 1f /* No, skip. */
/* Restore registers. */
__fpe_state_load x13
1:
#endif
ret
.Lcpu_switch_panic_str:
.asciz "cpu_switch: %p\0"
END(cpu_switch)
/*
* fork_exit(void (*callout)(void *, struct trapframe *), void *arg,
* struct trapframe *frame)
*/
ENTRY(fork_trampoline)
mv a0, s0
mv a1, s1
mv a2, sp
call _C_LABEL(fork_exit)
/* Restore sstatus */
ld t0, (TF_SSTATUS)(sp)
/* Ensure interrupts disabled */
li t1, ~SSTATUS_SIE
and t0, t0, t1
csrw sstatus, t0
/* Restore exception program counter */
ld t0, (TF_SEPC)(sp)
csrw sepc, t0
/* Restore the registers */
ld t0, (TF_T + 0 * 8)(sp)
ld t1, (TF_T + 1 * 8)(sp)
ld t2, (TF_T + 2 * 8)(sp)
ld t3, (TF_T + 3 * 8)(sp)
ld t4, (TF_T + 4 * 8)(sp)
ld t5, (TF_T + 5 * 8)(sp)
ld t6, (TF_T + 6 * 8)(sp)
ld s0, (TF_S + 0 * 8)(sp)
ld s1, (TF_S + 1 * 8)(sp)
ld s2, (TF_S + 2 * 8)(sp)
ld s3, (TF_S + 3 * 8)(sp)
ld s4, (TF_S + 4 * 8)(sp)
ld s5, (TF_S + 5 * 8)(sp)
ld s6, (TF_S + 6 * 8)(sp)
ld s7, (TF_S + 7 * 8)(sp)
ld s8, (TF_S + 8 * 8)(sp)
ld s9, (TF_S + 9 * 8)(sp)
ld s10, (TF_S + 10 * 8)(sp)
ld s11, (TF_S + 11 * 8)(sp)
ld a0, (TF_A + 0 * 8)(sp)
ld a1, (TF_A + 1 * 8)(sp)
ld a2, (TF_A + 2 * 8)(sp)
ld a3, (TF_A + 3 * 8)(sp)
ld a4, (TF_A + 4 * 8)(sp)
ld a5, (TF_A + 5 * 8)(sp)
ld a6, (TF_A + 6 * 8)(sp)
ld a7, (TF_A + 7 * 8)(sp)
/* Load user ra and sp */
ld ra, (TF_RA)(sp)
/*
* Store our pcpup on stack, we will load it back
* on kernel mode trap.
*/
sd gp, (TF_SIZE)(sp)
ld gp, (TF_GP)(sp)
/* Save kernel stack so we can use it doing a user trap */
addi sp, sp, TF_SIZE
csrw sscratch, sp
/* Load user stack */
ld sp, (TF_SP - TF_SIZE)(sp)
sret
END(fork_trampoline)
ENTRY(savectx)
/* Store ra, sp and the callee-saved registers */
sd ra, (PCB_RA)(a0)
sd sp, (PCB_SP)(a0)
sd tp, (PCB_TP)(a0)
/* s[0-11] */
sd s0, (PCB_S + 0 * 8)(a0)
sd s1, (PCB_S + 1 * 8)(a0)
sd s2, (PCB_S + 2 * 8)(a0)
sd s3, (PCB_S + 3 * 8)(a0)
sd s4, (PCB_S + 4 * 8)(a0)
sd s5, (PCB_S + 5 * 8)(a0)
sd s6, (PCB_S + 6 * 8)(a0)
sd s7, (PCB_S + 7 * 8)(a0)
sd s8, (PCB_S + 8 * 8)(a0)
sd s9, (PCB_S + 9 * 8)(a0)
sd s10, (PCB_S + 10 * 8)(a0)
sd s11, (PCB_S + 11 * 8)(a0)
#ifdef FPE
__fpe_state_save a0
#endif
ret
END(savectx)