Index: head/sys/dev/xilinx/axidma.c =================================================================== --- head/sys/dev/xilinx/axidma.c (revision 365454) +++ head/sys/dev/xilinx/axidma.c (revision 365455) @@ -1,649 +1,649 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2019 Ruslan Bukin * * This software was developed by SRI International and the University of * Cambridge Computer Laboratory (Department of Computer Science and * Technology) under DARPA contract HR0011-18-C-0016 ("ECATS"), as part of the * DARPA SSITH research programme. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* Xilinx AXI DMA controller driver. */ #include __FBSDID("$FreeBSD$"); #include "opt_platform.h" #include #include #include #include #include #include #include #include #include #include #include #ifdef FDT #include #include #include #endif #include #include #include "xdma_if.h" #define READ4(_sc, _reg) \ bus_space_read_4(_sc->bst, _sc->bsh, _reg) #define WRITE4(_sc, _reg, _val) \ bus_space_write_4(_sc->bst, _sc->bsh, _reg, _val) #define READ8(_sc, _reg) \ bus_space_read_8(_sc->bst, _sc->bsh, _reg) #define WRITE8(_sc, _reg, _val) \ bus_space_write_8(_sc->bst, _sc->bsh, _reg, _val) #define AXIDMA_DEBUG #undef AXIDMA_DEBUG #ifdef AXIDMA_DEBUG #define dprintf(fmt, ...) printf(fmt, ##__VA_ARGS__) #else #define dprintf(fmt, ...) #endif extern struct bus_space memmap_bus; struct axidma_channel { struct axidma_softc *sc; xdma_channel_t *xchan; bool used; int idx_head; int idx_tail; struct axidma_desc **descs; vm_paddr_t *descs_phys; uint32_t descs_num; vm_size_t mem_size; vm_offset_t mem_paddr; vm_offset_t mem_vaddr; uint32_t descs_used_count; }; struct axidma_softc { device_t dev; struct resource *res[3]; bus_space_tag_t bst; bus_space_handle_t bsh; void *ih[2]; struct axidma_desc desc; struct axidma_channel channels[AXIDMA_NCHANNELS]; }; static struct resource_spec axidma_spec[] = { { SYS_RES_MEMORY, 0, RF_ACTIVE }, { SYS_RES_IRQ, 0, RF_ACTIVE }, { SYS_RES_IRQ, 1, RF_ACTIVE }, { -1, 0 } }; #define HWTYPE_NONE 0 #define HWTYPE_STD 1 static struct ofw_compat_data compat_data[] = { { "xlnx,eth-dma", HWTYPE_STD }, { NULL, HWTYPE_NONE }, }; static int axidma_probe(device_t dev); static int axidma_attach(device_t dev); static int axidma_detach(device_t dev); static inline uint32_t axidma_next_desc(struct axidma_channel *chan, uint32_t curidx) { return ((curidx + 1) % chan->descs_num); } static void axidma_intr(struct axidma_softc *sc, struct axidma_channel *chan) { xdma_transfer_status_t status; xdma_transfer_status_t st; struct axidma_fdt_data *data; xdma_controller_t *xdma; struct axidma_desc *desc; struct xdma_channel *xchan; uint32_t tot_copied; int pending; int errors; xchan = chan->xchan; xdma = xchan->xdma; data = xdma->data; pending = READ4(sc, AXI_DMASR(data->id)); WRITE4(sc, AXI_DMASR(data->id), pending); errors = (pending & (DMASR_DMAINTERR | DMASR_DMASLVERR | DMASR_DMADECOREERR | DMASR_SGINTERR | DMASR_SGSLVERR | DMASR_SGDECERR)); dprintf("%s: AXI_DMASR %x\n", __func__, READ4(sc, AXI_DMASR(data->id))); dprintf("%s: AXI_CURDESC %x\n", __func__, READ4(sc, AXI_CURDESC(data->id))); dprintf("%s: AXI_TAILDESC %x\n", __func__, READ4(sc, AXI_TAILDESC(data->id))); tot_copied = 0; while (chan->idx_tail != chan->idx_head) { desc = chan->descs[chan->idx_tail]; if ((desc->status & BD_STATUS_CMPLT) == 0) break; st.error = errors; st.transferred = desc->status & BD_CONTROL_LEN_M; tot_copied += st.transferred; xchan_seg_done(xchan, &st); chan->idx_tail = axidma_next_desc(chan, chan->idx_tail); atomic_subtract_int(&chan->descs_used_count, 1); } /* Finish operation */ status.error = errors; status.transferred = tot_copied; xdma_callback(chan->xchan, &status); } static void axidma_intr_rx(void *arg) { struct axidma_softc *sc; struct axidma_channel *chan; dprintf("%s\n", __func__); sc = arg; chan = &sc->channels[AXIDMA_RX_CHAN]; axidma_intr(sc, chan); } static void axidma_intr_tx(void *arg) { struct axidma_softc *sc; struct axidma_channel *chan; dprintf("%s\n", __func__); sc = arg; chan = &sc->channels[AXIDMA_TX_CHAN]; axidma_intr(sc, chan); } static int axidma_reset(struct axidma_softc *sc, int chan_id) { int timeout; WRITE4(sc, AXI_DMACR(chan_id), DMACR_RESET); timeout = 100; do { if ((READ4(sc, AXI_DMACR(chan_id)) & DMACR_RESET) == 0) break; } while (timeout--); dprintf("timeout %d\n", timeout); if (timeout == 0) return (-1); dprintf("%s: read control after reset: %x\n", __func__, READ4(sc, AXI_DMACR(chan_id))); return (0); } static int axidma_probe(device_t dev) { int hwtype; if (!ofw_bus_status_okay(dev)) return (ENXIO); hwtype = ofw_bus_search_compatible(dev, compat_data)->ocd_data; if (hwtype == HWTYPE_NONE) return (ENXIO); device_set_desc(dev, "Xilinx AXI DMA"); return (BUS_PROBE_DEFAULT); } static int axidma_attach(device_t dev) { struct axidma_softc *sc; phandle_t xref, node; int err; sc = device_get_softc(dev); sc->dev = dev; if (bus_alloc_resources(dev, axidma_spec, sc->res)) { device_printf(dev, "could not allocate resources.\n"); return (ENXIO); } /* CSR memory interface */ sc->bst = rman_get_bustag(sc->res[0]); sc->bsh = rman_get_bushandle(sc->res[0]); /* Setup interrupt handler */ err = bus_setup_intr(dev, sc->res[1], INTR_TYPE_MISC | INTR_MPSAFE, NULL, axidma_intr_tx, sc, &sc->ih[0]); if (err) { device_printf(dev, "Unable to alloc interrupt resource.\n"); return (ENXIO); } /* Setup interrupt handler */ err = bus_setup_intr(dev, sc->res[2], INTR_TYPE_MISC | INTR_MPSAFE, NULL, axidma_intr_rx, sc, &sc->ih[1]); if (err) { device_printf(dev, "Unable to alloc interrupt resource.\n"); return (ENXIO); } node = ofw_bus_get_node(dev); xref = OF_xref_from_node(node); OF_device_register_xref(xref, dev); return (0); } static int axidma_detach(device_t dev) { struct axidma_softc *sc; sc = device_get_softc(dev); bus_teardown_intr(dev, sc->res[1], sc->ih[0]); bus_teardown_intr(dev, sc->res[2], sc->ih[1]); bus_release_resources(dev, axidma_spec, sc->res); return (0); } static int axidma_desc_free(struct axidma_softc *sc, struct axidma_channel *chan) { struct xdma_channel *xchan; int nsegments; nsegments = chan->descs_num; xchan = chan->xchan; free(chan->descs, M_DEVBUF); free(chan->descs_phys, M_DEVBUF); pmap_kremove_device(chan->mem_vaddr, chan->mem_size); kva_free(chan->mem_vaddr, chan->mem_size); vmem_free(xchan->vmem, chan->mem_paddr, chan->mem_size); return (0); } static int axidma_desc_alloc(struct axidma_softc *sc, struct xdma_channel *xchan, uint32_t desc_size) { struct axidma_channel *chan; int nsegments; int i; chan = (struct axidma_channel *)xchan->chan; nsegments = chan->descs_num; chan->descs = malloc(nsegments * sizeof(struct axidma_desc *), M_DEVBUF, M_NOWAIT | M_ZERO); if (chan->descs == NULL) { device_printf(sc->dev, "%s: Can't allocate memory.\n", __func__); return (-1); } chan->descs_phys = malloc(nsegments * sizeof(bus_dma_segment_t), M_DEVBUF, M_NOWAIT | M_ZERO); chan->mem_size = desc_size * nsegments; if (vmem_alloc(xchan->vmem, chan->mem_size, M_FIRSTFIT | M_NOWAIT, &chan->mem_paddr)) { device_printf(sc->dev, "Failed to allocate memory.\n"); return (-1); } chan->mem_vaddr = kva_alloc(chan->mem_size); pmap_kenter_device(chan->mem_vaddr, chan->mem_size, chan->mem_paddr); - device_printf(sc->dev, "Allocated chunk %lx %d\n", + device_printf(sc->dev, "Allocated chunk %lx %lu\n", chan->mem_paddr, chan->mem_size); for (i = 0; i < nsegments; i++) { chan->descs[i] = (struct axidma_desc *) ((uint64_t)chan->mem_vaddr + desc_size * i); chan->descs_phys[i] = chan->mem_paddr + desc_size * i; } return (0); } static int axidma_channel_alloc(device_t dev, struct xdma_channel *xchan) { xdma_controller_t *xdma; struct axidma_fdt_data *data; struct axidma_channel *chan; struct axidma_softc *sc; sc = device_get_softc(dev); if (xchan->caps & XCHAN_CAP_BUSDMA) { device_printf(sc->dev, "Error: busdma operation is not implemented."); return (-1); } xdma = xchan->xdma; data = xdma->data; chan = &sc->channels[data->id]; if (chan->used == false) { if (axidma_reset(sc, data->id) != 0) return (-1); chan->xchan = xchan; xchan->caps |= XCHAN_CAP_BOUNCE; xchan->chan = (void *)chan; chan->sc = sc; chan->used = true; chan->idx_head = 0; chan->idx_tail = 0; chan->descs_used_count = 0; chan->descs_num = AXIDMA_DESCS_NUM; return (0); } return (-1); } static int axidma_channel_free(device_t dev, struct xdma_channel *xchan) { struct axidma_channel *chan; struct axidma_softc *sc; sc = device_get_softc(dev); chan = (struct axidma_channel *)xchan->chan; axidma_desc_free(sc, chan); chan->used = false; return (0); } static int axidma_channel_capacity(device_t dev, xdma_channel_t *xchan, uint32_t *capacity) { struct axidma_channel *chan; uint32_t c; chan = (struct axidma_channel *)xchan->chan; /* At least one descriptor must be left empty. */ c = (chan->descs_num - chan->descs_used_count - 1); *capacity = c; return (0); } static int axidma_channel_submit_sg(device_t dev, struct xdma_channel *xchan, struct xdma_sglist *sg, uint32_t sg_n) { xdma_controller_t *xdma; struct axidma_fdt_data *data; struct axidma_channel *chan; struct axidma_desc *desc; struct axidma_softc *sc; uint32_t src_addr; uint32_t dst_addr; uint32_t addr; uint32_t len; uint32_t tmp; int i; int tail; dprintf("%s: sg_n %d\n", __func__, sg_n); sc = device_get_softc(dev); chan = (struct axidma_channel *)xchan->chan; xdma = xchan->xdma; data = xdma->data; if (sg_n == 0) return (0); tail = chan->idx_head; tmp = 0; for (i = 0; i < sg_n; i++) { src_addr = (uint32_t)sg[i].src_addr; dst_addr = (uint32_t)sg[i].dst_addr; len = (uint32_t)sg[i].len; dprintf("%s(%d): src %x dst %x len %d\n", __func__, data->id, src_addr, dst_addr, len); desc = chan->descs[chan->idx_head]; if (sg[i].direction == XDMA_MEM_TO_DEV) desc->phys = src_addr; else desc->phys = dst_addr; desc->status = 0; desc->control = len; if (sg[i].first == 1) desc->control |= BD_CONTROL_TXSOF; if (sg[i].last == 1) desc->control |= BD_CONTROL_TXEOF; tmp = chan->idx_head; atomic_add_int(&chan->descs_used_count, 1); chan->idx_head = axidma_next_desc(chan, chan->idx_head); } dprintf("%s(%d): _curdesc %x\n", __func__, data->id, READ8(sc, AXI_CURDESC(data->id))); dprintf("%s(%d): _curdesc %x\n", __func__, data->id, READ8(sc, AXI_CURDESC(data->id))); dprintf("%s(%d): status %x\n", __func__, data->id, READ4(sc, AXI_DMASR(data->id))); addr = chan->descs_phys[tmp]; WRITE8(sc, AXI_TAILDESC(data->id), addr); return (0); } static int axidma_channel_prep_sg(device_t dev, struct xdma_channel *xchan) { xdma_controller_t *xdma; struct axidma_fdt_data *data; struct axidma_channel *chan; struct axidma_desc *desc; struct axidma_softc *sc; uint32_t addr; uint32_t reg; int ret; int i; sc = device_get_softc(dev); chan = (struct axidma_channel *)xchan->chan; xdma = xchan->xdma; data = xdma->data; dprintf("%s(%d)\n", __func__, data->id); ret = axidma_desc_alloc(sc, xchan, sizeof(struct axidma_desc)); if (ret != 0) { device_printf(sc->dev, "%s: Can't allocate descriptors.\n", __func__); return (-1); } for (i = 0; i < chan->descs_num; i++) { desc = chan->descs[i]; bzero(desc, sizeof(struct axidma_desc)); if (i == (chan->descs_num - 1)) desc->next = chan->descs_phys[0]; else desc->next = chan->descs_phys[i + 1]; desc->status = 0; desc->control = 0; dprintf("%s(%d): desc %d vaddr %lx next paddr %x\n", __func__, data->id, i, (uint64_t)desc, le32toh(desc->next)); } addr = chan->descs_phys[0]; WRITE8(sc, AXI_CURDESC(data->id), addr); reg = READ4(sc, AXI_DMACR(data->id)); reg |= DMACR_IOC_IRQEN | DMACR_DLY_IRQEN | DMACR_ERR_IRQEN; WRITE4(sc, AXI_DMACR(data->id), reg); reg |= DMACR_RS; WRITE4(sc, AXI_DMACR(data->id), reg); return (0); } static int axidma_channel_control(device_t dev, xdma_channel_t *xchan, int cmd) { struct axidma_channel *chan; struct axidma_softc *sc; sc = device_get_softc(dev); chan = (struct axidma_channel *)xchan->chan; switch (cmd) { case XDMA_CMD_BEGIN: case XDMA_CMD_TERMINATE: case XDMA_CMD_PAUSE: /* TODO: implement me */ return (-1); } return (0); } #ifdef FDT static int axidma_ofw_md_data(device_t dev, pcell_t *cells, int ncells, void **ptr) { struct axidma_fdt_data *data; if (ncells != 1) return (-1); data = malloc(sizeof(struct axidma_fdt_data), M_DEVBUF, (M_WAITOK | M_ZERO)); data->id = cells[0]; *ptr = data; return (0); } #endif static device_method_t axidma_methods[] = { /* Device interface */ DEVMETHOD(device_probe, axidma_probe), DEVMETHOD(device_attach, axidma_attach), DEVMETHOD(device_detach, axidma_detach), /* xDMA Interface */ DEVMETHOD(xdma_channel_alloc, axidma_channel_alloc), DEVMETHOD(xdma_channel_free, axidma_channel_free), DEVMETHOD(xdma_channel_control, axidma_channel_control), /* xDMA SG Interface */ DEVMETHOD(xdma_channel_capacity, axidma_channel_capacity), DEVMETHOD(xdma_channel_prep_sg, axidma_channel_prep_sg), DEVMETHOD(xdma_channel_submit_sg, axidma_channel_submit_sg), #ifdef FDT DEVMETHOD(xdma_ofw_md_data, axidma_ofw_md_data), #endif DEVMETHOD_END }; static driver_t axidma_driver = { "axidma", axidma_methods, sizeof(struct axidma_softc), }; static devclass_t axidma_devclass; EARLY_DRIVER_MODULE(axidma, simplebus, axidma_driver, axidma_devclass, 0, 0, BUS_PASS_INTERRUPT + BUS_PASS_ORDER_LATE); Index: head/sys/riscv/riscv/db_disasm.c =================================================================== --- head/sys/riscv/riscv/db_disasm.c (revision 365454) +++ head/sys/riscv/riscv/db_disasm.c (revision 365455) @@ -1,603 +1,603 @@ /*- * Copyright (c) 2016-2018 Ruslan Bukin * All rights reserved. * * Portions of this software were developed by SRI International and the * University of Cambridge Computer Laboratory under DARPA/AFRL contract * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme. * * Portions of this software were developed by the University of Cambridge * Computer Laboratory as part of the CTSRD Project, with support from the * UK Higher Education Innovation Fund (HEIF). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #define X_RA 1 #define X_SP 2 #define X_GP 3 #define X_TP 4 #define X_T0 5 #define X_T1 6 #define X_T2 7 #define X_T3 28 #define RD_SHIFT 7 #define RD_MASK (0x1f << RD_SHIFT) #define RS1_SHIFT 15 #define RS1_MASK (0x1f << RS1_SHIFT) #define RS2_SHIFT 20 #define RS2_MASK (0x1f << RS2_SHIFT) #define IMM_SHIFT 20 #define IMM_MASK (0xfff << IMM_SHIFT) static char *reg_name[32] = { "zero", "ra", "sp", "gp", "tp", "t0", "t1", "t2", "s0", "s1", "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "s2", "s3", "s4", "s5", "s6", "s7", "s8", "s9", "s10", "s11", "t3", "t4", "t5", "t6" }; static char *fp_reg_name[32] = { "ft0", "ft1", "ft2", "ft3", "ft4", "ft5", "ft6", "ft7", "fs0", "fs1", "fa0", "fa1", "fa2", "fa3", "fa4", "fa5", "fa6", "fa7", "fs2", "fs3", "fs4", "fs5", "fs6", "fs7", "fs8", "fs9", "fs10", "fs11", "ft8", "ft9", "ft10", "ft11" }; struct riscv_op { char *name; char *fmt; int match; int mask; int (*match_func)(struct riscv_op *op, uint32_t insn); }; static int m_op(struct riscv_op *op, uint32_t insn) { if (((insn ^ op->match) & op->mask) == 0) return (1); return (0); } static struct riscv_op riscv_opcodes[] = { /* Aliases first */ {"ret","", MATCH_JALR | (X_RA << RS1_SHIFT), MASK_JALR | RD_MASK | RS1_MASK | IMM_MASK, m_op }, { "beq", "s,t,p", MATCH_BEQ, MASK_BEQ, m_op }, { "bne", "s,t,p", MATCH_BNE, MASK_BNE, m_op }, { "blt", "s,t,p", MATCH_BLT, MASK_BLT, m_op }, { "bge", "s,t,p", MATCH_BGE, MASK_BGE, m_op }, { "bltu", "s,t,p", MATCH_BLTU, MASK_BLTU, m_op }, { "bgeu", "s,t,p", MATCH_BGEU, MASK_BGEU, m_op }, { "jalr", "d,o(s)", MATCH_JALR, MASK_JALR, m_op }, { "jal", "d,a", MATCH_JAL, MASK_JAL, m_op }, { "lui", "d,u", MATCH_LUI, MASK_LUI, m_op }, { "auipc", "d,u", MATCH_AUIPC, MASK_AUIPC, m_op }, { "addi", "d,s,j", MATCH_ADDI, MASK_ADDI, m_op }, { "slli", "d,s,>", MATCH_SLLI, MASK_SLLI, m_op }, { "slti", "d,s,j", MATCH_SLTI, MASK_SLTI, m_op }, { "sltiu", "d,s,j", MATCH_SLTIU, MASK_SLTIU, m_op }, { "xori", "d,s,j", MATCH_XORI, MASK_XORI, m_op }, { "srli", "d,s,>", MATCH_SRLI, MASK_SRLI, m_op }, { "srai", "d,s,>", MATCH_SRAI, MASK_SRAI, m_op }, { "ori", "d,s,j", MATCH_ORI, MASK_ORI, m_op }, { "andi", "d,s,j", MATCH_ANDI, MASK_ANDI, m_op }, { "add", "d,s,t", MATCH_ADD, MASK_ADD, m_op }, { "sub", "d,s,t", MATCH_SUB, MASK_SUB, m_op }, { "sll", "d,s,t", MATCH_SLL, MASK_SLL, m_op }, { "slt", "d,s,t", MATCH_SLT, MASK_SLT, m_op }, { "sltu", "d,s,t", MATCH_SLTU, MASK_SLTU, m_op }, { "xor", "d,s,t", MATCH_XOR, MASK_XOR, m_op }, { "srl", "d,s,t", MATCH_SRL, MASK_SRL, m_op }, { "sra", "d,s,t", MATCH_SRA, MASK_SRA, m_op }, { "or", "d,s,t", MATCH_OR, MASK_OR, m_op }, { "and", "d,s,t", MATCH_AND, MASK_AND, m_op }, { "addiw", "d,s,j", MATCH_ADDIW, MASK_ADDIW, m_op }, { "slliw", "d,s,<", MATCH_SLLIW, MASK_SLLIW, m_op }, { "srliw", "d,s,<", MATCH_SRLIW, MASK_SRLIW, m_op }, { "sraiw", "d,s,<", MATCH_SRAIW, MASK_SRAIW, m_op }, { "addw", "d,s,t", MATCH_ADDW, MASK_ADDW, m_op }, { "subw", "d,s,t", MATCH_SUBW, MASK_SUBW, m_op }, { "sllw", "d,s,t", MATCH_SLLW, MASK_SLLW, m_op }, { "srlw", "d,s,t", MATCH_SRLW, MASK_SRLW, m_op }, { "sraw", "d,s,t", MATCH_SRAW, MASK_SRAW, m_op }, { "lb", "d,o(s)", MATCH_LB, MASK_LB, m_op }, { "lh", "d,o(s)", MATCH_LH, MASK_LH, m_op }, { "lw", "d,o(s)", MATCH_LW, MASK_LW, m_op }, { "ld", "d,o(s)", MATCH_LD, MASK_LD, m_op }, { "lbu", "d,o(s)", MATCH_LBU, MASK_LBU, m_op }, { "lhu", "d,o(s)", MATCH_LHU, MASK_LHU, m_op }, { "lwu", "d,o(s)", MATCH_LWU, MASK_LWU, m_op }, { "sb", "t,q(s)", MATCH_SB, MASK_SB, m_op }, { "sh", "t,q(s)", MATCH_SH, MASK_SH, m_op }, { "sw", "t,q(s)", MATCH_SW, MASK_SW, m_op }, { "sd", "t,q(s)", MATCH_SD, MASK_SD, m_op }, { "fence", "P,Q", MATCH_FENCE, MASK_FENCE, m_op }, { "fence.i", "", MATCH_FENCE_I, MASK_FENCE_I, m_op }, { "mul", "d,s,t", MATCH_MUL, MASK_MUL, m_op }, { "mulh", "d,s,t", MATCH_MULH, MASK_MULH, m_op }, { "mulhsu", "d,s,t", MATCH_MULHSU, MASK_MULHSU, m_op }, { "mulhu", "d,s,t", MATCH_MULHU, MASK_MULHU, m_op }, { "div", "d,s,t", MATCH_DIV, MASK_DIV, m_op }, { "divu", "d,s,t", MATCH_DIVU, MASK_DIVU, m_op }, { "rem", "d,s,t", MATCH_REM, MASK_REM, m_op }, { "remu", "d,s,t", MATCH_REMU, MASK_REMU, m_op }, { "mulw", "d,s,t", MATCH_MULW, MASK_MULW, m_op }, { "divw", "d,s,t", MATCH_DIVW, MASK_DIVW, m_op }, { "divuw", "d,s,t", MATCH_DIVUW, MASK_DIVUW, m_op }, { "remw", "d,s,t", MATCH_REMW, MASK_REMW, m_op }, { "remuw", "d,s,t", MATCH_REMUW, MASK_REMUW, m_op }, { "amoadd.w", "d,t,0(s)", MATCH_AMOADD_W, MASK_AMOADD_W, m_op }, { "amoxor.w", "d,t,0(s)", MATCH_AMOXOR_W, MASK_AMOXOR_W, m_op }, { "amoor.w", "d,t,0(s)", MATCH_AMOOR_W, MASK_AMOOR_W, m_op }, { "amoand.w", "d,t,0(s)", MATCH_AMOAND_W, MASK_AMOAND_W, m_op }, { "amomin.w", "d,t,0(s)", MATCH_AMOMIN_W, MASK_AMOMIN_W, m_op }, { "amomax.w", "d,t,0(s)", MATCH_AMOMAX_W, MASK_AMOMAX_W, m_op }, { "amominu.w", "d,t,0(s)", MATCH_AMOMINU_W, MASK_AMOMINU_W,m_op }, { "amomaxu.w", "d,t,0(s)", MATCH_AMOMAXU_W, MASK_AMOMAXU_W,m_op }, { "amoswap.w", "d,t,0(s)", MATCH_AMOSWAP_W, MASK_AMOSWAP_W,m_op }, { "lr.w", "d,0(s)", MATCH_LR_W, MASK_LR_W, m_op }, { "sc.w", "d,t,0(s)", MATCH_SC_W, MASK_SC_W, m_op }, { "amoadd.d", "d,t,0(s)", MATCH_AMOADD_D, MASK_AMOADD_D, m_op }, { "amoxor.d", "d,t,0(s)", MATCH_AMOXOR_D, MASK_AMOXOR_D, m_op }, { "amoor.d", "d,t,0(s)", MATCH_AMOOR_D, MASK_AMOOR_D, m_op }, { "amoand.d", "d,t,0(s)", MATCH_AMOAND_D, MASK_AMOAND_D, m_op }, { "amomin.d", "d,t,0(s)", MATCH_AMOMIN_D, MASK_AMOMIN_D, m_op }, { "amomax.d", "d,t,0(s)", MATCH_AMOMAX_D, MASK_AMOMAX_D, m_op }, { "amominu.d", "d,t,0(s)", MATCH_AMOMINU_D, MASK_AMOMINU_D,m_op }, { "amomaxu.d", "d,t,0(s)", MATCH_AMOMAXU_D, MASK_AMOMAXU_D,m_op }, { "amoswap.d", "d,t,0(s)", MATCH_AMOSWAP_D, MASK_AMOSWAP_D,m_op }, { "lr.d", "d,0(s)", MATCH_LR_D, MASK_LR_D, m_op }, { "sc.d", "d,t,0(s)", MATCH_SC_D, MASK_SC_D, m_op }, { "ecall", "", MATCH_ECALL, MASK_ECALL, m_op }, { "ebreak", "", MATCH_EBREAK, MASK_EBREAK, m_op }, { "uret", "", MATCH_URET, MASK_URET, m_op }, { "sret", "", MATCH_SRET, MASK_SRET, m_op }, { "mret", "", MATCH_MRET, MASK_MRET, m_op }, { "dret", "", MATCH_DRET, MASK_DRET, m_op }, { "sfence.vma", "", MATCH_SFENCE_VMA, MASK_SFENCE_VMA, m_op }, { "wfi", "", MATCH_WFI, MASK_WFI, m_op }, { "csrrw", "d,E,s", MATCH_CSRRW, MASK_CSRRW, m_op }, { "csrrs", "d,E,s", MATCH_CSRRS, MASK_CSRRS, m_op }, { "csrrc", "d,E,s", MATCH_CSRRC, MASK_CSRRC, m_op }, { "csrrwi", "d,E,Z", MATCH_CSRRWI, MASK_CSRRWI, m_op }, { "csrrsi", "d,E,Z", MATCH_CSRRSI, MASK_CSRRSI, m_op }, { "csrrci", "d,E,Z", MATCH_CSRRCI, MASK_CSRRCI, m_op }, { "fadd.s", "D,S,T", MATCH_FADD_S, MASK_FADD_S, m_op }, { "fsub.s", "D,S,T", MATCH_FSUB_S, MASK_FSUB_S, m_op }, { "fmul.s", "D,S,T", MATCH_FMUL_S, MASK_FMUL_S, m_op }, { "fdiv.s", "D,S,T", MATCH_FDIV_S, MASK_FDIV_S, m_op }, { "fsgnj.s", "D,S,T", MATCH_FSGNJ_S, MASK_FSGNJ_S, m_op }, { "fsgnjn.s", "D,S,T", MATCH_FSGNJN_S, MASK_FSGNJN_S, m_op }, { "fsgnjx.s", "D,S,T", MATCH_FSGNJX_S, MASK_FSGNJX_S, m_op }, { "fmin.s", "D,S,T", MATCH_FMIN_S, MASK_FMIN_S, m_op }, { "fmax.s", "D,S,T", MATCH_FMAX_S, MASK_FMAX_S, m_op }, { "fsqrt.s", "D,S", MATCH_FSQRT_S, MASK_FSQRT_S, m_op }, { "fadd.d", "D,S,T", MATCH_FADD_D, MASK_FADD_D, m_op }, { "fsub.d", "D,S,T", MATCH_FSUB_D, MASK_FSUB_D, m_op }, { "fmul.d", "D,S,T", MATCH_FMUL_D, MASK_FMUL_D, m_op }, { "fdiv.d", "D,S,T", MATCH_FDIV_D, MASK_FDIV_D, m_op }, { "fsgnj.d", "D,S,T", MATCH_FSGNJ_D, MASK_FSGNJ_D, m_op }, { "fsgnjn.d", "D,S,T", MATCH_FSGNJN_D, MASK_FSGNJN_D, m_op }, { "fsgnjx.d", "D,S,T", MATCH_FSGNJX_D, MASK_FSGNJX_D, m_op }, { "fmin.d", "D,S,T", MATCH_FMIN_D, MASK_FMIN_D, m_op }, { "fmax.d", "D,S,T", MATCH_FMAX_D, MASK_FMAX_D, m_op }, { "fcvt.s.d", "D,S", MATCH_FCVT_S_D, MASK_FCVT_S_D, m_op }, { "fcvt.d.s", "D,S", MATCH_FCVT_D_S, MASK_FCVT_D_S, m_op }, { "fsqrt.d", "D,S", MATCH_FSQRT_D, MASK_FSQRT_D, m_op }, { "fadd.q", "D,S,T", MATCH_FADD_Q, MASK_FADD_Q, m_op }, { "fsub.q", "D,S,T", MATCH_FSUB_Q, MASK_FSUB_Q, m_op }, { "fmul.q", "D,S,T", MATCH_FMUL_Q, MASK_FMUL_Q, m_op }, { "fdiv.q", "D,S,T", MATCH_FDIV_Q, MASK_FDIV_Q, m_op }, { "fsgnj.q", "D,S,T", MATCH_FSGNJ_Q, MASK_FSGNJ_Q, m_op }, { "fsgnjn.q", "D,S,T", MATCH_FSGNJN_Q, MASK_FSGNJN_Q, m_op }, { "fsgnjx.q", "D,S,T", MATCH_FSGNJX_Q, MASK_FSGNJX_Q, m_op }, { "fmin.q", "D,S,T", MATCH_FMIN_Q, MASK_FMIN_Q, m_op }, { "fmax.q", "D,S,T", MATCH_FMAX_Q, MASK_FMAX_Q, m_op }, { "fcvt.s.q", "D,S", MATCH_FCVT_S_Q, MASK_FCVT_S_Q, m_op }, { "fcvt.q.s", "D,S", MATCH_FCVT_Q_S, MASK_FCVT_Q_S, m_op }, { "fcvt.d.q", "D,S", MATCH_FCVT_D_Q, MASK_FCVT_D_Q, m_op }, { "fcvt.q.d", "D,S", MATCH_FCVT_Q_D, MASK_FCVT_Q_D, m_op }, { "fsqrt.q", "D,S", MATCH_FSQRT_Q, MASK_FSQRT_Q, m_op }, { "fle.s", "d,S,T", MATCH_FLE_S, MASK_FLE_S, m_op }, { "flt.s", "d,S,T", MATCH_FLT_S, MASK_FLT_S, m_op }, { "feq.s", "d,S,T", MATCH_FEQ_S, MASK_FEQ_S, m_op }, { "fle.d", "d,S,T", MATCH_FLE_D, MASK_FLE_D, m_op }, { "flt.d", "d,S,T", MATCH_FLT_D, MASK_FLT_D, m_op }, { "feq.d", "d,S,T", MATCH_FEQ_D, MASK_FEQ_D, m_op }, { "fle.q", "d,S,T", MATCH_FLE_Q, MASK_FLE_Q, m_op }, { "flt.q", "d,S,T", MATCH_FLT_Q, MASK_FLT_Q, m_op }, { "feq.q", "d,S,T", MATCH_FEQ_Q, MASK_FEQ_Q, m_op }, { "fcvt.w.s", "d,S", MATCH_FCVT_W_S, MASK_FCVT_W_S, m_op }, { "fcvt.wu.s", "d,S", MATCH_FCVT_WU_S, MASK_FCVT_WU_S,m_op }, { "fcvt.l.s", "d,S", MATCH_FCVT_L_S, MASK_FCVT_L_S, m_op }, { "fcvt.lu.s", "d,S", MATCH_FCVT_LU_S, MASK_FCVT_LU_S,m_op }, { "fmv.x.w", "d,S", MATCH_FMV_X_W, MASK_FMV_X_W, m_op }, { "fclass.s", "d,S", MATCH_FCLASS_S, MASK_FCLASS_S, m_op }, { "fcvt.w.d", "d,S", MATCH_FCVT_W_D, MASK_FCVT_W_D, m_op }, { "fcvt.wu.d", "d,S", MATCH_FCVT_WU_D, MASK_FCVT_WU_D,m_op }, { "fcvt.l.d", "d,S", MATCH_FCVT_L_D, MASK_FCVT_L_D, m_op }, { "fcvt.lu.d", "d,S", MATCH_FCVT_LU_D, MASK_FCVT_LU_D,m_op }, { "fmv.x.d", "d,S", MATCH_FMV_X_D, MASK_FMV_X_D, m_op }, { "fclass.d", "d,S", MATCH_FCLASS_D, MASK_FCLASS_D, m_op }, { "fcvt.w.q", "d,S", MATCH_FCVT_W_Q, MASK_FCVT_W_Q, m_op }, { "fcvt.wu.q", "d,S", MATCH_FCVT_WU_Q, MASK_FCVT_WU_Q,m_op }, { "fcvt.l.q", "d,S", MATCH_FCVT_L_Q, MASK_FCVT_L_Q, m_op }, { "fcvt.lu.q", "d,S", MATCH_FCVT_LU_Q, MASK_FCVT_LU_Q,m_op }, { "fmv.x.q", "d,S", MATCH_FMV_X_Q, MASK_FMV_X_Q, m_op }, { "fclass.q", "d,S", MATCH_FCLASS_Q, MASK_FCLASS_Q, m_op }, { "fcvt.s.w", "D,s", MATCH_FCVT_S_W, MASK_FCVT_S_W, m_op }, { "fcvt.s.wu", "D,s", MATCH_FCVT_S_WU, MASK_FCVT_S_WU,m_op }, { "fcvt.s.l", "D,s", MATCH_FCVT_S_L, MASK_FCVT_S_L, m_op }, { "fcvt.s.lu", "D,s", MATCH_FCVT_S_LU, MASK_FCVT_S_LU,m_op }, { "fmv.w.x", "D,s", MATCH_FMV_W_X, MASK_FMV_W_X, m_op }, { "fcvt.d.w", "D,s", MATCH_FCVT_D_W, MASK_FCVT_D_W, m_op }, { "fcvt.d.wu", "D,s", MATCH_FCVT_D_WU, MASK_FCVT_D_WU,m_op }, { "fcvt.d.l", "D,s", MATCH_FCVT_D_L, MASK_FCVT_D_L, m_op }, { "fcvt.d.lu", "D,s", MATCH_FCVT_D_LU, MASK_FCVT_D_LU,m_op }, { "fmv.d.x", "D,s", MATCH_FMV_D_X, MASK_FMV_D_X, m_op }, { "fcvt.q.w", "D,s", MATCH_FCVT_Q_W, MASK_FCVT_Q_W, m_op }, { "fcvt.q.wu", "D,s", MATCH_FCVT_Q_WU, MASK_FCVT_Q_WU,m_op }, { "fcvt.q.l", "D,s", MATCH_FCVT_Q_L, MASK_FCVT_Q_L, m_op }, { "fcvt.q.lu", "D,s", MATCH_FCVT_Q_LU, MASK_FCVT_Q_LU,m_op }, { "fmv.q.x", "D,s", MATCH_FMV_Q_X, MASK_FMV_Q_X, m_op }, { "flw", "D,o(s)", MATCH_FLW, MASK_FLW, m_op }, { "fld", "D,o(s)", MATCH_FLD, MASK_FLD, m_op }, { "flq", "D,o(s)", MATCH_FLQ, MASK_FLQ, m_op }, { "fsw", "T,q(s)", MATCH_FSW, MASK_FSW, m_op }, { "fsd", "T,q(s)", MATCH_FSD, MASK_FSD, m_op }, { "fsq", "T,q(s)", MATCH_FSQ, MASK_FSQ, m_op }, { "fmadd.s", "D,S,T,R", MATCH_FMADD_S, MASK_FMADD_S, m_op }, { "fmsub.s", "D,S,T,R", MATCH_FMSUB_S, MASK_FMSUB_S, m_op }, { "fnmsub.s", "D,S,T,R", MATCH_FNMSUB_S, MASK_FNMSUB_S, m_op }, { "fnmadd.s", "D,S,T,R", MATCH_FNMADD_S, MASK_FNMADD_S, m_op }, { "fmadd.d", "D,S,T,R", MATCH_FMADD_D, MASK_FMADD_D, m_op }, { "fmsub.d", "D,S,T,R", MATCH_FMSUB_D, MASK_FMSUB_D, m_op }, { "fnmsub.d", "D,S,T,R", MATCH_FNMSUB_D, MASK_FNMSUB_D, m_op }, { "fnmadd.d", "D,S,T,R", MATCH_FNMADD_D, MASK_FNMADD_D, m_op }, { "fmadd.q", "D,S,T,R", MATCH_FMADD_Q, MASK_FMADD_Q, m_op }, { "fmsub.q", "D,S,T,R", MATCH_FMSUB_Q, MASK_FMSUB_Q, m_op }, { "fnmsub.q", "D,S,T,R", MATCH_FNMSUB_Q, MASK_FNMSUB_Q, m_op }, { "fnmadd.q", "D,S,T,R", MATCH_FNMADD_Q, MASK_FNMADD_Q, m_op }, { NULL, NULL, 0, 0, NULL }, }; static struct riscv_op riscv_c_opcodes[] = { /* Aliases first */ { "ret","",MATCH_C_JR | (X_RA << RD_SHIFT), MASK_C_JR | RD_MASK, m_op}, /* C-Compressed ISA Extension Instructions */ { "c.nop", "", MATCH_C_NOP, MASK_C_NOP, m_op }, { "c.ebreak", "", MATCH_C_EBREAK, MASK_C_EBREAK, m_op }, { "c.jr", "d", MATCH_C_JR, MASK_C_JR, m_op }, { "c.jalr", "d", MATCH_C_JALR, MASK_C_JALR, m_op }, { "c.jal", "Ca", MATCH_C_JAL, MASK_C_JAL, m_op }, { "c.ld", "Ct,Cl(Cs)", MATCH_C_LD, MASK_C_LD, m_op }, { "c.sd", "Ct,Cl(Cs)", MATCH_C_SD, MASK_C_SD, m_op }, { "c.addiw", "d,Co", MATCH_C_ADDIW, MASK_C_ADDIW, m_op }, { "c.ldsp", "d,Cn(Cc)", MATCH_C_LDSP, MASK_C_LDSP, m_op }, { "c.sdsp", "CV,CN(Cc)", MATCH_C_SDSP, MASK_C_SDSP, m_op }, { "c.addi4spn", "", MATCH_C_ADDI4SPN, MASK_C_ADDI4SPN, m_op }, { "c.addi16sp", "", MATCH_C_ADDI16SP, MASK_C_ADDI16SP, m_op }, { "c.fld", "CD,Cl(Cs)", MATCH_C_FLD, MASK_C_FLD, m_op }, { "c.lw", "Ct,Ck(Cs)", MATCH_C_LW, MASK_C_LW, m_op }, { "c.flw", "CD,Ck(Cs)", MATCH_C_FLW, MASK_C_FLW, m_op }, { "c.fsd", "CD,Cl(Cs)", MATCH_C_FSD, MASK_C_FSD, m_op }, { "c.sw", "Ct,Ck(Cs)", MATCH_C_SW, MASK_C_SW, m_op }, { "c.fsw", "CD,Ck(Cs)", MATCH_C_FSW, MASK_C_FSW, m_op }, { "c.addi", "d,Co", MATCH_C_ADDI, MASK_C_ADDI, m_op }, { "c.li", "d,Co", MATCH_C_LI, MASK_C_LI, m_op }, { "c.lui", "d,Cu", MATCH_C_LUI, MASK_C_LUI, m_op }, { "c.srli", "Cs,C>", MATCH_C_SRLI, MASK_C_SRLI, m_op }, { "c.srai", "Cs,C>", MATCH_C_SRAI, MASK_C_SRAI, m_op }, { "c.andi", "Cs,Co", MATCH_C_ANDI, MASK_C_ANDI, m_op }, { "c.sub", "Cs,Ct", MATCH_C_SUB, MASK_C_SUB, m_op }, { "c.xor", "Cs,Ct", MATCH_C_XOR, MASK_C_XOR, m_op }, { "c.or", "Cs,Ct", MATCH_C_OR, MASK_C_OR, m_op }, { "c.and", "Cs,Ct", MATCH_C_AND, MASK_C_AND, m_op }, { "c.subw", "Cs,Ct", MATCH_C_SUBW, MASK_C_SUBW, m_op }, { "c.addw", "Cs,Ct", MATCH_C_ADDW, MASK_C_ADDW, m_op }, { "c.j", "Ca", MATCH_C_J, MASK_C_J, m_op }, { "c.beqz", "Cs,Cp", MATCH_C_BEQZ, MASK_C_BEQZ, m_op }, { "c.bnez", "Cs,Cp", MATCH_C_BNEZ, MASK_C_BNEZ, m_op }, { "c.slli", "d,C>", MATCH_C_SLLI, MASK_C_SLLI, m_op }, { "c.fldsp", "D,Cn(Cc)", MATCH_C_FLDSP, MASK_C_FLDSP, m_op }, { "c.lwsp", "d,Cm(Cc)", MATCH_C_LWSP, MASK_C_LWSP, m_op }, { "c.flwsp", "D,Cm(Cc)", MATCH_C_FLWSP, MASK_C_FLWSP, m_op }, { "c.mv", "d,CV", MATCH_C_MV, MASK_C_MV, m_op }, { "c.add", "d,CV", MATCH_C_ADD, MASK_C_ADD, m_op }, { "c.fsdsp", "CT,CN(Cc)", MATCH_C_FSDSP, MASK_C_FSDSP, m_op }, { "c.swsp", "CV,CM(Cc)", MATCH_C_SWSP, MASK_C_SWSP, m_op }, { "c.fswsp", "CT,CM(Cc)", MATCH_C_FSWSP, MASK_C_FSWSP, m_op }, { NULL, NULL, 0, 0, NULL }, }; static int oprint(struct riscv_op *op, vm_offset_t loc, int insn) { uint32_t rd, rs1, rs2, rs3; uint32_t val; const char *csr_name; int imm; char *p; p = op->fmt; rd = (insn & RD_MASK) >> RD_SHIFT; rs1 = (insn & RS1_MASK) >> RS1_SHIFT; rs2 = (insn & RS2_MASK) >> RS2_SHIFT; db_printf("%s\t", op->name); while (*p) { switch (*p) { case 'C': /* C-Compressed ISA extension */ switch (*++p) { case 't': rd = (insn >> 2) & 0x7; rd += 0x8; db_printf("%s", reg_name[rd]); break; case 's': rs2 = (insn >> 7) & 0x7; rs2 += 0x8; db_printf("%s", reg_name[rs2]); break; case 'l': imm = ((insn >> 10) & 0x7) << 3; imm |= ((insn >> 5) & 0x3) << 6; if (imm & (1 << 8)) imm |= 0xffffff << 8; db_printf("%d", imm); break; case 'k': imm = ((insn >> 10) & 0x7) << 3; imm |= ((insn >> 6) & 0x1) << 2; imm |= ((insn >> 5) & 0x1) << 6; if (imm & (1 << 8)) imm |= 0xffffff << 8; db_printf("%d", imm); break; case 'c': db_printf("sp"); break; case 'n': imm = ((insn >> 5) & 0x3) << 3; imm |= ((insn >> 12) & 0x1) << 5; imm |= ((insn >> 2) & 0x7) << 6; if (imm & (1 << 8)) imm |= 0xffffff << 8; db_printf("%d", imm); break; case 'N': imm = ((insn >> 10) & 0x7) << 3; imm |= ((insn >> 7) & 0x7) << 6; if (imm & (1 << 8)) imm |= 0xffffff << 8; db_printf("%d", imm); break; case 'u': imm = ((insn >> 2) & 0x1f) << 0; imm |= ((insn >> 12) & 0x1) << 5; if (imm & (1 << 5)) imm |= (0x7ffffff << 5); /* sign ext */ - db_printf("0x%lx", imm); + db_printf("0x%x", imm); break; case 'o': imm = ((insn >> 2) & 0x1f) << 0; imm |= ((insn >> 12) & 0x1) << 5; if (imm & (1 << 5)) imm |= (0x7ffffff << 5); /* sign ext */ db_printf("%d", imm); break; case 'a': /* imm[11|4|9:8|10|6|7|3:1|5] << 2 */ imm = ((insn >> 3) & 0x7) << 1; imm |= ((insn >> 11) & 0x1) << 4; imm |= ((insn >> 2) & 0x1) << 5; imm |= ((insn >> 7) & 0x1) << 6; imm |= ((insn >> 6) & 0x1) << 7; imm |= ((insn >> 9) & 0x3) << 8; imm |= ((insn >> 8) & 0x1) << 10; imm |= ((insn >> 12) & 0x1) << 11; if (imm & (1 << 11)) imm |= (0xfffff << 12); /* sign ext */ db_printf("0x%lx", (loc + imm)); break; case 'V': rs2 = (insn >> 2) & 0x1f; db_printf("%s", reg_name[rs2]); break; case '>': imm = ((insn >> 2) & 0x1f) << 0; imm |= ((insn >> 12) & 0x1) << 5; db_printf("%d", imm); }; break; case 'd': db_printf("%s", reg_name[rd]); break; case 'D': db_printf("%s", fp_reg_name[rd]); break; case 's': db_printf("%s", reg_name[rs1]); break; case 'S': db_printf("%s", fp_reg_name[rs1]); break; case 't': db_printf("%s", reg_name[rs2]); break; case 'T': db_printf("%s", fp_reg_name[rs2]); break; case 'R': rs3 = (insn >> 27) & 0x1f; db_printf("%s", fp_reg_name[rs3]); break; case 'Z': imm = (insn >> 15) & 0x1f; db_printf("%d", imm); break; case 'p': imm = ((insn >> 8) & 0xf) << 1; imm |= ((insn >> 25) & 0x3f) << 5; imm |= ((insn >> 7) & 0x1) << 11; imm |= ((insn >> 31) & 0x1) << 12; if (imm & (1 << 12)) imm |= (0xfffff << 12); /* sign extend */ db_printf("0x%016lx", (loc + imm)); break; case '(': case ')': case '[': case ']': case ',': db_printf("%c", *p); break; case '0': if (!p[1]) db_printf("%c", *p); break; case 'o': imm = (insn >> 20) & 0xfff; if (imm & (1 << 11)) imm |= (0xfffff << 12); /* sign extend */ db_printf("%d", imm); break; case 'q': imm = (insn >> 7) & 0x1f; imm |= ((insn >> 25) & 0x7f) << 5; if (imm & (1 << 11)) imm |= (0xfffff << 12); /* sign extend */ db_printf("%d", imm); break; case 'a': /* imm[20|10:1|11|19:12] << 12 */ imm = ((insn >> 21) & 0x3ff) << 1; imm |= ((insn >> 20) & 0x1) << 11; imm |= ((insn >> 12) & 0xff) << 12; imm |= ((insn >> 31) & 0x1) << 20; if (imm & (1 << 20)) imm |= (0xfff << 20); /* sign extend */ db_printf("0x%lx", (loc + imm)); break; case 'u': /* imm[31:12] << 12 */ imm = (insn >> 12) & 0xfffff; if (imm & (1 << 20)) imm |= (0xfff << 20); /* sign extend */ - db_printf("0x%lx", imm); + db_printf("0x%x", imm); break; case 'j': /* imm[11:0] << 20 */ imm = (insn >> 20) & 0xfff; if (imm & (1 << 11)) imm |= (0xfffff << 12); /* sign extend */ db_printf("%d", imm); break; case '>': val = (insn >> 20) & 0x3f; db_printf("0x%x", val); break; case '<': val = (insn >> 20) & 0x1f; db_printf("0x%x", val); break; case 'E': val = (insn >> 20) & 0xfff; csr_name = NULL; switch (val) { #define DECLARE_CSR(name, num) case num: csr_name = #name; break; #include "machine/encoding.h" #undef DECLARE_CSR } if (csr_name) db_printf("%s", csr_name); else db_printf("0x%x", val); break; case 'P': if (insn & (1 << 27)) db_printf("i"); if (insn & (1 << 26)) db_printf("o"); if (insn & (1 << 25)) db_printf("r"); if (insn & (1 << 24)) db_printf("w"); break; case 'Q': if (insn & (1 << 23)) db_printf("i"); if (insn & (1 << 22)) db_printf("o"); if (insn & (1 << 21)) db_printf("r"); if (insn & (1 << 20)) db_printf("w"); break; } p++; } return (0); } vm_offset_t db_disasm(vm_offset_t loc, bool altfmt) { struct riscv_op *op; uint32_t insn; int j; insn = db_get_value(loc, 4, 0); for (j = 0; riscv_opcodes[j].name != NULL; j++) { op = &riscv_opcodes[j]; if (op->match_func(op, insn)) { oprint(op, loc, insn); return(loc + 4); } }; insn = db_get_value(loc, 2, 0); for (j = 0; riscv_c_opcodes[j].name != NULL; j++) { op = &riscv_c_opcodes[j]; if (op->match_func(op, insn)) { oprint(op, loc, insn); break; } }; return(loc + 2); } Index: head/sys/riscv/riscv/intr_machdep.c =================================================================== --- head/sys/riscv/riscv/intr_machdep.c (revision 365454) +++ head/sys/riscv/riscv/intr_machdep.c (revision 365455) @@ -1,278 +1,278 @@ /*- * Copyright (c) 2015-2017 Ruslan Bukin * All rights reserved. * * Portions of this software were developed by SRI International and the * University of Cambridge Computer Laboratory under DARPA/AFRL contract * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme. * * Portions of this software were developed by the University of Cambridge * Computer Laboratory as part of the CTSRD Project, with support from the * UK Higher Education Innovation Fund (HEIF). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef SMP #include #endif void intr_irq_handler(struct trapframe *tf); struct intc_irqsrc { struct intr_irqsrc isrc; u_int irq; }; struct intc_irqsrc isrcs[INTC_NIRQS]; static void riscv_mask_irq(void *source) { - uintptr_t irq; + int irq; - irq = (uintptr_t)source; + irq = (int)(uintptr_t)source; switch (irq) { case IRQ_TIMER_SUPERVISOR: csr_clear(sie, SIE_STIE); break; case IRQ_SOFTWARE_USER: csr_clear(sie, SIE_USIE); break; case IRQ_SOFTWARE_SUPERVISOR: csr_clear(sie, SIE_SSIE); break; default: panic("Unknown irq %d\n", irq); } } static void riscv_unmask_irq(void *source) { - uintptr_t irq; + int irq; - irq = (uintptr_t)source; + irq = (int)(uintptr_t)source; switch (irq) { case IRQ_TIMER_SUPERVISOR: csr_set(sie, SIE_STIE); break; case IRQ_SOFTWARE_USER: csr_set(sie, SIE_USIE); break; case IRQ_SOFTWARE_SUPERVISOR: csr_set(sie, SIE_SSIE); break; default: panic("Unknown irq %d\n", irq); } } int riscv_setup_intr(const char *name, driver_filter_t *filt, void (*handler)(void*), void *arg, int irq, int flags, void **cookiep) { struct intr_irqsrc *isrc; int error; if (irq < 0 || irq >= INTC_NIRQS) panic("%s: unknown intr %d", __func__, irq); isrc = &isrcs[irq].isrc; if (isrc->isrc_event == NULL) { error = intr_event_create(&isrc->isrc_event, isrc, 0, irq, riscv_mask_irq, riscv_unmask_irq, NULL, NULL, "int%d", irq); if (error) return (error); riscv_unmask_irq((void*)(uintptr_t)irq); } error = intr_event_add_handler(isrc->isrc_event, name, filt, handler, arg, intr_priority(flags), flags, cookiep); if (error) { printf("Failed to setup intr: %d\n", irq); return (error); } return (0); } int riscv_teardown_intr(void *ih) { /* TODO */ return (0); } void riscv_cpu_intr(struct trapframe *frame) { struct intr_irqsrc *isrc; int active_irq; critical_enter(); KASSERT(frame->tf_scause & EXCP_INTR, ("riscv_cpu_intr: wrong frame passed")); active_irq = frame->tf_scause & EXCP_MASK; switch (active_irq) { case IRQ_SOFTWARE_USER: case IRQ_SOFTWARE_SUPERVISOR: case IRQ_TIMER_SUPERVISOR: isrc = &isrcs[active_irq].isrc; if (intr_isrc_dispatch(isrc, frame) != 0) printf("stray interrupt %d\n", active_irq); break; case IRQ_EXTERNAL_SUPERVISOR: intr_irq_handler(frame); break; default: break; } critical_exit(); } #ifdef SMP void riscv_setup_ipihandler(driver_filter_t *filt) { riscv_setup_intr("ipi", filt, NULL, NULL, IRQ_SOFTWARE_SUPERVISOR, INTR_TYPE_MISC, NULL); } void riscv_unmask_ipi(void) { csr_set(sie, SIE_SSIE); } /* Sending IPI */ static void ipi_send(struct pcpu *pc, int ipi) { u_long mask; CTR3(KTR_SMP, "%s: cpu: %d, ipi: %x", __func__, pc->pc_cpuid, ipi); atomic_set_32(&pc->pc_pending_ipis, ipi); mask = (1 << pc->pc_hart); sbi_send_ipi(&mask); CTR1(KTR_SMP, "%s: sent", __func__); } void ipi_all_but_self(u_int ipi) { cpuset_t other_cpus; other_cpus = all_cpus; CPU_CLR(PCPU_GET(cpuid), &other_cpus); CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi); ipi_selected(other_cpus, ipi); } void ipi_cpu(int cpu, u_int ipi) { cpuset_t cpus; CPU_ZERO(&cpus); CPU_SET(cpu, &cpus); ipi_send(cpuid_to_pcpu[cpu], ipi); } void ipi_selected(cpuset_t cpus, u_int ipi) { struct pcpu *pc; u_long mask; CTR1(KTR_SMP, "ipi_selected: ipi: %x", ipi); mask = 0; STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { if (CPU_ISSET(pc->pc_cpuid, &cpus)) { CTR3(KTR_SMP, "%s: pc: %p, ipi: %x\n", __func__, pc, ipi); atomic_set_32(&pc->pc_pending_ipis, ipi); mask |= (1 << pc->pc_hart); } } sbi_send_ipi(&mask); } #endif /* Interrupt machdep initialization routine. */ static void intc_init(void *dummy __unused) { int error; int i; for (i = 0; i < INTC_NIRQS; i++) { isrcs[i].irq = i; error = intr_isrc_register(&isrcs[i].isrc, NULL, 0, "intc,%u", i); if (error != 0) printf("Can't register interrupt %d\n", i); } } SYSINIT(intc_init, SI_SUB_INTR, SI_ORDER_MIDDLE, intc_init, NULL); Index: head/sys/riscv/riscv/pmap.c =================================================================== --- head/sys/riscv/riscv/pmap.c (revision 365454) +++ head/sys/riscv/riscv/pmap.c (revision 365455) @@ -1,4635 +1,4635 @@ /*- * SPDX-License-Identifier: BSD-4-Clause * * Copyright (c) 1991 Regents of the University of California. * All rights reserved. * Copyright (c) 1994 John S. Dyson * All rights reserved. * Copyright (c) 1994 David Greenman * All rights reserved. * Copyright (c) 2003 Peter Wemm * All rights reserved. * Copyright (c) 2005-2010 Alan L. Cox * All rights reserved. * Copyright (c) 2014 Andrew Turner * All rights reserved. * Copyright (c) 2014 The FreeBSD Foundation * All rights reserved. * Copyright (c) 2015-2018 Ruslan Bukin * All rights reserved. * * This code is derived from software contributed to Berkeley by * the Systems Programming Group of the University of Utah Computer * Science Department and William Jolitz of UUNET Technologies Inc. * * Portions of this software were developed by Andrew Turner under * sponsorship from The FreeBSD Foundation. * * Portions of this software were developed by SRI International and the * University of Cambridge Computer Laboratory under DARPA/AFRL contract * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme. * * Portions of this software were developed by the University of Cambridge * Computer Laboratory as part of the CTSRD Project, with support from the * UK Higher Education Innovation Fund (HEIF). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 */ /*- * Copyright (c) 2003 Networks Associates Technology, Inc. * All rights reserved. * * This software was developed for the FreeBSD Project by Jake Burkholder, * Safeport Network Services, and Network Associates Laboratories, the * Security Research Division of Network Associates, Inc. under * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA * CHATS research program. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * Manages physical address maps. * * Since the information managed by this module is * also stored by the logical address mapping module, * this module may throw away valid virtual-to-physical * mappings at almost any time. However, invalidations * of virtual-to-physical mappings must be done as * requested. * * In order to cope with hardware architectures which * make virtual-to-physical map invalidates expensive, * this module may delay invalidate or reduced protection * operations until such time as they are actually * necessary. This module is given full information as * to which processors are currently using which maps, * and to when physical maps must be made correct. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define NUL1E (Ln_ENTRIES * Ln_ENTRIES) #define NUL2E (Ln_ENTRIES * NUL1E) #if !defined(DIAGNOSTIC) #ifdef __GNUC_GNU_INLINE__ #define PMAP_INLINE __attribute__((__gnu_inline__)) inline #else #define PMAP_INLINE extern inline #endif #else #define PMAP_INLINE #endif #ifdef PV_STATS #define PV_STAT(x) do { x ; } while (0) #else #define PV_STAT(x) do { } while (0) #endif #define pmap_l2_pindex(v) ((v) >> L2_SHIFT) #define pa_to_pvh(pa) (&pv_table[pa_index(pa)]) #define NPV_LIST_LOCKS MAXCPU #define PHYS_TO_PV_LIST_LOCK(pa) \ (&pv_list_locks[pmap_l2_pindex(pa) % NPV_LIST_LOCKS]) #define CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa) do { \ struct rwlock **_lockp = (lockp); \ struct rwlock *_new_lock; \ \ _new_lock = PHYS_TO_PV_LIST_LOCK(pa); \ if (_new_lock != *_lockp) { \ if (*_lockp != NULL) \ rw_wunlock(*_lockp); \ *_lockp = _new_lock; \ rw_wlock(*_lockp); \ } \ } while (0) #define CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m) \ CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, VM_PAGE_TO_PHYS(m)) #define RELEASE_PV_LIST_LOCK(lockp) do { \ struct rwlock **_lockp = (lockp); \ \ if (*_lockp != NULL) { \ rw_wunlock(*_lockp); \ *_lockp = NULL; \ } \ } while (0) #define VM_PAGE_TO_PV_LIST_LOCK(m) \ PHYS_TO_PV_LIST_LOCK(VM_PAGE_TO_PHYS(m)) /* The list of all the user pmaps */ LIST_HEAD(pmaplist, pmap); static struct pmaplist allpmaps = LIST_HEAD_INITIALIZER(); struct pmap kernel_pmap_store; vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */ vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ vm_offset_t kernel_vm_end = 0; vm_paddr_t dmap_phys_base; /* The start of the dmap region */ vm_paddr_t dmap_phys_max; /* The limit of the dmap region */ vm_offset_t dmap_max_addr; /* The virtual address limit of the dmap */ /* This code assumes all L1 DMAP entries will be used */ CTASSERT((DMAP_MIN_ADDRESS & ~L1_OFFSET) == DMAP_MIN_ADDRESS); CTASSERT((DMAP_MAX_ADDRESS & ~L1_OFFSET) == DMAP_MAX_ADDRESS); static struct rwlock_padalign pvh_global_lock; static struct mtx_padalign allpmaps_lock; static SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "VM/pmap parameters"); static int superpages_enabled = 1; SYSCTL_INT(_vm_pmap, OID_AUTO, superpages_enabled, CTLFLAG_RDTUN, &superpages_enabled, 0, "Enable support for transparent superpages"); static SYSCTL_NODE(_vm_pmap, OID_AUTO, l2, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "2MB page mapping counters"); static u_long pmap_l2_demotions; SYSCTL_ULONG(_vm_pmap_l2, OID_AUTO, demotions, CTLFLAG_RD, &pmap_l2_demotions, 0, "2MB page demotions"); static u_long pmap_l2_mappings; SYSCTL_ULONG(_vm_pmap_l2, OID_AUTO, mappings, CTLFLAG_RD, &pmap_l2_mappings, 0, "2MB page mappings"); static u_long pmap_l2_p_failures; SYSCTL_ULONG(_vm_pmap_l2, OID_AUTO, p_failures, CTLFLAG_RD, &pmap_l2_p_failures, 0, "2MB page promotion failures"); static u_long pmap_l2_promotions; SYSCTL_ULONG(_vm_pmap_l2, OID_AUTO, promotions, CTLFLAG_RD, &pmap_l2_promotions, 0, "2MB page promotions"); /* * Data for the pv entry allocation mechanism */ static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks); static struct mtx pv_chunks_mutex; static struct rwlock pv_list_locks[NPV_LIST_LOCKS]; static struct md_page *pv_table; static struct md_page pv_dummy; extern cpuset_t all_harts; /* * Internal flags for pmap_enter()'s helper functions. */ #define PMAP_ENTER_NORECLAIM 0x1000000 /* Don't reclaim PV entries. */ #define PMAP_ENTER_NOREPLACE 0x2000000 /* Don't replace mappings. */ static void free_pv_chunk(struct pv_chunk *pc); static void free_pv_entry(pmap_t pmap, pv_entry_t pv); static pv_entry_t get_pv_entry(pmap_t pmap, struct rwlock **lockp); static vm_page_t reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp); static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va); static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va); static bool pmap_demote_l2(pmap_t pmap, pd_entry_t *l2, vm_offset_t va); static bool pmap_demote_l2_locked(pmap_t pmap, pd_entry_t *l2, vm_offset_t va, struct rwlock **lockp); static int pmap_enter_l2(pmap_t pmap, vm_offset_t va, pd_entry_t new_l2, u_int flags, vm_page_t m, struct rwlock **lockp); static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp); static int pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t sva, pd_entry_t ptepde, struct spglist *free, struct rwlock **lockp); static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m, struct rwlock **lockp); static vm_page_t _pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp); static void _pmap_unwire_ptp(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free); static int pmap_unuse_pt(pmap_t, vm_offset_t, pd_entry_t, struct spglist *); #define pmap_clear(pte) pmap_store(pte, 0) #define pmap_clear_bits(pte, bits) atomic_clear_64(pte, bits) #define pmap_load_store(pte, entry) atomic_swap_64(pte, entry) #define pmap_load_clear(pte) pmap_load_store(pte, 0) #define pmap_load(pte) atomic_load_64(pte) #define pmap_store(pte, entry) atomic_store_64(pte, entry) #define pmap_store_bits(pte, bits) atomic_set_64(pte, bits) /********************/ /* Inline functions */ /********************/ static __inline void pagecopy(void *s, void *d) { memcpy(d, s, PAGE_SIZE); } static __inline void pagezero(void *p) { bzero(p, PAGE_SIZE); } #define pmap_l1_index(va) (((va) >> L1_SHIFT) & Ln_ADDR_MASK) #define pmap_l2_index(va) (((va) >> L2_SHIFT) & Ln_ADDR_MASK) #define pmap_l3_index(va) (((va) >> L3_SHIFT) & Ln_ADDR_MASK) #define PTE_TO_PHYS(pte) \ ((((pte) & ~PTE_HI_MASK) >> PTE_PPN0_S) * PAGE_SIZE) static __inline pd_entry_t * pmap_l1(pmap_t pmap, vm_offset_t va) { return (&pmap->pm_l1[pmap_l1_index(va)]); } static __inline pd_entry_t * pmap_l1_to_l2(pd_entry_t *l1, vm_offset_t va) { vm_paddr_t phys; pd_entry_t *l2; phys = PTE_TO_PHYS(pmap_load(l1)); l2 = (pd_entry_t *)PHYS_TO_DMAP(phys); return (&l2[pmap_l2_index(va)]); } static __inline pd_entry_t * pmap_l2(pmap_t pmap, vm_offset_t va) { pd_entry_t *l1; l1 = pmap_l1(pmap, va); if ((pmap_load(l1) & PTE_V) == 0) return (NULL); if ((pmap_load(l1) & PTE_RX) != 0) return (NULL); return (pmap_l1_to_l2(l1, va)); } static __inline pt_entry_t * pmap_l2_to_l3(pd_entry_t *l2, vm_offset_t va) { vm_paddr_t phys; pt_entry_t *l3; phys = PTE_TO_PHYS(pmap_load(l2)); l3 = (pd_entry_t *)PHYS_TO_DMAP(phys); return (&l3[pmap_l3_index(va)]); } static __inline pt_entry_t * pmap_l3(pmap_t pmap, vm_offset_t va) { pd_entry_t *l2; l2 = pmap_l2(pmap, va); if (l2 == NULL) return (NULL); if ((pmap_load(l2) & PTE_V) == 0) return (NULL); if ((pmap_load(l2) & PTE_RX) != 0) return (NULL); return (pmap_l2_to_l3(l2, va)); } static __inline void pmap_resident_count_inc(pmap_t pmap, int count) { PMAP_LOCK_ASSERT(pmap, MA_OWNED); pmap->pm_stats.resident_count += count; } static __inline void pmap_resident_count_dec(pmap_t pmap, int count) { PMAP_LOCK_ASSERT(pmap, MA_OWNED); KASSERT(pmap->pm_stats.resident_count >= count, ("pmap %p resident count underflow %ld %d", pmap, pmap->pm_stats.resident_count, count)); pmap->pm_stats.resident_count -= count; } static void pmap_distribute_l1(struct pmap *pmap, vm_pindex_t l1index, pt_entry_t entry) { struct pmap *user_pmap; pd_entry_t *l1; /* Distribute new kernel L1 entry to all the user pmaps */ if (pmap != kernel_pmap) return; mtx_lock(&allpmaps_lock); LIST_FOREACH(user_pmap, &allpmaps, pm_list) { l1 = &user_pmap->pm_l1[l1index]; pmap_store(l1, entry); } mtx_unlock(&allpmaps_lock); } static pt_entry_t * pmap_early_page_idx(vm_offset_t l1pt, vm_offset_t va, u_int *l1_slot, u_int *l2_slot) { pt_entry_t *l2; pd_entry_t *l1; l1 = (pd_entry_t *)l1pt; *l1_slot = (va >> L1_SHIFT) & Ln_ADDR_MASK; /* Check locore has used a table L1 map */ KASSERT((l1[*l1_slot] & PTE_RX) == 0, ("Invalid bootstrap L1 table")); /* Find the address of the L2 table */ l2 = (pt_entry_t *)init_pt_va; *l2_slot = pmap_l2_index(va); return (l2); } static vm_paddr_t pmap_early_vtophys(vm_offset_t l1pt, vm_offset_t va) { u_int l1_slot, l2_slot; pt_entry_t *l2; vm_paddr_t ret; l2 = pmap_early_page_idx(l1pt, va, &l1_slot, &l2_slot); /* Check locore has used L2 superpages */ KASSERT((l2[l2_slot] & PTE_RX) != 0, ("Invalid bootstrap L2 table")); /* L2 is superpages */ ret = (l2[l2_slot] >> PTE_PPN1_S) << L2_SHIFT; ret += (va & L2_OFFSET); return (ret); } static void pmap_bootstrap_dmap(vm_offset_t kern_l1, vm_paddr_t min_pa, vm_paddr_t max_pa) { vm_offset_t va; vm_paddr_t pa; pd_entry_t *l1; u_int l1_slot; pt_entry_t entry; pn_t pn; pa = dmap_phys_base = min_pa & ~L1_OFFSET; va = DMAP_MIN_ADDRESS; l1 = (pd_entry_t *)kern_l1; l1_slot = pmap_l1_index(DMAP_MIN_ADDRESS); for (; va < DMAP_MAX_ADDRESS && pa < max_pa; pa += L1_SIZE, va += L1_SIZE, l1_slot++) { KASSERT(l1_slot < Ln_ENTRIES, ("Invalid L1 index")); /* superpages */ pn = (pa / PAGE_SIZE); entry = PTE_KERN; entry |= (pn << PTE_PPN0_S); pmap_store(&l1[l1_slot], entry); } /* Set the upper limit of the DMAP region */ dmap_phys_max = pa; dmap_max_addr = va; sfence_vma(); } static vm_offset_t pmap_bootstrap_l3(vm_offset_t l1pt, vm_offset_t va, vm_offset_t l3_start) { vm_offset_t l3pt; pt_entry_t entry; pd_entry_t *l2; vm_paddr_t pa; u_int l2_slot; pn_t pn; KASSERT((va & L2_OFFSET) == 0, ("Invalid virtual address")); l2 = pmap_l2(kernel_pmap, va); l2 = (pd_entry_t *)((uintptr_t)l2 & ~(PAGE_SIZE - 1)); l2_slot = pmap_l2_index(va); l3pt = l3_start; for (; va < VM_MAX_KERNEL_ADDRESS; l2_slot++, va += L2_SIZE) { KASSERT(l2_slot < Ln_ENTRIES, ("Invalid L2 index")); pa = pmap_early_vtophys(l1pt, l3pt); pn = (pa / PAGE_SIZE); entry = (PTE_V); entry |= (pn << PTE_PPN0_S); pmap_store(&l2[l2_slot], entry); l3pt += PAGE_SIZE; } /* Clean the L2 page table */ memset((void *)l3_start, 0, l3pt - l3_start); return (l3pt); } /* * Bootstrap the system enough to run with virtual memory. */ void pmap_bootstrap(vm_offset_t l1pt, vm_paddr_t kernstart, vm_size_t kernlen) { u_int l1_slot, l2_slot; vm_offset_t freemempos; vm_offset_t dpcpu, msgbufpv; vm_paddr_t max_pa, min_pa, pa; pt_entry_t *l2p; int i; printf("pmap_bootstrap %lx %lx %lx\n", l1pt, kernstart, kernlen); /* Set this early so we can use the pagetable walking functions */ kernel_pmap_store.pm_l1 = (pd_entry_t *)l1pt; PMAP_LOCK_INIT(kernel_pmap); rw_init(&pvh_global_lock, "pmap pv global"); CPU_FILL(&kernel_pmap->pm_active); /* Assume the address we were loaded to is a valid physical address. */ min_pa = max_pa = kernstart; physmap_idx = physmem_avail(physmap, nitems(physmap)); physmap_idx /= 2; /* * Find the minimum physical address. physmap is sorted, * but may contain empty ranges. */ for (i = 0; i < physmap_idx * 2; i += 2) { if (physmap[i] == physmap[i + 1]) continue; if (physmap[i] <= min_pa) min_pa = physmap[i]; if (physmap[i + 1] > max_pa) max_pa = physmap[i + 1]; } - printf("physmap_idx %lx\n", physmap_idx); + printf("physmap_idx %u\n", physmap_idx); printf("min_pa %lx\n", min_pa); printf("max_pa %lx\n", max_pa); /* Create a direct map region early so we can use it for pa -> va */ pmap_bootstrap_dmap(l1pt, min_pa, max_pa); /* * Read the page table to find out what is already mapped. * This assumes we have mapped a block of memory from KERNBASE * using a single L1 entry. */ (void)pmap_early_page_idx(l1pt, KERNBASE, &l1_slot, &l2_slot); /* Sanity check the index, KERNBASE should be the first VA */ KASSERT(l2_slot == 0, ("The L2 index is non-zero")); freemempos = roundup2(KERNBASE + kernlen, PAGE_SIZE); /* Create the l3 tables for the early devmap */ freemempos = pmap_bootstrap_l3(l1pt, VM_MAX_KERNEL_ADDRESS - L2_SIZE, freemempos); /* * Invalidate the mapping we created for the DTB. At this point a copy * has been created, and we no longer need it. We want to avoid the * possibility of an aliased mapping in the future. */ l2p = pmap_l2(kernel_pmap, VM_EARLY_DTB_ADDRESS); if ((pmap_load(l2p) & PTE_V) != 0) pmap_clear(l2p); sfence_vma(); #define alloc_pages(var, np) \ (var) = freemempos; \ freemempos += (np * PAGE_SIZE); \ memset((char *)(var), 0, ((np) * PAGE_SIZE)); /* Allocate dynamic per-cpu area. */ alloc_pages(dpcpu, DPCPU_SIZE / PAGE_SIZE); dpcpu_init((void *)dpcpu, 0); /* Allocate memory for the msgbuf, e.g. for /sbin/dmesg */ alloc_pages(msgbufpv, round_page(msgbufsize) / PAGE_SIZE); msgbufp = (void *)msgbufpv; virtual_avail = roundup2(freemempos, L2_SIZE); virtual_end = VM_MAX_KERNEL_ADDRESS - L2_SIZE; kernel_vm_end = virtual_avail; pa = pmap_early_vtophys(l1pt, freemempos); physmem_exclude_region(kernstart, pa - kernstart, EXFLAG_NOALLOC); } /* * Initialize a vm_page's machine-dependent fields. */ void pmap_page_init(vm_page_t m) { TAILQ_INIT(&m->md.pv_list); m->md.pv_memattr = VM_MEMATTR_WRITE_BACK; } /* * Initialize the pmap module. * Called by vm_init, to initialize any structures that the pmap * system needs to map virtual memory. */ void pmap_init(void) { vm_size_t s; int i, pv_npg; /* * Initialize the pv chunk and pmap list mutexes. */ mtx_init(&pv_chunks_mutex, "pmap pv chunk list", NULL, MTX_DEF); mtx_init(&allpmaps_lock, "allpmaps", NULL, MTX_DEF); /* * Initialize the pool of pv list locks. */ for (i = 0; i < NPV_LIST_LOCKS; i++) rw_init(&pv_list_locks[i], "pmap pv list"); /* * Calculate the size of the pv head table for superpages. */ pv_npg = howmany(vm_phys_segs[vm_phys_nsegs - 1].end, L2_SIZE); /* * Allocate memory for the pv head table for superpages. */ s = (vm_size_t)(pv_npg * sizeof(struct md_page)); s = round_page(s); pv_table = (struct md_page *)kmem_malloc(s, M_WAITOK | M_ZERO); for (i = 0; i < pv_npg; i++) TAILQ_INIT(&pv_table[i].pv_list); TAILQ_INIT(&pv_dummy.pv_list); if (superpages_enabled) pagesizes[1] = L2_SIZE; } #ifdef SMP /* * For SMP, these functions have to use IPIs for coherence. * * In general, the calling thread uses a plain fence to order the * writes to the page tables before invoking an SBI callback to invoke * sfence_vma() on remote CPUs. */ static void pmap_invalidate_page(pmap_t pmap, vm_offset_t va) { cpuset_t mask; sched_pin(); mask = pmap->pm_active; CPU_CLR(PCPU_GET(hart), &mask); fence(); if (!CPU_EMPTY(&mask) && smp_started) sbi_remote_sfence_vma(mask.__bits, va, 1); sfence_vma_page(va); sched_unpin(); } static void pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { cpuset_t mask; sched_pin(); mask = pmap->pm_active; CPU_CLR(PCPU_GET(hart), &mask); fence(); if (!CPU_EMPTY(&mask) && smp_started) sbi_remote_sfence_vma(mask.__bits, sva, eva - sva + 1); /* * Might consider a loop of sfence_vma_page() for a small * number of pages in the future. */ sfence_vma(); sched_unpin(); } static void pmap_invalidate_all(pmap_t pmap) { cpuset_t mask; sched_pin(); mask = pmap->pm_active; CPU_CLR(PCPU_GET(hart), &mask); /* * XXX: The SBI doc doesn't detail how to specify x0 as the * address to perform a global fence. BBL currently treats * all sfence_vma requests as global however. */ fence(); if (!CPU_EMPTY(&mask) && smp_started) sbi_remote_sfence_vma(mask.__bits, 0, 0); sfence_vma(); sched_unpin(); } #else /* * Normal, non-SMP, invalidation functions. * We inline these within pmap.c for speed. */ static __inline void pmap_invalidate_page(pmap_t pmap, vm_offset_t va) { sfence_vma_page(va); } static __inline void pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { /* * Might consider a loop of sfence_vma_page() for a small * number of pages in the future. */ sfence_vma(); } static __inline void pmap_invalidate_all(pmap_t pmap) { sfence_vma(); } #endif /* * Routine: pmap_extract * Function: * Extract the physical page address associated * with the given map/virtual_address pair. */ vm_paddr_t pmap_extract(pmap_t pmap, vm_offset_t va) { pd_entry_t *l2p, l2; pt_entry_t *l3p, l3; vm_paddr_t pa; pa = 0; PMAP_LOCK(pmap); /* * Start with the l2 tabel. We are unable to allocate * pages in the l1 table. */ l2p = pmap_l2(pmap, va); if (l2p != NULL) { l2 = pmap_load(l2p); if ((l2 & PTE_RX) == 0) { l3p = pmap_l2_to_l3(l2p, va); if (l3p != NULL) { l3 = pmap_load(l3p); pa = PTE_TO_PHYS(l3); pa |= (va & L3_OFFSET); } } else { /* L2 is superpages */ pa = (l2 >> PTE_PPN1_S) << L2_SHIFT; pa |= (va & L2_OFFSET); } } PMAP_UNLOCK(pmap); return (pa); } /* * Routine: pmap_extract_and_hold * Function: * Atomically extract and hold the physical page * with the given pmap and virtual address pair * if that mapping permits the given protection. */ vm_page_t pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) { pt_entry_t *l3p, l3; vm_paddr_t phys; vm_page_t m; m = NULL; PMAP_LOCK(pmap); l3p = pmap_l3(pmap, va); if (l3p != NULL && (l3 = pmap_load(l3p)) != 0) { if ((l3 & PTE_W) != 0 || (prot & VM_PROT_WRITE) == 0) { phys = PTE_TO_PHYS(l3); m = PHYS_TO_VM_PAGE(phys); if (!vm_page_wire_mapped(m)) m = NULL; } } PMAP_UNLOCK(pmap); return (m); } vm_paddr_t pmap_kextract(vm_offset_t va) { pd_entry_t *l2; pt_entry_t *l3; vm_paddr_t pa; if (va >= DMAP_MIN_ADDRESS && va < DMAP_MAX_ADDRESS) { pa = DMAP_TO_PHYS(va); } else { l2 = pmap_l2(kernel_pmap, va); if (l2 == NULL) panic("pmap_kextract: No l2"); if ((pmap_load(l2) & PTE_RX) != 0) { /* superpages */ pa = (pmap_load(l2) >> PTE_PPN1_S) << L2_SHIFT; pa |= (va & L2_OFFSET); return (pa); } l3 = pmap_l2_to_l3(l2, va); if (l3 == NULL) panic("pmap_kextract: No l3..."); pa = PTE_TO_PHYS(pmap_load(l3)); pa |= (va & PAGE_MASK); } return (pa); } /*************************************************** * Low level mapping routines..... ***************************************************/ void pmap_kenter_device(vm_offset_t sva, vm_size_t size, vm_paddr_t pa) { pt_entry_t entry; pt_entry_t *l3; vm_offset_t va; pn_t pn; KASSERT((pa & L3_OFFSET) == 0, ("pmap_kenter_device: Invalid physical address")); KASSERT((sva & L3_OFFSET) == 0, ("pmap_kenter_device: Invalid virtual address")); KASSERT((size & PAGE_MASK) == 0, ("pmap_kenter_device: Mapping is not page-sized")); va = sva; while (size != 0) { l3 = pmap_l3(kernel_pmap, va); KASSERT(l3 != NULL, ("Invalid page table, va: 0x%lx", va)); pn = (pa / PAGE_SIZE); entry = PTE_KERN; entry |= (pn << PTE_PPN0_S); pmap_store(l3, entry); va += PAGE_SIZE; pa += PAGE_SIZE; size -= PAGE_SIZE; } pmap_invalidate_range(kernel_pmap, sva, va); } /* * Remove a page from the kernel pagetables. * Note: not SMP coherent. */ PMAP_INLINE void pmap_kremove(vm_offset_t va) { pt_entry_t *l3; l3 = pmap_l3(kernel_pmap, va); KASSERT(l3 != NULL, ("pmap_kremove: Invalid address")); pmap_clear(l3); sfence_vma(); } void pmap_kremove_device(vm_offset_t sva, vm_size_t size) { pt_entry_t *l3; vm_offset_t va; KASSERT((sva & L3_OFFSET) == 0, ("pmap_kremove_device: Invalid virtual address")); KASSERT((size & PAGE_MASK) == 0, ("pmap_kremove_device: Mapping is not page-sized")); va = sva; while (size != 0) { l3 = pmap_l3(kernel_pmap, va); KASSERT(l3 != NULL, ("Invalid page table, va: 0x%lx", va)); pmap_clear(l3); va += PAGE_SIZE; size -= PAGE_SIZE; } pmap_invalidate_range(kernel_pmap, sva, va); } /* * Used to map a range of physical addresses into kernel * virtual address space. * * The value passed in '*virt' is a suggested virtual address for * the mapping. Architectures which can support a direct-mapped * physical to virtual region can return the appropriate address * within that region, leaving '*virt' unchanged. Other * architectures should map the pages starting at '*virt' and * update '*virt' with the first usable address after the mapped * region. */ vm_offset_t pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot) { return PHYS_TO_DMAP(start); } /* * Add a list of wired pages to the kva * this routine is only used for temporary * kernel mappings that do not need to have * page modification or references recorded. * Note that old mappings are simply written * over. The page *must* be wired. * Note: SMP coherent. Uses a ranged shootdown IPI. */ void pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count) { pt_entry_t *l3, pa; vm_offset_t va; vm_page_t m; pt_entry_t entry; pn_t pn; int i; va = sva; for (i = 0; i < count; i++) { m = ma[i]; pa = VM_PAGE_TO_PHYS(m); pn = (pa / PAGE_SIZE); l3 = pmap_l3(kernel_pmap, va); entry = PTE_KERN; entry |= (pn << PTE_PPN0_S); pmap_store(l3, entry); va += L3_SIZE; } pmap_invalidate_range(kernel_pmap, sva, va); } /* * This routine tears out page mappings from the * kernel -- it is meant only for temporary mappings. * Note: SMP coherent. Uses a ranged shootdown IPI. */ void pmap_qremove(vm_offset_t sva, int count) { pt_entry_t *l3; vm_offset_t va; KASSERT(sva >= VM_MIN_KERNEL_ADDRESS, ("usermode va %lx", sva)); for (va = sva; count-- > 0; va += PAGE_SIZE) { l3 = pmap_l3(kernel_pmap, va); KASSERT(l3 != NULL, ("pmap_kremove: Invalid address")); pmap_clear(l3); } pmap_invalidate_range(kernel_pmap, sva, va); } bool pmap_ps_enabled(pmap_t pmap __unused) { return (superpages_enabled); } /*************************************************** * Page table page management routines..... ***************************************************/ /* * Schedule the specified unused page table page to be freed. Specifically, * add the page to the specified list of pages that will be released to the * physical memory manager after the TLB has been updated. */ static __inline void pmap_add_delayed_free_list(vm_page_t m, struct spglist *free, boolean_t set_PG_ZERO) { if (set_PG_ZERO) m->flags |= PG_ZERO; else m->flags &= ~PG_ZERO; SLIST_INSERT_HEAD(free, m, plinks.s.ss); } /* * Inserts the specified page table page into the specified pmap's collection * of idle page table pages. Each of a pmap's page table pages is responsible * for mapping a distinct range of virtual addresses. The pmap's collection is * ordered by this virtual address range. * * If "promoted" is false, then the page table page "ml3" must be zero filled. */ static __inline int pmap_insert_pt_page(pmap_t pmap, vm_page_t ml3, bool promoted) { PMAP_LOCK_ASSERT(pmap, MA_OWNED); ml3->valid = promoted ? VM_PAGE_BITS_ALL : 0; return (vm_radix_insert(&pmap->pm_root, ml3)); } /* * Removes the page table page mapping the specified virtual address from the * specified pmap's collection of idle page table pages, and returns it. * Otherwise, returns NULL if there is no page table page corresponding to the * specified virtual address. */ static __inline vm_page_t pmap_remove_pt_page(pmap_t pmap, vm_offset_t va) { PMAP_LOCK_ASSERT(pmap, MA_OWNED); return (vm_radix_remove(&pmap->pm_root, pmap_l2_pindex(va))); } /* * Decrements a page table page's reference count, which is used to record the * number of valid page table entries within the page. If the reference count * drops to zero, then the page table page is unmapped. Returns TRUE if the * page table page was unmapped and FALSE otherwise. */ static inline boolean_t pmap_unwire_ptp(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free) { --m->ref_count; if (m->ref_count == 0) { _pmap_unwire_ptp(pmap, va, m, free); return (TRUE); } else { return (FALSE); } } static void _pmap_unwire_ptp(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free) { vm_paddr_t phys; PMAP_LOCK_ASSERT(pmap, MA_OWNED); if (m->pindex >= NUL1E) { pd_entry_t *l1; l1 = pmap_l1(pmap, va); pmap_clear(l1); pmap_distribute_l1(pmap, pmap_l1_index(va), 0); } else { pd_entry_t *l2; l2 = pmap_l2(pmap, va); pmap_clear(l2); } pmap_resident_count_dec(pmap, 1); if (m->pindex < NUL1E) { pd_entry_t *l1; vm_page_t pdpg; l1 = pmap_l1(pmap, va); phys = PTE_TO_PHYS(pmap_load(l1)); pdpg = PHYS_TO_VM_PAGE(phys); pmap_unwire_ptp(pmap, va, pdpg, free); } pmap_invalidate_page(pmap, va); vm_wire_sub(1); /* * Put page on a list so that it is released after * *ALL* TLB shootdown is done */ pmap_add_delayed_free_list(m, free, TRUE); } /* * After removing a page table entry, this routine is used to * conditionally free the page, and manage the reference count. */ static int pmap_unuse_pt(pmap_t pmap, vm_offset_t va, pd_entry_t ptepde, struct spglist *free) { vm_page_t mpte; if (va >= VM_MAXUSER_ADDRESS) return (0); KASSERT(ptepde != 0, ("pmap_unuse_pt: ptepde != 0")); mpte = PHYS_TO_VM_PAGE(PTE_TO_PHYS(ptepde)); return (pmap_unwire_ptp(pmap, va, mpte, free)); } void pmap_pinit0(pmap_t pmap) { PMAP_LOCK_INIT(pmap); bzero(&pmap->pm_stats, sizeof(pmap->pm_stats)); pmap->pm_l1 = kernel_pmap->pm_l1; pmap->pm_satp = SATP_MODE_SV39 | (vtophys(pmap->pm_l1) >> PAGE_SHIFT); CPU_ZERO(&pmap->pm_active); pmap_activate_boot(pmap); } int pmap_pinit(pmap_t pmap) { vm_paddr_t l1phys; vm_page_t l1pt; /* * allocate the l1 page */ while ((l1pt = vm_page_alloc(NULL, 0xdeadbeef, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) vm_wait(NULL); l1phys = VM_PAGE_TO_PHYS(l1pt); pmap->pm_l1 = (pd_entry_t *)PHYS_TO_DMAP(l1phys); pmap->pm_satp = SATP_MODE_SV39 | (l1phys >> PAGE_SHIFT); if ((l1pt->flags & PG_ZERO) == 0) pagezero(pmap->pm_l1); bzero(&pmap->pm_stats, sizeof(pmap->pm_stats)); CPU_ZERO(&pmap->pm_active); /* Install kernel pagetables */ memcpy(pmap->pm_l1, kernel_pmap->pm_l1, PAGE_SIZE); /* Add to the list of all user pmaps */ mtx_lock(&allpmaps_lock); LIST_INSERT_HEAD(&allpmaps, pmap, pm_list); mtx_unlock(&allpmaps_lock); vm_radix_init(&pmap->pm_root); return (1); } /* * This routine is called if the desired page table page does not exist. * * If page table page allocation fails, this routine may sleep before * returning NULL. It sleeps only if a lock pointer was given. * * Note: If a page allocation fails at page table level two or three, * one or two pages may be held during the wait, only to be released * afterwards. This conservative approach is easily argued to avoid * race conditions. */ static vm_page_t _pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp) { vm_page_t m, /*pdppg, */pdpg; pt_entry_t entry; vm_paddr_t phys; pn_t pn; PMAP_LOCK_ASSERT(pmap, MA_OWNED); /* * Allocate a page table page. */ if ((m = vm_page_alloc(NULL, ptepindex, VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) { if (lockp != NULL) { RELEASE_PV_LIST_LOCK(lockp); PMAP_UNLOCK(pmap); rw_runlock(&pvh_global_lock); vm_wait(NULL); rw_rlock(&pvh_global_lock); PMAP_LOCK(pmap); } /* * Indicate the need to retry. While waiting, the page table * page may have been allocated. */ return (NULL); } if ((m->flags & PG_ZERO) == 0) pmap_zero_page(m); /* * Map the pagetable page into the process address space, if * it isn't already there. */ if (ptepindex >= NUL1E) { pd_entry_t *l1; vm_pindex_t l1index; l1index = ptepindex - NUL1E; l1 = &pmap->pm_l1[l1index]; pn = (VM_PAGE_TO_PHYS(m) / PAGE_SIZE); entry = (PTE_V); entry |= (pn << PTE_PPN0_S); pmap_store(l1, entry); pmap_distribute_l1(pmap, l1index, entry); } else { vm_pindex_t l1index; pd_entry_t *l1, *l2; l1index = ptepindex >> (L1_SHIFT - L2_SHIFT); l1 = &pmap->pm_l1[l1index]; if (pmap_load(l1) == 0) { /* recurse for allocating page dir */ if (_pmap_alloc_l3(pmap, NUL1E + l1index, lockp) == NULL) { vm_page_unwire_noq(m); vm_page_free_zero(m); return (NULL); } } else { phys = PTE_TO_PHYS(pmap_load(l1)); pdpg = PHYS_TO_VM_PAGE(phys); pdpg->ref_count++; } phys = PTE_TO_PHYS(pmap_load(l1)); l2 = (pd_entry_t *)PHYS_TO_DMAP(phys); l2 = &l2[ptepindex & Ln_ADDR_MASK]; pn = (VM_PAGE_TO_PHYS(m) / PAGE_SIZE); entry = (PTE_V); entry |= (pn << PTE_PPN0_S); pmap_store(l2, entry); } pmap_resident_count_inc(pmap, 1); return (m); } static vm_page_t pmap_alloc_l2(pmap_t pmap, vm_offset_t va, struct rwlock **lockp) { pd_entry_t *l1; vm_page_t l2pg; vm_pindex_t l2pindex; retry: l1 = pmap_l1(pmap, va); if (l1 != NULL && (pmap_load(l1) & PTE_RWX) == 0) { /* Add a reference to the L2 page. */ l2pg = PHYS_TO_VM_PAGE(PTE_TO_PHYS(pmap_load(l1))); l2pg->ref_count++; } else { /* Allocate a L2 page. */ l2pindex = pmap_l2_pindex(va) >> Ln_ENTRIES_SHIFT; l2pg = _pmap_alloc_l3(pmap, NUL2E + l2pindex, lockp); if (l2pg == NULL && lockp != NULL) goto retry; } return (l2pg); } static vm_page_t pmap_alloc_l3(pmap_t pmap, vm_offset_t va, struct rwlock **lockp) { vm_pindex_t ptepindex; pd_entry_t *l2; vm_paddr_t phys; vm_page_t m; /* * Calculate pagetable page index */ ptepindex = pmap_l2_pindex(va); retry: /* * Get the page directory entry */ l2 = pmap_l2(pmap, va); /* * If the page table page is mapped, we just increment the * hold count, and activate it. */ if (l2 != NULL && pmap_load(l2) != 0) { phys = PTE_TO_PHYS(pmap_load(l2)); m = PHYS_TO_VM_PAGE(phys); m->ref_count++; } else { /* * Here if the pte page isn't mapped, or if it has been * deallocated. */ m = _pmap_alloc_l3(pmap, ptepindex, lockp); if (m == NULL && lockp != NULL) goto retry; } return (m); } /*************************************************** * Pmap allocation/deallocation routines. ***************************************************/ /* * Release any resources held by the given physical map. * Called when a pmap initialized by pmap_pinit is being released. * Should only be called if the map contains no valid mappings. */ void pmap_release(pmap_t pmap) { vm_page_t m; KASSERT(pmap->pm_stats.resident_count == 0, ("pmap_release: pmap resident count %ld != 0", pmap->pm_stats.resident_count)); KASSERT(CPU_EMPTY(&pmap->pm_active), ("releasing active pmap %p", pmap)); mtx_lock(&allpmaps_lock); LIST_REMOVE(pmap, pm_list); mtx_unlock(&allpmaps_lock); m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pmap->pm_l1)); vm_page_unwire_noq(m); vm_page_free(m); } static int kvm_size(SYSCTL_HANDLER_ARGS) { unsigned long ksize = VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS; return sysctl_handle_long(oidp, &ksize, 0, req); } SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG | CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 0, kvm_size, "LU", "Size of KVM"); static int kvm_free(SYSCTL_HANDLER_ARGS) { unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end; return sysctl_handle_long(oidp, &kfree, 0, req); } SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG | CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 0, kvm_free, "LU", "Amount of KVM free"); /* * grow the number of kernel page table entries, if needed */ void pmap_growkernel(vm_offset_t addr) { vm_paddr_t paddr; vm_page_t nkpg; pd_entry_t *l1, *l2; pt_entry_t entry; pn_t pn; mtx_assert(&kernel_map->system_mtx, MA_OWNED); addr = roundup2(addr, L2_SIZE); if (addr - 1 >= vm_map_max(kernel_map)) addr = vm_map_max(kernel_map); while (kernel_vm_end < addr) { l1 = pmap_l1(kernel_pmap, kernel_vm_end); if (pmap_load(l1) == 0) { /* We need a new PDP entry */ nkpg = vm_page_alloc(NULL, kernel_vm_end >> L1_SHIFT, VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO); if (nkpg == NULL) panic("pmap_growkernel: no memory to grow kernel"); if ((nkpg->flags & PG_ZERO) == 0) pmap_zero_page(nkpg); paddr = VM_PAGE_TO_PHYS(nkpg); pn = (paddr / PAGE_SIZE); entry = (PTE_V); entry |= (pn << PTE_PPN0_S); pmap_store(l1, entry); pmap_distribute_l1(kernel_pmap, pmap_l1_index(kernel_vm_end), entry); continue; /* try again */ } l2 = pmap_l1_to_l2(l1, kernel_vm_end); if ((pmap_load(l2) & PTE_V) != 0 && (pmap_load(l2) & PTE_RWX) == 0) { kernel_vm_end = (kernel_vm_end + L2_SIZE) & ~L2_OFFSET; if (kernel_vm_end - 1 >= vm_map_max(kernel_map)) { kernel_vm_end = vm_map_max(kernel_map); break; } continue; } nkpg = vm_page_alloc(NULL, kernel_vm_end >> L2_SHIFT, VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO); if (nkpg == NULL) panic("pmap_growkernel: no memory to grow kernel"); if ((nkpg->flags & PG_ZERO) == 0) { pmap_zero_page(nkpg); } paddr = VM_PAGE_TO_PHYS(nkpg); pn = (paddr / PAGE_SIZE); entry = (PTE_V); entry |= (pn << PTE_PPN0_S); pmap_store(l2, entry); pmap_invalidate_page(kernel_pmap, kernel_vm_end); kernel_vm_end = (kernel_vm_end + L2_SIZE) & ~L2_OFFSET; if (kernel_vm_end - 1 >= vm_map_max(kernel_map)) { kernel_vm_end = vm_map_max(kernel_map); break; } } } /*************************************************** * page management routines. ***************************************************/ CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE); CTASSERT(_NPCM == 3); CTASSERT(_NPCPV == 168); static __inline struct pv_chunk * pv_to_chunk(pv_entry_t pv) { return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK)); } #define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap) #define PC_FREE0 0xfffffffffffffffful #define PC_FREE1 0xfffffffffffffffful #define PC_FREE2 0x000000fffffffffful static const uint64_t pc_freemask[_NPCM] = { PC_FREE0, PC_FREE1, PC_FREE2 }; #if 0 #ifdef PV_STATS static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail; SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0, "Current number of pv entry chunks"); SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0, "Current number of pv entry chunks allocated"); SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0, "Current number of pv entry chunks frees"); SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0, "Number of times tried to get a chunk page but failed."); static long pv_entry_frees, pv_entry_allocs, pv_entry_count; static int pv_entry_spare; SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0, "Current number of pv entry frees"); SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0, "Current number of pv entry allocs"); SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0, "Current number of pv entries"); SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0, "Current number of spare pv entries"); #endif #endif /* 0 */ /* * We are in a serious low memory condition. Resort to * drastic measures to free some pages so we can allocate * another pv entry chunk. * * Returns NULL if PV entries were reclaimed from the specified pmap. * * We do not, however, unmap 2mpages because subsequent accesses will * allocate per-page pv entries until repromotion occurs, thereby * exacerbating the shortage of free pv entries. */ static vm_page_t reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp) { panic("RISCVTODO: reclaim_pv_chunk"); } /* * free the pv_entry back to the free list */ static void free_pv_entry(pmap_t pmap, pv_entry_t pv) { struct pv_chunk *pc; int idx, field, bit; rw_assert(&pvh_global_lock, RA_LOCKED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); PV_STAT(atomic_add_long(&pv_entry_frees, 1)); PV_STAT(atomic_add_int(&pv_entry_spare, 1)); PV_STAT(atomic_subtract_long(&pv_entry_count, 1)); pc = pv_to_chunk(pv); idx = pv - &pc->pc_pventry[0]; field = idx / 64; bit = idx % 64; pc->pc_map[field] |= 1ul << bit; if (pc->pc_map[0] != PC_FREE0 || pc->pc_map[1] != PC_FREE1 || pc->pc_map[2] != PC_FREE2) { /* 98% of the time, pc is already at the head of the list. */ if (__predict_false(pc != TAILQ_FIRST(&pmap->pm_pvchunk))) { TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); } return; } TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); free_pv_chunk(pc); } static void free_pv_chunk(struct pv_chunk *pc) { vm_page_t m; mtx_lock(&pv_chunks_mutex); TAILQ_REMOVE(&pv_chunks, pc, pc_lru); mtx_unlock(&pv_chunks_mutex); PV_STAT(atomic_subtract_int(&pv_entry_spare, _NPCPV)); PV_STAT(atomic_subtract_int(&pc_chunk_count, 1)); PV_STAT(atomic_add_int(&pc_chunk_frees, 1)); /* entire chunk is free, return it */ m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc)); dump_drop_page(m->phys_addr); vm_page_unwire_noq(m); vm_page_free(m); } /* * Returns a new PV entry, allocating a new PV chunk from the system when * needed. If this PV chunk allocation fails and a PV list lock pointer was * given, a PV chunk is reclaimed from an arbitrary pmap. Otherwise, NULL is * returned. * * The given PV list lock may be released. */ static pv_entry_t get_pv_entry(pmap_t pmap, struct rwlock **lockp) { int bit, field; pv_entry_t pv; struct pv_chunk *pc; vm_page_t m; rw_assert(&pvh_global_lock, RA_LOCKED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); PV_STAT(atomic_add_long(&pv_entry_allocs, 1)); retry: pc = TAILQ_FIRST(&pmap->pm_pvchunk); if (pc != NULL) { for (field = 0; field < _NPCM; field++) { if (pc->pc_map[field]) { bit = ffsl(pc->pc_map[field]) - 1; break; } } if (field < _NPCM) { pv = &pc->pc_pventry[field * 64 + bit]; pc->pc_map[field] &= ~(1ul << bit); /* If this was the last item, move it to tail */ if (pc->pc_map[0] == 0 && pc->pc_map[1] == 0 && pc->pc_map[2] == 0) { TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list); } PV_STAT(atomic_add_long(&pv_entry_count, 1)); PV_STAT(atomic_subtract_int(&pv_entry_spare, 1)); return (pv); } } /* No free items, allocate another chunk */ m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED); if (m == NULL) { if (lockp == NULL) { PV_STAT(pc_chunk_tryfail++); return (NULL); } m = reclaim_pv_chunk(pmap, lockp); if (m == NULL) goto retry; } PV_STAT(atomic_add_int(&pc_chunk_count, 1)); PV_STAT(atomic_add_int(&pc_chunk_allocs, 1)); dump_add_page(m->phys_addr); pc = (void *)PHYS_TO_DMAP(m->phys_addr); pc->pc_pmap = pmap; pc->pc_map[0] = PC_FREE0 & ~1ul; /* preallocated bit 0 */ pc->pc_map[1] = PC_FREE1; pc->pc_map[2] = PC_FREE2; mtx_lock(&pv_chunks_mutex); TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru); mtx_unlock(&pv_chunks_mutex); pv = &pc->pc_pventry[0]; TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); PV_STAT(atomic_add_long(&pv_entry_count, 1)); PV_STAT(atomic_add_int(&pv_entry_spare, _NPCPV - 1)); return (pv); } /* * Ensure that the number of spare PV entries in the specified pmap meets or * exceeds the given count, "needed". * * The given PV list lock may be released. */ static void reserve_pv_entries(pmap_t pmap, int needed, struct rwlock **lockp) { struct pch new_tail; struct pv_chunk *pc; vm_page_t m; int avail, free; bool reclaimed; rw_assert(&pvh_global_lock, RA_LOCKED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); KASSERT(lockp != NULL, ("reserve_pv_entries: lockp is NULL")); /* * Newly allocated PV chunks must be stored in a private list until * the required number of PV chunks have been allocated. Otherwise, * reclaim_pv_chunk() could recycle one of these chunks. In * contrast, these chunks must be added to the pmap upon allocation. */ TAILQ_INIT(&new_tail); retry: avail = 0; TAILQ_FOREACH(pc, &pmap->pm_pvchunk, pc_list) { bit_count((bitstr_t *)pc->pc_map, 0, sizeof(pc->pc_map) * NBBY, &free); if (free == 0) break; avail += free; if (avail >= needed) break; } for (reclaimed = false; avail < needed; avail += _NPCPV) { m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED); if (m == NULL) { m = reclaim_pv_chunk(pmap, lockp); if (m == NULL) goto retry; reclaimed = true; } /* XXX PV STATS */ #if 0 dump_add_page(m->phys_addr); #endif pc = (void *)PHYS_TO_DMAP(m->phys_addr); pc->pc_pmap = pmap; pc->pc_map[0] = PC_FREE0; pc->pc_map[1] = PC_FREE1; pc->pc_map[2] = PC_FREE2; TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); TAILQ_INSERT_TAIL(&new_tail, pc, pc_lru); /* * The reclaim might have freed a chunk from the current pmap. * If that chunk contained available entries, we need to * re-count the number of available entries. */ if (reclaimed) goto retry; } if (!TAILQ_EMPTY(&new_tail)) { mtx_lock(&pv_chunks_mutex); TAILQ_CONCAT(&pv_chunks, &new_tail, pc_lru); mtx_unlock(&pv_chunks_mutex); } } /* * First find and then remove the pv entry for the specified pmap and virtual * address from the specified pv list. Returns the pv entry if found and NULL * otherwise. This operation can be performed on pv lists for either 4KB or * 2MB page mappings. */ static __inline pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va) { pv_entry_t pv; rw_assert(&pvh_global_lock, RA_LOCKED); TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { if (pmap == PV_PMAP(pv) && va == pv->pv_va) { TAILQ_REMOVE(&pvh->pv_list, pv, pv_next); pvh->pv_gen++; break; } } return (pv); } /* * First find and then destroy the pv entry for the specified pmap and virtual * address. This operation can be performed on pv lists for either 4KB or 2MB * page mappings. */ static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va) { pv_entry_t pv; pv = pmap_pvh_remove(pvh, pmap, va); KASSERT(pv != NULL, ("pmap_pvh_free: pv not found for %#lx", va)); free_pv_entry(pmap, pv); } /* * Conditionally create the PV entry for a 4KB page mapping if the required * memory can be allocated without resorting to reclamation. */ static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m, struct rwlock **lockp) { pv_entry_t pv; rw_assert(&pvh_global_lock, RA_LOCKED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); /* Pass NULL instead of the lock pointer to disable reclamation. */ if ((pv = get_pv_entry(pmap, NULL)) != NULL) { pv->pv_va = va; CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m); TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); m->md.pv_gen++; return (TRUE); } else return (FALSE); } /* * After demotion from a 2MB page mapping to 512 4KB page mappings, * destroy the pv entry for the 2MB page mapping and reinstantiate the pv * entries for each of the 4KB page mappings. */ static void __unused pmap_pv_demote_l2(pmap_t pmap, vm_offset_t va, vm_paddr_t pa, struct rwlock **lockp) { struct md_page *pvh; struct pv_chunk *pc; pv_entry_t pv; vm_page_t m; vm_offset_t va_last; int bit, field; rw_assert(&pvh_global_lock, RA_LOCKED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa); /* * Transfer the 2mpage's pv entry for this mapping to the first * page's pv list. Once this transfer begins, the pv list lock * must not be released until the last pv entry is reinstantiated. */ pvh = pa_to_pvh(pa); va &= ~L2_OFFSET; pv = pmap_pvh_remove(pvh, pmap, va); KASSERT(pv != NULL, ("pmap_pv_demote_l2: pv not found")); m = PHYS_TO_VM_PAGE(pa); TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); m->md.pv_gen++; /* Instantiate the remaining 511 pv entries. */ va_last = va + L2_SIZE - PAGE_SIZE; for (;;) { pc = TAILQ_FIRST(&pmap->pm_pvchunk); KASSERT(pc->pc_map[0] != 0 || pc->pc_map[1] != 0 || pc->pc_map[2] != 0, ("pmap_pv_demote_l2: missing spare")); for (field = 0; field < _NPCM; field++) { while (pc->pc_map[field] != 0) { bit = ffsl(pc->pc_map[field]) - 1; pc->pc_map[field] &= ~(1ul << bit); pv = &pc->pc_pventry[field * 64 + bit]; va += PAGE_SIZE; pv->pv_va = va; m++; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_pv_demote_l2: page %p is not managed", m)); TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); m->md.pv_gen++; if (va == va_last) goto out; } } TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list); } out: if (pc->pc_map[0] == 0 && pc->pc_map[1] == 0 && pc->pc_map[2] == 0) { TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list); } /* XXX PV stats */ } #if VM_NRESERVLEVEL > 0 static void pmap_pv_promote_l2(pmap_t pmap, vm_offset_t va, vm_paddr_t pa, struct rwlock **lockp) { struct md_page *pvh; pv_entry_t pv; vm_page_t m; vm_offset_t va_last; rw_assert(&pvh_global_lock, RA_LOCKED); KASSERT((va & L2_OFFSET) == 0, ("pmap_pv_promote_l2: misaligned va %#lx", va)); CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa); m = PHYS_TO_VM_PAGE(pa); pv = pmap_pvh_remove(&m->md, pmap, va); KASSERT(pv != NULL, ("pmap_pv_promote_l2: pv for %#lx not found", va)); pvh = pa_to_pvh(pa); TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next); pvh->pv_gen++; va_last = va + L2_SIZE - PAGE_SIZE; do { m++; va += PAGE_SIZE; pmap_pvh_free(&m->md, pmap, va); } while (va < va_last); } #endif /* VM_NRESERVLEVEL > 0 */ /* * Create the PV entry for a 2MB page mapping. Always returns true unless the * flag PMAP_ENTER_NORECLAIM is specified. If that flag is specified, returns * false if the PV entry cannot be allocated without resorting to reclamation. */ static bool pmap_pv_insert_l2(pmap_t pmap, vm_offset_t va, pd_entry_t l2e, u_int flags, struct rwlock **lockp) { struct md_page *pvh; pv_entry_t pv; vm_paddr_t pa; PMAP_LOCK_ASSERT(pmap, MA_OWNED); /* Pass NULL instead of the lock pointer to disable reclamation. */ if ((pv = get_pv_entry(pmap, (flags & PMAP_ENTER_NORECLAIM) != 0 ? NULL : lockp)) == NULL) return (false); pv->pv_va = va; pa = PTE_TO_PHYS(l2e); CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa); pvh = pa_to_pvh(pa); TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next); pvh->pv_gen++; return (true); } static void pmap_remove_kernel_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t va) { pt_entry_t newl2, oldl2; vm_page_t ml3; vm_paddr_t ml3pa; KASSERT(!VIRT_IN_DMAP(va), ("removing direct mapping of %#lx", va)); KASSERT(pmap == kernel_pmap, ("pmap %p is not kernel_pmap", pmap)); PMAP_LOCK_ASSERT(pmap, MA_OWNED); ml3 = pmap_remove_pt_page(pmap, va); if (ml3 == NULL) panic("pmap_remove_kernel_l2: Missing pt page"); ml3pa = VM_PAGE_TO_PHYS(ml3); newl2 = ml3pa | PTE_V; /* * If this page table page was unmapped by a promotion, then it * contains valid mappings. Zero it to invalidate those mappings. */ if (ml3->valid != 0) pagezero((void *)PHYS_TO_DMAP(ml3pa)); /* * Demote the mapping. */ oldl2 = pmap_load_store(l2, newl2); KASSERT(oldl2 == 0, ("%s: found existing mapping at %p: %#lx", __func__, l2, oldl2)); } /* * pmap_remove_l2: Do the things to unmap a level 2 superpage. */ static int pmap_remove_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t sva, pd_entry_t l1e, struct spglist *free, struct rwlock **lockp) { struct md_page *pvh; pt_entry_t oldl2; vm_offset_t eva, va; vm_page_t m, ml3; PMAP_LOCK_ASSERT(pmap, MA_OWNED); KASSERT((sva & L2_OFFSET) == 0, ("pmap_remove_l2: sva is not aligned")); oldl2 = pmap_load_clear(l2); KASSERT((oldl2 & PTE_RWX) != 0, ("pmap_remove_l2: L2e %lx is not a superpage mapping", oldl2)); /* * The sfence.vma documentation states that it is sufficient to specify * a single address within a superpage mapping. However, since we do * not perform any invalidation upon promotion, TLBs may still be * caching 4KB mappings within the superpage, so we must invalidate the * entire range. */ pmap_invalidate_range(pmap, sva, sva + L2_SIZE); if ((oldl2 & PTE_SW_WIRED) != 0) pmap->pm_stats.wired_count -= L2_SIZE / PAGE_SIZE; pmap_resident_count_dec(pmap, L2_SIZE / PAGE_SIZE); if ((oldl2 & PTE_SW_MANAGED) != 0) { CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, PTE_TO_PHYS(oldl2)); pvh = pa_to_pvh(PTE_TO_PHYS(oldl2)); pmap_pvh_free(pvh, pmap, sva); eva = sva + L2_SIZE; for (va = sva, m = PHYS_TO_VM_PAGE(PTE_TO_PHYS(oldl2)); va < eva; va += PAGE_SIZE, m++) { if ((oldl2 & PTE_D) != 0) vm_page_dirty(m); if ((oldl2 & PTE_A) != 0) vm_page_aflag_set(m, PGA_REFERENCED); if (TAILQ_EMPTY(&m->md.pv_list) && TAILQ_EMPTY(&pvh->pv_list)) vm_page_aflag_clear(m, PGA_WRITEABLE); } } if (pmap == kernel_pmap) { pmap_remove_kernel_l2(pmap, l2, sva); } else { ml3 = pmap_remove_pt_page(pmap, sva); if (ml3 != NULL) { KASSERT(ml3->valid == VM_PAGE_BITS_ALL, ("pmap_remove_l2: l3 page not promoted")); pmap_resident_count_dec(pmap, 1); KASSERT(ml3->ref_count == Ln_ENTRIES, ("pmap_remove_l2: l3 page ref count error")); ml3->ref_count = 1; vm_page_unwire_noq(ml3); pmap_add_delayed_free_list(ml3, free, FALSE); } } return (pmap_unuse_pt(pmap, sva, l1e, free)); } /* * pmap_remove_l3: do the things to unmap a page in a process */ static int pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t va, pd_entry_t l2e, struct spglist *free, struct rwlock **lockp) { struct md_page *pvh; pt_entry_t old_l3; vm_paddr_t phys; vm_page_t m; PMAP_LOCK_ASSERT(pmap, MA_OWNED); old_l3 = pmap_load_clear(l3); pmap_invalidate_page(pmap, va); if (old_l3 & PTE_SW_WIRED) pmap->pm_stats.wired_count -= 1; pmap_resident_count_dec(pmap, 1); if (old_l3 & PTE_SW_MANAGED) { phys = PTE_TO_PHYS(old_l3); m = PHYS_TO_VM_PAGE(phys); if ((old_l3 & PTE_D) != 0) vm_page_dirty(m); if (old_l3 & PTE_A) vm_page_aflag_set(m, PGA_REFERENCED); CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m); pmap_pvh_free(&m->md, pmap, va); if (TAILQ_EMPTY(&m->md.pv_list) && (m->flags & PG_FICTITIOUS) == 0) { pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); if (TAILQ_EMPTY(&pvh->pv_list)) vm_page_aflag_clear(m, PGA_WRITEABLE); } } return (pmap_unuse_pt(pmap, va, l2e, free)); } /* * Remove the given range of addresses from the specified map. * * It is assumed that the start and end are properly * rounded to the page size. */ void pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { struct spglist free; struct rwlock *lock; vm_offset_t va, va_next; pd_entry_t *l1, *l2, l2e; pt_entry_t *l3; /* * Perform an unsynchronized read. This is, however, safe. */ if (pmap->pm_stats.resident_count == 0) return; SLIST_INIT(&free); rw_rlock(&pvh_global_lock); PMAP_LOCK(pmap); lock = NULL; for (; sva < eva; sva = va_next) { if (pmap->pm_stats.resident_count == 0) break; l1 = pmap_l1(pmap, sva); if (pmap_load(l1) == 0) { va_next = (sva + L1_SIZE) & ~L1_OFFSET; if (va_next < sva) va_next = eva; continue; } /* * Calculate index for next page table. */ va_next = (sva + L2_SIZE) & ~L2_OFFSET; if (va_next < sva) va_next = eva; l2 = pmap_l1_to_l2(l1, sva); if (l2 == NULL) continue; if ((l2e = pmap_load(l2)) == 0) continue; if ((l2e & PTE_RWX) != 0) { if (sva + L2_SIZE == va_next && eva >= va_next) { (void)pmap_remove_l2(pmap, l2, sva, pmap_load(l1), &free, &lock); continue; } else if (!pmap_demote_l2_locked(pmap, l2, sva, &lock)) { /* * The large page mapping was destroyed. */ continue; } l2e = pmap_load(l2); } /* * Limit our scan to either the end of the va represented * by the current page table page, or to the end of the * range being removed. */ if (va_next > eva) va_next = eva; va = va_next; for (l3 = pmap_l2_to_l3(l2, sva); sva != va_next; l3++, sva += L3_SIZE) { if (pmap_load(l3) == 0) { if (va != va_next) { pmap_invalidate_range(pmap, va, sva); va = va_next; } continue; } if (va == va_next) va = sva; if (pmap_remove_l3(pmap, l3, sva, l2e, &free, &lock)) { sva += L3_SIZE; break; } } if (va != va_next) pmap_invalidate_range(pmap, va, sva); } if (lock != NULL) rw_wunlock(lock); rw_runlock(&pvh_global_lock); PMAP_UNLOCK(pmap); vm_page_free_pages_toq(&free, false); } /* * Routine: pmap_remove_all * Function: * Removes this physical page from * all physical maps in which it resides. * Reflects back modify bits to the pager. * * Notes: * Original versions of this routine were very * inefficient because they iteratively called * pmap_remove (slow...) */ void pmap_remove_all(vm_page_t m) { struct spglist free; struct md_page *pvh; pmap_t pmap; pt_entry_t *l3, l3e; pd_entry_t *l2, l2e; pv_entry_t pv; vm_offset_t va; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_remove_all: page %p is not managed", m)); SLIST_INIT(&free); pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy : pa_to_pvh(VM_PAGE_TO_PHYS(m)); rw_wlock(&pvh_global_lock); while ((pv = TAILQ_FIRST(&pvh->pv_list)) != NULL) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); va = pv->pv_va; l2 = pmap_l2(pmap, va); (void)pmap_demote_l2(pmap, l2, va); PMAP_UNLOCK(pmap); } while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pmap_resident_count_dec(pmap, 1); l2 = pmap_l2(pmap, pv->pv_va); KASSERT(l2 != NULL, ("pmap_remove_all: no l2 table found")); l2e = pmap_load(l2); KASSERT((l2e & PTE_RX) == 0, ("pmap_remove_all: found a superpage in %p's pv list", m)); l3 = pmap_l2_to_l3(l2, pv->pv_va); l3e = pmap_load_clear(l3); pmap_invalidate_page(pmap, pv->pv_va); if (l3e & PTE_SW_WIRED) pmap->pm_stats.wired_count--; if ((l3e & PTE_A) != 0) vm_page_aflag_set(m, PGA_REFERENCED); /* * Update the vm_page_t clean and reference bits. */ if ((l3e & PTE_D) != 0) vm_page_dirty(m); pmap_unuse_pt(pmap, pv->pv_va, pmap_load(l2), &free); TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); m->md.pv_gen++; free_pv_entry(pmap, pv); PMAP_UNLOCK(pmap); } vm_page_aflag_clear(m, PGA_WRITEABLE); rw_wunlock(&pvh_global_lock); vm_page_free_pages_toq(&free, false); } /* * Set the physical protection on the * specified range of this map as requested. */ void pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) { pd_entry_t *l1, *l2, l2e; pt_entry_t *l3, l3e, mask; vm_page_t m, mt; vm_paddr_t pa; vm_offset_t va_next; bool anychanged, pv_lists_locked; if ((prot & VM_PROT_READ) == VM_PROT_NONE) { pmap_remove(pmap, sva, eva); return; } if ((prot & (VM_PROT_WRITE | VM_PROT_EXECUTE)) == (VM_PROT_WRITE | VM_PROT_EXECUTE)) return; anychanged = false; pv_lists_locked = false; mask = 0; if ((prot & VM_PROT_WRITE) == 0) mask |= PTE_W | PTE_D; if ((prot & VM_PROT_EXECUTE) == 0) mask |= PTE_X; resume: PMAP_LOCK(pmap); for (; sva < eva; sva = va_next) { l1 = pmap_l1(pmap, sva); if (pmap_load(l1) == 0) { va_next = (sva + L1_SIZE) & ~L1_OFFSET; if (va_next < sva) va_next = eva; continue; } va_next = (sva + L2_SIZE) & ~L2_OFFSET; if (va_next < sva) va_next = eva; l2 = pmap_l1_to_l2(l1, sva); if (l2 == NULL || (l2e = pmap_load(l2)) == 0) continue; if ((l2e & PTE_RWX) != 0) { if (sva + L2_SIZE == va_next && eva >= va_next) { retryl2: if ((prot & VM_PROT_WRITE) == 0 && (l2e & (PTE_SW_MANAGED | PTE_D)) == (PTE_SW_MANAGED | PTE_D)) { pa = PTE_TO_PHYS(l2e); m = PHYS_TO_VM_PAGE(pa); for (mt = m; mt < &m[Ln_ENTRIES]; mt++) vm_page_dirty(mt); } if (!atomic_fcmpset_long(l2, &l2e, l2e & ~mask)) goto retryl2; anychanged = true; continue; } else { if (!pv_lists_locked) { pv_lists_locked = true; if (!rw_try_rlock(&pvh_global_lock)) { if (anychanged) pmap_invalidate_all( pmap); PMAP_UNLOCK(pmap); rw_rlock(&pvh_global_lock); goto resume; } } if (!pmap_demote_l2(pmap, l2, sva)) { /* * The large page mapping was destroyed. */ continue; } } } if (va_next > eva) va_next = eva; for (l3 = pmap_l2_to_l3(l2, sva); sva != va_next; l3++, sva += L3_SIZE) { l3e = pmap_load(l3); retryl3: if ((l3e & PTE_V) == 0) continue; if ((prot & VM_PROT_WRITE) == 0 && (l3e & (PTE_SW_MANAGED | PTE_D)) == (PTE_SW_MANAGED | PTE_D)) { m = PHYS_TO_VM_PAGE(PTE_TO_PHYS(l3e)); vm_page_dirty(m); } if (!atomic_fcmpset_long(l3, &l3e, l3e & ~mask)) goto retryl3; anychanged = true; } } if (anychanged) pmap_invalidate_all(pmap); if (pv_lists_locked) rw_runlock(&pvh_global_lock); PMAP_UNLOCK(pmap); } int pmap_fault_fixup(pmap_t pmap, vm_offset_t va, vm_prot_t ftype) { pd_entry_t *l2, l2e; pt_entry_t bits, *pte, oldpte; int rv; rv = 0; PMAP_LOCK(pmap); l2 = pmap_l2(pmap, va); if (l2 == NULL || ((l2e = pmap_load(l2)) & PTE_V) == 0) goto done; if ((l2e & PTE_RWX) == 0) { pte = pmap_l2_to_l3(l2, va); if (pte == NULL || ((oldpte = pmap_load(pte)) & PTE_V) == 0) goto done; } else { pte = l2; oldpte = l2e; } if ((pmap != kernel_pmap && (oldpte & PTE_U) == 0) || (ftype == VM_PROT_WRITE && (oldpte & PTE_W) == 0) || (ftype == VM_PROT_EXECUTE && (oldpte & PTE_X) == 0) || (ftype == VM_PROT_READ && (oldpte & PTE_R) == 0)) goto done; bits = PTE_A; if (ftype == VM_PROT_WRITE) bits |= PTE_D; /* * Spurious faults can occur if the implementation caches invalid * entries in the TLB, or if simultaneous accesses on multiple CPUs * race with each other. */ if ((oldpte & bits) != bits) pmap_store_bits(pte, bits); sfence_vma(); rv = 1; done: PMAP_UNLOCK(pmap); return (rv); } static bool pmap_demote_l2(pmap_t pmap, pd_entry_t *l2, vm_offset_t va) { struct rwlock *lock; bool rv; lock = NULL; rv = pmap_demote_l2_locked(pmap, l2, va, &lock); if (lock != NULL) rw_wunlock(lock); return (rv); } /* * Tries to demote a 2MB page mapping. If demotion fails, the 2MB page * mapping is invalidated. */ static bool pmap_demote_l2_locked(pmap_t pmap, pd_entry_t *l2, vm_offset_t va, struct rwlock **lockp) { struct spglist free; vm_page_t mpte; pd_entry_t newl2, oldl2; pt_entry_t *firstl3, newl3; vm_paddr_t mptepa; int i; PMAP_LOCK_ASSERT(pmap, MA_OWNED); oldl2 = pmap_load(l2); KASSERT((oldl2 & PTE_RWX) != 0, ("pmap_demote_l2_locked: oldl2 is not a leaf entry")); if ((oldl2 & PTE_A) == 0 || (mpte = pmap_remove_pt_page(pmap, va)) == NULL) { if ((oldl2 & PTE_A) == 0 || (mpte = vm_page_alloc(NULL, pmap_l2_pindex(va), (VIRT_IN_DMAP(va) ? VM_ALLOC_INTERRUPT : VM_ALLOC_NORMAL) | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) { SLIST_INIT(&free); (void)pmap_remove_l2(pmap, l2, va & ~L2_OFFSET, pmap_load(pmap_l1(pmap, va)), &free, lockp); vm_page_free_pages_toq(&free, true); CTR2(KTR_PMAP, "pmap_demote_l2_locked: " "failure for va %#lx in pmap %p", va, pmap); return (false); } if (va < VM_MAXUSER_ADDRESS) { mpte->ref_count = Ln_ENTRIES; pmap_resident_count_inc(pmap, 1); } } mptepa = VM_PAGE_TO_PHYS(mpte); firstl3 = (pt_entry_t *)PHYS_TO_DMAP(mptepa); newl2 = ((mptepa / PAGE_SIZE) << PTE_PPN0_S) | PTE_V; KASSERT((oldl2 & PTE_A) != 0, ("pmap_demote_l2_locked: oldl2 is missing PTE_A")); KASSERT((oldl2 & (PTE_D | PTE_W)) != PTE_W, ("pmap_demote_l2_locked: oldl2 is missing PTE_D")); newl3 = oldl2; /* * If the page table page is not leftover from an earlier promotion, * initialize it. */ if (mpte->valid == 0) { for (i = 0; i < Ln_ENTRIES; i++) pmap_store(firstl3 + i, newl3 + (i << PTE_PPN0_S)); } KASSERT(PTE_TO_PHYS(pmap_load(firstl3)) == PTE_TO_PHYS(newl3), ("pmap_demote_l2_locked: firstl3 and newl3 map different physical " "addresses")); /* * If the mapping has changed attributes, update the page table * entries. */ if ((pmap_load(firstl3) & PTE_PROMOTE) != (newl3 & PTE_PROMOTE)) for (i = 0; i < Ln_ENTRIES; i++) pmap_store(firstl3 + i, newl3 + (i << PTE_PPN0_S)); /* * The spare PV entries must be reserved prior to demoting the * mapping, that is, prior to changing the L2 entry. Otherwise, the * state of the L2 entry and the PV lists will be inconsistent, which * can result in reclaim_pv_chunk() attempting to remove a PV entry from * the wrong PV list and pmap_pv_demote_l2() failing to find the * expected PV entry for the 2MB page mapping that is being demoted. */ if ((oldl2 & PTE_SW_MANAGED) != 0) reserve_pv_entries(pmap, Ln_ENTRIES - 1, lockp); /* * Demote the mapping. */ pmap_store(l2, newl2); /* * Demote the PV entry. */ if ((oldl2 & PTE_SW_MANAGED) != 0) pmap_pv_demote_l2(pmap, va, PTE_TO_PHYS(oldl2), lockp); atomic_add_long(&pmap_l2_demotions, 1); CTR2(KTR_PMAP, "pmap_demote_l2_locked: success for va %#lx in pmap %p", va, pmap); return (true); } #if VM_NRESERVLEVEL > 0 static void pmap_promote_l2(pmap_t pmap, pd_entry_t *l2, vm_offset_t va, struct rwlock **lockp) { pt_entry_t *firstl3, *l3; vm_paddr_t pa; vm_page_t ml3; PMAP_LOCK_ASSERT(pmap, MA_OWNED); va &= ~L2_OFFSET; KASSERT((pmap_load(l2) & PTE_RWX) == 0, ("pmap_promote_l2: invalid l2 entry %p", l2)); firstl3 = (pt_entry_t *)PHYS_TO_DMAP(PTE_TO_PHYS(pmap_load(l2))); pa = PTE_TO_PHYS(pmap_load(firstl3)); if ((pa & L2_OFFSET) != 0) { CTR2(KTR_PMAP, "pmap_promote_l2: failure for va %#lx pmap %p", va, pmap); atomic_add_long(&pmap_l2_p_failures, 1); return; } pa += PAGE_SIZE; for (l3 = firstl3 + 1; l3 < firstl3 + Ln_ENTRIES; l3++) { if (PTE_TO_PHYS(pmap_load(l3)) != pa) { CTR2(KTR_PMAP, "pmap_promote_l2: failure for va %#lx pmap %p", va, pmap); atomic_add_long(&pmap_l2_p_failures, 1); return; } if ((pmap_load(l3) & PTE_PROMOTE) != (pmap_load(firstl3) & PTE_PROMOTE)) { CTR2(KTR_PMAP, "pmap_promote_l2: failure for va %#lx pmap %p", va, pmap); atomic_add_long(&pmap_l2_p_failures, 1); return; } pa += PAGE_SIZE; } ml3 = PHYS_TO_VM_PAGE(PTE_TO_PHYS(pmap_load(l2))); KASSERT(ml3->pindex == pmap_l2_pindex(va), ("pmap_promote_l2: page table page's pindex is wrong")); if (pmap_insert_pt_page(pmap, ml3, true)) { CTR2(KTR_PMAP, "pmap_promote_l2: failure for va %#lx pmap %p", va, pmap); atomic_add_long(&pmap_l2_p_failures, 1); return; } if ((pmap_load(firstl3) & PTE_SW_MANAGED) != 0) pmap_pv_promote_l2(pmap, va, PTE_TO_PHYS(pmap_load(firstl3)), lockp); pmap_store(l2, pmap_load(firstl3)); atomic_add_long(&pmap_l2_promotions, 1); CTR2(KTR_PMAP, "pmap_promote_l2: success for va %#lx in pmap %p", va, pmap); } #endif /* * Insert the given physical page (p) at * the specified virtual address (v) in the * target physical map with the protection requested. * * If specified, the page will be wired down, meaning * that the related pte can not be reclaimed. * * NB: This is the only routine which MAY NOT lazy-evaluate * or lose information. That is, this routine must actually * insert this page into the given map NOW. */ int pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, u_int flags, int8_t psind) { struct rwlock *lock; pd_entry_t *l1, *l2, l2e; pt_entry_t new_l3, orig_l3; pt_entry_t *l3; pv_entry_t pv; vm_paddr_t opa, pa, l2_pa, l3_pa; vm_page_t mpte, om, l2_m, l3_m; pt_entry_t entry; pn_t l2_pn, l3_pn, pn; int rv; bool nosleep; va = trunc_page(va); if ((m->oflags & VPO_UNMANAGED) == 0) VM_PAGE_OBJECT_BUSY_ASSERT(m); pa = VM_PAGE_TO_PHYS(m); pn = (pa / PAGE_SIZE); new_l3 = PTE_V | PTE_R | PTE_A; if (prot & VM_PROT_EXECUTE) new_l3 |= PTE_X; if (flags & VM_PROT_WRITE) new_l3 |= PTE_D; if (prot & VM_PROT_WRITE) new_l3 |= PTE_W; if (va < VM_MAX_USER_ADDRESS) new_l3 |= PTE_U; new_l3 |= (pn << PTE_PPN0_S); if ((flags & PMAP_ENTER_WIRED) != 0) new_l3 |= PTE_SW_WIRED; /* * Set modified bit gratuitously for writeable mappings if * the page is unmanaged. We do not want to take a fault * to do the dirty bit accounting for these mappings. */ if ((m->oflags & VPO_UNMANAGED) != 0) { if (prot & VM_PROT_WRITE) new_l3 |= PTE_D; } else new_l3 |= PTE_SW_MANAGED; CTR2(KTR_PMAP, "pmap_enter: %.16lx -> %.16lx", va, pa); lock = NULL; mpte = NULL; rw_rlock(&pvh_global_lock); PMAP_LOCK(pmap); if (psind == 1) { /* Assert the required virtual and physical alignment. */ KASSERT((va & L2_OFFSET) == 0, ("pmap_enter: va %#lx unaligned", va)); KASSERT(m->psind > 0, ("pmap_enter: m->psind < psind")); rv = pmap_enter_l2(pmap, va, new_l3, flags, m, &lock); goto out; } l2 = pmap_l2(pmap, va); if (l2 != NULL && ((l2e = pmap_load(l2)) & PTE_V) != 0 && ((l2e & PTE_RWX) == 0 || pmap_demote_l2_locked(pmap, l2, va, &lock))) { l3 = pmap_l2_to_l3(l2, va); if (va < VM_MAXUSER_ADDRESS) { mpte = PHYS_TO_VM_PAGE(PTE_TO_PHYS(pmap_load(l2))); mpte->ref_count++; } } else if (va < VM_MAXUSER_ADDRESS) { nosleep = (flags & PMAP_ENTER_NOSLEEP) != 0; mpte = pmap_alloc_l3(pmap, va, nosleep ? NULL : &lock); if (mpte == NULL && nosleep) { CTR0(KTR_PMAP, "pmap_enter: mpte == NULL"); if (lock != NULL) rw_wunlock(lock); rw_runlock(&pvh_global_lock); PMAP_UNLOCK(pmap); return (KERN_RESOURCE_SHORTAGE); } l3 = pmap_l3(pmap, va); } else { l3 = pmap_l3(pmap, va); /* TODO: This is not optimal, but should mostly work */ if (l3 == NULL) { if (l2 == NULL) { l2_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO); if (l2_m == NULL) panic("pmap_enter: l2 pte_m == NULL"); if ((l2_m->flags & PG_ZERO) == 0) pmap_zero_page(l2_m); l2_pa = VM_PAGE_TO_PHYS(l2_m); l2_pn = (l2_pa / PAGE_SIZE); l1 = pmap_l1(pmap, va); entry = (PTE_V); entry |= (l2_pn << PTE_PPN0_S); pmap_store(l1, entry); pmap_distribute_l1(pmap, pmap_l1_index(va), entry); l2 = pmap_l1_to_l2(l1, va); } l3_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO); if (l3_m == NULL) panic("pmap_enter: l3 pte_m == NULL"); if ((l3_m->flags & PG_ZERO) == 0) pmap_zero_page(l3_m); l3_pa = VM_PAGE_TO_PHYS(l3_m); l3_pn = (l3_pa / PAGE_SIZE); entry = (PTE_V); entry |= (l3_pn << PTE_PPN0_S); pmap_store(l2, entry); l3 = pmap_l2_to_l3(l2, va); } pmap_invalidate_page(pmap, va); } orig_l3 = pmap_load(l3); opa = PTE_TO_PHYS(orig_l3); pv = NULL; /* * Is the specified virtual address already mapped? */ if ((orig_l3 & PTE_V) != 0) { /* * Wiring change, just update stats. We don't worry about * wiring PT pages as they remain resident as long as there * are valid mappings in them. Hence, if a user page is wired, * the PT page will be also. */ if ((flags & PMAP_ENTER_WIRED) != 0 && (orig_l3 & PTE_SW_WIRED) == 0) pmap->pm_stats.wired_count++; else if ((flags & PMAP_ENTER_WIRED) == 0 && (orig_l3 & PTE_SW_WIRED) != 0) pmap->pm_stats.wired_count--; /* * Remove the extra PT page reference. */ if (mpte != NULL) { mpte->ref_count--; KASSERT(mpte->ref_count > 0, ("pmap_enter: missing reference to page table page," " va: 0x%lx", va)); } /* * Has the physical page changed? */ if (opa == pa) { /* * No, might be a protection or wiring change. */ if ((orig_l3 & PTE_SW_MANAGED) != 0 && (new_l3 & PTE_W) != 0) vm_page_aflag_set(m, PGA_WRITEABLE); goto validate; } /* * The physical page has changed. Temporarily invalidate * the mapping. This ensures that all threads sharing the * pmap keep a consistent view of the mapping, which is * necessary for the correct handling of COW faults. It * also permits reuse of the old mapping's PV entry, * avoiding an allocation. * * For consistency, handle unmanaged mappings the same way. */ orig_l3 = pmap_load_clear(l3); KASSERT(PTE_TO_PHYS(orig_l3) == opa, ("pmap_enter: unexpected pa update for %#lx", va)); if ((orig_l3 & PTE_SW_MANAGED) != 0) { om = PHYS_TO_VM_PAGE(opa); /* * The pmap lock is sufficient to synchronize with * concurrent calls to pmap_page_test_mappings() and * pmap_ts_referenced(). */ if ((orig_l3 & PTE_D) != 0) vm_page_dirty(om); if ((orig_l3 & PTE_A) != 0) vm_page_aflag_set(om, PGA_REFERENCED); CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, opa); pv = pmap_pvh_remove(&om->md, pmap, va); KASSERT(pv != NULL, ("pmap_enter: no PV entry for %#lx", va)); if ((new_l3 & PTE_SW_MANAGED) == 0) free_pv_entry(pmap, pv); if ((om->a.flags & PGA_WRITEABLE) != 0 && TAILQ_EMPTY(&om->md.pv_list) && ((om->flags & PG_FICTITIOUS) != 0 || TAILQ_EMPTY(&pa_to_pvh(opa)->pv_list))) vm_page_aflag_clear(om, PGA_WRITEABLE); } pmap_invalidate_page(pmap, va); orig_l3 = 0; } else { /* * Increment the counters. */ if ((new_l3 & PTE_SW_WIRED) != 0) pmap->pm_stats.wired_count++; pmap_resident_count_inc(pmap, 1); } /* * Enter on the PV list if part of our managed memory. */ if ((new_l3 & PTE_SW_MANAGED) != 0) { if (pv == NULL) { pv = get_pv_entry(pmap, &lock); pv->pv_va = va; } CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, pa); TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); m->md.pv_gen++; if ((new_l3 & PTE_W) != 0) vm_page_aflag_set(m, PGA_WRITEABLE); } validate: /* * Sync the i-cache on all harts before updating the PTE * if the new PTE is executable. */ if (prot & VM_PROT_EXECUTE) pmap_sync_icache(pmap, va, PAGE_SIZE); /* * Update the L3 entry. */ if (orig_l3 != 0) { orig_l3 = pmap_load_store(l3, new_l3); pmap_invalidate_page(pmap, va); KASSERT(PTE_TO_PHYS(orig_l3) == pa, ("pmap_enter: invalid update")); if ((orig_l3 & (PTE_D | PTE_SW_MANAGED)) == (PTE_D | PTE_SW_MANAGED)) vm_page_dirty(m); } else { pmap_store(l3, new_l3); } #if VM_NRESERVLEVEL > 0 if (mpte != NULL && mpte->ref_count == Ln_ENTRIES && pmap_ps_enabled(pmap) && (m->flags & PG_FICTITIOUS) == 0 && vm_reserv_level_iffullpop(m) == 0) pmap_promote_l2(pmap, l2, va, &lock); #endif rv = KERN_SUCCESS; out: if (lock != NULL) rw_wunlock(lock); rw_runlock(&pvh_global_lock); PMAP_UNLOCK(pmap); return (rv); } /* * Tries to create a read- and/or execute-only 2MB page mapping. Returns true * if successful. Returns false if (1) a page table page cannot be allocated * without sleeping, (2) a mapping already exists at the specified virtual * address, or (3) a PV entry cannot be allocated without reclaiming another * PV entry. */ static bool pmap_enter_2mpage(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, struct rwlock **lockp) { pd_entry_t new_l2; pn_t pn; PMAP_LOCK_ASSERT(pmap, MA_OWNED); pn = VM_PAGE_TO_PHYS(m) / PAGE_SIZE; new_l2 = (pd_entry_t)((pn << PTE_PPN0_S) | PTE_R | PTE_V); if ((m->oflags & VPO_UNMANAGED) == 0) new_l2 |= PTE_SW_MANAGED; if ((prot & VM_PROT_EXECUTE) != 0) new_l2 |= PTE_X; if (va < VM_MAXUSER_ADDRESS) new_l2 |= PTE_U; return (pmap_enter_l2(pmap, va, new_l2, PMAP_ENTER_NOSLEEP | PMAP_ENTER_NOREPLACE | PMAP_ENTER_NORECLAIM, NULL, lockp) == KERN_SUCCESS); } /* * Tries to create the specified 2MB page mapping. Returns KERN_SUCCESS if * the mapping was created, and either KERN_FAILURE or KERN_RESOURCE_SHORTAGE * otherwise. Returns KERN_FAILURE if PMAP_ENTER_NOREPLACE was specified and * a mapping already exists at the specified virtual address. Returns * KERN_RESOURCE_SHORTAGE if PMAP_ENTER_NOSLEEP was specified and a page table * page allocation failed. Returns KERN_RESOURCE_SHORTAGE if * PMAP_ENTER_NORECLAIM was specified and a PV entry allocation failed. * * The parameter "m" is only used when creating a managed, writeable mapping. */ static int pmap_enter_l2(pmap_t pmap, vm_offset_t va, pd_entry_t new_l2, u_int flags, vm_page_t m, struct rwlock **lockp) { struct spglist free; pd_entry_t *l2, *l3, oldl2; vm_offset_t sva; vm_page_t l2pg, mt; PMAP_LOCK_ASSERT(pmap, MA_OWNED); if ((l2pg = pmap_alloc_l2(pmap, va, (flags & PMAP_ENTER_NOSLEEP) != 0 ? NULL : lockp)) == NULL) { CTR2(KTR_PMAP, "pmap_enter_l2: failure for va %#lx in pmap %p", va, pmap); return (KERN_RESOURCE_SHORTAGE); } l2 = (pd_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(l2pg)); l2 = &l2[pmap_l2_index(va)]; if ((oldl2 = pmap_load(l2)) != 0) { KASSERT(l2pg->ref_count > 1, ("pmap_enter_l2: l2pg's ref count is too low")); if ((flags & PMAP_ENTER_NOREPLACE) != 0) { l2pg->ref_count--; CTR2(KTR_PMAP, "pmap_enter_l2: failure for va %#lx in pmap %p", va, pmap); return (KERN_FAILURE); } SLIST_INIT(&free); if ((oldl2 & PTE_RWX) != 0) (void)pmap_remove_l2(pmap, l2, va, pmap_load(pmap_l1(pmap, va)), &free, lockp); else for (sva = va; sva < va + L2_SIZE; sva += PAGE_SIZE) { l3 = pmap_l2_to_l3(l2, sva); if ((pmap_load(l3) & PTE_V) != 0 && pmap_remove_l3(pmap, l3, sva, oldl2, &free, lockp) != 0) break; } vm_page_free_pages_toq(&free, true); if (va >= VM_MAXUSER_ADDRESS) { /* * Both pmap_remove_l2() and pmap_remove_l3() will * leave the kernel page table page zero filled. */ mt = PHYS_TO_VM_PAGE(PTE_TO_PHYS(pmap_load(l2))); if (pmap_insert_pt_page(pmap, mt, false)) panic("pmap_enter_l2: trie insert failed"); } else KASSERT(pmap_load(l2) == 0, ("pmap_enter_l2: non-zero L2 entry %p", l2)); } if ((new_l2 & PTE_SW_MANAGED) != 0) { /* * Abort this mapping if its PV entry could not be created. */ if (!pmap_pv_insert_l2(pmap, va, new_l2, flags, lockp)) { SLIST_INIT(&free); if (pmap_unwire_ptp(pmap, va, l2pg, &free)) { /* * Although "va" is not mapped, paging-structure * caches could nonetheless have entries that * refer to the freed page table pages. * Invalidate those entries. */ pmap_invalidate_page(pmap, va); vm_page_free_pages_toq(&free, true); } CTR2(KTR_PMAP, "pmap_enter_l2: failure for va %#lx in pmap %p", va, pmap); return (KERN_RESOURCE_SHORTAGE); } if ((new_l2 & PTE_W) != 0) for (mt = m; mt < &m[L2_SIZE / PAGE_SIZE]; mt++) vm_page_aflag_set(mt, PGA_WRITEABLE); } /* * Increment counters. */ if ((new_l2 & PTE_SW_WIRED) != 0) pmap->pm_stats.wired_count += L2_SIZE / PAGE_SIZE; pmap->pm_stats.resident_count += L2_SIZE / PAGE_SIZE; /* * Map the superpage. */ pmap_store(l2, new_l2); atomic_add_long(&pmap_l2_mappings, 1); CTR2(KTR_PMAP, "pmap_enter_l2: success for va %#lx in pmap %p", va, pmap); return (KERN_SUCCESS); } /* * Maps a sequence of resident pages belonging to the same object. * The sequence begins with the given page m_start. This page is * mapped at the given virtual address start. Each subsequent page is * mapped at a virtual address that is offset from start by the same * amount as the page is offset from m_start within the object. The * last page in the sequence is the page with the largest offset from * m_start that can be mapped at a virtual address less than the given * virtual address end. Not every virtual page between start and end * is mapped; only those for which a resident page exists with the * corresponding offset from m_start are mapped. */ void pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end, vm_page_t m_start, vm_prot_t prot) { struct rwlock *lock; vm_offset_t va; vm_page_t m, mpte; vm_pindex_t diff, psize; VM_OBJECT_ASSERT_LOCKED(m_start->object); psize = atop(end - start); mpte = NULL; m = m_start; lock = NULL; rw_rlock(&pvh_global_lock); PMAP_LOCK(pmap); while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { va = start + ptoa(diff); if ((va & L2_OFFSET) == 0 && va + L2_SIZE <= end && m->psind == 1 && pmap_ps_enabled(pmap) && pmap_enter_2mpage(pmap, va, m, prot, &lock)) m = &m[L2_SIZE / PAGE_SIZE - 1]; else mpte = pmap_enter_quick_locked(pmap, va, m, prot, mpte, &lock); m = TAILQ_NEXT(m, listq); } if (lock != NULL) rw_wunlock(lock); rw_runlock(&pvh_global_lock); PMAP_UNLOCK(pmap); } /* * this code makes some *MAJOR* assumptions: * 1. Current pmap & pmap exists. * 2. Not wired. * 3. Read access. * 4. No page table pages. * but is *MUCH* faster than pmap_enter... */ void pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) { struct rwlock *lock; lock = NULL; rw_rlock(&pvh_global_lock); PMAP_LOCK(pmap); (void)pmap_enter_quick_locked(pmap, va, m, prot, NULL, &lock); if (lock != NULL) rw_wunlock(lock); rw_runlock(&pvh_global_lock); PMAP_UNLOCK(pmap); } static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp) { struct spglist free; vm_paddr_t phys; pd_entry_t *l2; pt_entry_t *l3, newl3; KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva || (m->oflags & VPO_UNMANAGED) != 0, ("pmap_enter_quick_locked: managed mapping within the clean submap")); rw_assert(&pvh_global_lock, RA_LOCKED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); CTR2(KTR_PMAP, "pmap_enter_quick_locked: %p %lx", pmap, va); /* * In the case that a page table page is not * resident, we are creating it here. */ if (va < VM_MAXUSER_ADDRESS) { vm_pindex_t l2pindex; /* * Calculate pagetable page index */ l2pindex = pmap_l2_pindex(va); if (mpte && (mpte->pindex == l2pindex)) { mpte->ref_count++; } else { /* * Get the l2 entry */ l2 = pmap_l2(pmap, va); /* * If the page table page is mapped, we just increment * the hold count, and activate it. Otherwise, we * attempt to allocate a page table page. If this * attempt fails, we don't retry. Instead, we give up. */ if (l2 != NULL && pmap_load(l2) != 0) { phys = PTE_TO_PHYS(pmap_load(l2)); mpte = PHYS_TO_VM_PAGE(phys); mpte->ref_count++; } else { /* * Pass NULL instead of the PV list lock * pointer, because we don't intend to sleep. */ mpte = _pmap_alloc_l3(pmap, l2pindex, NULL); if (mpte == NULL) return (mpte); } } l3 = (pt_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mpte)); l3 = &l3[pmap_l3_index(va)]; } else { mpte = NULL; l3 = pmap_l3(kernel_pmap, va); } if (l3 == NULL) panic("pmap_enter_quick_locked: No l3"); if (pmap_load(l3) != 0) { if (mpte != NULL) { mpte->ref_count--; mpte = NULL; } return (mpte); } /* * Enter on the PV list if part of our managed memory. */ if ((m->oflags & VPO_UNMANAGED) == 0 && !pmap_try_insert_pv_entry(pmap, va, m, lockp)) { if (mpte != NULL) { SLIST_INIT(&free); if (pmap_unwire_ptp(pmap, va, mpte, &free)) { pmap_invalidate_page(pmap, va); vm_page_free_pages_toq(&free, false); } mpte = NULL; } return (mpte); } /* * Increment counters */ pmap_resident_count_inc(pmap, 1); newl3 = ((VM_PAGE_TO_PHYS(m) / PAGE_SIZE) << PTE_PPN0_S) | PTE_V | PTE_R; if ((prot & VM_PROT_EXECUTE) != 0) newl3 |= PTE_X; if ((m->oflags & VPO_UNMANAGED) == 0) newl3 |= PTE_SW_MANAGED; if (va < VM_MAX_USER_ADDRESS) newl3 |= PTE_U; /* * Sync the i-cache on all harts before updating the PTE * if the new PTE is executable. */ if (prot & VM_PROT_EXECUTE) pmap_sync_icache(pmap, va, PAGE_SIZE); pmap_store(l3, newl3); pmap_invalidate_page(pmap, va); return (mpte); } /* * This code maps large physical mmap regions into the * processor address space. Note that some shortcuts * are taken, but the code works. */ void pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object, vm_pindex_t pindex, vm_size_t size) { VM_OBJECT_ASSERT_WLOCKED(object); KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG, ("pmap_object_init_pt: non-device object")); } /* * Clear the wired attribute from the mappings for the specified range of * addresses in the given pmap. Every valid mapping within that range * must have the wired attribute set. In contrast, invalid mappings * cannot have the wired attribute set, so they are ignored. * * The wired attribute of the page table entry is not a hardware feature, * so there is no need to invalidate any TLB entries. */ void pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { vm_offset_t va_next; pd_entry_t *l1, *l2, l2e; pt_entry_t *l3, l3e; bool pv_lists_locked; pv_lists_locked = false; retry: PMAP_LOCK(pmap); for (; sva < eva; sva = va_next) { l1 = pmap_l1(pmap, sva); if (pmap_load(l1) == 0) { va_next = (sva + L1_SIZE) & ~L1_OFFSET; if (va_next < sva) va_next = eva; continue; } va_next = (sva + L2_SIZE) & ~L2_OFFSET; if (va_next < sva) va_next = eva; l2 = pmap_l1_to_l2(l1, sva); if ((l2e = pmap_load(l2)) == 0) continue; if ((l2e & PTE_RWX) != 0) { if (sva + L2_SIZE == va_next && eva >= va_next) { if ((l2e & PTE_SW_WIRED) == 0) panic("pmap_unwire: l2 %#jx is missing " "PTE_SW_WIRED", (uintmax_t)l2e); pmap_clear_bits(l2, PTE_SW_WIRED); continue; } else { if (!pv_lists_locked) { pv_lists_locked = true; if (!rw_try_rlock(&pvh_global_lock)) { PMAP_UNLOCK(pmap); rw_rlock(&pvh_global_lock); /* Repeat sva. */ goto retry; } } if (!pmap_demote_l2(pmap, l2, sva)) panic("pmap_unwire: demotion failed"); } } if (va_next > eva) va_next = eva; for (l3 = pmap_l2_to_l3(l2, sva); sva != va_next; l3++, sva += L3_SIZE) { if ((l3e = pmap_load(l3)) == 0) continue; if ((l3e & PTE_SW_WIRED) == 0) panic("pmap_unwire: l3 %#jx is missing " "PTE_SW_WIRED", (uintmax_t)l3e); /* * PG_W must be cleared atomically. Although the pmap * lock synchronizes access to PG_W, another processor * could be setting PG_M and/or PG_A concurrently. */ pmap_clear_bits(l3, PTE_SW_WIRED); pmap->pm_stats.wired_count--; } } if (pv_lists_locked) rw_runlock(&pvh_global_lock); PMAP_UNLOCK(pmap); } /* * Copy the range specified by src_addr/len * from the source map to the range dst_addr/len * in the destination map. * * This routine is only advisory and need not do anything. */ void pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, vm_offset_t src_addr) { } /* * pmap_zero_page zeros the specified hardware page by mapping * the page into KVM and using bzero to clear its contents. */ void pmap_zero_page(vm_page_t m) { vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); pagezero((void *)va); } /* * pmap_zero_page_area zeros the specified hardware page by mapping * the page into KVM and using bzero to clear its contents. * * off and size may not cover an area beyond a single hardware page. */ void pmap_zero_page_area(vm_page_t m, int off, int size) { vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); if (off == 0 && size == PAGE_SIZE) pagezero((void *)va); else bzero((char *)va + off, size); } /* * pmap_copy_page copies the specified (machine independent) * page by mapping the page into virtual memory and using * bcopy to copy the page, one machine dependent page at a * time. */ void pmap_copy_page(vm_page_t msrc, vm_page_t mdst) { vm_offset_t src = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(msrc)); vm_offset_t dst = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mdst)); pagecopy((void *)src, (void *)dst); } int unmapped_buf_allowed = 1; void pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[], vm_offset_t b_offset, int xfersize) { void *a_cp, *b_cp; vm_page_t m_a, m_b; vm_paddr_t p_a, p_b; vm_offset_t a_pg_offset, b_pg_offset; int cnt; while (xfersize > 0) { a_pg_offset = a_offset & PAGE_MASK; m_a = ma[a_offset >> PAGE_SHIFT]; p_a = m_a->phys_addr; b_pg_offset = b_offset & PAGE_MASK; m_b = mb[b_offset >> PAGE_SHIFT]; p_b = m_b->phys_addr; cnt = min(xfersize, PAGE_SIZE - a_pg_offset); cnt = min(cnt, PAGE_SIZE - b_pg_offset); if (__predict_false(!PHYS_IN_DMAP(p_a))) { panic("!DMAP a %lx", p_a); } else { a_cp = (char *)PHYS_TO_DMAP(p_a) + a_pg_offset; } if (__predict_false(!PHYS_IN_DMAP(p_b))) { panic("!DMAP b %lx", p_b); } else { b_cp = (char *)PHYS_TO_DMAP(p_b) + b_pg_offset; } bcopy(a_cp, b_cp, cnt); a_offset += cnt; b_offset += cnt; xfersize -= cnt; } } vm_offset_t pmap_quick_enter_page(vm_page_t m) { return (PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m))); } void pmap_quick_remove_page(vm_offset_t addr) { } /* * Returns true if the pmap's pv is one of the first * 16 pvs linked to from this page. This count may * be changed upwards or downwards in the future; it * is only necessary that true be returned for a small * subset of pmaps for proper page aging. */ boolean_t pmap_page_exists_quick(pmap_t pmap, vm_page_t m) { struct md_page *pvh; struct rwlock *lock; pv_entry_t pv; int loops = 0; boolean_t rv; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_page_exists_quick: page %p is not managed", m)); rv = FALSE; rw_rlock(&pvh_global_lock); lock = VM_PAGE_TO_PV_LIST_LOCK(m); rw_rlock(lock); TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { if (PV_PMAP(pv) == pmap) { rv = TRUE; break; } loops++; if (loops >= 16) break; } if (!rv && loops < 16 && (m->flags & PG_FICTITIOUS) == 0) { pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { if (PV_PMAP(pv) == pmap) { rv = TRUE; break; } loops++; if (loops >= 16) break; } } rw_runlock(lock); rw_runlock(&pvh_global_lock); return (rv); } /* * pmap_page_wired_mappings: * * Return the number of managed mappings to the given physical page * that are wired. */ int pmap_page_wired_mappings(vm_page_t m) { struct md_page *pvh; struct rwlock *lock; pmap_t pmap; pd_entry_t *l2; pt_entry_t *l3; pv_entry_t pv; int count, md_gen, pvh_gen; if ((m->oflags & VPO_UNMANAGED) != 0) return (0); rw_rlock(&pvh_global_lock); lock = VM_PAGE_TO_PV_LIST_LOCK(m); rw_rlock(lock); restart: count = 0; TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { pmap = PV_PMAP(pv); if (!PMAP_TRYLOCK(pmap)) { md_gen = m->md.pv_gen; rw_runlock(lock); PMAP_LOCK(pmap); rw_rlock(lock); if (md_gen != m->md.pv_gen) { PMAP_UNLOCK(pmap); goto restart; } } l3 = pmap_l3(pmap, pv->pv_va); if ((pmap_load(l3) & PTE_SW_WIRED) != 0) count++; PMAP_UNLOCK(pmap); } if ((m->flags & PG_FICTITIOUS) == 0) { pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { pmap = PV_PMAP(pv); if (!PMAP_TRYLOCK(pmap)) { md_gen = m->md.pv_gen; pvh_gen = pvh->pv_gen; rw_runlock(lock); PMAP_LOCK(pmap); rw_rlock(lock); if (md_gen != m->md.pv_gen || pvh_gen != pvh->pv_gen) { PMAP_UNLOCK(pmap); goto restart; } } l2 = pmap_l2(pmap, pv->pv_va); if ((pmap_load(l2) & PTE_SW_WIRED) != 0) count++; PMAP_UNLOCK(pmap); } } rw_runlock(lock); rw_runlock(&pvh_global_lock); return (count); } /* * Returns true if the given page is mapped individually or as part of * a 2mpage. Otherwise, returns false. */ bool pmap_page_is_mapped(vm_page_t m) { struct rwlock *lock; bool rv; if ((m->oflags & VPO_UNMANAGED) != 0) return (false); lock = VM_PAGE_TO_PV_LIST_LOCK(m); rw_rlock(lock); rv = !TAILQ_EMPTY(&m->md.pv_list) || ((m->flags & PG_FICTITIOUS) == 0 && !TAILQ_EMPTY(&pa_to_pvh(VM_PAGE_TO_PHYS(m))->pv_list)); rw_runlock(lock); return (rv); } static void pmap_remove_pages_pv(pmap_t pmap, vm_page_t m, pv_entry_t pv, struct spglist *free, bool superpage) { struct md_page *pvh; vm_page_t mpte, mt; if (superpage) { pmap_resident_count_dec(pmap, Ln_ENTRIES); pvh = pa_to_pvh(m->phys_addr); TAILQ_REMOVE(&pvh->pv_list, pv, pv_next); pvh->pv_gen++; if (TAILQ_EMPTY(&pvh->pv_list)) { for (mt = m; mt < &m[Ln_ENTRIES]; mt++) if (TAILQ_EMPTY(&mt->md.pv_list) && (mt->a.flags & PGA_WRITEABLE) != 0) vm_page_aflag_clear(mt, PGA_WRITEABLE); } mpte = pmap_remove_pt_page(pmap, pv->pv_va); if (mpte != NULL) { KASSERT(mpte->valid == VM_PAGE_BITS_ALL, ("pmap_remove_pages: pte page not promoted")); pmap_resident_count_dec(pmap, 1); KASSERT(mpte->ref_count == Ln_ENTRIES, ("pmap_remove_pages: pte page ref count error")); mpte->ref_count = 0; pmap_add_delayed_free_list(mpte, free, FALSE); } } else { pmap_resident_count_dec(pmap, 1); TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); m->md.pv_gen++; if (TAILQ_EMPTY(&m->md.pv_list) && (m->a.flags & PGA_WRITEABLE) != 0) { pvh = pa_to_pvh(m->phys_addr); if (TAILQ_EMPTY(&pvh->pv_list)) vm_page_aflag_clear(m, PGA_WRITEABLE); } } } /* * Destroy all managed, non-wired mappings in the given user-space * pmap. This pmap cannot be active on any processor besides the * caller. * * This function cannot be applied to the kernel pmap. Moreover, it * is not intended for general use. It is only to be used during * process termination. Consequently, it can be implemented in ways * that make it faster than pmap_remove(). First, it can more quickly * destroy mappings by iterating over the pmap's collection of PV * entries, rather than searching the page table. Second, it doesn't * have to test and clear the page table entries atomically, because * no processor is currently accessing the user address space. In * particular, a page table entry's dirty bit won't change state once * this function starts. */ void pmap_remove_pages(pmap_t pmap) { struct spglist free; pd_entry_t ptepde; pt_entry_t *pte, tpte; vm_page_t m, mt; pv_entry_t pv; struct pv_chunk *pc, *npc; struct rwlock *lock; int64_t bit; uint64_t inuse, bitmask; int allfree, field, freed, idx; bool superpage; lock = NULL; SLIST_INIT(&free); rw_rlock(&pvh_global_lock); PMAP_LOCK(pmap); TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) { allfree = 1; freed = 0; for (field = 0; field < _NPCM; field++) { inuse = ~pc->pc_map[field] & pc_freemask[field]; while (inuse != 0) { bit = ffsl(inuse) - 1; bitmask = 1UL << bit; idx = field * 64 + bit; pv = &pc->pc_pventry[idx]; inuse &= ~bitmask; pte = pmap_l1(pmap, pv->pv_va); ptepde = pmap_load(pte); pte = pmap_l1_to_l2(pte, pv->pv_va); tpte = pmap_load(pte); if ((tpte & PTE_RWX) != 0) { superpage = true; } else { ptepde = tpte; pte = pmap_l2_to_l3(pte, pv->pv_va); tpte = pmap_load(pte); superpage = false; } /* * We cannot remove wired pages from a * process' mapping at this time. */ if (tpte & PTE_SW_WIRED) { allfree = 0; continue; } m = PHYS_TO_VM_PAGE(PTE_TO_PHYS(tpte)); KASSERT((m->flags & PG_FICTITIOUS) != 0 || m < &vm_page_array[vm_page_array_size], ("pmap_remove_pages: bad pte %#jx", (uintmax_t)tpte)); pmap_clear(pte); /* * Update the vm_page_t clean/reference bits. */ if ((tpte & (PTE_D | PTE_W)) == (PTE_D | PTE_W)) { if (superpage) for (mt = m; mt < &m[Ln_ENTRIES]; mt++) vm_page_dirty(mt); else vm_page_dirty(m); } CHANGE_PV_LIST_LOCK_TO_VM_PAGE(&lock, m); /* Mark free */ pc->pc_map[field] |= bitmask; pmap_remove_pages_pv(pmap, m, pv, &free, superpage); pmap_unuse_pt(pmap, pv->pv_va, ptepde, &free); freed++; } } PV_STAT(atomic_add_long(&pv_entry_frees, freed)); PV_STAT(atomic_add_int(&pv_entry_spare, freed)); PV_STAT(atomic_subtract_long(&pv_entry_count, freed)); if (allfree) { TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); free_pv_chunk(pc); } } if (lock != NULL) rw_wunlock(lock); pmap_invalidate_all(pmap); rw_runlock(&pvh_global_lock); PMAP_UNLOCK(pmap); vm_page_free_pages_toq(&free, false); } static bool pmap_page_test_mappings(vm_page_t m, boolean_t accessed, boolean_t modified) { struct md_page *pvh; struct rwlock *lock; pd_entry_t *l2; pt_entry_t *l3, mask; pv_entry_t pv; pmap_t pmap; int md_gen, pvh_gen; bool rv; mask = 0; if (modified) mask |= PTE_D; if (accessed) mask |= PTE_A; rv = FALSE; rw_rlock(&pvh_global_lock); lock = VM_PAGE_TO_PV_LIST_LOCK(m); rw_rlock(lock); restart: TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { pmap = PV_PMAP(pv); if (!PMAP_TRYLOCK(pmap)) { md_gen = m->md.pv_gen; rw_runlock(lock); PMAP_LOCK(pmap); rw_rlock(lock); if (md_gen != m->md.pv_gen) { PMAP_UNLOCK(pmap); goto restart; } } l3 = pmap_l3(pmap, pv->pv_va); rv = (pmap_load(l3) & mask) == mask; PMAP_UNLOCK(pmap); if (rv) goto out; } if ((m->flags & PG_FICTITIOUS) == 0) { pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { pmap = PV_PMAP(pv); if (!PMAP_TRYLOCK(pmap)) { md_gen = m->md.pv_gen; pvh_gen = pvh->pv_gen; rw_runlock(lock); PMAP_LOCK(pmap); rw_rlock(lock); if (md_gen != m->md.pv_gen || pvh_gen != pvh->pv_gen) { PMAP_UNLOCK(pmap); goto restart; } } l2 = pmap_l2(pmap, pv->pv_va); rv = (pmap_load(l2) & mask) == mask; PMAP_UNLOCK(pmap); if (rv) goto out; } } out: rw_runlock(lock); rw_runlock(&pvh_global_lock); return (rv); } /* * pmap_is_modified: * * Return whether or not the specified physical page was modified * in any physical maps. */ boolean_t pmap_is_modified(vm_page_t m) { KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_is_modified: page %p is not managed", m)); /* * If the page is not busied then this check is racy. */ if (!pmap_page_is_write_mapped(m)) return (FALSE); return (pmap_page_test_mappings(m, FALSE, TRUE)); } /* * pmap_is_prefaultable: * * Return whether or not the specified virtual address is eligible * for prefault. */ boolean_t pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr) { pt_entry_t *l3; boolean_t rv; rv = FALSE; PMAP_LOCK(pmap); l3 = pmap_l3(pmap, addr); if (l3 != NULL && pmap_load(l3) != 0) { rv = TRUE; } PMAP_UNLOCK(pmap); return (rv); } /* * pmap_is_referenced: * * Return whether or not the specified physical page was referenced * in any physical maps. */ boolean_t pmap_is_referenced(vm_page_t m) { KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_is_referenced: page %p is not managed", m)); return (pmap_page_test_mappings(m, TRUE, FALSE)); } /* * Clear the write and modified bits in each of the given page's mappings. */ void pmap_remove_write(vm_page_t m) { struct md_page *pvh; struct rwlock *lock; pmap_t pmap; pd_entry_t *l2; pt_entry_t *l3, oldl3, newl3; pv_entry_t next_pv, pv; vm_offset_t va; int md_gen, pvh_gen; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_remove_write: page %p is not managed", m)); vm_page_assert_busied(m); if (!pmap_page_is_write_mapped(m)) return; lock = VM_PAGE_TO_PV_LIST_LOCK(m); pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy : pa_to_pvh(VM_PAGE_TO_PHYS(m)); rw_rlock(&pvh_global_lock); retry_pv_loop: rw_wlock(lock); TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) { pmap = PV_PMAP(pv); if (!PMAP_TRYLOCK(pmap)) { pvh_gen = pvh->pv_gen; rw_wunlock(lock); PMAP_LOCK(pmap); rw_wlock(lock); if (pvh_gen != pvh->pv_gen) { PMAP_UNLOCK(pmap); rw_wunlock(lock); goto retry_pv_loop; } } va = pv->pv_va; l2 = pmap_l2(pmap, va); if ((pmap_load(l2) & PTE_W) != 0) (void)pmap_demote_l2_locked(pmap, l2, va, &lock); KASSERT(lock == VM_PAGE_TO_PV_LIST_LOCK(m), ("inconsistent pv lock %p %p for page %p", lock, VM_PAGE_TO_PV_LIST_LOCK(m), m)); PMAP_UNLOCK(pmap); } TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { pmap = PV_PMAP(pv); if (!PMAP_TRYLOCK(pmap)) { pvh_gen = pvh->pv_gen; md_gen = m->md.pv_gen; rw_wunlock(lock); PMAP_LOCK(pmap); rw_wlock(lock); if (pvh_gen != pvh->pv_gen || md_gen != m->md.pv_gen) { PMAP_UNLOCK(pmap); rw_wunlock(lock); goto retry_pv_loop; } } l3 = pmap_l3(pmap, pv->pv_va); oldl3 = pmap_load(l3); retry: if ((oldl3 & PTE_W) != 0) { newl3 = oldl3 & ~(PTE_D | PTE_W); if (!atomic_fcmpset_long(l3, &oldl3, newl3)) goto retry; if ((oldl3 & PTE_D) != 0) vm_page_dirty(m); pmap_invalidate_page(pmap, pv->pv_va); } PMAP_UNLOCK(pmap); } rw_wunlock(lock); vm_page_aflag_clear(m, PGA_WRITEABLE); rw_runlock(&pvh_global_lock); } /* * pmap_ts_referenced: * * Return a count of reference bits for a page, clearing those bits. * It is not necessary for every reference bit to be cleared, but it * is necessary that 0 only be returned when there are truly no * reference bits set. * * As an optimization, update the page's dirty field if a modified bit is * found while counting reference bits. This opportunistic update can be * performed at low cost and can eliminate the need for some future calls * to pmap_is_modified(). However, since this function stops after * finding PMAP_TS_REFERENCED_MAX reference bits, it may not detect some * dirty pages. Those dirty pages will only be detected by a future call * to pmap_is_modified(). */ int pmap_ts_referenced(vm_page_t m) { struct spglist free; struct md_page *pvh; struct rwlock *lock; pv_entry_t pv, pvf; pmap_t pmap; pd_entry_t *l2, l2e; pt_entry_t *l3, l3e; vm_paddr_t pa; vm_offset_t va; int cleared, md_gen, not_cleared, pvh_gen; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_ts_referenced: page %p is not managed", m)); SLIST_INIT(&free); cleared = 0; pa = VM_PAGE_TO_PHYS(m); pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy : pa_to_pvh(pa); lock = PHYS_TO_PV_LIST_LOCK(pa); rw_rlock(&pvh_global_lock); rw_wlock(lock); retry: not_cleared = 0; if ((pvf = TAILQ_FIRST(&pvh->pv_list)) == NULL) goto small_mappings; pv = pvf; do { pmap = PV_PMAP(pv); if (!PMAP_TRYLOCK(pmap)) { pvh_gen = pvh->pv_gen; rw_wunlock(lock); PMAP_LOCK(pmap); rw_wlock(lock); if (pvh_gen != pvh->pv_gen) { PMAP_UNLOCK(pmap); goto retry; } } va = pv->pv_va; l2 = pmap_l2(pmap, va); l2e = pmap_load(l2); if ((l2e & (PTE_W | PTE_D)) == (PTE_W | PTE_D)) { /* * Although l2e is mapping a 2MB page, because * this function is called at a 4KB page granularity, * we only update the 4KB page under test. */ vm_page_dirty(m); } if ((l2e & PTE_A) != 0) { /* * Since this reference bit is shared by 512 4KB * pages, it should not be cleared every time it is * tested. Apply a simple "hash" function on the * physical page number, the virtual superpage number, * and the pmap address to select one 4KB page out of * the 512 on which testing the reference bit will * result in clearing that reference bit. This * function is designed to avoid the selection of the * same 4KB page for every 2MB page mapping. * * On demotion, a mapping that hasn't been referenced * is simply destroyed. To avoid the possibility of a * subsequent page fault on a demoted wired mapping, * always leave its reference bit set. Moreover, * since the superpage is wired, the current state of * its reference bit won't affect page replacement. */ if ((((pa >> PAGE_SHIFT) ^ (pv->pv_va >> L2_SHIFT) ^ (uintptr_t)pmap) & (Ln_ENTRIES - 1)) == 0 && (l2e & PTE_SW_WIRED) == 0) { pmap_clear_bits(l2, PTE_A); pmap_invalidate_page(pmap, va); cleared++; } else not_cleared++; } PMAP_UNLOCK(pmap); /* Rotate the PV list if it has more than one entry. */ if (pv != NULL && TAILQ_NEXT(pv, pv_next) != NULL) { TAILQ_REMOVE(&pvh->pv_list, pv, pv_next); TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next); pvh->pv_gen++; } if (cleared + not_cleared >= PMAP_TS_REFERENCED_MAX) goto out; } while ((pv = TAILQ_FIRST(&pvh->pv_list)) != pvf); small_mappings: if ((pvf = TAILQ_FIRST(&m->md.pv_list)) == NULL) goto out; pv = pvf; do { pmap = PV_PMAP(pv); if (!PMAP_TRYLOCK(pmap)) { pvh_gen = pvh->pv_gen; md_gen = m->md.pv_gen; rw_wunlock(lock); PMAP_LOCK(pmap); rw_wlock(lock); if (pvh_gen != pvh->pv_gen || md_gen != m->md.pv_gen) { PMAP_UNLOCK(pmap); goto retry; } } l2 = pmap_l2(pmap, pv->pv_va); KASSERT((pmap_load(l2) & PTE_RX) == 0, ("pmap_ts_referenced: found an invalid l2 table")); l3 = pmap_l2_to_l3(l2, pv->pv_va); l3e = pmap_load(l3); if ((l3e & PTE_D) != 0) vm_page_dirty(m); if ((l3e & PTE_A) != 0) { if ((l3e & PTE_SW_WIRED) == 0) { /* * Wired pages cannot be paged out so * doing accessed bit emulation for * them is wasted effort. We do the * hard work for unwired pages only. */ pmap_clear_bits(l3, PTE_A); pmap_invalidate_page(pmap, pv->pv_va); cleared++; } else not_cleared++; } PMAP_UNLOCK(pmap); /* Rotate the PV list if it has more than one entry. */ if (pv != NULL && TAILQ_NEXT(pv, pv_next) != NULL) { TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); m->md.pv_gen++; } } while ((pv = TAILQ_FIRST(&m->md.pv_list)) != pvf && cleared + not_cleared < PMAP_TS_REFERENCED_MAX); out: rw_wunlock(lock); rw_runlock(&pvh_global_lock); vm_page_free_pages_toq(&free, false); return (cleared + not_cleared); } /* * Apply the given advice to the specified range of addresses within the * given pmap. Depending on the advice, clear the referenced and/or * modified flags in each mapping and set the mapped page's dirty field. */ void pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice) { } /* * Clear the modify bits on the specified physical page. */ void pmap_clear_modify(vm_page_t m) { struct md_page *pvh; struct rwlock *lock; pmap_t pmap; pv_entry_t next_pv, pv; pd_entry_t *l2, oldl2; pt_entry_t *l3; vm_offset_t va; int md_gen, pvh_gen; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_clear_modify: page %p is not managed", m)); vm_page_assert_busied(m); if (!pmap_page_is_write_mapped(m)) return; /* * If the page is not PGA_WRITEABLE, then no PTEs can have PG_M set. * If the object containing the page is locked and the page is not * exclusive busied, then PGA_WRITEABLE cannot be concurrently set. */ if ((m->a.flags & PGA_WRITEABLE) == 0) return; pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy : pa_to_pvh(VM_PAGE_TO_PHYS(m)); lock = VM_PAGE_TO_PV_LIST_LOCK(m); rw_rlock(&pvh_global_lock); rw_wlock(lock); restart: TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) { pmap = PV_PMAP(pv); if (!PMAP_TRYLOCK(pmap)) { pvh_gen = pvh->pv_gen; rw_wunlock(lock); PMAP_LOCK(pmap); rw_wlock(lock); if (pvh_gen != pvh->pv_gen) { PMAP_UNLOCK(pmap); goto restart; } } va = pv->pv_va; l2 = pmap_l2(pmap, va); oldl2 = pmap_load(l2); /* If oldl2 has PTE_W set, then it also has PTE_D set. */ if ((oldl2 & PTE_W) != 0 && pmap_demote_l2_locked(pmap, l2, va, &lock) && (oldl2 & PTE_SW_WIRED) == 0) { /* * Write protect the mapping to a single page so that * a subsequent write access may repromote. */ va += VM_PAGE_TO_PHYS(m) - PTE_TO_PHYS(oldl2); l3 = pmap_l2_to_l3(l2, va); pmap_clear_bits(l3, PTE_D | PTE_W); vm_page_dirty(m); pmap_invalidate_page(pmap, va); } PMAP_UNLOCK(pmap); } TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { pmap = PV_PMAP(pv); if (!PMAP_TRYLOCK(pmap)) { md_gen = m->md.pv_gen; pvh_gen = pvh->pv_gen; rw_wunlock(lock); PMAP_LOCK(pmap); rw_wlock(lock); if (pvh_gen != pvh->pv_gen || md_gen != m->md.pv_gen) { PMAP_UNLOCK(pmap); goto restart; } } l2 = pmap_l2(pmap, pv->pv_va); KASSERT((pmap_load(l2) & PTE_RWX) == 0, ("pmap_clear_modify: found a 2mpage in page %p's pv list", m)); l3 = pmap_l2_to_l3(l2, pv->pv_va); if ((pmap_load(l3) & (PTE_D | PTE_W)) == (PTE_D | PTE_W)) { pmap_clear_bits(l3, PTE_D | PTE_W); pmap_invalidate_page(pmap, pv->pv_va); } PMAP_UNLOCK(pmap); } rw_wunlock(lock); rw_runlock(&pvh_global_lock); } void * pmap_mapbios(vm_paddr_t pa, vm_size_t size) { return ((void *)PHYS_TO_DMAP(pa)); } void pmap_unmapbios(vm_paddr_t pa, vm_size_t size) { } /* * Sets the memory attribute for the specified page. */ void pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma) { m->md.pv_memattr = ma; } /* * Perform the pmap work for mincore(2). If the page is not both referenced and * modified by this pmap, returns its physical address so that the caller can * find other mappings. */ int pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *pap) { pt_entry_t *l2, *l3, tpte; vm_paddr_t pa; int val; bool managed; PMAP_LOCK(pmap); l2 = pmap_l2(pmap, addr); if (l2 != NULL && ((tpte = pmap_load(l2)) & PTE_V) != 0) { if ((tpte & PTE_RWX) != 0) { pa = PTE_TO_PHYS(tpte) | (addr & L2_OFFSET); val = MINCORE_INCORE | MINCORE_PSIND(1); } else { l3 = pmap_l2_to_l3(l2, addr); tpte = pmap_load(l3); if ((tpte & PTE_V) == 0) { PMAP_UNLOCK(pmap); return (0); } pa = PTE_TO_PHYS(tpte) | (addr & L3_OFFSET); val = MINCORE_INCORE; } if ((tpte & PTE_D) != 0) val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER; if ((tpte & PTE_A) != 0) val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER; managed = (tpte & PTE_SW_MANAGED) == PTE_SW_MANAGED; } else { managed = false; val = 0; } if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) != (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) && managed) { *pap = pa; } PMAP_UNLOCK(pmap); return (val); } void pmap_activate_sw(struct thread *td) { pmap_t oldpmap, pmap; u_int hart; oldpmap = PCPU_GET(curpmap); pmap = vmspace_pmap(td->td_proc->p_vmspace); if (pmap == oldpmap) return; load_satp(pmap->pm_satp); hart = PCPU_GET(hart); #ifdef SMP CPU_SET_ATOMIC(hart, &pmap->pm_active); CPU_CLR_ATOMIC(hart, &oldpmap->pm_active); #else CPU_SET(hart, &pmap->pm_active); CPU_CLR(hart, &oldpmap->pm_active); #endif PCPU_SET(curpmap, pmap); sfence_vma(); } void pmap_activate(struct thread *td) { critical_enter(); pmap_activate_sw(td); critical_exit(); } void pmap_activate_boot(pmap_t pmap) { u_int hart; hart = PCPU_GET(hart); #ifdef SMP CPU_SET_ATOMIC(hart, &pmap->pm_active); #else CPU_SET(hart, &pmap->pm_active); #endif PCPU_SET(curpmap, pmap); } void pmap_sync_icache(pmap_t pmap, vm_offset_t va, vm_size_t sz) { cpuset_t mask; /* * From the RISC-V User-Level ISA V2.2: * * "To make a store to instruction memory visible to all * RISC-V harts, the writing hart has to execute a data FENCE * before requesting that all remote RISC-V harts execute a * FENCE.I." * * However, this is slightly misleading; we still need to * perform a FENCE.I for the local hart, as FENCE does nothing * for its icache. FENCE.I alone is also sufficient for the * local hart. */ sched_pin(); mask = all_harts; CPU_CLR(PCPU_GET(hart), &mask); fence_i(); if (!CPU_EMPTY(&mask) && smp_started) { fence(); sbi_remote_fence_i(mask.__bits); } sched_unpin(); } /* * Increase the starting virtual address of the given mapping if a * different alignment might result in more superpage mappings. */ void pmap_align_superpage(vm_object_t object, vm_ooffset_t offset, vm_offset_t *addr, vm_size_t size) { vm_offset_t superpage_offset; if (size < L2_SIZE) return; if (object != NULL && (object->flags & OBJ_COLORED) != 0) offset += ptoa(object->pg_color); superpage_offset = offset & L2_OFFSET; if (size - ((L2_SIZE - superpage_offset) & L2_OFFSET) < L2_SIZE || (*addr & L2_OFFSET) == superpage_offset) return; if ((*addr & L2_OFFSET) < superpage_offset) *addr = (*addr & ~L2_OFFSET) + superpage_offset; else *addr = ((*addr + L2_OFFSET) & ~L2_OFFSET) + superpage_offset; } /** * Get the kernel virtual address of a set of physical pages. If there are * physical addresses not covered by the DMAP perform a transient mapping * that will be removed when calling pmap_unmap_io_transient. * * \param page The pages the caller wishes to obtain the virtual * address on the kernel memory map. * \param vaddr On return contains the kernel virtual memory address * of the pages passed in the page parameter. * \param count Number of pages passed in. * \param can_fault TRUE if the thread using the mapped pages can take * page faults, FALSE otherwise. * * \returns TRUE if the caller must call pmap_unmap_io_transient when * finished or FALSE otherwise. * */ boolean_t pmap_map_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count, boolean_t can_fault) { vm_paddr_t paddr; boolean_t needs_mapping; int error, i; /* * Allocate any KVA space that we need, this is done in a separate * loop to prevent calling vmem_alloc while pinned. */ needs_mapping = FALSE; for (i = 0; i < count; i++) { paddr = VM_PAGE_TO_PHYS(page[i]); if (__predict_false(paddr >= DMAP_MAX_PHYSADDR)) { error = vmem_alloc(kernel_arena, PAGE_SIZE, M_BESTFIT | M_WAITOK, &vaddr[i]); KASSERT(error == 0, ("vmem_alloc failed: %d", error)); needs_mapping = TRUE; } else { vaddr[i] = PHYS_TO_DMAP(paddr); } } /* Exit early if everything is covered by the DMAP */ if (!needs_mapping) return (FALSE); if (!can_fault) sched_pin(); for (i = 0; i < count; i++) { paddr = VM_PAGE_TO_PHYS(page[i]); if (paddr >= DMAP_MAX_PHYSADDR) { panic( "pmap_map_io_transient: TODO: Map out of DMAP data"); } } return (needs_mapping); } void pmap_unmap_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count, boolean_t can_fault) { vm_paddr_t paddr; int i; if (!can_fault) sched_unpin(); for (i = 0; i < count; i++) { paddr = VM_PAGE_TO_PHYS(page[i]); if (paddr >= DMAP_MAX_PHYSADDR) { panic("RISCVTODO: pmap_unmap_io_transient: Unmap data"); } } } boolean_t pmap_is_valid_memattr(pmap_t pmap __unused, vm_memattr_t mode) { return (mode >= VM_MEMATTR_DEVICE && mode <= VM_MEMATTR_WRITE_BACK); } bool pmap_get_tables(pmap_t pmap, vm_offset_t va, pd_entry_t **l1, pd_entry_t **l2, pt_entry_t **l3) { pd_entry_t *l1p, *l2p; /* Get l1 directory entry. */ l1p = pmap_l1(pmap, va); *l1 = l1p; if (l1p == NULL || (pmap_load(l1p) & PTE_V) == 0) return (false); if ((pmap_load(l1p) & PTE_RX) != 0) { *l2 = NULL; *l3 = NULL; return (true); } /* Get l2 directory entry. */ l2p = pmap_l1_to_l2(l1p, va); *l2 = l2p; if (l2p == NULL || (pmap_load(l2p) & PTE_V) == 0) return (false); if ((pmap_load(l2p) & PTE_RX) != 0) { *l3 = NULL; return (true); } /* Get l3 page table entry. */ *l3 = pmap_l2_to_l3(l2p, va); return (true); } /* * Track a range of the kernel's virtual address space that is contiguous * in various mapping attributes. */ struct pmap_kernel_map_range { vm_offset_t sva; pt_entry_t attrs; int l3pages; int l2pages; int l1pages; }; static void sysctl_kmaps_dump(struct sbuf *sb, struct pmap_kernel_map_range *range, vm_offset_t eva) { if (eva <= range->sva) return; sbuf_printf(sb, "0x%016lx-0x%016lx r%c%c%c%c %d %d %d\n", range->sva, eva, (range->attrs & PTE_W) == PTE_W ? 'w' : '-', (range->attrs & PTE_X) == PTE_X ? 'x' : '-', (range->attrs & PTE_U) == PTE_U ? 'u' : 's', (range->attrs & PTE_G) == PTE_G ? 'g' : '-', range->l1pages, range->l2pages, range->l3pages); /* Reset to sentinel value. */ range->sva = 0xfffffffffffffffful; } /* * Determine whether the attributes specified by a page table entry match those * being tracked by the current range. */ static bool sysctl_kmaps_match(struct pmap_kernel_map_range *range, pt_entry_t attrs) { return (range->attrs == attrs); } static void sysctl_kmaps_reinit(struct pmap_kernel_map_range *range, vm_offset_t va, pt_entry_t attrs) { memset(range, 0, sizeof(*range)); range->sva = va; range->attrs = attrs; } /* * Given a leaf PTE, derive the mapping's attributes. If they do not match * those of the current run, dump the address range and its attributes, and * begin a new run. */ static void sysctl_kmaps_check(struct sbuf *sb, struct pmap_kernel_map_range *range, vm_offset_t va, pd_entry_t l1e, pd_entry_t l2e, pt_entry_t l3e) { pt_entry_t attrs; /* The PTE global bit is inherited by lower levels. */ attrs = l1e & PTE_G; if ((l1e & PTE_RWX) != 0) attrs |= l1e & (PTE_RWX | PTE_U); else if (l2e != 0) attrs |= l2e & PTE_G; if ((l2e & PTE_RWX) != 0) attrs |= l2e & (PTE_RWX | PTE_U); else if (l3e != 0) attrs |= l3e & (PTE_RWX | PTE_U | PTE_G); if (range->sva > va || !sysctl_kmaps_match(range, attrs)) { sysctl_kmaps_dump(sb, range, va); sysctl_kmaps_reinit(range, va, attrs); } } static int sysctl_kmaps(SYSCTL_HANDLER_ARGS) { struct pmap_kernel_map_range range; struct sbuf sbuf, *sb; pd_entry_t l1e, *l2, l2e; pt_entry_t *l3, l3e; vm_offset_t sva; vm_paddr_t pa; int error, i, j, k; error = sysctl_wire_old_buffer(req, 0); if (error != 0) return (error); sb = &sbuf; sbuf_new_for_sysctl(sb, NULL, PAGE_SIZE, req); /* Sentinel value. */ range.sva = 0xfffffffffffffffful; /* * Iterate over the kernel page tables without holding the kernel pmap * lock. Kernel page table pages are never freed, so at worst we will * observe inconsistencies in the output. */ sva = VM_MIN_KERNEL_ADDRESS; for (i = pmap_l1_index(sva); i < Ln_ENTRIES; i++) { if (i == pmap_l1_index(DMAP_MIN_ADDRESS)) sbuf_printf(sb, "\nDirect map:\n"); else if (i == pmap_l1_index(VM_MIN_KERNEL_ADDRESS)) sbuf_printf(sb, "\nKernel map:\n"); l1e = kernel_pmap->pm_l1[i]; if ((l1e & PTE_V) == 0) { sysctl_kmaps_dump(sb, &range, sva); sva += L1_SIZE; continue; } if ((l1e & PTE_RWX) != 0) { sysctl_kmaps_check(sb, &range, sva, l1e, 0, 0); range.l1pages++; sva += L1_SIZE; continue; } pa = PTE_TO_PHYS(l1e); l2 = (pd_entry_t *)PHYS_TO_DMAP(pa); for (j = pmap_l2_index(sva); j < Ln_ENTRIES; j++) { l2e = l2[j]; if ((l2e & PTE_V) == 0) { sysctl_kmaps_dump(sb, &range, sva); sva += L2_SIZE; continue; } if ((l2e & PTE_RWX) != 0) { sysctl_kmaps_check(sb, &range, sva, l1e, l2e, 0); range.l2pages++; sva += L2_SIZE; continue; } pa = PTE_TO_PHYS(l2e); l3 = (pd_entry_t *)PHYS_TO_DMAP(pa); for (k = pmap_l3_index(sva); k < Ln_ENTRIES; k++, sva += L3_SIZE) { l3e = l3[k]; if ((l3e & PTE_V) == 0) { sysctl_kmaps_dump(sb, &range, sva); continue; } sysctl_kmaps_check(sb, &range, sva, l1e, l2e, l3e); range.l3pages++; } } } error = sbuf_finish(sb); sbuf_delete(sb); return (error); } SYSCTL_OID(_vm_pmap, OID_AUTO, kernel_maps, CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, sysctl_kmaps, "A", "Dump kernel address layout"); Index: head/sys/riscv/riscv/sbi.c =================================================================== --- head/sys/riscv/riscv/sbi.c (revision 365454) +++ head/sys/riscv/riscv/sbi.c (revision 365455) @@ -1,207 +1,207 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2019 Mitchell Horne * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include /* SBI Implementation-Specific Definitions */ #define OPENSBI_VERSION_MAJOR_OFFSET 16 #define OPENSBI_VERSION_MINOR_MASK 0xFFFF u_long sbi_spec_version; u_long sbi_impl_id; u_long sbi_impl_version; static struct sbi_ret sbi_get_spec_version(void) { return (SBI_CALL0(SBI_EXT_ID_BASE, SBI_BASE_GET_SPEC_VERSION)); } static struct sbi_ret sbi_get_impl_id(void) { return (SBI_CALL0(SBI_EXT_ID_BASE, SBI_BASE_GET_IMPL_ID)); } static struct sbi_ret sbi_get_impl_version(void) { return (SBI_CALL0(SBI_EXT_ID_BASE, SBI_BASE_GET_IMPL_VERSION)); } static struct sbi_ret sbi_get_mvendorid(void) { return (SBI_CALL0(SBI_EXT_ID_BASE, SBI_BASE_GET_MVENDORID)); } static struct sbi_ret sbi_get_marchid(void) { return (SBI_CALL0(SBI_EXT_ID_BASE, SBI_BASE_GET_MARCHID)); } static struct sbi_ret sbi_get_mimpid(void) { return (SBI_CALL0(SBI_EXT_ID_BASE, SBI_BASE_GET_MIMPID)); } static void sbi_shutdown_final(void *dummy __unused, int howto) { if ((howto & RB_POWEROFF) != 0) sbi_shutdown(); } void sbi_print_version(void) { u_int major; u_int minor; /* For legacy SBI implementations. */ if (sbi_spec_version == 0) { printf("SBI: Unknown (Legacy) Implementation\n"); printf("SBI Specification Version: 0.1\n"); return; } switch (sbi_impl_id) { case (SBI_IMPL_ID_BBL): - printf("SBI: Berkely Boot Loader %u\n", sbi_impl_version); + printf("SBI: Berkely Boot Loader %lu\n", sbi_impl_version); break; case (SBI_IMPL_ID_OPENSBI): major = sbi_impl_version >> OPENSBI_VERSION_MAJOR_OFFSET; minor = sbi_impl_version & OPENSBI_VERSION_MINOR_MASK; printf("SBI: OpenSBI v%u.%u\n", major, minor); break; default: - printf("SBI: Unrecognized Implementation: %u\n", sbi_impl_id); + printf("SBI: Unrecognized Implementation: %lu\n", sbi_impl_id); break; } major = (sbi_spec_version & SBI_SPEC_VERS_MAJOR_MASK) >> SBI_SPEC_VERS_MAJOR_OFFSET; minor = (sbi_spec_version & SBI_SPEC_VERS_MINOR_MASK); printf("SBI Specification Version: %u.%u\n", major, minor); } int sbi_hsm_hart_start(u_long hart, u_long start_addr, u_long priv) { struct sbi_ret ret; ret = SBI_CALL3(SBI_EXT_ID_HSM, SBI_HSM_HART_START, hart, start_addr, priv); return (ret.error != 0 ? (int)ret.error : 0); } void sbi_hsm_hart_stop(void) { (void)SBI_CALL0(SBI_EXT_ID_HSM, SBI_HSM_HART_STOP); } int sbi_hsm_hart_status(u_long hart) { struct sbi_ret ret; ret = SBI_CALL1(SBI_EXT_ID_HSM, SBI_HSM_HART_STATUS, hart); return (ret.error != 0 ? (int)ret.error : (int)ret.value); } void sbi_init(void) { struct sbi_ret sret; /* * Get the spec version. For legacy SBI implementations this will * return an error, otherwise it is guaranteed to succeed. */ sret = sbi_get_spec_version(); if (sret.error != 0) { /* We are running a legacy SBI implementation. */ sbi_spec_version = 0; return; } /* Set the SBI implementation info. */ sbi_spec_version = sret.value; sbi_impl_id = sbi_get_impl_id().value; sbi_impl_version = sbi_get_impl_version().value; /* Set the hardware implementation info. */ mvendorid = sbi_get_mvendorid().value; marchid = sbi_get_marchid().value; mimpid = sbi_get_mimpid().value; /* * Probe for legacy extensions. Currently we rely on all of them * to be implemented, but this is not guaranteed by the spec. */ KASSERT(sbi_probe_extension(SBI_SET_TIMER) != 0, ("SBI doesn't implement sbi_set_timer()")); KASSERT(sbi_probe_extension(SBI_CONSOLE_PUTCHAR) != 0, ("SBI doesn't implement sbi_console_putchar()")); KASSERT(sbi_probe_extension(SBI_CONSOLE_GETCHAR) != 0, ("SBI doesn't implement sbi_console_getchar()")); KASSERT(sbi_probe_extension(SBI_CLEAR_IPI) != 0, ("SBI doesn't implement sbi_clear_ipi()")); KASSERT(sbi_probe_extension(SBI_SEND_IPI) != 0, ("SBI doesn't implement sbi_send_ipi()")); KASSERT(sbi_probe_extension(SBI_REMOTE_FENCE_I) != 0, ("SBI doesn't implement sbi_remote_fence_i()")); KASSERT(sbi_probe_extension(SBI_REMOTE_SFENCE_VMA) != 0, ("SBI doesn't implement sbi_remote_sfence_vma()")); KASSERT(sbi_probe_extension(SBI_REMOTE_SFENCE_VMA_ASID) != 0, ("SBI doesn't implement sbi_remote_sfence_vma_asid()")); KASSERT(sbi_probe_extension(SBI_SHUTDOWN) != 0, ("SBI doesn't implement sbi_shutdown()")); } static void sbi_late_init(void *dummy __unused) { EVENTHANDLER_REGISTER(shutdown_final, sbi_shutdown_final, NULL, SHUTDOWN_PRI_LAST); } SYSINIT(sbi, SI_SUB_KLD, SI_ORDER_ANY, sbi_late_init, NULL); Index: head/sys/riscv/riscv/trap.c =================================================================== --- head/sys/riscv/riscv/trap.c (revision 365454) +++ head/sys/riscv/riscv/trap.c (revision 365455) @@ -1,381 +1,381 @@ /*- * Copyright (c) 2015-2018 Ruslan Bukin * All rights reserved. * * Portions of this software were developed by SRI International and the * University of Cambridge Computer Laboratory under DARPA/AFRL contract * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme. * * Portions of this software were developed by the University of Cambridge * Computer Laboratory as part of the CTSRD Project, with support from the * UK Higher Education Innovation Fund (HEIF). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #ifdef KDB #include #endif #include #include #include #include #include #include #ifdef FPE #include #endif #include #include #include #include #include #ifdef KDTRACE_HOOKS #include #endif int (*dtrace_invop_jump_addr)(struct trapframe *); extern register_t fsu_intr_fault; /* Called from exception.S */ void do_trap_supervisor(struct trapframe *); void do_trap_user(struct trapframe *); static __inline void call_trapsignal(struct thread *td, int sig, int code, void *addr, int trapno) { ksiginfo_t ksi; ksiginfo_init_trap(&ksi); ksi.ksi_signo = sig; ksi.ksi_code = code; ksi.ksi_addr = addr; ksi.ksi_trapno = trapno; trapsignal(td, &ksi); } int cpu_fetch_syscall_args(struct thread *td) { struct proc *p; register_t *ap; struct syscall_args *sa; int nap; nap = NARGREG; p = td->td_proc; sa = &td->td_sa; ap = &td->td_frame->tf_a[0]; sa->code = td->td_frame->tf_t[0]; if (sa->code == SYS_syscall || sa->code == SYS___syscall) { sa->code = *ap++; nap--; } if (sa->code >= p->p_sysent->sv_size) sa->callp = &p->p_sysent->sv_table[0]; else sa->callp = &p->p_sysent->sv_table[sa->code]; sa->narg = sa->callp->sy_narg; memcpy(sa->args, ap, nap * sizeof(register_t)); if (sa->narg > nap) panic("TODO: Could we have more then %d args?", NARGREG); td->td_retval[0] = 0; td->td_retval[1] = 0; return (0); } #include "../../kern/subr_syscall.c" static void dump_regs(struct trapframe *frame) { int n; int i; n = (sizeof(frame->tf_t) / sizeof(frame->tf_t[0])); for (i = 0; i < n; i++) printf("t[%d] == 0x%016lx\n", i, frame->tf_t[i]); n = (sizeof(frame->tf_s) / sizeof(frame->tf_s[0])); for (i = 0; i < n; i++) printf("s[%d] == 0x%016lx\n", i, frame->tf_s[i]); n = (sizeof(frame->tf_a) / sizeof(frame->tf_a[0])); for (i = 0; i < n; i++) printf("a[%d] == 0x%016lx\n", i, frame->tf_a[i]); printf("ra == 0x%016lx\n", frame->tf_ra); printf("sp == 0x%016lx\n", frame->tf_sp); printf("gp == 0x%016lx\n", frame->tf_gp); printf("tp == 0x%016lx\n", frame->tf_tp); printf("sepc == 0x%016lx\n", frame->tf_sepc); printf("sstatus == 0x%016lx\n", frame->tf_sstatus); } static void svc_handler(struct trapframe *frame) { struct thread *td; td = curthread; td->td_frame = frame; syscallenter(td); syscallret(td); } static void data_abort(struct trapframe *frame, int usermode) { struct vm_map *map; uint64_t stval; struct thread *td; struct pcb *pcb; vm_prot_t ftype; vm_offset_t va; struct proc *p; int error, sig, ucode; #ifdef KDB if (kdb_active) { kdb_reenter(); return; } #endif td = curthread; p = td->td_proc; pcb = td->td_pcb; stval = frame->tf_stval; if (td->td_critnest != 0 || td->td_intr_nesting_level != 0 || WITNESS_CHECK(WARN_SLEEPOK | WARN_GIANTOK, NULL, "Kernel page fault") != 0) goto fatal; if (usermode) { map = &td->td_proc->p_vmspace->vm_map; } else { /* * Enable interrupts for the duration of the page fault. For * user faults this was done already in do_trap_user(). */ intr_enable(); if (stval >= VM_MAX_USER_ADDRESS) { map = kernel_map; } else { if (pcb->pcb_onfault == 0) goto fatal; map = &td->td_proc->p_vmspace->vm_map; } } va = trunc_page(stval); if ((frame->tf_scause == EXCP_FAULT_STORE) || (frame->tf_scause == EXCP_STORE_PAGE_FAULT)) { ftype = VM_PROT_WRITE; } else if (frame->tf_scause == EXCP_INST_PAGE_FAULT) { ftype = VM_PROT_EXECUTE; } else { ftype = VM_PROT_READ; } if (pmap_fault_fixup(map->pmap, va, ftype)) goto done; error = vm_fault_trap(map, va, ftype, VM_FAULT_NORMAL, &sig, &ucode); if (error != KERN_SUCCESS) { if (usermode) { call_trapsignal(td, sig, ucode, (void *)stval, frame->tf_scause & EXCP_MASK); } else { if (pcb->pcb_onfault != 0) { frame->tf_a[0] = error; frame->tf_sepc = pcb->pcb_onfault; return; } goto fatal; } } done: if (usermode) userret(td, frame); return; fatal: dump_regs(frame); panic("Fatal page fault at %#lx: %#016lx", frame->tf_sepc, stval); } void do_trap_supervisor(struct trapframe *frame) { uint64_t exception; /* Ensure we came from supervisor mode, interrupts disabled */ KASSERT((csr_read(sstatus) & (SSTATUS_SPP | SSTATUS_SIE)) == SSTATUS_SPP, ("Came from S mode with interrupts enabled")); exception = frame->tf_scause & EXCP_MASK; if (frame->tf_scause & EXCP_INTR) { /* Interrupt */ riscv_cpu_intr(frame); return; } #ifdef KDTRACE_HOOKS if (dtrace_trap_func != NULL && (*dtrace_trap_func)(frame, exception)) return; #endif CTR3(KTR_TRAP, "do_trap_supervisor: curthread: %p, sepc: %lx, frame: %p", curthread, frame->tf_sepc, frame); switch (exception) { case EXCP_FAULT_LOAD: case EXCP_FAULT_STORE: case EXCP_FAULT_FETCH: case EXCP_STORE_PAGE_FAULT: case EXCP_LOAD_PAGE_FAULT: data_abort(frame, 0); break; case EXCP_BREAKPOINT: #ifdef KDTRACE_HOOKS if (dtrace_invop_jump_addr != NULL && dtrace_invop_jump_addr(frame) == 0) break; #endif #ifdef KDB kdb_trap(exception, 0, frame); #else dump_regs(frame); panic("No debugger in kernel.\n"); #endif break; case EXCP_ILLEGAL_INSTRUCTION: dump_regs(frame); panic("Illegal instruction at 0x%016lx\n", frame->tf_sepc); break; default: dump_regs(frame); - panic("Unknown kernel exception %x trap value %lx\n", + panic("Unknown kernel exception %lx trap value %lx\n", exception, frame->tf_stval); } } void do_trap_user(struct trapframe *frame) { uint64_t exception; struct thread *td; struct pcb *pcb; td = curthread; td->td_frame = frame; pcb = td->td_pcb; /* Ensure we came from usermode, interrupts disabled */ KASSERT((csr_read(sstatus) & (SSTATUS_SPP | SSTATUS_SIE)) == 0, ("Came from U mode with interrupts enabled")); exception = frame->tf_scause & EXCP_MASK; if (frame->tf_scause & EXCP_INTR) { /* Interrupt */ riscv_cpu_intr(frame); return; } intr_enable(); CTR3(KTR_TRAP, "do_trap_user: curthread: %p, sepc: %lx, frame: %p", curthread, frame->tf_sepc, frame); switch (exception) { case EXCP_FAULT_LOAD: case EXCP_FAULT_STORE: case EXCP_FAULT_FETCH: case EXCP_STORE_PAGE_FAULT: case EXCP_LOAD_PAGE_FAULT: case EXCP_INST_PAGE_FAULT: data_abort(frame, 1); break; case EXCP_USER_ECALL: frame->tf_sepc += 4; /* Next instruction */ svc_handler(frame); break; case EXCP_ILLEGAL_INSTRUCTION: #ifdef FPE if ((pcb->pcb_fpflags & PCB_FP_STARTED) == 0) { /* * May be a FPE trap. Enable FPE usage * for this thread and try again. */ fpe_state_clear(); frame->tf_sstatus &= ~SSTATUS_FS_MASK; frame->tf_sstatus |= SSTATUS_FS_CLEAN; pcb->pcb_fpflags |= PCB_FP_STARTED; break; } #endif call_trapsignal(td, SIGILL, ILL_ILLTRP, (void *)frame->tf_sepc, exception); userret(td, frame); break; case EXCP_BREAKPOINT: call_trapsignal(td, SIGTRAP, TRAP_BRKPT, (void *)frame->tf_sepc, exception); userret(td, frame); break; default: dump_regs(frame); - panic("Unknown userland exception %x, trap value %lx\n", + panic("Unknown userland exception %lx, trap value %lx\n", exception, frame->tf_stval); } }