Index: head/sys/boot/fdt/dts/riscv/spike.dts =================================================================== --- head/sys/boot/fdt/dts/riscv/spike.dts (revision 295971) +++ head/sys/boot/fdt/dts/riscv/spike.dts (revision 295972) @@ -1,92 +1,109 @@ /*- * Copyright (c) 2015 Ruslan Bukin * All rights reserved. * * Portions of this software were developed by SRI International and the * University of Cambridge Computer Laboratory under DARPA/AFRL contract * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme. * * Portions of this software were developed by the University of Cambridge * Computer Laboratory as part of the CTSRD Project, with support from the * UK Higher Education Innovation Fund (HEIF). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /dts-v1/; / { model = "UC Berkeley Spike Simulator RV64I"; compatible = "riscv,rv64i"; #address-cells = <1>; #size-cells = <1>; #interrupt-cells = <1>; + cpus { + #address-cells = <1>; + #size-cells = <0>; + + cpu@0 { + device_type = "cpu"; + compatible = "riscv,rv64i"; + reg = <0x40002000>; + }; + + cpu@1 { + device_type = "cpu"; + compatible = "riscv,rv64i"; + reg = <0x4000a000>; + }; + }; + aliases { console0 = &console0; }; memory { device_type = "memory"; reg = <0x0 0x40000000>; /* 1GB at 0x0 */ }; soc { #address-cells = <2>; #size-cells = <2>; #interrupt-cells = <1>; compatible = "simple-bus"; ranges; pic0: pic@0 { compatible = "riscv,pic"; interrupt-controller; }; timer0: timer@0 { compatible = "riscv,timer"; interrupts = < 1 >; interrupt-parent = < &pic0 >; clock-frequency = < 1000000 >; }; htif0: htif@0 { compatible = "riscv,htif"; interrupts = < 0 >; interrupt-parent = < &pic0 >; console0: console@0 { compatible = "htif,console"; status = "okay"; }; }; }; chosen { bootargs = "-v"; stdin = "console0"; stdout = "console0"; }; }; Index: head/sys/conf/files.riscv =================================================================== --- head/sys/conf/files.riscv (revision 295971) +++ head/sys/conf/files.riscv (revision 295972) @@ -1,45 +1,47 @@ # $FreeBSD$ crypto/blowfish/bf_enc.c optional crypto | ipsec crypto/des/des_enc.c optional crypto | ipsec | netsmb +dev/ofw/ofw_cpu.c optional fdt kern/kern_clocksource.c standard kern/subr_dummy_vdso_tc.c standard libkern/bcmp.c standard libkern/ffs.c standard libkern/ffsl.c standard libkern/fls.c standard libkern/flsl.c standard libkern/flsll.c standard libkern/memmove.c standard libkern/memset.c standard riscv/htif/htif.c standard riscv/htif/htif_block.c standard riscv/htif/htif_console.c standard riscv/riscv/autoconf.c standard riscv/riscv/bcopy.c standard riscv/riscv/bus_machdep.c standard riscv/riscv/busdma_machdep.c standard riscv/riscv/clock.c standard riscv/riscv/copyinout.S standard riscv/riscv/copystr.c standard riscv/riscv/cpufunc_asm.S standard riscv/riscv/devmap.c standard riscv/riscv/dump_machdep.c standard riscv/riscv/elf_machdep.c standard riscv/riscv/intr_machdep.c standard riscv/riscv/in_cksum.c optional inet | inet6 riscv/riscv/identcpu.c standard riscv/riscv/locore.S standard no-obj -riscv/riscv/minidump_machdep.c standard riscv/riscv/machdep.c standard +riscv/riscv/minidump_machdep.c standard +riscv/riscv/mp_machdep.c optional smp riscv/riscv/mem.c standard riscv/riscv/nexus.c standard riscv/riscv/pmap.c standard riscv/riscv/stack_machdep.c optional ddb | stack riscv/riscv/support.S standard riscv/riscv/swtch.S standard riscv/riscv/sys_machdep.c standard riscv/riscv/trap.c standard riscv/riscv/timer.c standard riscv/riscv/uio_machdep.c standard riscv/riscv/uma_machdep.c standard riscv/riscv/vm_machdep.c standard Index: head/sys/riscv/conf/GENERIC =================================================================== --- head/sys/riscv/conf/GENERIC (revision 295971) +++ head/sys/riscv/conf/GENERIC (revision 295972) @@ -1,101 +1,107 @@ # # GENERIC -- Generic kernel configuration file for FreeBSD/RISC-V # # For more information on this file, please read the config(5) manual page, # and/or the handbook section on Kernel Configuration Files: # # http://www.FreeBSD.org/doc/en_US.ISO8859-1/books/handbook/kernelconfig-config.html # # The handbook is also available locally in /usr/share/doc/handbook # if you've installed the doc distribution, otherwise always see the # FreeBSD World Wide Web server (http://www.FreeBSD.org/) for the # latest information. # # An exhaustive list of options and more detailed explanations of the # device lines is also present in the ../../conf/NOTES and NOTES files. # If you are in doubt as to the purpose or necessity of a line, check first # in NOTES. # # $FreeBSD$ cpu RISCV ident GENERIC makeoptions DEBUG=-g # Build kernel with gdb(1) debug symbols # makeoptions WITH_CTF=1 # Run ctfconvert(1) for DTrace support makeoptions NO_MODULES=1 # We don't yet support modules on RISC-V options SCHED_ULE # ULE scheduler options PREEMPTION # Enable kernel thread preemption options INET # InterNETworking options INET6 # IPv6 communications protocols options IPSEC # IP (v4/v6) security options TCP_OFFLOAD # TCP offload options SCTP # Stream Control Transmission Protocol options FFS # Berkeley Fast Filesystem options SOFTUPDATES # Enable FFS soft updates support options UFS_ACL # Support for access control lists options UFS_DIRHASH # Improve performance on big directories options UFS_GJOURNAL # Enable gjournal-based UFS journaling options QUOTA # Enable disk quotas for UFS options MD_ROOT # MD is a potential root device options NFSCL # Network Filesystem Client options NFSD # Network Filesystem Server options NFSLOCKD # Network Lock Manager options NFS_ROOT # NFS usable as /, requires NFSCL options MSDOSFS # MSDOS Filesystem options CD9660 # ISO 9660 Filesystem options PROCFS # Process filesystem (requires PSEUDOFS) options PSEUDOFS # Pseudo-filesystem framework options GEOM_PART_GPT # GUID Partition Tables. # options GEOM_RAID # Soft RAID functionality. options GEOM_LABEL # Provides labelization options SCSI_DELAY=5000 # Delay (in ms) before probing SCSI options KTRACE # ktrace(1) support # options STACK # stack(9) support options SYSVSHM # SYSV-style shared memory options SYSVMSG # SYSV-style message queues options SYSVSEM # SYSV-style semaphores options _KPOSIX_PRIORITY_SCHEDULING # POSIX P1003_1B real-time extensions options PRINTF_BUFR_SIZE=128 # Prevent printf output being interspersed. options KBD_INSTALL_CDEV # install a CDEV entry in /dev # options HWPMC_HOOKS # Necessary kernel hooks for hwpmc(4) options AUDIT # Security event auditing options CAPABILITY_MODE # Capsicum capability mode options CAPABILITIES # Capsicum capabilities options MAC # TrustedBSD MAC Framework # options KDTRACE_FRAME # Ensure frames are compiled in # options KDTRACE_HOOKS # Kernel DTrace hooks # options VFP # Floating-point support options RACCT # Resource accounting framework options RACCT_DEFAULT_TO_DISABLED # Set kern.racct.enable=0 by default options RCTL # Resource limits -# options SMP +options SMP + +# Uncomment for memory disk +# options MD_ROOT +# options MD_ROOT_SIZE=8192 # 8MB ram disk +# makeoptions MFS_IMAGE=/path/to/img +# options ROOTDEVNAME=\"ufs:/dev/md0\" # Debugging support. Always need this: # options KDB # Enable kernel debugger support. # options KDB_TRACE # Print a stack trace for a panic. # For full debugger support use (turn off in stable branch): # options DDB # Support DDB. # options GDB # Support remote GDB. options DEADLKRES # Enable the deadlock resolver options INVARIANTS # Enable calls of extra sanity checking options INVARIANT_SUPPORT # Extra sanity checks of internal structures, required by INVARIANTS # options WITNESS # Enable checks to detect deadlocks and cycles # options WITNESS_SKIPSPIN # Don't run witness on spinlocks for speed options MALLOC_DEBUG_MAXZONES=8 # Separate malloc(9) zones options ROOTDEVNAME=\"ufs:/dev/htif_blk0\" # options EARLY_PRINTF # Pseudo devices. device loop # Network loopback device random # Entropy device device ether # Ethernet support device vlan # 802.1Q VLAN support device tun # Packet tunnel. device md # Memory "disks" device gif # IPv6 and IPv4 tunneling device firmware # firmware assist module options FDT Index: head/sys/riscv/htif/htif.c =================================================================== --- head/sys/riscv/htif/htif.c (revision 295971) +++ head/sys/riscv/htif/htif.c (revision 295972) @@ -1,283 +1,283 @@ /*- - * Copyright (c) 2015 Ruslan Bukin + * Copyright (c) 2015-2016 Ruslan Bukin * All rights reserved. * * Portions of this software were developed by SRI International and the * University of Cambridge Computer Laboratory under DARPA/AFRL contract * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme. * * Portions of this software were developed by the University of Cambridge * Computer Laboratory as part of the CTSRD Project, with support from the * UK Higher Education Innovation Fund (HEIF). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "htif.h" static struct resource_spec htif_spec[] = { - { SYS_RES_IRQ, 0, RF_ACTIVE }, + { SYS_RES_IRQ, 0, RF_ACTIVE | RF_SHAREABLE}, { -1, 0 } }; struct intr_entry { void (*func) (void *, uint64_t); void *arg; }; struct intr_entry intrs[HTIF_NDEV]; uint64_t htif_command(uint64_t arg) { return (machine_command(ECALL_HTIF_CMD, arg)); } int htif_setup_intr(int id, void *func, void *arg) { if (id >= HTIF_NDEV) return (-1); intrs[id].func = func; intrs[id].arg = arg; return (0); } static void htif_handle_entry(struct htif_softc *sc) { uint64_t entry; uint8_t devcmd; uint8_t devid; entry = machine_command(ECALL_HTIF_GET_ENTRY, 0); while (entry) { devid = HTIF_DEV_ID(entry); devcmd = HTIF_DEV_CMD(entry); if (devcmd == HTIF_CMD_IDENTIFY) { /* Enumeration interrupt */ if (devid == sc->identify_id) sc->identify_done = 1; } else { /* Device interrupt */ if (intrs[devid].func != NULL) intrs[devid].func(intrs[devid].arg, entry); } entry = machine_command(ECALL_HTIF_GET_ENTRY, 0); } } static int htif_intr(void *arg) { struct htif_softc *sc; sc = arg; - htif_handle_entry(sc); + csr_clear(sip, SIP_SSIP); - csr_clear(sip, SIE_SSIE); + htif_handle_entry(sc); return (FILTER_HANDLED); } static int htif_add_device(struct htif_softc *sc, int i, char *id, char *name) { struct htif_dev_ivars *di; di = malloc(sizeof(struct htif_dev_ivars), M_DEVBUF, M_WAITOK | M_ZERO); di->sc = sc; di->index = i; di->id = malloc(HTIF_ID_LEN, M_DEVBUF, M_WAITOK | M_ZERO); memcpy(di->id, id, HTIF_ID_LEN); di->dev = device_add_child(sc->dev, name, -1); device_set_ivars(di->dev, di); return (0); } static int htif_enumerate(struct htif_softc *sc) { char id[HTIF_ID_LEN] __aligned(HTIF_ALIGN); uint64_t paddr; uint64_t data; uint64_t cmd; int len; int i; device_printf(sc->dev, "Enumerating devices\n"); for (i = 0; i < HTIF_NDEV; i++) { paddr = pmap_kextract((vm_offset_t)&id); data = (paddr << IDENTIFY_PADDR_SHIFT); data |= IDENTIFY_IDENT; sc->identify_id = i; sc->identify_done = 0; cmd = i; cmd <<= HTIF_DEV_ID_SHIFT; cmd |= (HTIF_CMD_IDENTIFY << HTIF_CMD_SHIFT); cmd |= data; htif_command(cmd); /* Do poll as interrupts are disabled yet */ while (sc->identify_done == 0) { htif_handle_entry(sc); } len = strnlen(id, sizeof(id)); if (len <= 0) break; if (bootverbose) printf(" %d %s\n", i, id); if (strncmp(id, "disk", 4) == 0) htif_add_device(sc, i, id, "htif_blk"); else if (strncmp(id, "bcd", 3) == 0) htif_add_device(sc, i, id, "htif_console"); else if (strncmp(id, "syscall_proxy", 13) == 0) htif_add_device(sc, i, id, "htif_syscall_proxy"); } return (bus_generic_attach(sc->dev)); } int htif_read_ivar(device_t dev, device_t child, int which, uintptr_t *result) { struct htif_dev_ivars *ivars; ivars = device_get_ivars(child); switch (which) { case HTIF_IVAR_INDEX: *result = ivars->index; break; case HTIF_IVAR_ID: *result = (uintptr_t)ivars->id; default: return (EINVAL); } return (0); } static int htif_probe(device_t dev) { if (!ofw_bus_status_okay(dev)) return (ENXIO); if (!ofw_bus_is_compatible(dev, "riscv,htif")) return (ENXIO); device_set_desc(dev, "HTIF bus device"); return (BUS_PROBE_DEFAULT); } static int htif_attach(device_t dev) { struct htif_softc *sc; int error; sc = device_get_softc(dev); sc->dev = dev; if (bus_alloc_resources(dev, htif_spec, sc->res)) { device_printf(dev, "could not allocate resources\n"); return (ENXIO); } /* Setup IRQs handler */ error = bus_setup_intr(dev, sc->res[0], INTR_TYPE_CLK, htif_intr, NULL, sc, &sc->ihl[0]); if (error) { device_printf(dev, "Unable to alloc int resource.\n"); return (ENXIO); } csr_set(sie, SIE_SSIE); return (htif_enumerate(sc)); } static device_method_t htif_methods[] = { DEVMETHOD(device_probe, htif_probe), DEVMETHOD(device_attach, htif_attach), /* Bus interface */ DEVMETHOD(bus_read_ivar, htif_read_ivar), DEVMETHOD_END }; static driver_t htif_driver = { "htif", htif_methods, sizeof(struct htif_softc) }; static devclass_t htif_devclass; DRIVER_MODULE(htif, simplebus, htif_driver, htif_devclass, 0, 0); Index: head/sys/riscv/htif/htif_block.c =================================================================== --- head/sys/riscv/htif/htif_block.c (revision 295971) +++ head/sys/riscv/htif/htif_block.c (revision 295972) @@ -1,289 +1,293 @@ /*- - * Copyright (c) 2015 Ruslan Bukin + * Copyright (c) 2015-2016 Ruslan Bukin * All rights reserved. * * Portions of this software were developed by SRI International and the * University of Cambridge Computer Laboratory under DARPA/AFRL contract * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme. * * Portions of this software were developed by the University of Cambridge * Computer Laboratory as part of the CTSRD Project, with support from the * UK Higher Education Innovation Fund (HEIF). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "htif.h" #define SECTOR_SIZE_SHIFT (9) #define SECTOR_SIZE (1 << SECTOR_SIZE_SHIFT) #define HTIF_BLK_LOCK(_sc) mtx_lock(&(_sc)->sc_mtx) #define HTIF_BLK_UNLOCK(_sc) mtx_unlock(&(_sc)->sc_mtx) #define HTIF_BLK_LOCK_INIT(_sc) \ mtx_init(&_sc->sc_mtx, device_get_nameunit(_sc->dev), \ "htif_blk", MTX_DEF) #define HTIF_BLK_LOCK_DESTROY(_sc) mtx_destroy(&_sc->sc_mtx); #define HTIF_BLK_ASSERT_LOCKED(_sc) mtx_assert(&_sc->sc_mtx, MA_OWNED); #define HTIF_BLK_ASSERT_UNLOCKED(_sc) mtx_assert(&_sc->sc_mtx, MA_NOTOWNED); static void htif_blk_task(void *arg); static disk_open_t htif_blk_open; static disk_close_t htif_blk_close; static disk_strategy_t htif_blk_strategy; struct htif_blk_softc { device_t dev; struct disk *disk; struct mtx htif_io_mtx; struct mtx sc_mtx; struct proc *p; struct bio_queue_head bio_queue; int running; int intr_chan; int cmd_done; int index; uint16_t curtag; }; struct htif_blk_request { uint64_t addr; uint64_t offset; /* offset in bytes */ uint64_t size; /* length in bytes */ uint64_t tag; }; static void htif_blk_intr(void *arg, uint64_t entry) { struct htif_blk_softc *sc; uint64_t devcmd; uint64_t data; sc = arg; devcmd = HTIF_DEV_CMD(entry); data = HTIF_DEV_DATA(entry); if (sc->curtag == data) { sc->cmd_done = 1; wakeup(&sc->intr_chan); + } else { + device_printf(sc->dev, "Unexpected tag %d (should be %d)\n", + data, sc->curtag); } } static int htif_blk_probe(device_t dev) { return (0); } static int htif_blk_attach(device_t dev) { struct htif_blk_softc *sc; char prefix[] = " size="; char *str; long size; sc = device_get_softc(dev); sc->dev = dev; mtx_init(&sc->htif_io_mtx, device_get_nameunit(dev), "htif_blk", MTX_DEF); HTIF_BLK_LOCK_INIT(sc); str = strstr(htif_get_id(dev), prefix); size = strtol((str + 6), NULL, 10); if (size == 0) { return (ENXIO); } sc->index = htif_get_index(dev); if (sc->index < 0) return (EINVAL); htif_setup_intr(sc->index, htif_blk_intr, sc); sc->disk = disk_alloc(); sc->disk->d_drv1 = sc; sc->disk->d_maxsize = 4096; /* Max transfer */ sc->disk->d_name = "htif_blk"; sc->disk->d_open = htif_blk_open; sc->disk->d_close = htif_blk_close; sc->disk->d_strategy = htif_blk_strategy; sc->disk->d_unit = 0; sc->disk->d_sectorsize = SECTOR_SIZE; sc->disk->d_mediasize = size; disk_create(sc->disk, DISK_VERSION); bioq_init(&sc->bio_queue); sc->running = 1; kproc_create(&htif_blk_task, sc, &sc->p, 0, 0, "%s: transfer", device_get_nameunit(dev)); return (0); } static int htif_blk_open(struct disk *dp) { return (0); } static int htif_blk_close(struct disk *dp) { return (0); } static void htif_blk_task(void *arg) { struct htif_blk_request req __aligned(HTIF_ALIGN); struct htif_blk_softc *sc; struct bio *bp; uint64_t paddr; uint64_t cmd; int i; sc = (struct htif_blk_softc *)arg; while (1) { HTIF_BLK_LOCK(sc); do { bp = bioq_takefirst(&sc->bio_queue); if (bp == NULL) msleep(sc, &sc->sc_mtx, PRIBIO, "jobqueue", 0); } while (bp == NULL); HTIF_BLK_UNLOCK(sc); if (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE) { + HTIF_BLK_LOCK(sc); + req.offset = (bp->bio_pblkno * sc->disk->d_sectorsize); req.size = bp->bio_bcount; paddr = vtophys(bp->bio_data); KASSERT(paddr != 0, ("paddr is 0")); req.addr = paddr; req.tag = sc->curtag; cmd = sc->index; cmd <<= HTIF_DEV_ID_SHIFT; if (bp->bio_cmd == BIO_READ) cmd |= (HTIF_CMD_READ << HTIF_CMD_SHIFT); else cmd |= (HTIF_CMD_WRITE << HTIF_CMD_SHIFT); paddr = vtophys(&req); KASSERT(paddr != 0, ("paddr is 0")); cmd |= paddr; sc->cmd_done = 0; htif_command(cmd); /* Wait for interrupt */ - HTIF_BLK_LOCK(sc); i = 0; while (sc->cmd_done == 0) { msleep(&sc->intr_chan, &sc->sc_mtx, PRIBIO, "intr", hz/2); if (i++ > 2) { /* TODO: try to re-issue operation on timeout ? */ bp->bio_error = EIO; bp->bio_flags |= BIO_ERROR; disk_err(bp, "hard error", -1, 1); break; } } HTIF_BLK_UNLOCK(sc); biodone(bp); } else { printf("unknown op %d\n", bp->bio_cmd); } } } static void htif_blk_strategy(struct bio *bp) { struct htif_blk_softc *sc; sc = bp->bio_disk->d_drv1; HTIF_BLK_LOCK(sc); if (sc->running > 0) { bioq_disksort(&sc->bio_queue, bp); HTIF_BLK_UNLOCK(sc); wakeup(sc); } else { HTIF_BLK_UNLOCK(sc); biofinish(bp, NULL, ENXIO); } } static device_method_t htif_blk_methods[] = { DEVMETHOD(device_probe, htif_blk_probe), DEVMETHOD(device_attach, htif_blk_attach), }; static driver_t htif_blk_driver = { "htif_blk", htif_blk_methods, sizeof(struct htif_blk_softc) }; static devclass_t htif_blk_devclass; DRIVER_MODULE(htif_blk, htif, htif_blk_driver, htif_blk_devclass, 0, 0); Index: head/sys/riscv/htif/htif_console.c =================================================================== --- head/sys/riscv/htif/htif_console.c (revision 295971) +++ head/sys/riscv/htif/htif_console.c (revision 295972) @@ -1,361 +1,369 @@ /*- - * Copyright (c) 2015 Ruslan Bukin + * Copyright (c) 2015-2016 Ruslan Bukin * All rights reserved. * * Portions of this software were developed by SRI International and the * University of Cambridge Computer Laboratory under DARPA/AFRL contract * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme. * * Portions of this software were developed by the University of Cambridge * Computer Laboratory as part of the CTSRD Project, with support from the * UK Higher Education Innovation Fund (HEIF). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "htif.h" #include #include extern uint64_t console_intr; static tsw_outwakeup_t riscvtty_outwakeup; static struct ttydevsw riscv_ttydevsw = { .tsw_flags = TF_NOPREFIX, .tsw_outwakeup = riscvtty_outwakeup, }; static int polltime; static struct callout riscv_callout; static struct tty *tp = NULL; #if defined(KDB) static int alt_break_state; #endif static void riscv_timeout(void *); static cn_probe_t riscv_cnprobe; static cn_init_t riscv_cninit; static cn_term_t riscv_cnterm; static cn_getc_t riscv_cngetc; static cn_putc_t riscv_cnputc; static cn_grab_t riscv_cngrab; static cn_ungrab_t riscv_cnungrab; CONSOLE_DRIVER(riscv); #define MAX_BURST_LEN 1 #define QUEUE_SIZE 256 #define CONSOLE_DEFAULT_ID 1ul +#define SPIN_IN_MACHINE_MODE 1 struct queue_entry { uint64_t data; uint64_t used; struct queue_entry *next; }; struct queue_entry cnqueue[QUEUE_SIZE]; struct queue_entry *entry_last; struct queue_entry *entry_served; static void htif_putc(int c) { uint64_t cmd; cmd = (HTIF_CMD_WRITE << HTIF_CMD_SHIFT); cmd |= (CONSOLE_DEFAULT_ID << HTIF_DEV_ID_SHIFT); cmd |= c; +#ifdef SPIN_IN_MACHINE_MODE + machine_command(ECALL_HTIF_LOWPUTC, cmd); +#else htif_command(cmd); +#endif + } static uint8_t htif_getc(void) { uint64_t cmd; uint8_t res; cmd = (HTIF_CMD_READ << HTIF_CMD_SHIFT); cmd |= (CONSOLE_DEFAULT_ID << HTIF_DEV_ID_SHIFT); res = htif_command(cmd); return (res); } static void riscv_putc(int c) { uint64_t counter; uint64_t *cc; uint64_t val; val = 0; counter = 0; cc = (uint64_t*)&console_intr; *cc = 0; htif_putc(c); +#ifndef SPIN_IN_MACHINE_MODE /* Wait for an interrupt */ __asm __volatile( "li %0, 1\n" /* counter = 1 */ "slli %0, %0, 12\n" /* counter <<= 12 */ "1:" "addi %0, %0, -1\n" /* counter -= 1 */ "beqz %0, 2f\n" /* counter == 0 ? finish */ "ld %1, 0(%2)\n" /* val = *cc */ "beqz %1, 1b\n" /* val == 0 ? repeat */ "2:" : "=&r"(counter), "=&r"(val) : "r"(cc) ); +#endif } #ifdef EARLY_PRINTF early_putc_t *early_putc = riscv_putc; #endif static void cn_drvinit(void *unused) { if (riscv_consdev.cn_pri != CN_DEAD && riscv_consdev.cn_name[0] != '\0') { tp = tty_alloc(&riscv_ttydevsw, NULL); tty_init_console(tp, 0); tty_makedev(tp, NULL, "%s", "rcons"); polltime = 1; callout_init(&riscv_callout, 1); callout_reset(&riscv_callout, polltime, riscv_timeout, NULL); } } SYSINIT(cndev, SI_SUB_CONFIGURE, SI_ORDER_MIDDLE, cn_drvinit, NULL); static void riscvtty_outwakeup(struct tty *tp) { u_char buf[MAX_BURST_LEN]; int len; int i; for (;;) { len = ttydisc_getc(tp, buf, sizeof(buf)); if (len == 0) break; KASSERT(len == 1, ("tty error")); for (i = 0; i < len; i++) riscv_putc(buf[i]); } } static void riscv_timeout(void *v) { int c; tty_lock(tp); while ((c = riscv_cngetc(NULL)) != -1) ttydisc_rint(tp, c, 0); ttydisc_rint_done(tp); tty_unlock(tp); callout_reset(&riscv_callout, polltime, riscv_timeout, NULL); } static void riscv_cnprobe(struct consdev *cp) { cp->cn_pri = CN_NORMAL; } static void riscv_cninit(struct consdev *cp) { int i; strcpy(cp->cn_name, "rcons"); for (i = 0; i < QUEUE_SIZE; i++) { if (i == (QUEUE_SIZE - 1)) cnqueue[i].next = &cnqueue[0]; else cnqueue[i].next = &cnqueue[i+1]; cnqueue[i].data = 0; cnqueue[i].used = 0; } entry_last = &cnqueue[0]; entry_served = &cnqueue[0]; } static void riscv_cnterm(struct consdev *cp) { } static void riscv_cngrab(struct consdev *cp) { } static void riscv_cnungrab(struct consdev *cp) { } static int riscv_cngetc(struct consdev *cp) { uint8_t data; int ch; ch = htif_getc(); if (entry_served->used == 1) { data = entry_served->data; entry_served->used = 0; entry_served = entry_served->next; ch = (data & 0xff); if (ch > 0 && ch < 0xff) { #if defined(KDB) kdb_alt_break(ch, &alt_break_state); #endif return (ch); } } return (-1); } static void riscv_cnputc(struct consdev *cp, int c) { riscv_putc(c); } /* * Bus interface. */ struct htif_console_softc { device_t dev; int running; int intr_chan; int cmd_done; int curtag; int index; }; static void htif_console_intr(void *arg, uint64_t entry) { struct htif_console_softc *sc; uint8_t devcmd; uint64_t data; sc = arg; devcmd = HTIF_DEV_CMD(entry); data = HTIF_DEV_DATA(entry); if (devcmd == 0) { entry_last->data = data; entry_last->used = 1; entry_last = entry_last->next; } } static int htif_console_probe(device_t dev) { return (0); } static int htif_console_attach(device_t dev) { struct htif_console_softc *sc; sc = device_get_softc(dev); sc->dev = dev; sc->index = htif_get_index(dev); if (sc->index < 0) return (EINVAL); htif_setup_intr(sc->index, htif_console_intr, sc); return (0); } static device_method_t htif_console_methods[] = { DEVMETHOD(device_probe, htif_console_probe), DEVMETHOD(device_attach, htif_console_attach), DEVMETHOD_END }; static driver_t htif_console_driver = { "htif_console", htif_console_methods, sizeof(struct htif_console_softc) }; static devclass_t htif_console_devclass; DRIVER_MODULE(htif_console, htif, htif_console_driver, htif_console_devclass, 0, 0); Index: head/sys/riscv/include/intr.h =================================================================== --- head/sys/riscv/include/intr.h (revision 295971) +++ head/sys/riscv/include/intr.h (revision 295972) @@ -1,54 +1,66 @@ /*- - * Copyright (c) 2015 Ruslan Bukin + * Copyright (c) 2015-2016 Ruslan Bukin * All rights reserved. * * Portions of this software were developed by SRI International and the * University of Cambridge Computer Laboratory under DARPA/AFRL contract * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme. * * Portions of this software were developed by the University of Cambridge * Computer Laboratory as part of the CTSRD Project, with support from the * UK Higher Education Innovation Fund (HEIF). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _MACHINE_INTR_MACHDEP_H_ #define _MACHINE_INTR_MACHDEP_H_ struct trapframe; void riscv_init_interrupts(void); int riscv_teardown_intr(void *); int riscv_config_intr(u_int, enum intr_trigger, enum intr_polarity); int riscv_setup_intr(const char *, driver_filter_t *, driver_intr_t *, void *, int, int, void **); void riscv_cpu_intr(struct trapframe *); typedef unsigned long * riscv_intrcnt_t; riscv_intrcnt_t riscv_intrcnt_create(const char *); void riscv_intrcnt_setname(riscv_intrcnt_t, const char *); + +#ifdef SMP +void riscv_setup_ipihandler(driver_filter_t *); +void riscv_unmask_ipi(void); +#endif + +enum { + IRQ_SOFTWARE, + IRQ_TIMER, + IRQ_HTIF, + NIRQS +}; #endif /* !_MACHINE_INTR_MACHDEP_H_ */ Index: head/sys/riscv/include/param.h =================================================================== --- head/sys/riscv/include/param.h (revision 295971) +++ head/sys/riscv/include/param.h (revision 295972) @@ -1,108 +1,108 @@ /*- * Copyright (c) 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)param.h 5.8 (Berkeley) 6/28/91 * $FreeBSD$ */ #ifndef _MACHINE_PARAM_H_ #define _MACHINE_PARAM_H_ /* * Machine dependent constants for RISC-V. */ #include #define STACKALIGNBYTES (16 - 1) #define STACKALIGN(p) ((uint64_t)(p) & ~STACKALIGNBYTES) #ifndef MACHINE #define MACHINE "riscv" #endif #ifndef MACHINE_ARCH #define MACHINE_ARCH "riscv64" #endif #if defined(SMP) || defined(KLD_MODULE) #ifndef MAXCPU -#define MAXCPU 2 +#define MAXCPU 16 #endif #else #define MAXCPU 1 #endif /* SMP || KLD_MODULE */ #ifndef MAXMEMDOM #define MAXMEMDOM 1 #endif #define ALIGNBYTES _ALIGNBYTES #define ALIGN(p) _ALIGN(p) /* * ALIGNED_POINTER is a boolean macro that checks whether an address * is valid to fetch data elements of type t from on this architecture. * This does not reflect the optimal alignment, just the possibility * (within reasonable limits). */ #define ALIGNED_POINTER(p, t) ((((u_long)(p)) & (sizeof(t) - 1)) == 0) /* * CACHE_LINE_SIZE is the compile-time maximum cache line size for an * architecture. It should be used with appropriate caution. */ #define CACHE_LINE_SHIFT 6 #define CACHE_LINE_SIZE (1 << CACHE_LINE_SHIFT) #define PAGE_SHIFT 12 #define PAGE_SIZE (1 << PAGE_SHIFT) /* Page size */ #define PAGE_MASK (PAGE_SIZE - 1) #define MAXPAGESIZES 1 /* maximum number of supported page sizes */ #ifndef KSTACK_PAGES #define KSTACK_PAGES 4 /* pages of kernel stack (with pcb) */ #endif #define KSTACK_GUARD_PAGES 1 /* pages of kstack guard; 0 disables */ #define PCPU_PAGES 1 /* * Mach derived conversion macros */ #define round_page(x) (((unsigned long)(x) + PAGE_MASK) & ~PAGE_MASK) #define trunc_page(x) ((unsigned long)(x) & ~PAGE_MASK) #define atop(x) ((unsigned long)(x) >> PAGE_SHIFT) #define ptoa(x) ((unsigned long)(x) << PAGE_SHIFT) #define riscv_btop(x) ((unsigned long)(x) >> PAGE_SHIFT) #define riscv_ptob(x) ((unsigned long)(x) << PAGE_SHIFT) #define pgtok(x) ((unsigned long)(x) * (PAGE_SIZE / 1024)) #endif /* !_MACHINE_PARAM_H_ */ Index: head/sys/riscv/include/pcpu.h =================================================================== --- head/sys/riscv/include/pcpu.h (revision 295971) +++ head/sys/riscv/include/pcpu.h (revision 295972) @@ -1,77 +1,89 @@ /*- * Copyright (c) 1999 Luoqi Chen + * Copyright (c) 2015-2016 Ruslan Bukin * All rights reserved. * + * Portions of this software were developed by SRI International and the + * University of Cambridge Computer Laboratory under DARPA/AFRL contract + * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme. + * + * Portions of this software were developed by the University of Cambridge + * Computer Laboratory as part of the CTSRD Project, with support from the + * UK Higher Education Innovation Fund (HEIF). + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: FreeBSD: src/sys/i386/include/globaldata.h,v 1.27 2001/04/27 * $FreeBSD$ */ #ifndef _MACHINE_PCPU_H_ #define _MACHINE_PCPU_H_ #include #include #define ALT_STACK_SIZE 128 #define PCPU_MD_FIELDS \ - char __pad[129] + uint32_t pc_pending_ipis; /* IPIs pending to this CPU */ \ + uint64_t pc_sptbr; /* L0 page table base (VA) */ \ + uint64_t pc_reg; /* CPU MMIO base (PA) */ \ + char __pad[109] #ifdef _KERNEL struct pcb; struct pcpu; extern struct pcpu *pcpup; static inline struct pcpu * get_pcpu(void) { struct pcpu *pcpu; __asm __volatile("mv %0, gp" : "=&r"(pcpu)); return (pcpu); } static inline struct thread * get_curthread(void) { struct thread *td; __asm __volatile("ld %0, 0(gp)" : "=&r"(td)); return (td); } #define curthread get_curthread() #define PCPU_GET(member) (get_pcpu()->pc_ ## member) #define PCPU_ADD(member, value) (get_pcpu()->pc_ ## member += (value)) #define PCPU_INC(member) PCPU_ADD(member, 1) #define PCPU_PTR(member) (&get_pcpu()->pc_ ## member) #define PCPU_SET(member,value) (get_pcpu()->pc_ ## member = (value)) #endif /* _KERNEL */ #endif /* !_MACHINE_PCPU_H_ */ Index: head/sys/riscv/include/riscvreg.h =================================================================== --- head/sys/riscv/include/riscvreg.h (revision 295971) +++ head/sys/riscv/include/riscvreg.h (revision 295972) @@ -1,155 +1,164 @@ /*- - * Copyright (c) 2015 Ruslan Bukin + * Copyright (c) 2015-2016 Ruslan Bukin * All rights reserved. * * Portions of this software were developed by SRI International and the * University of Cambridge Computer Laboratory under DARPA/AFRL contract * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme. * * Portions of this software were developed by the University of Cambridge * Computer Laboratory as part of the CTSRD Project, with support from the * UK Higher Education Innovation Fund (HEIF). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _MACHINE_RISCVREG_H_ #define _MACHINE_RISCVREG_H_ /* Machine mode requests */ -#define ECALL_MTIMECMP 0x1 -#define ECALL_CLEAR_PENDING 0x2 -#define ECALL_HTIF_CMD 0x3 -#define ECALL_HTIF_GET_ENTRY 0x4 -#define ECALL_MCPUID_GET 0x5 -#define ECALL_MIMPID_GET 0x6 +#define ECALL_MTIMECMP 0x01 +#define ECALL_CLEAR_PENDING 0x02 +#define ECALL_HTIF_CMD 0x03 +#define ECALL_HTIF_GET_ENTRY 0x04 +#define ECALL_MCPUID_GET 0x05 +#define ECALL_MIMPID_GET 0x06 +#define ECALL_SEND_IPI 0x07 +#define ECALL_CLEAR_IPI 0x08 +#define ECALL_HTIF_LOWPUTC 0x09 +#define ECALL_MIE_SET 0x10 #define EXCP_SHIFT 0 #define EXCP_MASK (0xf << EXCP_SHIFT) #define EXCP_INSTR_ADDR_MISALIGNED 0 #define EXCP_INSTR_ACCESS_FAULT 1 #define EXCP_INSTR_ILLEGAL 2 #define EXCP_INSTR_BREAKPOINT 3 #define EXCP_LOAD_ADDR_MISALIGNED 4 #define EXCP_LOAD_ACCESS_FAULT 5 #define EXCP_STORE_ADDR_MISALIGNED 6 #define EXCP_STORE_ACCESS_FAULT 7 #define EXCP_UMODE_ENV_CALL 8 #define EXCP_SMODE_ENV_CALL 9 #define EXCP_HMODE_ENV_CALL 10 #define EXCP_MMODE_ENV_CALL 11 #define EXCP_INTR (1 << 31) #define EXCP_INTR_SOFTWARE 0 #define EXCP_INTR_TIMER 1 #define EXCP_INTR_HTIF 2 #define SSTATUS_IE (1 << 0) #define SSTATUS_PIE (1 << 3) #define SSTATUS_PS (1 << 4) #define MSTATUS_MPRV (1 << 16) #define MSTATUS_PRV_SHIFT 1 #define MSTATUS_PRV1_SHIFT 4 #define MSTATUS_PRV2_SHIFT 7 #define MSTATUS_PRV_MASK (0x3 << MSTATUS_PRV_SHIFT) #define MSTATUS_PRV_U 0 /* user */ #define MSTATUS_PRV_S 1 /* supervisor */ #define MSTATUS_PRV_H 2 /* hypervisor */ #define MSTATUS_PRV_M 3 /* machine */ #define MSTATUS_VM_SHIFT 17 #define MSTATUS_VM_MASK 0x1f #define MSTATUS_VM_MBARE 0 #define MSTATUS_VM_MBB 1 #define MSTATUS_VM_MBBID 2 #define MSTATUS_VM_SV32 8 #define MSTATUS_VM_SV39 9 #define MSTATUS_VM_SV48 10 #define MIE_SSIE (1 << 1) #define MIE_HSIE (1 << 2) #define MIE_MSIE (1 << 3) #define MIE_STIE (1 << 5) #define MIE_HTIE (1 << 6) #define MIE_MTIE (1 << 7) #define MIP_SSIP (1 << 1) #define MIP_HSIP (1 << 2) #define MIP_MSIP (1 << 3) #define MIP_STIP (1 << 5) #define MIP_HTIP (1 << 6) #define MIP_MTIP (1 << 7) #define SR_IE (1 << 0) #define SR_IE1 (1 << 3) #define SR_IE2 (1 << 6) #define SR_IE3 (1 << 9) #define SIE_SSIE (1 << 1) #define SIE_STIE (1 << 5) -/* Note: sip register is not yet implement in Spike simulator */ +/* Note: sip register has no SIP_STIP bit in Spike simulator */ +#define SIP_SSIP (1 << 1) #define SIP_STIP (1 << 5) + +#define NCSRS 4096 +#define CSR_IPI 0x783 +#define XLEN 8 #define CSR_ZIMM(val) \ (__builtin_constant_p(val) && ((u_long)(val) < 32)) #define csr_swap(csr, val) \ ({ if (CSR_ZIMM(val)) \ __asm __volatile("csrrwi %0, " #csr ", %1" \ : "=r" (val) : "i" (val)); \ else \ __asm __volatile("csrrw %0, " #csr ", %1" \ : "=r" (val) : "r" (val)); \ val; \ }) #define csr_write(csr, val) \ ({ if (CSR_ZIMM(val)) \ __asm __volatile("csrwi " #csr ", %0" :: "i" (val)); \ else \ __asm __volatile("csrw " #csr ", %0" :: "r" (val)); \ }) #define csr_set(csr, val) \ ({ if (CSR_ZIMM(val)) \ __asm __volatile("csrsi " #csr ", %0" :: "i" (val)); \ else \ __asm __volatile("csrs " #csr ", %0" :: "r" (val)); \ }) #define csr_clear(csr, val) \ ({ if (CSR_ZIMM(val)) \ __asm __volatile("csrci " #csr ", %0" :: "i" (val)); \ else \ __asm __volatile("csrc " #csr ", %0" :: "r" (val)); \ }) #define csr_read(csr) \ ({ u_long val; \ __asm __volatile("csrr %0, " #csr : "=r" (val)); \ val; \ }) #endif /* !_MACHINE_RISCVREG_H_ */ Index: head/sys/riscv/include/smp.h =================================================================== --- head/sys/riscv/include/smp.h (revision 295971) +++ head/sys/riscv/include/smp.h (revision 295972) @@ -1 +1,55 @@ -/* $FreeBSD$ */ +/*- + * Copyright (c) 2016 Ruslan Bukin + * All rights reserved. + * + * Portions of this software were developed by SRI International and the + * University of Cambridge Computer Laboratory under DARPA/AFRL contract + * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme. + * + * Portions of this software were developed by the University of Cambridge + * Computer Laboratory as part of the CTSRD Project, with support from the + * UK Higher Education Innovation Fund (HEIF). + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _MACHINE_SMP_H_ +#define _MACHINE_SMP_H_ + +#include + +#define IPI_AST (1 << 0) +#define IPI_PREEMPT (1 << 1) +#define IPI_RENDEZVOUS (1 << 2) +#define IPI_STOP (1 << 3) +#define IPI_STOP_HARD (1 << 4) +#define IPI_HARDCLOCK (1 << 5) + +void ipi_all_but_self(u_int ipi); +void ipi_cpu(int cpu, u_int ipi); +void ipi_selected(cpuset_t cpus, u_int ipi); + +extern struct pcb stoppcbs[]; + +#endif /* !_MACHINE_SMP_H_ */ Index: head/sys/riscv/riscv/cpufunc_asm.S =================================================================== --- head/sys/riscv/riscv/cpufunc_asm.S (revision 295971) +++ head/sys/riscv/riscv/cpufunc_asm.S (revision 295972) @@ -1,101 +1,102 @@ /*- - * Copyright (c) 2015 Ruslan Bukin + * Copyright (c) 2015-2016 Ruslan Bukin * All rights reserved. * * Portions of this software were developed by SRI International and the * University of Cambridge Computer Laboratory under DARPA/AFRL contract * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme. * * Portions of this software were developed by the University of Cambridge * Computer Laboratory as part of the CTSRD Project, with support from the * UK Higher Education Innovation Fund (HEIF). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include __FBSDID("$FreeBSD$"); .text .align 2 .Lpage_mask: .word PAGE_MASK ENTRY(riscv_nullop) ret END(riscv_nullop) /* * Generic functions to read/modify/write the internal coprocessor registers */ ENTRY(riscv_tlb_flushID) sfence.vm ret END(riscv_tlb_flushID) ENTRY(riscv_tlb_flushID_SE) sfence.vm ret END(riscv_tlb_flushID_SE) /* * void riscv_dcache_wb_range(vm_offset_t, vm_size_t) */ ENTRY(riscv_dcache_wb_range) - /* RISCVTODO */ + sfence.vm ret END(riscv_dcache_wb_range) /* * void riscv_dcache_wbinv_range(vm_offset_t, vm_size_t) */ ENTRY(riscv_dcache_wbinv_range) - /* RISCVTODO */ + sfence.vm ret END(riscv_dcache_wbinv_range) /* * void riscv_dcache_inv_range(vm_offset_t, vm_size_t) */ ENTRY(riscv_dcache_inv_range) - /* RISCVTODO */ + sfence.vm ret END(riscv_dcache_inv_range) /* * void riscv_idcache_wbinv_range(vm_offset_t, vm_size_t) */ ENTRY(riscv_idcache_wbinv_range) - /* RISCVTODO */ + fence.i + sfence.vm ret END(riscv_idcache_wbinv_range) /* * void riscv_icache_sync_range(vm_offset_t, vm_size_t) */ ENTRY(riscv_icache_sync_range) - /* RISCVTODO */ + fence.i ret END(riscv_icache_sync_range) Index: head/sys/riscv/riscv/exception.S =================================================================== --- head/sys/riscv/riscv/exception.S (revision 295971) +++ head/sys/riscv/riscv/exception.S (revision 295972) @@ -1,461 +1,594 @@ /*- - * Copyright (c) 2015 Ruslan Bukin + * Copyright (c) 2015-2016 Ruslan Bukin * All rights reserved. * * Portions of this software were developed by SRI International and the * University of Cambridge Computer Laboratory under DARPA/AFRL contract * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme. * * Portions of this software were developed by the University of Cambridge * Computer Laboratory as part of the CTSRD Project, with support from the * UK Higher Education Innovation Fund (HEIF). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "assym.s" #include #include .macro save_registers el addi sp, sp, -(TF_SIZE) sd ra, (TF_RA)(sp) sd tp, (TF_TP)(sp) .if \el == 0 /* We came from userspace. Load our pcpu */ sd gp, (TF_GP)(sp) ld gp, (TF_SIZE)(sp) .endif sd t0, (TF_T + 0 * 8)(sp) sd t1, (TF_T + 1 * 8)(sp) sd t2, (TF_T + 2 * 8)(sp) sd t3, (TF_T + 3 * 8)(sp) sd t4, (TF_T + 4 * 8)(sp) sd t5, (TF_T + 5 * 8)(sp) sd t6, (TF_T + 6 * 8)(sp) sd s0, (TF_S + 0 * 8)(sp) sd s1, (TF_S + 1 * 8)(sp) sd s2, (TF_S + 2 * 8)(sp) sd s3, (TF_S + 3 * 8)(sp) sd s4, (TF_S + 4 * 8)(sp) sd s5, (TF_S + 5 * 8)(sp) sd s6, (TF_S + 6 * 8)(sp) sd s7, (TF_S + 7 * 8)(sp) sd s8, (TF_S + 8 * 8)(sp) sd s9, (TF_S + 9 * 8)(sp) sd s10, (TF_S + 10 * 8)(sp) sd s11, (TF_S + 11 * 8)(sp) sd a0, (TF_A + 0 * 8)(sp) sd a1, (TF_A + 1 * 8)(sp) sd a2, (TF_A + 2 * 8)(sp) sd a3, (TF_A + 3 * 8)(sp) sd a4, (TF_A + 4 * 8)(sp) sd a5, (TF_A + 5 * 8)(sp) sd a6, (TF_A + 6 * 8)(sp) sd a7, (TF_A + 7 * 8)(sp) #if 0 /* XXX: temporary test: spin if stack is not kernel one */ .if \el == 1 /* kernel */ mv t0, sp srli t0, t0, 63 1: beqz t0, 1b .endif #endif .if \el == 1 /* Store kernel sp */ sd sp, (TF_SP)(sp) .else /* Store user sp */ csrr t0, sscratch sd t0, (TF_SP)(sp) .endif li t0, 0 csrw sscratch, t0 csrr t0, sepc sd t0, (TF_SEPC)(sp) csrr t0, sstatus sd t0, (TF_SSTATUS)(sp) csrr t0, sbadaddr sd t0, (TF_SBADADDR)(sp) csrr t0, scause sd t0, (TF_SCAUSE)(sp) .endm .macro load_registers el ld t0, (TF_SSTATUS)(sp) .if \el == 0 /* Ensure user interrupts will be enabled on eret. */ ori t0, t0, SSTATUS_PIE .else /* * Disable interrupts for supervisor mode exceptions. * For user mode exceptions we have already done this * in do_ast. */ li t1, ~SSTATUS_IE and t0, t0, t1 .endif csrw sstatus, t0 ld t0, (TF_SEPC)(sp) csrw sepc, t0 .if \el == 0 /* We go to userspace. Load user sp */ ld t0, (TF_SP)(sp) csrw sscratch, t0 /* And store our pcpu */ sd gp, (TF_SIZE)(sp) ld gp, (TF_GP)(sp) .endif ld ra, (TF_RA)(sp) ld tp, (TF_TP)(sp) ld t0, (TF_T + 0 * 8)(sp) ld t1, (TF_T + 1 * 8)(sp) ld t2, (TF_T + 2 * 8)(sp) ld t3, (TF_T + 3 * 8)(sp) ld t4, (TF_T + 4 * 8)(sp) ld t5, (TF_T + 5 * 8)(sp) ld t6, (TF_T + 6 * 8)(sp) ld s0, (TF_S + 0 * 8)(sp) ld s1, (TF_S + 1 * 8)(sp) ld s2, (TF_S + 2 * 8)(sp) ld s3, (TF_S + 3 * 8)(sp) ld s4, (TF_S + 4 * 8)(sp) ld s5, (TF_S + 5 * 8)(sp) ld s6, (TF_S + 6 * 8)(sp) ld s7, (TF_S + 7 * 8)(sp) ld s8, (TF_S + 8 * 8)(sp) ld s9, (TF_S + 9 * 8)(sp) ld s10, (TF_S + 10 * 8)(sp) ld s11, (TF_S + 11 * 8)(sp) ld a0, (TF_A + 0 * 8)(sp) ld a1, (TF_A + 1 * 8)(sp) ld a2, (TF_A + 2 * 8)(sp) ld a3, (TF_A + 3 * 8)(sp) ld a4, (TF_A + 4 * 8)(sp) ld a5, (TF_A + 5 * 8)(sp) ld a6, (TF_A + 6 * 8)(sp) ld a7, (TF_A + 7 * 8)(sp) addi sp, sp, (TF_SIZE) .endm .macro do_ast /* Disable interrupts */ csrr a4, sstatus 1: csrci sstatus, SSTATUS_IE ld a1, PC_CURTHREAD(gp) lw a2, TD_FLAGS(a1) li a3, (TDF_ASTPENDING|TDF_NEEDRESCHED) and a2, a2, a3 beqz a2, 2f /* Restore interrupts */ andi a4, a4, SSTATUS_IE csrs sstatus, a4 /* Handle the ast */ mv a0, sp call _C_LABEL(ast) /* Re-check for new ast scheduled */ j 1b 2: .endm ENTRY(cpu_exception_handler_supervisor) save_registers 1 mv a0, sp call _C_LABEL(do_trap_supervisor) load_registers 1 eret END(cpu_exception_handler_supervisor) ENTRY(cpu_exception_handler_user) csrrw sp, sscratch, sp save_registers 0 mv a0, sp call _C_LABEL(do_trap_user) do_ast load_registers 0 csrrw sp, sscratch, sp eret END(cpu_exception_handler_user) /* * Trap handlers */ .text bad_trap: j bad_trap user_trap: + /* Save state */ csrrw sp, mscratch, sp addi sp, sp, -64 sd t0, (8 * 0)(sp) sd t1, (8 * 1)(sp) sd t2, (8 * 2)(sp) sd t3, (8 * 3)(sp) sd t4, (8 * 4)(sp) sd t5, (8 * 5)(sp) sd a0, (8 * 7)(sp) la t2, _C_LABEL(cpu_exception_handler_user) csrr t0, mcause bltz t0, machine_interrupt j exit_mrts supervisor_trap: /* Save state */ csrrw sp, mscratch, sp addi sp, sp, -64 sd t0, (8 * 0)(sp) sd t1, (8 * 1)(sp) sd t2, (8 * 2)(sp) sd t3, (8 * 3)(sp) sd t4, (8 * 4)(sp) sd t5, (8 * 5)(sp) sd a0, (8 * 7)(sp) la t2, _C_LABEL(cpu_exception_handler_supervisor) csrr t0, mcause bltz t0, machine_interrupt li t1, EXCP_SMODE_ENV_CALL beq t0, t1, supervisor_call j exit_mrts machine_interrupt: /* Type of interrupt ? */ csrr t0, mcause andi t0, t0, 3 li t1, 0 beq t1, t0, software_interrupt li t1, 1 beq t1, t0, timer_interrupt li t1, 2 beq t1, t0, htif_interrupt /* not reached */ 1: j 1b software_interrupt: - /* Redirect to supervisor */ + li t0, MIP_MSIP + csrc mip, t0 + li t0, MIP_SSIP + csrs mip, t0 + + /* If PRV1 is PRV_U (user) then serve the trap */ + csrr t0, mstatus + li t1, (MSTATUS_PRV_M << MSTATUS_PRV1_SHIFT) + and t0, t0, t1 + beqz t0, 1f + + /* + * If PRV1 is supervisor and interrupts were enabled, + * then serve the trap. + */ + csrr t0, mstatus + li t1, (SR_IE1 | (MSTATUS_PRV_M << MSTATUS_PRV1_SHIFT)) + and t0, t0, t1 + li t1, (SR_IE1 | (MSTATUS_PRV_S << MSTATUS_PRV1_SHIFT)) + beq t0, t1, 1f + + j exit + +1: + /* Serve a trap in supervisor mode */ j exit_mrts timer_interrupt: /* Disable machine timer interrupts */ li t0, MIE_MTIE csrc mie, t0 /* Clear machine pending */ li t0, MIP_MTIP csrc mip, t0 - /* Post supervisor software interrupt */ + /* Post supervisor timer interrupt */ li t0, MIP_STIP csrs mip, t0 - /* If PRV1 is PRV_U (user) then serve a trap */ + /* If PRV1 is PRV_U (user) then serve the trap */ csrr t0, mstatus li t1, (MSTATUS_PRV_M << MSTATUS_PRV1_SHIFT) and t0, t0, t1 beqz t0, 1f - /* If PRV1 is supervisor and interrupts were enabled, then serve a trap */ + /* + * If PRV1 is supervisor and interrupts were enabled, + * then serve the trap. + */ csrr t0, mstatus li t1, (SR_IE1 | (MSTATUS_PRV_M << MSTATUS_PRV1_SHIFT)) and t0, t0, t1 li t1, (SR_IE1 | (MSTATUS_PRV_S << MSTATUS_PRV1_SHIFT)) beq t0, t1, 1f j exit 1: /* Serve a trap in supervisor mode */ j exit_mrts htif_interrupt: 1: li t5, 0 csrrw t5, mfromhost, t5 beqz t5, 3f /* Console PUT intr ? */ mv t1, t5 li t0, 0x101 srli t1, t1, 48 bne t1, t0, 2f /* Yes */ la t0, console_intr li t1, 1 sd t1, 0(t0) - j 3f + /* Check if there is any other pending event */ + j 1b + 2: /* Save entry */ - la t0, htif_ring_cursor - beqz t0, 3f /* not initialized */ - ld t0, 0(t0) /* load struct */ - sd t5, 0(t0) /* put entry */ + la t0, htif_ring + csrr t1, mhartid + li t4, (HTIF_RING_SIZE + 16) + mulw t4, t4, t1 + add t0, t0, t4 + li t4, (HTIF_RING_SIZE) + add t0, t0, t4 /* t0 == htif_ring_cursor */ + + ld t1, 0(t0) /* load ptr to cursor */ + sd t5, 0(t1) /* put entry */ li t4, 1 - sd t4, 8(t0) /* mark used */ - ld t4, 16(t0) /* take next */ + sd t4, 8(t1) /* mark used */ + ld t4, 16(t1) /* take next */ /* Update cursor */ - la t0, htif_ring_cursor sd t4, 0(t0) /* Post supervisor software interrupt */ li t0, MIP_SSIP csrs mip, t0 + /* Check if there is any other pending event */ + j 1b + 3: j exit supervisor_call: csrr t1, mepc addi t1, t1, 4 /* Next instruction in t1 */ li t4, ECALL_HTIF_CMD beq t5, t4, htif_cmd li t4, ECALL_HTIF_GET_ENTRY beq t5, t4, htif_get_entry li t4, ECALL_MTIMECMP beq t5, t4, set_mtimecmp li t4, ECALL_CLEAR_PENDING beq t5, t4, clear_pending li t4, ECALL_MCPUID_GET beq t5, t4, mcpuid_get li t4, ECALL_MIMPID_GET beq t5, t4, mimpid_get + li t4, ECALL_SEND_IPI + beq t5, t4, send_ipi + li t4, ECALL_CLEAR_IPI + beq t5, t4, clear_ipi + li t4, ECALL_HTIF_LOWPUTC + beq t5, t4, htif_lowputc + li t4, ECALL_MIE_SET + beq t5, t4, mie_set j exit_next_instr +mie_set: + csrs mie, t6 + j exit_next_instr + mcpuid_get: csrr t6, mcpuid j exit_next_instr mimpid_get: csrr t6, mimpid j exit_next_instr +send_ipi: + /* CPU mmio base in t6 */ + mv t0, t6 + li t2, (CSR_IPI * XLEN) + add t0, t0, t2 /* t0 = CSR_IPI */ + li t2, 1 + sd t2, 0(t0) + j exit_next_instr + +clear_ipi: + /* Do only clear if there are no new entries in HTIF ring */ + la t0, htif_ring + csrr t2, mhartid + li t4, (HTIF_RING_SIZE + 16) + mulw t4, t4, t2 + add t0, t0, t4 + li t4, (HTIF_RING_SIZE) + add t0, t0, t4 /* t0 == ptr to htif_ring_cursor */ + ld t2, 8(t0) /* load htif_ring_last */ + ld t2, 8(t2) /* load used */ + bnez t2, 1f + + /* Clear supervisor software interrupt pending bit */ + li t0, MIP_SSIP + csrc mip, t0 + +1: + j exit_next_instr + htif_get_entry: + /* Get a htif_ring for current core */ + la t0, htif_ring + csrr t2, mhartid + li t4, (HTIF_RING_SIZE + 16) + mulw t4, t4, t2 + add t0, t0, t4 + li t4, (HTIF_RING_SIZE + 8) + add t0, t0, t4 /* t0 == htif_ring_last */ + + /* Check for new entries */ li t6, 0 /* preset return value */ - la t0, htif_ring_last - ld t0, 0(t0) /* load struct */ - ld t4, 8(t0) /* get used */ - beqz t4, 1f - ld t6, 0(t0) /* get entry */ + ld t2, 0(t0) /* load ptr to last */ + ld t4, 8(t2) /* get used */ + beqz t4, 1f /* No new entries. Exit */ + + /* Get one */ + ld t6, 0(t2) /* get entry */ li t4, 0 - sd t4, 8(t0) /* mark free */ - sd t4, 0(t0) /* free entry, just in case */ - ld t4, 16(t0) /* take next */ - la t0, htif_ring_last - sd t4, 0(t0) + sd t4, 8(t2) /* mark free */ + sd t4, 0(t2) /* free entry, just in case */ + ld t4, 16(t2) /* take next */ + sd t4, 0(t0) /* update ptr to last */ 1: /* Exit. Result is stored in t6 */ j exit_next_instr htif_cmd: +1: mv t0, t6 + csrrw t0, mtohost, t0 + bnez t0, 1b + j exit_next_instr + +htif_lowputc: 1: + mv t0, t6 csrrw t0, mtohost, t0 bnez t0, 1b + +2: + li t4, 0 + csrrw t5, mfromhost, t4 + beqz t5, 2b + + /* Console PUT intr ? */ + mv t2, t5 + srli t2, t2, 48 + li t3, 0x0101 + beq t2, t3, 3f + + /* Not a console PUT, so save entry */ + la t0, htif_ring + csrr t2, mhartid + li t4, (HTIF_RING_SIZE + 16) + mulw t4, t4, t2 + add t0, t0, t4 + li t4, (HTIF_RING_SIZE) + add t0, t0, t4 /* t0 == htif_ring_cursor */ + + ld t2, 0(t0) /* load ptr to cursor */ + sd t5, 0(t2) /* put entry */ + li t4, 1 + sd t4, 8(t2) /* mark used */ + ld t4, 16(t2) /* take next */ + /* Update cursor */ + sd t4, 0(t0) + + /* Post supervisor software interrupt */ + li t0, MIP_SSIP + csrs mip, t0 + + /* Wait for console intr again */ + j 2b + +3: j exit_next_instr set_mtimecmp: csrr t2, stime add t6, t6, t2 csrw mtimecmp, t6 /* Enable interrupts */ li t0, (MIE_MTIE | MIE_STIE) csrs mie, t0 j exit_next_instr clear_pending: li t0, MIP_STIP csrc mip, t0 j exit_next_instr /* * Trap exit functions */ exit_next_instr: /* Next instruction is in t1 */ csrw mepc, t1 exit: /* Restore state */ ld t0, (8 * 0)(sp) ld t1, (8 * 1)(sp) ld t2, (8 * 2)(sp) ld t3, (8 * 3)(sp) ld t4, (8 * 4)(sp) ld t5, (8 * 5)(sp) ld a0, (8 * 7)(sp) addi sp, sp, 64 csrrw sp, mscratch, sp eret /* * Redirect to supervisor */ exit_mrts: /* Setup exception handler */ li t1, KERNBASE add t2, t2, t1 csrw stvec, t2 /* Restore state */ ld t0, (8 * 0)(sp) ld t1, (8 * 1)(sp) ld t2, (8 * 2)(sp) ld t3, (8 * 3)(sp) ld t4, (8 * 4)(sp) ld t5, (8 * 5)(sp) ld a0, (8 * 7)(sp) addi sp, sp, 64 csrrw sp, mscratch, sp /* Redirect to supervisor */ mrts Index: head/sys/riscv/riscv/genassym.c =================================================================== --- head/sys/riscv/riscv/genassym.c (revision 295971) +++ head/sys/riscv/riscv/genassym.c (revision 295972) @@ -1,98 +1,99 @@ /*- - * Copyright (c) 2015 Ruslan Bukin + * Copyright (c) 2015-2016 Ruslan Bukin * All rights reserved. * * Portions of this software were developed by SRI International and the * University of Cambridge Computer Laboratory under DARPA/AFRL contract * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme. * * Portions of this software were developed by the University of Cambridge * Computer Laboratory as part of the CTSRD Project, with support from the * UK Higher Education Innovation Fund (HEIF). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include ASSYM(KERNBASE, KERNBASE); ASSYM(VM_MAXUSER_ADDRESS, VM_MAXUSER_ADDRESS); ASSYM(TDF_ASTPENDING, TDF_ASTPENDING); ASSYM(TDF_NEEDRESCHED, TDF_NEEDRESCHED); ASSYM(PCB_ONFAULT, offsetof(struct pcb, pcb_onfault)); ASSYM(PCB_L1ADDR, offsetof(struct pcb, pcb_l1addr)); ASSYM(PCB_SIZE, sizeof(struct pcb)); ASSYM(PCB_RA, offsetof(struct pcb, pcb_ra)); ASSYM(PCB_SP, offsetof(struct pcb, pcb_sp)); ASSYM(PCB_GP, offsetof(struct pcb, pcb_gp)); ASSYM(PCB_TP, offsetof(struct pcb, pcb_tp)); ASSYM(PCB_T, offsetof(struct pcb, pcb_t)); ASSYM(PCB_S, offsetof(struct pcb, pcb_s)); ASSYM(PCB_A, offsetof(struct pcb, pcb_a)); ASSYM(SF_UC, offsetof(struct sigframe, sf_uc)); ASSYM(PC_CURPCB, offsetof(struct pcpu, pc_curpcb)); +ASSYM(PC_SPTBR, offsetof(struct pcpu, pc_sptbr)); ASSYM(PC_CURTHREAD, offsetof(struct pcpu, pc_curthread)); ASSYM(TD_PCB, offsetof(struct thread, td_pcb)); ASSYM(TD_FLAGS, offsetof(struct thread, td_flags)); ASSYM(TD_PROC, offsetof(struct thread, td_proc)); ASSYM(TD_FRAME, offsetof(struct thread, td_frame)); ASSYM(TD_MD, offsetof(struct thread, td_md)); ASSYM(TD_LOCK, offsetof(struct thread, td_lock)); ASSYM(TF_SIZE, sizeof(struct trapframe)); ASSYM(TF_RA, offsetof(struct trapframe, tf_ra)); ASSYM(TF_SP, offsetof(struct trapframe, tf_sp)); ASSYM(TF_GP, offsetof(struct trapframe, tf_gp)); ASSYM(TF_TP, offsetof(struct trapframe, tf_tp)); ASSYM(TF_T, offsetof(struct trapframe, tf_t)); ASSYM(TF_S, offsetof(struct trapframe, tf_s)); ASSYM(TF_A, offsetof(struct trapframe, tf_a)); ASSYM(TF_SEPC, offsetof(struct trapframe, tf_sepc)); ASSYM(TF_SBADADDR, offsetof(struct trapframe, tf_sbadaddr)); ASSYM(TF_SCAUSE, offsetof(struct trapframe, tf_scause)); ASSYM(TF_SSTATUS, offsetof(struct trapframe, tf_sstatus)); Index: head/sys/riscv/riscv/intr_machdep.c =================================================================== --- head/sys/riscv/riscv/intr_machdep.c (revision 295971) +++ head/sys/riscv/riscv/intr_machdep.c (revision 295972) @@ -1,223 +1,297 @@ /*- - * Copyright (c) 2015 Ruslan Bukin + * Copyright (c) 2015-2016 Ruslan Bukin * All rights reserved. * * Portions of this software were developed by SRI International and the * University of Cambridge Computer Laboratory under DARPA/AFRL contract * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme. * * Portions of this software were developed by the University of Cambridge * Computer Laboratory as part of the CTSRD Project, with support from the * UK Higher Education Innovation Fund (HEIF). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include +#include #include +#include #include #include #include #include #include -enum { - IRQ_SOFTWARE, - IRQ_TIMER, - IRQ_HTIF, - NIRQS -}; +#ifdef SMP +#include +#endif u_long intrcnt[NIRQS]; size_t sintrcnt = sizeof(intrcnt); char intrnames[NIRQS * (MAXCOMLEN + 1) * 2]; size_t sintrnames = sizeof(intrnames); static struct intr_event *intr_events[NIRQS]; static riscv_intrcnt_t riscv_intr_counters[NIRQS]; static int intrcnt_index; riscv_intrcnt_t riscv_intrcnt_create(const char* name) { riscv_intrcnt_t counter; counter = &intrcnt[intrcnt_index++]; riscv_intrcnt_setname(counter, name); return (counter); } void riscv_intrcnt_setname(riscv_intrcnt_t counter, const char *name) { int i; i = (counter - intrcnt); KASSERT(counter != NULL, ("riscv_intrcnt_setname: NULL counter")); snprintf(intrnames + (MAXCOMLEN + 1) * i, MAXCOMLEN + 1, "%-*s", MAXCOMLEN, name); } static void riscv_mask_irq(void *source) { uintptr_t irq; irq = (uintptr_t)source; switch (irq) { case IRQ_TIMER: csr_clear(sie, SIE_STIE); break; case IRQ_SOFTWARE: csr_clear(sie, SIE_SSIE); break; default: panic("Unknown irq %d\n", irq); } } static void riscv_unmask_irq(void *source) { uintptr_t irq; irq = (uintptr_t)source; switch (irq) { case IRQ_TIMER: csr_set(sie, SIE_STIE); break; case IRQ_SOFTWARE: csr_set(sie, SIE_SSIE); break; default: panic("Unknown irq %d\n", irq); } } void riscv_init_interrupts(void) { char name[MAXCOMLEN + 1]; int i; for (i = 0; i < NIRQS; i++) { snprintf(name, MAXCOMLEN + 1, "int%d:", i); riscv_intr_counters[i] = riscv_intrcnt_create(name); } } int riscv_setup_intr(const char *name, driver_filter_t *filt, void (*handler)(void*), void *arg, int irq, int flags, void **cookiep) { struct intr_event *event; int error; if (irq < 0 || irq >= NIRQS) panic("%s: unknown intr %d", __func__, irq); event = intr_events[irq]; if (event == NULL) { error = intr_event_create(&event, (void *)(uintptr_t)irq, 0, irq, riscv_mask_irq, riscv_unmask_irq, NULL, NULL, "int%d", irq); if (error) return (error); intr_events[irq] = event; riscv_unmask_irq((void*)(uintptr_t)irq); } - intr_event_add_handler(event, name, filt, handler, arg, + error = intr_event_add_handler(event, name, filt, handler, arg, intr_priority(flags), flags, cookiep); + if (error) { + printf("Failed to setup intr: %d\n", irq); + return (error); + } riscv_intrcnt_setname(riscv_intr_counters[irq], event->ie_fullname); return (0); } int riscv_teardown_intr(void *ih) { /* TODO */ return (0); } int riscv_config_intr(u_int irq, enum intr_trigger trig, enum intr_polarity pol) { /* There is no configuration for interrupts */ return (0); } void riscv_cpu_intr(struct trapframe *frame) { struct intr_event *event; int active_irq; critical_enter(); KASSERT(frame->tf_scause & EXCP_INTR, ("riscv_cpu_intr: wrong frame passed")); active_irq = (frame->tf_scause & EXCP_MASK); switch (active_irq) { case IRQ_SOFTWARE: case IRQ_TIMER: event = intr_events[active_irq]; /* Update counters */ atomic_add_long(riscv_intr_counters[active_irq], 1); PCPU_INC(cnt.v_intr); break; case IRQ_HTIF: /* HTIF interrupts are only handled in machine mode */ panic("%s: HTIF interrupt", __func__); break; default: event = NULL; } if (!event || TAILQ_EMPTY(&event->ie_handlers) || (intr_event_handle(event, frame) != 0)) printf("stray interrupt %d\n", active_irq); critical_exit(); } + +#ifdef SMP +void +riscv_setup_ipihandler(driver_filter_t *filt) +{ + + riscv_setup_intr("ipi", filt, NULL, NULL, IRQ_SOFTWARE, + INTR_TYPE_MISC, NULL); +} + +void +riscv_unmask_ipi(void) +{ + + csr_set(sie, SIE_SSIE); +} + +/* Sending IPI */ +static void +ipi_send(struct pcpu *pc, int ipi) +{ + + CTR3(KTR_SMP, "%s: cpu=%d, ipi=%x", __func__, pc->pc_cpuid, ipi); + + atomic_set_32(&pc->pc_pending_ipis, ipi); + machine_command(ECALL_SEND_IPI, pc->pc_reg); + + CTR1(KTR_SMP, "%s: sent", __func__); +} + +void +ipi_all_but_self(u_int ipi) +{ + cpuset_t other_cpus; + + other_cpus = all_cpus; + CPU_CLR(PCPU_GET(cpuid), &other_cpus); + + CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi); + ipi_selected(other_cpus, ipi); +} + +void +ipi_cpu(int cpu, u_int ipi) +{ + cpuset_t cpus; + + CPU_ZERO(&cpus); + CPU_SET(cpu, &cpus); + + CTR3(KTR_SMP, "%s: cpu: %d, ipi: %x\n", __func__, cpu, ipi); + ipi_send(cpuid_to_pcpu[cpu], ipi); +} + +void +ipi_selected(cpuset_t cpus, u_int ipi) +{ + struct pcpu *pc; + + CTR1(KTR_SMP, "ipi_selected: ipi: %x", ipi); + + STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { + if (CPU_ISSET(pc->pc_cpuid, &cpus)) { + CTR3(KTR_SMP, "%s: pc: %p, ipi: %x\n", __func__, pc, + ipi); + ipi_send(pc, ipi); + } + } +} + +#endif Index: head/sys/riscv/riscv/locore.S =================================================================== --- head/sys/riscv/riscv/locore.S (revision 295971) +++ head/sys/riscv/riscv/locore.S (revision 295972) @@ -1,273 +1,399 @@ /*- - * Copyright (c) 2015 Ruslan Bukin + * Copyright (c) 2015-2016 Ruslan Bukin * All rights reserved. * * Portions of this software were developed by SRI International and the * University of Cambridge Computer Laboratory under DARPA/AFRL contract * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme. * * Portions of this software were developed by the University of Cambridge * Computer Laboratory as part of the CTSRD Project, with support from the * UK Higher Education Innovation Fund (HEIF). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include "assym.s" #include #include #include #include #include #include -#define HTIF_RING_SIZE (64) -#define HTIF_RING_LAST (24 * (HTIF_RING_SIZE - 1)) +#define HTIF_RING_NENTRIES (512) +#define HTIF_RING_ENTRY_SZ (24) +#define HTIF_RING_SIZE (HTIF_RING_ENTRY_SZ * HTIF_RING_NENTRIES) +#define HW_STACK_SIZE (96) +/* + * Event queue for each CPU core: + * + * struct htif_ring { + * uint64_t data; + * uint64_t used; + * uint64_t next; + * } htif_ring[HTIF_RING_NENTRIES]; + * uint64_t htif_ring_cursor; + * uint64_t htif_ring_last; + */ + +.macro build_ring + la t0, htif_ring +#ifdef SMP + csrr a0, mhartid + li s0, (HTIF_RING_SIZE + 16) + mulw s0, a0, s0 + add t0, t0, s0 +#endif + li t1, 0 + sd t1, 0(t0) /* zero data */ + sd t1, 8(t0) /* zero used */ + mv t2, t0 + mv t3, t0 + li t5, (HTIF_RING_SIZE - HTIF_RING_ENTRY_SZ) + li t6, 0 + add t4, t0, t5 +1: + addi t3, t3, 24 /* pointer to next */ + beq t3, t4, 2f /* finish */ + sd t3, 16(t2) /* store pointer */ + addi t2, t2, 24 /* next entry */ + addi t6, t6, 1 /* counter */ + j 1b +2: + sd t0, 16(t3) /* last -> first */ + + li t2, (HTIF_RING_SIZE) + add s0, t0, t2 + sd t0, 0(s0) /* cursor */ + sd t0, 8(s0) /* last */ + /* finish building ring */ +.endm + .globl kernbase .set kernbase, KERNBASE /* Trap entries */ .text mentry: /* User mode entry point (mtvec + 0x000) */ .align 6 j user_trap /* Supervisor mode entry point (mtvec + 0x040) */ .align 6 j supervisor_trap /* Hypervisor mode entry point (mtvec + 0x080) */ .align 6 j bad_trap /* Machine mode entry point (mtvec + 0x0C0) */ .align 6 j bad_trap /* Reset vector */ .text .align 8 .globl _start _start: - li s11, KERNBASE + /* Direct secondary cores to mpentry */ + csrr a0, mhartid + bnez a0, mpentry - /* Build ring */ - la t0, htif_ring - li t1, 0 - sd t1, 0(t0) /* zero data */ - sd t1, 8(t0) /* zero used */ - mv t2, t0 - mv t3, t0 - li t5, HTIF_RING_LAST - li t6, 0 - add t4, t0, t5 -1: - addi t3, t3, 24 /* pointer to next */ - beq t3, t4, 2f /* finish */ - sd t3, 16(t2) /* store pointer */ - addi t2, t2, 24 /* next entry */ - addi t6, t6, 1 /* counter */ - j 1b -2: - sd t0, 16(t3) /* last -> first */ - la t1, htif_ring_cursor - sd t0, 0(t1) - la t1, htif_ring_last - sd t0, 0(t1) - /* finish building ring */ + /* Build event queue for current core */ + build_ring + /* Setup machine-mode stack for CPU 0 */ la t0, hardstack_end csrw mscratch, t0 la t0, mentry csrw mtvec, t0 li t0, 0 csrw sscratch, t0 li s10, PAGE_SIZE li s9, (PAGE_SIZE * KSTACK_PAGES) /* Page tables */ /* Level 0 */ la s1, pagetable_l0 la s2, pagetable_l1 /* Link to next level PN */ srli s2, s2, PAGE_SHIFT li t4, (PTE_VALID | (PTE_TYPE_PTR << PTE_TYPE_S)) slli t5, s2, PTE_PPN0_S /* (s2 << PTE_PPN0_S) */ or t6, t4, t5 /* Store single level0 PTE entry to position */ li a5, 0x1ff li a6, PTE_SIZE mulw a5, a5, a6 add t0, s1, a5 + /* Store it to pagetable_l0 for each cpu */ + li t1, MAXCPU + li t2, PAGE_SIZE +1: sd t6, 0(t0) + add t0, t0, t2 + addi t1, t1, -1 + bnez t1, 1b /* Level 1 */ la s1, pagetable_l1 la s2, pagetable_l2 /* Link to next level PN */ srli s2, s2, PAGE_SHIFT li a5, KERNBASE srli a5, a5, 0x1e /* >> 30 */ andi a5, a5, 0x1ff /* & 0x1ff */ li t4, (PTE_VALID | (PTE_TYPE_PTR << PTE_TYPE_S)) slli t5, s2, PTE_PPN0_S /* (s2 << PTE_PPN0_S) */ or t6, t4, t5 /* Store single level1 PTE entry to position */ li a6, PTE_SIZE mulw a5, a5, a6 add t0, s1, a5 sd t6, (t0) /* Level 2 superpages (512 x 2MiB) */ la s1, pagetable_l2 li t3, 512 /* Build 512 entries */ li t4, 0 /* Counter */ li t5, 0 2: li t0, (PTE_VALID | (PTE_TYPE_SRWX << PTE_TYPE_S)) slli t2, t4, PTE_PPN1_S /* << PTE_PPN1_S */ or t5, t0, t2 sd t5, (s1) /* Store PTE entry to position */ addi s1, s1, PTE_SIZE addi t4, t4, 1 bltu t4, t3, 2b /* Set page tables base register */ la s1, pagetable_l0 csrw sptbr, s1 /* Page tables END */ /* Enter supervisor mode */ li s0, ((MSTATUS_VM_SV48 << MSTATUS_VM_SHIFT) | \ (MSTATUS_PRV_M << MSTATUS_PRV_SHIFT) | \ (MSTATUS_PRV_S << MSTATUS_PRV1_SHIFT) | \ (MSTATUS_PRV_U << MSTATUS_PRV2_SHIFT)); csrw mstatus, s0 + /* + * Enable machine-mode software interrupts + * so we can deliver IPI to this core. + */ + li t0, MIE_MSIE + csrs mie, t0 + /* Exit from machine mode */ la t0, .Lmmu_on + li s11, KERNBASE add t0, t0, s11 csrw mepc, t0 eret .Lmmu_on: /* Initialize stack pointer */ la s3, initstack_end mv sp, s3 addi sp, sp, -PCB_SIZE /* Clear BSS */ la a0, _C_LABEL(__bss_start) la s1, _C_LABEL(_end) 1: sd zero, 0(a0) addi a0, a0, 8 bltu a0, s1, 1b /* Fill riscv_bootparams */ addi sp, sp, -16 la t0, pagetable_l1 sd t0, 0(sp) /* kern_l1pt */ la t0, initstack_end sd t0, 8(sp) /* kern_stack */ mv a0, sp call _C_LABEL(initriscv) /* Off we go */ call _C_LABEL(mi_startup) .align 4 initstack: .space (PAGE_SIZE * KSTACK_PAGES) initstack_end: hardstack: - .space (PAGE_SIZE) + .space (HW_STACK_SIZE * MAXCPU) hardstack_end: .globl htif_ring htif_ring: - .space (24 * 1024) + .space ((HTIF_RING_SIZE + 16) * MAXCPU) - .globl htif_ring_cursor -htif_ring_cursor: - .space (8) - - .globl htif_ring_last -htif_ring_last: - .space (8) - .globl console_intr console_intr: - .space (8) + .space (8) ENTRY(sigcode) mv a0, sp addi a0, a0, SF_UC 1: li t0, SYS_sigreturn ecall /* sigreturn failed, exit */ li t0, SYS_exit ecall j 1b END(sigcode) /* This may be copied to the stack, keep it 16-byte aligned */ .align 3 esigcode: .data .align 3 .global szsigcode szsigcode: .quad esigcode - sigcode .align 12 .globl pagetable_l0 pagetable_l0: - .space PAGE_SIZE + .space (PAGE_SIZE * MAXCPU) pagetable_l1: .space PAGE_SIZE pagetable_l2: .space PAGE_SIZE pagetable_end: .globl init_pt_va init_pt_va: .quad pagetable_l2 /* XXX: Keep page tables VA */ + +#ifndef SMP +ENTRY(mpentry) +1: + wfi + j 1b +END(mpentry) +#else +/* + * mpentry(unsigned long) + * + * Called by a core when it is being brought online. + * The data in x0 is passed straight to init_secondary. + */ +ENTRY(mpentry) + /* + * Calculate the offset to __riscv_boot_ap + * for current core, cpuid in a0. + */ + li t1, 4 + mulw t1, t1, a0 + /* Get pointer */ + la t0, __riscv_boot_ap + add t0, t0, t1 + +1: + /* Wait the kernel to be ready */ + lw t1, 0(t0) + beqz t1, 1b + + /* Setup machine exception vector */ + la t0, mentry + csrw mtvec, t0 + + /* Build event queue ring for this core */ + build_ring + + /* Set page tables base register */ + la t0, pagetable_l0 + li t1, PAGE_SIZE + mulw t1, t1, a0 + add t0, t0, t1 + csrw sptbr, t0 + /* Page tables END */ + + /* Configure mstatus */ + li s0, ((MSTATUS_VM_SV48 << MSTATUS_VM_SHIFT) | \ + (MSTATUS_PRV_M << MSTATUS_PRV_SHIFT) | \ + (MSTATUS_PRV_S << MSTATUS_PRV1_SHIFT) | \ + (MSTATUS_PRV_U << MSTATUS_PRV2_SHIFT)); + csrw mstatus, s0 + + /* Setup stack for machine mode exceptions */ + la t0, hardstack_end + li t1, HW_STACK_SIZE + mulw t1, t1, a0 + sub t0, t0, t1 + csrw mscratch, t0 + + li t0, 0 + csrw sscratch, t0 + + /* + * Enable machine-mode software interrupts + * so we can deliver IPI to this core. + */ + li t0, MIE_MSIE + csrs mie, t0 + + /* + * Exit from machine mode and go to + * the virtual address space. + */ + la t0, mp_virtdone + li s11, KERNBASE + add t0, t0, s11 + csrw mepc, t0 + eret + +mp_virtdone: + /* We are now in virtual address space */ + + /* Setup stack pointer */ + la t0, secondary_stacks + li t1, (PAGE_SIZE * KSTACK_PAGES) + mulw t1, t1, a0 + add sp, t0, t1 + + call init_secondary +END(mpentry) +#endif #include "exception.S" Index: head/sys/riscv/riscv/machdep.c =================================================================== --- head/sys/riscv/riscv/machdep.c (revision 295971) +++ head/sys/riscv/riscv/machdep.c (revision 295972) @@ -1,799 +1,805 @@ /*- * Copyright (c) 2014 Andrew Turner - * Copyright (c) 2015 Ruslan Bukin + * Copyright (c) 2015-2016 Ruslan Bukin * All rights reserved. * * Portions of this software were developed by SRI International and the * University of Cambridge Computer Laboratory under DARPA/AFRL contract * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme. * * Portions of this software were developed by the University of Cambridge * Computer Laboratory as part of the CTSRD Project, with support from the * UK Higher Education Innovation Fund (HEIF). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include "opt_platform.h" #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef VFP #include #endif #ifdef FDT #include #include #endif struct pcpu __pcpu[MAXCPU]; +extern uint64_t pagetable_l0; static struct trapframe proc0_tf; vm_paddr_t phys_avail[PHYS_AVAIL_SIZE + 2]; vm_paddr_t dump_avail[PHYS_AVAIL_SIZE + 2]; int early_boot = 1; int cold = 1; long realmem = 0; long Maxmem = 0; #define PHYSMAP_SIZE (2 * (VM_PHYSSEG_MAX - 1)) vm_paddr_t physmap[PHYSMAP_SIZE]; u_int physmap_idx; struct kva_md_info kmi; int64_t dcache_line_size; /* The minimum D cache line size */ int64_t icache_line_size; /* The minimum I cache line size */ int64_t idcache_line_size; /* The minimum cache line size */ extern int *end; extern int *initstack_end; struct pcpu *pcpup; uintptr_t mcall_trap(uintptr_t mcause, uintptr_t* regs); uintptr_t mcall_trap(uintptr_t mcause, uintptr_t* regs) { return (0); } static void cpu_startup(void *dummy) { identify_cpu(); vm_ksubmap_init(&kmi); bufinit(); vm_pager_bufferinit(); } SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL); int cpu_idle_wakeup(int cpu) { return (0); } void bzero(void *buf, size_t len) { uint8_t *p; p = buf; while(len-- > 0) *p++ = 0; } int fill_regs(struct thread *td, struct reg *regs) { struct trapframe *frame; frame = td->td_frame; regs->sepc = frame->tf_sepc; regs->sstatus = frame->tf_sstatus; regs->ra = frame->tf_ra; regs->sp = frame->tf_sp; regs->gp = frame->tf_gp; regs->tp = frame->tf_tp; memcpy(regs->t, frame->tf_t, sizeof(regs->t)); memcpy(regs->s, frame->tf_s, sizeof(regs->s)); memcpy(regs->a, frame->tf_a, sizeof(regs->a)); return (0); } int set_regs(struct thread *td, struct reg *regs) { struct trapframe *frame; frame = td->td_frame; frame->tf_sepc = regs->sepc; frame->tf_sstatus = regs->sstatus; frame->tf_ra = regs->ra; frame->tf_sp = regs->sp; frame->tf_gp = regs->gp; frame->tf_tp = regs->tp; memcpy(frame->tf_t, regs->t, sizeof(frame->tf_t)); memcpy(frame->tf_s, regs->s, sizeof(frame->tf_s)); memcpy(frame->tf_a, regs->a, sizeof(frame->tf_a)); return (0); } int fill_fpregs(struct thread *td, struct fpreg *regs) { /* TODO */ bzero(regs, sizeof(*regs)); return (0); } int set_fpregs(struct thread *td, struct fpreg *regs) { /* TODO */ return (0); } int fill_dbregs(struct thread *td, struct dbreg *regs) { panic("fill_dbregs"); } int set_dbregs(struct thread *td, struct dbreg *regs) { panic("set_dbregs"); } int ptrace_set_pc(struct thread *td, u_long addr) { panic("ptrace_set_pc"); return (0); } int ptrace_single_step(struct thread *td) { /* TODO; */ return (0); } int ptrace_clear_single_step(struct thread *td) { /* TODO; */ return (0); } void exec_setregs(struct thread *td, struct image_params *imgp, u_long stack) { struct trapframe *tf; tf = td->td_frame; memset(tf, 0, sizeof(struct trapframe)); /* * We need to set a0 for init as it doesn't call * cpu_set_syscall_retval to copy the value. We also * need to set td_retval for the cases where we do. */ tf->tf_a[0] = td->td_retval[0] = stack; tf->tf_sp = STACKALIGN(stack); tf->tf_ra = imgp->entry_addr; tf->tf_sepc = imgp->entry_addr; } /* Sanity check these are the same size, they will be memcpy'd to and fro */ CTASSERT(sizeof(((struct trapframe *)0)->tf_a) == sizeof((struct gpregs *)0)->gp_a); CTASSERT(sizeof(((struct trapframe *)0)->tf_s) == sizeof((struct gpregs *)0)->gp_s); CTASSERT(sizeof(((struct trapframe *)0)->tf_t) == sizeof((struct gpregs *)0)->gp_t); CTASSERT(sizeof(((struct trapframe *)0)->tf_a) == sizeof((struct reg *)0)->a); CTASSERT(sizeof(((struct trapframe *)0)->tf_s) == sizeof((struct reg *)0)->s); CTASSERT(sizeof(((struct trapframe *)0)->tf_t) == sizeof((struct reg *)0)->t); int get_mcontext(struct thread *td, mcontext_t *mcp, int clear_ret) { struct trapframe *tf = td->td_frame; memcpy(mcp->mc_gpregs.gp_t, tf->tf_t, sizeof(mcp->mc_gpregs.gp_t)); memcpy(mcp->mc_gpregs.gp_s, tf->tf_s, sizeof(mcp->mc_gpregs.gp_s)); memcpy(mcp->mc_gpregs.gp_a, tf->tf_a, sizeof(mcp->mc_gpregs.gp_a)); if (clear_ret & GET_MC_CLEAR_RET) { mcp->mc_gpregs.gp_a[0] = 0; mcp->mc_gpregs.gp_t[0] = 0; /* clear syscall error */ } mcp->mc_gpregs.gp_ra = tf->tf_ra; mcp->mc_gpregs.gp_sp = tf->tf_sp; mcp->mc_gpregs.gp_gp = tf->tf_gp; mcp->mc_gpregs.gp_tp = tf->tf_tp; mcp->mc_gpregs.gp_sepc = tf->tf_sepc; mcp->mc_gpregs.gp_sstatus = tf->tf_sstatus; return (0); } int set_mcontext(struct thread *td, mcontext_t *mcp) { struct trapframe *tf; tf = td->td_frame; memcpy(tf->tf_t, mcp->mc_gpregs.gp_t, sizeof(tf->tf_t)); memcpy(tf->tf_s, mcp->mc_gpregs.gp_s, sizeof(tf->tf_s)); memcpy(tf->tf_a, mcp->mc_gpregs.gp_a, sizeof(tf->tf_a)); tf->tf_ra = mcp->mc_gpregs.gp_ra; tf->tf_sp = mcp->mc_gpregs.gp_sp; tf->tf_gp = mcp->mc_gpregs.gp_gp; tf->tf_tp = mcp->mc_gpregs.gp_tp; tf->tf_sepc = mcp->mc_gpregs.gp_sepc; tf->tf_sstatus = mcp->mc_gpregs.gp_sstatus; return (0); } static void get_fpcontext(struct thread *td, mcontext_t *mcp) { /* TODO */ } static void set_fpcontext(struct thread *td, mcontext_t *mcp) { /* TODO */ } void cpu_idle(int busy) { spinlock_enter(); if (!busy) cpu_idleclock(); if (!sched_runnable()) __asm __volatile( "fence \n" "wfi \n"); if (!busy) cpu_activeclock(); spinlock_exit(); } void cpu_halt(void) { panic("cpu_halt"); } /* * Flush the D-cache for non-DMA I/O so that the I-cache can * be made coherent later. */ void cpu_flush_dcache(void *ptr, size_t len) { /* TBD */ } /* Get current clock frequency for the given CPU ID. */ int cpu_est_clockrate(int cpu_id, uint64_t *rate) { panic("cpu_est_clockrate"); } void cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size) { + uint64_t addr; + addr = (uint64_t)&pagetable_l0; + addr += (cpuid * PAGE_SIZE); + + pcpu->pc_sptbr = addr; } void spinlock_enter(void) { struct thread *td; td = curthread; if (td->td_md.md_spinlock_count == 0) { td->td_md.md_spinlock_count = 1; td->td_md.md_saved_sstatus_ie = intr_disable(); } else td->td_md.md_spinlock_count++; critical_enter(); } void spinlock_exit(void) { struct thread *td; register_t sstatus_ie; td = curthread; critical_exit(); sstatus_ie = td->td_md.md_saved_sstatus_ie; td->td_md.md_spinlock_count--; if (td->td_md.md_spinlock_count == 0) intr_restore(sstatus_ie); } #ifndef _SYS_SYSPROTO_H_ struct sigreturn_args { ucontext_t *ucp; }; #endif int sys_sigreturn(struct thread *td, struct sigreturn_args *uap) { uint64_t sstatus; ucontext_t uc; int error; if (uap == NULL) return (EFAULT); if (copyin(uap->sigcntxp, &uc, sizeof(uc))) return (EFAULT); /* * Make sure the processor mode has not been tampered with and * interrupts have not been disabled. */ sstatus = uc.uc_mcontext.mc_gpregs.gp_sstatus; if ((sstatus & SSTATUS_PS) != 0 || (sstatus & SSTATUS_PIE) == 0) return (EINVAL); error = set_mcontext(td, &uc.uc_mcontext); if (error != 0) return (error); set_fpcontext(td, &uc.uc_mcontext); /* Restore signal mask. */ kern_sigprocmask(td, SIG_SETMASK, &uc.uc_sigmask, NULL, 0); return (EJUSTRETURN); } /* * Construct a PCB from a trapframe. This is called from kdb_trap() where * we want to start a backtrace from the function that caused us to enter * the debugger. We have the context in the trapframe, but base the trace * on the PCB. The PCB doesn't have to be perfect, as long as it contains * enough for a backtrace. */ void makectx(struct trapframe *tf, struct pcb *pcb) { memcpy(pcb->pcb_t, tf->tf_t, sizeof(tf->tf_t)); memcpy(pcb->pcb_s, tf->tf_s, sizeof(tf->tf_s)); memcpy(pcb->pcb_a, tf->tf_a, sizeof(tf->tf_a)); pcb->pcb_ra = tf->tf_ra; pcb->pcb_sp = tf->tf_sp; pcb->pcb_gp = tf->tf_gp; pcb->pcb_tp = tf->tf_tp; pcb->pcb_sepc = tf->tf_sepc; } void sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct sigframe *fp, frame; struct sysentvec *sysent; struct trapframe *tf; struct sigacts *psp; struct thread *td; struct proc *p; int onstack; int code; int sig; td = curthread; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); sig = ksi->ksi_signo; code = ksi->ksi_code; psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); tf = td->td_frame; onstack = sigonstack(tf->tf_sp); CTR4(KTR_SIG, "sendsig: td=%p (%s) catcher=%p sig=%d", td, p->p_comm, catcher, sig); /* Allocate and validate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) != 0 && !onstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { fp = (struct sigframe *)((uintptr_t)td->td_sigstk.ss_sp + td->td_sigstk.ss_size); } else { fp = (struct sigframe *)td->td_frame->tf_sp; } /* Make room, keeping the stack aligned */ fp--; fp = (struct sigframe *)STACKALIGN(fp); /* Fill in the frame to copy out */ get_mcontext(td, &frame.sf_uc.uc_mcontext, 0); get_fpcontext(td, &frame.sf_uc.uc_mcontext); frame.sf_si = ksi->ksi_info; frame.sf_uc.uc_sigmask = *mask; frame.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) ? ((onstack) ? SS_ONSTACK : 0) : SS_DISABLE; frame.sf_uc.uc_stack = td->td_sigstk; mtx_unlock(&psp->ps_mtx); PROC_UNLOCK(td->td_proc); /* Copy the sigframe out to the user's stack. */ if (copyout(&frame, fp, sizeof(*fp)) != 0) { /* Process has trashed its stack. Kill it. */ CTR2(KTR_SIG, "sendsig: sigexit td=%p fp=%p", td, fp); PROC_LOCK(p); sigexit(td, SIGILL); } tf->tf_a[0] = sig; tf->tf_a[1] = (register_t)&fp->sf_si; tf->tf_a[2] = (register_t)&fp->sf_uc; tf->tf_sepc = (register_t)catcher; tf->tf_sp = (register_t)fp; sysent = p->p_sysent; if (sysent->sv_sigcode_base != 0) tf->tf_ra = (register_t)sysent->sv_sigcode_base; else tf->tf_ra = (register_t)(sysent->sv_psstrings - *(sysent->sv_szsigcode)); CTR3(KTR_SIG, "sendsig: return td=%p pc=%#x sp=%#x", td, tf->tf_sepc, tf->tf_sp); PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } static void init_proc0(vm_offset_t kstack) { pcpup = &__pcpu[0]; proc_linkup0(&proc0, &thread0); thread0.td_kstack = kstack; thread0.td_pcb = (struct pcb *)(thread0.td_kstack) - 1; thread0.td_frame = &proc0_tf; pcpup->pc_curpcb = thread0.td_pcb; } static int add_physmap_entry(uint64_t base, uint64_t length, vm_paddr_t *physmap, u_int *physmap_idxp) { u_int i, insert_idx, _physmap_idx; _physmap_idx = *physmap_idxp; if (length == 0) return (1); /* * Find insertion point while checking for overlap. Start off by * assuming the new entry will be added to the end. */ insert_idx = _physmap_idx; for (i = 0; i <= _physmap_idx; i += 2) { if (base < physmap[i + 1]) { if (base + length <= physmap[i]) { insert_idx = i; break; } if (boothowto & RB_VERBOSE) printf( "Overlapping memory regions, ignoring second region\n"); return (1); } } /* See if we can prepend to the next entry. */ if (insert_idx <= _physmap_idx && base + length == physmap[insert_idx]) { physmap[insert_idx] = base; return (1); } /* See if we can append to the previous entry. */ if (insert_idx > 0 && base == physmap[insert_idx - 1]) { physmap[insert_idx - 1] += length; return (1); } _physmap_idx += 2; *physmap_idxp = _physmap_idx; if (_physmap_idx == PHYSMAP_SIZE) { printf( "Too many segments in the physical address map, giving up\n"); return (0); } /* * Move the last 'N' entries down to make room for the new * entry if needed. */ for (i = _physmap_idx; i > insert_idx; i -= 2) { physmap[i] = physmap[i - 2]; physmap[i + 1] = physmap[i - 1]; } /* Insert the new entry. */ physmap[insert_idx] = base; physmap[insert_idx + 1] = base + length; printf("physmap[%d] = 0x%016lx\n", insert_idx, base); printf("physmap[%d] = 0x%016lx\n", insert_idx + 1, base + length); return (1); } #ifdef FDT static void try_load_dtb(caddr_t kmdp) { vm_offset_t dtbp; dtbp = (vm_offset_t)&fdt_static_dtb; if (dtbp == (vm_offset_t)NULL) { printf("ERROR loading DTB\n"); return; } if (OF_install(OFW_FDT, 0) == FALSE) panic("Cannot install FDT"); if (OF_init((void *)dtbp) != 0) panic("OF_init failed with the found device tree"); } #endif static void cache_setup(void) { /* TODO */ } /* * Fake up a boot descriptor table. * RISCVTODO: This needs to be done via loader (when it's available). */ vm_offset_t fake_preload_metadata(struct riscv_bootparams *rvbp __unused) { #ifdef DDB vm_offset_t zstart = 0, zend = 0; #endif vm_offset_t lastaddr; int i = 0; static uint32_t fake_preload[35]; fake_preload[i++] = MODINFO_NAME; fake_preload[i++] = strlen("kernel") + 1; strcpy((char*)&fake_preload[i++], "kernel"); i += 1; fake_preload[i++] = MODINFO_TYPE; fake_preload[i++] = strlen("elf64 kernel") + 1; strcpy((char*)&fake_preload[i++], "elf64 kernel"); i += 3; fake_preload[i++] = MODINFO_ADDR; fake_preload[i++] = sizeof(vm_offset_t); fake_preload[i++] = (uint64_t)(KERNBASE + KERNENTRY); i += 1; fake_preload[i++] = MODINFO_SIZE; fake_preload[i++] = sizeof(uint64_t); printf("end is 0x%016lx\n", (uint64_t)&end); fake_preload[i++] = (uint64_t)&end - (uint64_t)(KERNBASE + KERNENTRY); i += 1; #ifdef DDB #if 0 /* RISCVTODO */ if (*(uint32_t *)KERNVIRTADDR == MAGIC_TRAMP_NUMBER) { fake_preload[i++] = MODINFO_METADATA|MODINFOMD_SSYM; fake_preload[i++] = sizeof(vm_offset_t); fake_preload[i++] = *(uint32_t *)(KERNVIRTADDR + 4); fake_preload[i++] = MODINFO_METADATA|MODINFOMD_ESYM; fake_preload[i++] = sizeof(vm_offset_t); fake_preload[i++] = *(uint32_t *)(KERNVIRTADDR + 8); lastaddr = *(uint32_t *)(KERNVIRTADDR + 8); zend = lastaddr; zstart = *(uint32_t *)(KERNVIRTADDR + 4); db_fetch_ksymtab(zstart, zend); } else #endif #endif lastaddr = (vm_offset_t)&end; fake_preload[i++] = 0; fake_preload[i] = 0; preload_metadata = (void *)fake_preload; return (lastaddr); } void initriscv(struct riscv_bootparams *rvbp) { struct mem_region mem_regions[FDT_MEM_REGIONS]; vm_offset_t lastaddr; int mem_regions_sz; vm_size_t kernlen; uint32_t memsize; caddr_t kmdp; int i; /* Set the module data location */ lastaddr = fake_preload_metadata(rvbp); /* Find the kernel address */ kmdp = preload_search_by_type("elf kernel"); if (kmdp == NULL) kmdp = preload_search_by_type("elf64 kernel"); boothowto = 0; kern_envp = NULL; #ifdef FDT try_load_dtb(kmdp); #endif /* Load the physical memory ranges */ physmap_idx = 0; /* Grab physical memory regions information from device tree. */ if (fdt_get_mem_regions(mem_regions, &mem_regions_sz, &memsize) != 0) panic("Cannot get physical memory regions"); for (i = 0; i < mem_regions_sz; i++) add_physmap_entry(mem_regions[i].mr_start, mem_regions[i].mr_size, physmap, &physmap_idx); /* Set the pcpu data, this is needed by pmap_bootstrap */ pcpup = &__pcpu[0]; pcpu_init(pcpup, 0, sizeof(struct pcpu)); /* Set the pcpu pointer */ __asm __volatile("mv gp, %0" :: "r"(pcpup)); PCPU_SET(curthread, &thread0); /* Do basic tuning, hz etc */ init_param1(); cache_setup(); /* Bootstrap enough of pmap to enter the kernel proper */ kernlen = (lastaddr - KERNBASE); pmap_bootstrap(rvbp->kern_l1pt, KERNENTRY, kernlen); cninit(); init_proc0(rvbp->kern_stack); /* set page table base register for thread0 */ thread0.td_pcb->pcb_l1addr = (rvbp->kern_l1pt - KERNBASE); msgbufinit(msgbufp, msgbufsize); mutex_init(); init_param2(physmem); kdb_init(); riscv_init_interrupts(); early_boot = 0; } Index: head/sys/riscv/riscv/mp_machdep.c =================================================================== --- head/sys/riscv/riscv/mp_machdep.c (nonexistent) +++ head/sys/riscv/riscv/mp_machdep.c (revision 295972) @@ -0,0 +1,453 @@ +/*- + * Copyright (c) 2015 The FreeBSD Foundation + * Copyright (c) 2016 Ruslan Bukin + * All rights reserved. + * + * Portions of this software were developed by Andrew Turner under + * sponsorship from the FreeBSD Foundation. + * + * Portions of this software were developed by SRI International and the + * University of Cambridge Computer Laboratory under DARPA/AFRL contract + * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme. + * + * Portions of this software were developed by the University of Cambridge + * Computer Laboratory as part of the CTSRD Project, with support from the + * UK Higher Education Innovation Fund (HEIF). + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "opt_kstack_pages.h" +#include "opt_platform.h" + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#ifdef VFP +#include +#endif + +#ifdef FDT +#include +#include +#endif + +boolean_t ofw_cpu_reg(phandle_t node, u_int, cell_t *); + +extern struct pcpu __pcpu[]; + +uint32_t __riscv_boot_ap[MAXCPU]; + +static enum { + CPUS_UNKNOWN, +#ifdef FDT + CPUS_FDT, +#endif +} cpu_enum_method; + +static device_identify_t riscv64_cpu_identify; +static device_probe_t riscv64_cpu_probe; +static device_attach_t riscv64_cpu_attach; + +static int ipi_handler(void *); + +struct mtx ap_boot_mtx; +struct pcb stoppcbs[MAXCPU]; + +#ifdef INVARIANTS +static uint32_t cpu_reg[MAXCPU][2]; +#endif +static device_t cpu_list[MAXCPU]; + +void mpentry(unsigned long cpuid); +void init_secondary(uint64_t); + +uint8_t secondary_stacks[MAXCPU - 1][PAGE_SIZE * KSTACK_PAGES] __aligned(16); + +/* Set to 1 once we're ready to let the APs out of the pen. */ +volatile int aps_ready = 0; + +/* Temporary variables for init_secondary() */ +void *dpcpu[MAXCPU - 1]; + +static device_method_t riscv64_cpu_methods[] = { + /* Device interface */ + DEVMETHOD(device_identify, riscv64_cpu_identify), + DEVMETHOD(device_probe, riscv64_cpu_probe), + DEVMETHOD(device_attach, riscv64_cpu_attach), + + DEVMETHOD_END +}; + +static devclass_t riscv64_cpu_devclass; +static driver_t riscv64_cpu_driver = { + "riscv64_cpu", + riscv64_cpu_methods, + 0 +}; + +DRIVER_MODULE(riscv64_cpu, cpu, riscv64_cpu_driver, riscv64_cpu_devclass, 0, 0); + +static void +riscv64_cpu_identify(driver_t *driver, device_t parent) +{ + + if (device_find_child(parent, "riscv64_cpu", -1) != NULL) + return; + if (BUS_ADD_CHILD(parent, 0, "riscv64_cpu", -1) == NULL) + device_printf(parent, "add child failed\n"); +} + +static int +riscv64_cpu_probe(device_t dev) +{ + u_int cpuid; + + cpuid = device_get_unit(dev); + if (cpuid >= MAXCPU || cpuid > mp_maxid) + return (EINVAL); + + device_quiet(dev); + return (0); +} + +static int +riscv64_cpu_attach(device_t dev) +{ + const uint32_t *reg; + size_t reg_size; + u_int cpuid; + int i; + + cpuid = device_get_unit(dev); + + if (cpuid >= MAXCPU || cpuid > mp_maxid) + return (EINVAL); + KASSERT(cpu_list[cpuid] == NULL, ("Already have cpu %u", cpuid)); + + reg = cpu_get_cpuid(dev, ®_size); + if (reg == NULL) + return (EINVAL); + + if (bootverbose) { + device_printf(dev, "register <"); + for (i = 0; i < reg_size; i++) + printf("%s%x", (i == 0) ? "" : " ", reg[i]); + printf(">\n"); + } + + /* Set the device to start it later */ + cpu_list[cpuid] = dev; + + return (0); +} + +static void +release_aps(void *dummy __unused) +{ + int cpu, i; + + if (mp_ncpus == 1) + return; + + /* Setup the IPI handler */ + riscv_setup_ipihandler(ipi_handler); + + atomic_store_rel_int(&aps_ready, 1); + + printf("Release APs\n"); + + for (i = 0; i < 2000; i++) { + if (smp_started) { + for (cpu = 0; cpu <= mp_maxid; cpu++) { + if (CPU_ABSENT(cpu)) + continue; + } + return; + } + DELAY(1000); + } + + printf("APs not started\n"); +} +SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL); + +void +init_secondary(uint64_t cpu) +{ + struct pcpu *pcpup; + + /* Setup the pcpu pointer */ + pcpup = &__pcpu[cpu]; + __asm __volatile("mv gp, %0" :: "r"(pcpup)); + + /* Spin until the BSP releases the APs */ + while (!aps_ready) + __asm __volatile("wfi"); + + /* Initialize curthread */ + KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread")); + pcpup->pc_curthread = pcpup->pc_idlethread; + pcpup->pc_curpcb = pcpup->pc_idlethread->td_pcb; + + /* + * Identify current CPU. This is necessary to setup + * affinity registers and to provide support for + * runtime chip identification. + */ + identify_cpu(); + + /* Enable software interrupts */ + riscv_unmask_ipi(); + + /* Start per-CPU event timers. */ + cpu_initclocks_ap(); + +#ifdef VFP + /* TODO: init FPU */ +#endif + + /* Enable interrupts */ + intr_enable(); + + mtx_lock_spin(&ap_boot_mtx); + + atomic_add_rel_32(&smp_cpus, 1); + + if (smp_cpus == mp_ncpus) { + /* enable IPI's, tlb shootdown, freezes etc */ + atomic_store_rel_int(&smp_started, 1); + } + + mtx_unlock_spin(&ap_boot_mtx); + + /* Enter the scheduler */ + sched_throw(NULL); + + panic("scheduler returned us to init_secondary"); + /* NOTREACHED */ +} + +static int +ipi_handler(void *arg) +{ + u_int ipi_bitmap; + u_int cpu, ipi; + int bit; + + /* + * We have shared interrupt line for both IPI and HTIF, + * so we don't really need to clear pending bit here + * as it will be cleared later in htif_intr. + * But lets assume HTIF is optional part, so do clear + * pending bit if there is no new entires in htif_ring. + */ + machine_command(ECALL_CLEAR_IPI, 0); + + cpu = PCPU_GET(cpuid); + + mb(); + + ipi_bitmap = atomic_readandclear_int(PCPU_PTR(pending_ipis)); + if (ipi_bitmap == 0) + return (FILTER_HANDLED); + + while ((bit = ffs(ipi_bitmap))) { + bit = (bit - 1); + ipi = (1 << bit); + ipi_bitmap &= ~ipi; + + mb(); + + switch (ipi) { + case IPI_AST: + CTR0(KTR_SMP, "IPI_AST"); + break; + case IPI_PREEMPT: + CTR1(KTR_SMP, "%s: IPI_PREEMPT", __func__); + sched_preempt(curthread); + break; + case IPI_RENDEZVOUS: + CTR0(KTR_SMP, "IPI_RENDEZVOUS"); + smp_rendezvous_action(); + break; + case IPI_STOP: + case IPI_STOP_HARD: + CTR0(KTR_SMP, (ipi == IPI_STOP) ? "IPI_STOP" : "IPI_STOP_HARD"); + savectx(&stoppcbs[cpu]); + + /* Indicate we are stopped */ + CPU_SET_ATOMIC(cpu, &stopped_cpus); + + /* Wait for restart */ + while (!CPU_ISSET(cpu, &started_cpus)) + cpu_spinwait(); + + CPU_CLR_ATOMIC(cpu, &started_cpus); + CPU_CLR_ATOMIC(cpu, &stopped_cpus); + CTR0(KTR_SMP, "IPI_STOP (restart)"); + break; + case IPI_HARDCLOCK: + CTR1(KTR_SMP, "%s: IPI_HARDCLOCK", __func__); + hardclockintr(); + break; + default: + panic("Unknown IPI %#0x on cpu %d", ipi, curcpu); + } + } + + return (FILTER_HANDLED); +} + +struct cpu_group * +cpu_topo(void) +{ + + return (smp_topo_none()); +} + +/* Determine if we running MP machine */ +int +cpu_mp_probe(void) +{ + + return (mp_ncpus > 1); +} + +#ifdef FDT +static boolean_t +cpu_init_fdt(u_int id, phandle_t node, u_int addr_size, pcell_t *reg) +{ + uint64_t target_cpu; + struct pcpu *pcpup; + + /* Check we are able to start this cpu */ + if (id > mp_maxid) + return (0); + + KASSERT(id < MAXCPU, ("Too many CPUs")); + + KASSERT(addr_size == 1 || addr_size == 2, ("Invalid register size")); +#ifdef INVARIANTS + cpu_reg[id][0] = reg[0]; + if (addr_size == 2) + cpu_reg[id][1] = reg[1]; +#endif + + target_cpu = reg[0]; + if (addr_size == 2) { + target_cpu <<= 32; + target_cpu |= reg[1]; + } + + pcpup = &__pcpu[id]; + + /* We are already running on cpu 0 */ + if (id == 0) { + pcpup->pc_reg = target_cpu; + return (1); + } + + pcpu_init(pcpup, id, sizeof(struct pcpu)); + pcpup->pc_reg = target_cpu; + + dpcpu[id - 1] = (void *)kmem_malloc(kernel_arena, DPCPU_SIZE, + M_WAITOK | M_ZERO); + dpcpu_init(dpcpu[id - 1], id); + + printf("Starting CPU %u (%lx)\n", id, target_cpu); + __riscv_boot_ap[id] = 1; + + CPU_SET(id, &all_cpus); + + return (1); +} +#endif + +/* Initialize and fire up non-boot processors */ +void +cpu_mp_start(void) +{ + + mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN); + + CPU_SET(0, &all_cpus); + + switch(cpu_enum_method) { +#ifdef FDT + case CPUS_FDT: + ofw_cpu_early_foreach(cpu_init_fdt, true); + break; +#endif + case CPUS_UNKNOWN: + break; + } +} + +/* Introduce rest of cores to the world */ +void +cpu_mp_announce(void) +{ +} + +void +cpu_mp_setmaxid(void) +{ +#ifdef FDT + int cores; + + cores = ofw_cpu_early_foreach(NULL, false); + if (cores > 0) { + cores = MIN(cores, MAXCPU); + if (bootverbose) + printf("Found %d CPUs in the device tree\n", cores); + mp_ncpus = cores; + mp_maxid = cores - 1; + cpu_enum_method = CPUS_FDT; + return; + } +#endif + + if (bootverbose) + printf("No CPU data, limiting to 1 core\n"); + mp_ncpus = 1; + mp_maxid = 0; +} Property changes on: head/sys/riscv/riscv/mp_machdep.c ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: head/sys/riscv/riscv/pmap.c =================================================================== --- head/sys/riscv/riscv/pmap.c (revision 295971) +++ head/sys/riscv/riscv/pmap.c (revision 295972) @@ -1,3197 +1,3195 @@ /*- * Copyright (c) 1991 Regents of the University of California. * All rights reserved. * Copyright (c) 1994 John S. Dyson * All rights reserved. * Copyright (c) 1994 David Greenman * All rights reserved. * Copyright (c) 2003 Peter Wemm * All rights reserved. * Copyright (c) 2005-2010 Alan L. Cox * All rights reserved. * Copyright (c) 2014 Andrew Turner * All rights reserved. * Copyright (c) 2014 The FreeBSD Foundation * All rights reserved. - * Copyright (c) 2015 Ruslan Bukin + * Copyright (c) 2015-2016 Ruslan Bukin * All rights reserved. * * This code is derived from software contributed to Berkeley by * the Systems Programming Group of the University of Utah Computer * Science Department and William Jolitz of UUNET Technologies Inc. * * Portions of this software were developed by Andrew Turner under * sponsorship from The FreeBSD Foundation. * * Portions of this software were developed by SRI International and the * University of Cambridge Computer Laboratory under DARPA/AFRL contract * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme. * * Portions of this software were developed by the University of Cambridge * Computer Laboratory as part of the CTSRD Project, with support from the * UK Higher Education Innovation Fund (HEIF). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 */ /*- * Copyright (c) 2003 Networks Associates Technology, Inc. * All rights reserved. * * This software was developed for the FreeBSD Project by Jake Burkholder, * Safeport Network Services, and Network Associates Laboratories, the * Security Research Division of Network Associates, Inc. under * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA * CHATS research program. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * Manages physical address maps. * * Since the information managed by this module is * also stored by the logical address mapping module, * this module may throw away valid virtual-to-physical * mappings at almost any time. However, invalidations * of virtual-to-physical mappings must be done as * requested. * * In order to cope with hardware architectures which * make virtual-to-physical map invalidates expensive, * this module may delay invalidate or reduced protection * operations until such time as they are actually * necessary. This module is given full information as * to which processors are currently using which maps, * and to when physical maps must be made correct. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define NPDEPG (PAGE_SIZE/(sizeof (pd_entry_t))) #define NUPDE (NPDEPG * NPDEPG) #define NUSERPGTBLS (NUPDE + NPDEPG) #if !defined(DIAGNOSTIC) #ifdef __GNUC_GNU_INLINE__ #define PMAP_INLINE __attribute__((__gnu_inline__)) inline #else #define PMAP_INLINE extern inline #endif #else #define PMAP_INLINE #endif #ifdef PV_STATS #define PV_STAT(x) do { x ; } while (0) #else #define PV_STAT(x) do { } while (0) #endif #define pmap_l2_pindex(v) ((v) >> L2_SHIFT) #define NPV_LIST_LOCKS MAXCPU #define PHYS_TO_PV_LIST_LOCK(pa) \ (&pv_list_locks[pa_index(pa) % NPV_LIST_LOCKS]) #define CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa) do { \ struct rwlock **_lockp = (lockp); \ struct rwlock *_new_lock; \ \ _new_lock = PHYS_TO_PV_LIST_LOCK(pa); \ if (_new_lock != *_lockp) { \ if (*_lockp != NULL) \ rw_wunlock(*_lockp); \ *_lockp = _new_lock; \ rw_wlock(*_lockp); \ } \ } while (0) #define CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m) \ CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, VM_PAGE_TO_PHYS(m)) #define RELEASE_PV_LIST_LOCK(lockp) do { \ struct rwlock **_lockp = (lockp); \ \ if (*_lockp != NULL) { \ rw_wunlock(*_lockp); \ *_lockp = NULL; \ } \ } while (0) #define VM_PAGE_TO_PV_LIST_LOCK(m) \ PHYS_TO_PV_LIST_LOCK(VM_PAGE_TO_PHYS(m)) struct pmap kernel_pmap_store; vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */ vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ vm_offset_t kernel_vm_end = 0; struct msgbuf *msgbufp = NULL; static struct rwlock_padalign pvh_global_lock; -extern uint64_t pagetable_l0; - /* * Data for the pv entry allocation mechanism */ static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks); static struct mtx pv_chunks_mutex; static struct rwlock pv_list_locks[NPV_LIST_LOCKS]; static void free_pv_chunk(struct pv_chunk *pc); static void free_pv_entry(pmap_t pmap, pv_entry_t pv); static pv_entry_t get_pv_entry(pmap_t pmap, struct rwlock **lockp); static vm_page_t reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp); static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va); static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va); static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp); static int pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t sva, pd_entry_t ptepde, struct spglist *free, struct rwlock **lockp); static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m, struct rwlock **lockp); static vm_page_t _pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp); static void _pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free); static int pmap_unuse_l3(pmap_t, vm_offset_t, pd_entry_t, struct spglist *); /* * These load the old table data and store the new value. * They need to be atomic as the System MMU may write to the table at * the same time as the CPU. */ #define pmap_load_store(table, entry) atomic_swap_64(table, entry) #define pmap_set(table, mask) atomic_set_64(table, mask) #define pmap_load_clear(table) atomic_swap_64(table, 0) #define pmap_load(table) (*table) /********************/ /* Inline functions */ /********************/ static __inline void pagecopy(void *s, void *d) { memcpy(d, s, PAGE_SIZE); } static __inline void pagezero(void *p) { bzero(p, PAGE_SIZE); } #define pmap_l1_index(va) (((va) >> L1_SHIFT) & Ln_ADDR_MASK) #define pmap_l2_index(va) (((va) >> L2_SHIFT) & Ln_ADDR_MASK) #define pmap_l3_index(va) (((va) >> L3_SHIFT) & Ln_ADDR_MASK) #define PTE_TO_PHYS(pte) ((pte >> PTE_PPN0_S) * PAGE_SIZE) static __inline pd_entry_t * pmap_l1(pmap_t pmap, vm_offset_t va) { return (&pmap->pm_l1[pmap_l1_index(va)]); } static __inline pd_entry_t * pmap_l1_to_l2(pd_entry_t *l1, vm_offset_t va) { vm_paddr_t phys; pd_entry_t *l2; phys = PTE_TO_PHYS(pmap_load(l1)); l2 = (pd_entry_t *)PHYS_TO_DMAP(phys); return (&l2[pmap_l2_index(va)]); } static __inline pd_entry_t * pmap_l2(pmap_t pmap, vm_offset_t va) { pd_entry_t *l1; l1 = pmap_l1(pmap, va); if ((pmap_load(l1) & PTE_VALID) == 0) return (NULL); if ((pmap_load(l1) & PTE_TYPE_M) != (PTE_TYPE_PTR << PTE_TYPE_S)) return (NULL); return (pmap_l1_to_l2(l1, va)); } static __inline pt_entry_t * pmap_l2_to_l3(pd_entry_t *l2, vm_offset_t va) { vm_paddr_t phys; pt_entry_t *l3; phys = PTE_TO_PHYS(pmap_load(l2)); l3 = (pd_entry_t *)PHYS_TO_DMAP(phys); return (&l3[pmap_l3_index(va)]); } static __inline pt_entry_t * pmap_l3(pmap_t pmap, vm_offset_t va) { pd_entry_t *l2; l2 = pmap_l2(pmap, va); if (l2 == NULL) return (NULL); if ((pmap_load(l2) & PTE_VALID) == 0) return (NULL); if (l2 == NULL || (pmap_load(l2) & PTE_TYPE_M) != (PTE_TYPE_PTR << PTE_TYPE_S)) return (NULL); return (pmap_l2_to_l3(l2, va)); } static __inline int pmap_is_write(pt_entry_t entry) { if (entry & (1 << PTE_TYPE_S)) return (1); return (0); } static __inline int pmap_is_current(pmap_t pmap) { return ((pmap == pmap_kernel()) || (pmap == curthread->td_proc->p_vmspace->vm_map.pmap)); } static __inline int pmap_l3_valid(pt_entry_t l3) { return (l3 & PTE_VALID); } static __inline int pmap_l3_valid_cacheable(pt_entry_t l3) { /* TODO */ return (0); } #define PTE_SYNC(pte) cpu_dcache_wb_range((vm_offset_t)pte, sizeof(*pte)) /* Checks if the page is dirty. */ static inline int pmap_page_dirty(pt_entry_t pte) { return (pte & PTE_DIRTY); } static __inline void pmap_resident_count_inc(pmap_t pmap, int count) { PMAP_LOCK_ASSERT(pmap, MA_OWNED); pmap->pm_stats.resident_count += count; } static __inline void pmap_resident_count_dec(pmap_t pmap, int count) { PMAP_LOCK_ASSERT(pmap, MA_OWNED); KASSERT(pmap->pm_stats.resident_count >= count, ("pmap %p resident count underflow %ld %d", pmap, pmap->pm_stats.resident_count, count)); pmap->pm_stats.resident_count -= count; } static pt_entry_t * pmap_early_page_idx(vm_offset_t l1pt, vm_offset_t va, u_int *l1_slot, u_int *l2_slot) { pt_entry_t *l2; pd_entry_t *l1; l1 = (pd_entry_t *)l1pt; *l1_slot = (va >> L1_SHIFT) & Ln_ADDR_MASK; /* Check locore has used a table L1 map */ KASSERT((l1[*l1_slot] & PTE_TYPE_M) == (PTE_TYPE_PTR << PTE_TYPE_S), ("Invalid bootstrap L1 table")); /* Find the address of the L2 table */ l2 = (pt_entry_t *)init_pt_va; *l2_slot = pmap_l2_index(va); return (l2); } static vm_paddr_t pmap_early_vtophys(vm_offset_t l1pt, vm_offset_t va) { u_int l1_slot, l2_slot; pt_entry_t *l2; u_int ret; l2 = pmap_early_page_idx(l1pt, va, &l1_slot, &l2_slot); /* L2 is superpages */ ret = (l2[l2_slot] >> PTE_PPN1_S) << L2_SHIFT; ret += (va & L2_OFFSET); return (ret); } static void pmap_bootstrap_dmap(vm_offset_t l1pt, vm_paddr_t kernstart) { vm_offset_t va; vm_paddr_t pa; pd_entry_t *l1; u_int l1_slot; pt_entry_t entry; u_int pn; pa = kernstart & ~L1_OFFSET; va = DMAP_MIN_ADDRESS; l1 = (pd_entry_t *)l1pt; l1_slot = pmap_l1_index(DMAP_MIN_ADDRESS); for (; va < DMAP_MAX_ADDRESS; pa += L1_SIZE, va += L1_SIZE, l1_slot++) { KASSERT(l1_slot < Ln_ENTRIES, ("Invalid L1 index")); /* superpages */ pn = ((pa >> L1_SHIFT) & Ln_ADDR_MASK); entry = (PTE_VALID | (PTE_TYPE_SRWX << PTE_TYPE_S)); entry |= (pn << PTE_PPN2_S); pmap_load_store(&l1[l1_slot], entry); } cpu_dcache_wb_range((vm_offset_t)l1, PAGE_SIZE); cpu_tlb_flushID(); } /* * Bootstrap the system enough to run with virtual memory. */ void pmap_bootstrap(vm_offset_t l1pt, vm_paddr_t kernstart, vm_size_t kernlen) { u_int l1_slot, l2_slot, avail_slot, map_slot, used_map_slot; uint64_t kern_delta; pt_entry_t *l2; vm_offset_t va, freemempos; vm_offset_t dpcpu, msgbufpv; vm_paddr_t pa, min_pa; int i; kern_delta = KERNBASE - kernstart; physmem = 0; printf("pmap_bootstrap %lx %lx %lx\n", l1pt, kernstart, kernlen); printf("%lx\n", l1pt); printf("%lx\n", (KERNBASE >> L1_SHIFT) & Ln_ADDR_MASK); /* Set this early so we can use the pagetable walking functions */ kernel_pmap_store.pm_l1 = (pd_entry_t *)l1pt; PMAP_LOCK_INIT(kernel_pmap); /* * Initialize the global pv list lock. */ rw_init(&pvh_global_lock, "pmap pv global"); /* Assume the address we were loaded to is a valid physical address */ min_pa = KERNBASE - kern_delta; /* * Find the minimum physical address. physmap is sorted, * but may contain empty ranges. */ for (i = 0; i < (physmap_idx * 2); i += 2) { if (physmap[i] == physmap[i + 1]) continue; if (physmap[i] <= min_pa) min_pa = physmap[i]; break; } /* Create a direct map region early so we can use it for pa -> va */ pmap_bootstrap_dmap(l1pt, min_pa); va = KERNBASE; pa = KERNBASE - kern_delta; /* * Start to initialize phys_avail by copying from physmap * up to the physical address KERNBASE points at. */ map_slot = avail_slot = 0; for (; map_slot < (physmap_idx * 2); map_slot += 2) { if (physmap[map_slot] == physmap[map_slot + 1]) continue; phys_avail[avail_slot] = physmap[map_slot]; phys_avail[avail_slot + 1] = physmap[map_slot + 1]; physmem += (phys_avail[avail_slot + 1] - phys_avail[avail_slot]) >> PAGE_SHIFT; avail_slot += 2; } /* Add the memory before the kernel */ if (physmap[avail_slot] < pa) { phys_avail[avail_slot] = physmap[map_slot]; phys_avail[avail_slot + 1] = pa; physmem += (phys_avail[avail_slot + 1] - phys_avail[avail_slot]) >> PAGE_SHIFT; avail_slot += 2; } used_map_slot = map_slot; /* * Read the page table to find out what is already mapped. * This assumes we have mapped a block of memory from KERNBASE * using a single L1 entry. */ l2 = pmap_early_page_idx(l1pt, KERNBASE, &l1_slot, &l2_slot); /* Sanity check the index, KERNBASE should be the first VA */ KASSERT(l2_slot == 0, ("The L2 index is non-zero")); /* Find how many pages we have mapped */ for (; l2_slot < Ln_ENTRIES; l2_slot++) { if ((l2[l2_slot] & PTE_VALID) == 0) break; /* Check locore used L2 superpages */ KASSERT((l2[l2_slot] & PTE_TYPE_M) != (PTE_TYPE_PTR << PTE_TYPE_S), ("Invalid bootstrap L2 table")); va += L2_SIZE; pa += L2_SIZE; } va = roundup2(va, L2_SIZE); freemempos = KERNBASE + kernlen; freemempos = roundup2(freemempos, PAGE_SIZE); cpu_tlb_flushID(); #define alloc_pages(var, np) \ (var) = freemempos; \ freemempos += (np * PAGE_SIZE); \ memset((char *)(var), 0, ((np) * PAGE_SIZE)); /* Allocate dynamic per-cpu area. */ alloc_pages(dpcpu, DPCPU_SIZE / PAGE_SIZE); dpcpu_init((void *)dpcpu, 0); /* Allocate memory for the msgbuf, e.g. for /sbin/dmesg */ alloc_pages(msgbufpv, round_page(msgbufsize) / PAGE_SIZE); msgbufp = (void *)msgbufpv; virtual_avail = roundup2(freemempos, L2_SIZE); virtual_end = VM_MAX_KERNEL_ADDRESS - L2_SIZE; kernel_vm_end = virtual_avail; pa = pmap_early_vtophys(l1pt, freemempos); /* Finish initialising physmap */ map_slot = used_map_slot; for (; avail_slot < (PHYS_AVAIL_SIZE - 2) && map_slot < (physmap_idx * 2); map_slot += 2) { if (physmap[map_slot] == physmap[map_slot + 1]) continue; /* Have we used the current range? */ if (physmap[map_slot + 1] <= pa) continue; /* Do we need to split the entry? */ if (physmap[map_slot] < pa) { phys_avail[avail_slot] = pa; phys_avail[avail_slot + 1] = physmap[map_slot + 1]; } else { phys_avail[avail_slot] = physmap[map_slot]; phys_avail[avail_slot + 1] = physmap[map_slot + 1]; } physmem += (phys_avail[avail_slot + 1] - phys_avail[avail_slot]) >> PAGE_SHIFT; avail_slot += 2; } phys_avail[avail_slot] = 0; phys_avail[avail_slot + 1] = 0; /* * Maxmem isn't the "maximum memory", it's one larger than the * highest page of the physical address space. It should be * called something like "Maxphyspage". */ Maxmem = atop(phys_avail[avail_slot - 1]); cpu_tlb_flushID(); } /* * Initialize a vm_page's machine-dependent fields. */ void pmap_page_init(vm_page_t m) { TAILQ_INIT(&m->md.pv_list); m->md.pv_memattr = VM_MEMATTR_WRITE_BACK; } /* * Initialize the pmap module. * Called by vm_init, to initialize any structures that the pmap * system needs to map virtual memory. */ void pmap_init(void) { int i; /* * Initialize the pv chunk list mutex. */ mtx_init(&pv_chunks_mutex, "pmap pv chunk list", NULL, MTX_DEF); /* * Initialize the pool of pv list locks. */ for (i = 0; i < NPV_LIST_LOCKS; i++) rw_init(&pv_list_locks[i], "pmap pv list"); } /* * Normal, non-SMP, invalidation functions. * We inline these within pmap.c for speed. */ PMAP_INLINE void pmap_invalidate_page(pmap_t pmap, vm_offset_t va) { /* TODO */ sched_pin(); __asm __volatile("sfence.vm"); sched_unpin(); } PMAP_INLINE void pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { /* TODO */ sched_pin(); __asm __volatile("sfence.vm"); sched_unpin(); } PMAP_INLINE void pmap_invalidate_all(pmap_t pmap) { /* TODO */ sched_pin(); __asm __volatile("sfence.vm"); sched_unpin(); } /* * Routine: pmap_extract * Function: * Extract the physical page address associated * with the given map/virtual_address pair. */ vm_paddr_t pmap_extract(pmap_t pmap, vm_offset_t va) { pd_entry_t *l2p, l2; pt_entry_t *l3p, l3; vm_paddr_t pa; pa = 0; PMAP_LOCK(pmap); /* * Start with the l2 tabel. We are unable to allocate * pages in the l1 table. */ l2p = pmap_l2(pmap, va); if (l2p != NULL) { l2 = pmap_load(l2p); if ((l2 & PTE_TYPE_M) == (PTE_TYPE_PTR << PTE_TYPE_S)) { l3p = pmap_l2_to_l3(l2p, va); if (l3p != NULL) { l3 = pmap_load(l3p); pa = PTE_TO_PHYS(l3); pa |= (va & L3_OFFSET); } } else { /* L2 is superpages */ pa = (l2 >> PTE_PPN1_S) << L2_SHIFT; pa |= (va & L2_OFFSET); } } PMAP_UNLOCK(pmap); return (pa); } /* * Routine: pmap_extract_and_hold * Function: * Atomically extract and hold the physical page * with the given pmap and virtual address pair * if that mapping permits the given protection. */ vm_page_t pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) { pt_entry_t *l3p, l3; vm_paddr_t phys; vm_paddr_t pa; vm_page_t m; pa = 0; m = NULL; PMAP_LOCK(pmap); retry: l3p = pmap_l3(pmap, va); if (l3p != NULL && (l3 = pmap_load(l3p)) != 0) { if ((pmap_is_write(l3)) || ((prot & VM_PROT_WRITE) == 0)) { phys = PTE_TO_PHYS(l3); if (vm_page_pa_tryrelock(pmap, phys, &pa)) goto retry; m = PHYS_TO_VM_PAGE(phys); vm_page_hold(m); } } PA_UNLOCK_COND(pa); PMAP_UNLOCK(pmap); return (m); } vm_paddr_t pmap_kextract(vm_offset_t va) { pd_entry_t *l2; pt_entry_t *l3; vm_paddr_t pa; if (va >= DMAP_MIN_ADDRESS && va < DMAP_MAX_ADDRESS) { pa = DMAP_TO_PHYS(va); } else { l2 = pmap_l2(kernel_pmap, va); if (l2 == NULL) panic("pmap_kextract: No l2"); if ((pmap_load(l2) & PTE_TYPE_M) != (PTE_TYPE_PTR << PTE_TYPE_S)) { /* superpages */ pa = (pmap_load(l2) >> PTE_PPN1_S) << L2_SHIFT; pa |= (va & L2_OFFSET); return (pa); } l3 = pmap_l2_to_l3(l2, va); if (l3 == NULL) panic("pmap_kextract: No l3..."); pa = PTE_TO_PHYS(pmap_load(l3)); pa |= (va & PAGE_MASK); } return (pa); } /*************************************************** * Low level mapping routines..... ***************************************************/ void pmap_kenter_device(vm_offset_t sva, vm_size_t size, vm_paddr_t pa) { pt_entry_t *l3; vm_offset_t va; panic("%s: implement me\n", __func__); KASSERT((pa & L3_OFFSET) == 0, ("pmap_kenter_device: Invalid physical address")); KASSERT((sva & L3_OFFSET) == 0, ("pmap_kenter_device: Invalid virtual address")); KASSERT((size & PAGE_MASK) == 0, ("pmap_kenter_device: Mapping is not page-sized")); va = sva; while (size != 0) { l3 = pmap_l3(kernel_pmap, va); KASSERT(l3 != NULL, ("Invalid page table, va: 0x%lx", va)); panic("%s: unimplemented", __func__); #if 0 /* implement me */ pmap_load_store(l3, (pa & ~L3_OFFSET) | ATTR_DEFAULT | ATTR_IDX(DEVICE_MEMORY) | L3_PAGE); #endif PTE_SYNC(l3); va += PAGE_SIZE; pa += PAGE_SIZE; size -= PAGE_SIZE; } pmap_invalidate_range(kernel_pmap, sva, va); } /* * Remove a page from the kernel pagetables. * Note: not SMP coherent. */ PMAP_INLINE void pmap_kremove(vm_offset_t va) { pt_entry_t *l3; l3 = pmap_l3(kernel_pmap, va); KASSERT(l3 != NULL, ("pmap_kremove: Invalid address")); if (pmap_l3_valid_cacheable(pmap_load(l3))) cpu_dcache_wb_range(va, L3_SIZE); pmap_load_clear(l3); PTE_SYNC(l3); pmap_invalidate_page(kernel_pmap, va); } void pmap_kremove_device(vm_offset_t sva, vm_size_t size) { pt_entry_t *l3; vm_offset_t va; KASSERT((sva & L3_OFFSET) == 0, ("pmap_kremove_device: Invalid virtual address")); KASSERT((size & PAGE_MASK) == 0, ("pmap_kremove_device: Mapping is not page-sized")); va = sva; while (size != 0) { l3 = pmap_l3(kernel_pmap, va); KASSERT(l3 != NULL, ("Invalid page table, va: 0x%lx", va)); pmap_load_clear(l3); PTE_SYNC(l3); va += PAGE_SIZE; size -= PAGE_SIZE; } pmap_invalidate_range(kernel_pmap, sva, va); } /* * Used to map a range of physical addresses into kernel * virtual address space. * * The value passed in '*virt' is a suggested virtual address for * the mapping. Architectures which can support a direct-mapped * physical to virtual region can return the appropriate address * within that region, leaving '*virt' unchanged. Other * architectures should map the pages starting at '*virt' and * update '*virt' with the first usable address after the mapped * region. */ vm_offset_t pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot) { return PHYS_TO_DMAP(start); } /* * Add a list of wired pages to the kva * this routine is only used for temporary * kernel mappings that do not need to have * page modification or references recorded. * Note that old mappings are simply written * over. The page *must* be wired. * Note: SMP coherent. Uses a ranged shootdown IPI. */ void pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count) { pt_entry_t *l3, pa; vm_offset_t va; vm_page_t m; pt_entry_t entry; u_int pn; int i; va = sva; for (i = 0; i < count; i++) { m = ma[i]; pa = VM_PAGE_TO_PHYS(m); pn = (pa / PAGE_SIZE); l3 = pmap_l3(kernel_pmap, va); entry = (PTE_VALID | (PTE_TYPE_SRWX << PTE_TYPE_S)); entry |= (pn << PTE_PPN0_S); pmap_load_store(l3, entry); PTE_SYNC(l3); va += L3_SIZE; } pmap_invalidate_range(kernel_pmap, sva, va); } /* * This routine tears out page mappings from the * kernel -- it is meant only for temporary mappings. * Note: SMP coherent. Uses a ranged shootdown IPI. */ void pmap_qremove(vm_offset_t sva, int count) { pt_entry_t *l3; vm_offset_t va; KASSERT(sva >= VM_MIN_KERNEL_ADDRESS, ("usermode va %lx", sva)); va = sva; while (count-- > 0) { l3 = pmap_l3(kernel_pmap, va); KASSERT(l3 != NULL, ("pmap_kremove: Invalid address")); if (pmap_l3_valid_cacheable(pmap_load(l3))) cpu_dcache_wb_range(va, L3_SIZE); pmap_load_clear(l3); PTE_SYNC(l3); va += PAGE_SIZE; } pmap_invalidate_range(kernel_pmap, sva, va); } /*************************************************** * Page table page management routines..... ***************************************************/ static __inline void pmap_free_zero_pages(struct spglist *free) { vm_page_t m; while ((m = SLIST_FIRST(free)) != NULL) { SLIST_REMOVE_HEAD(free, plinks.s.ss); /* Preserve the page's PG_ZERO setting. */ vm_page_free_toq(m); } } /* * Schedule the specified unused page table page to be freed. Specifically, * add the page to the specified list of pages that will be released to the * physical memory manager after the TLB has been updated. */ static __inline void pmap_add_delayed_free_list(vm_page_t m, struct spglist *free, boolean_t set_PG_ZERO) { if (set_PG_ZERO) m->flags |= PG_ZERO; else m->flags &= ~PG_ZERO; SLIST_INSERT_HEAD(free, m, plinks.s.ss); } /* * Decrements a page table page's wire count, which is used to record the * number of valid page table entries within the page. If the wire count * drops to zero, then the page table page is unmapped. Returns TRUE if the * page table page was unmapped and FALSE otherwise. */ static inline boolean_t pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free) { --m->wire_count; if (m->wire_count == 0) { _pmap_unwire_l3(pmap, va, m, free); return (TRUE); } else { return (FALSE); } } static void _pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free) { vm_paddr_t phys; PMAP_LOCK_ASSERT(pmap, MA_OWNED); /* * unmap the page table page */ if (m->pindex >= NUPDE) { /* PD page */ pd_entry_t *l1; l1 = pmap_l1(pmap, va); pmap_load_clear(l1); PTE_SYNC(l1); } else { /* PTE page */ pd_entry_t *l2; l2 = pmap_l2(pmap, va); pmap_load_clear(l2); PTE_SYNC(l2); } pmap_resident_count_dec(pmap, 1); if (m->pindex < NUPDE) { pd_entry_t *l1; /* We just released a PT, unhold the matching PD */ vm_page_t pdpg; l1 = pmap_l1(pmap, va); phys = PTE_TO_PHYS(pmap_load(l1)); pdpg = PHYS_TO_VM_PAGE(phys); pmap_unwire_l3(pmap, va, pdpg, free); } pmap_invalidate_page(pmap, va); /* * This is a release store so that the ordinary store unmapping * the page table page is globally performed before TLB shoot- * down is begun. */ atomic_subtract_rel_int(&vm_cnt.v_wire_count, 1); /* * Put page on a list so that it is released after * *ALL* TLB shootdown is done */ pmap_add_delayed_free_list(m, free, TRUE); } /* * After removing an l3 entry, this routine is used to * conditionally free the page, and manage the hold/wire counts. */ static int pmap_unuse_l3(pmap_t pmap, vm_offset_t va, pd_entry_t ptepde, struct spglist *free) { vm_paddr_t phys; vm_page_t mpte; if (va >= VM_MAXUSER_ADDRESS) return (0); KASSERT(ptepde != 0, ("pmap_unuse_pt: ptepde != 0")); phys = PTE_TO_PHYS(ptepde); mpte = PHYS_TO_VM_PAGE(phys); return (pmap_unwire_l3(pmap, va, mpte, free)); } void pmap_pinit0(pmap_t pmap) { PMAP_LOCK_INIT(pmap); bzero(&pmap->pm_stats, sizeof(pmap->pm_stats)); pmap->pm_l1 = kernel_pmap->pm_l1; } int pmap_pinit(pmap_t pmap) { vm_paddr_t l1phys; vm_page_t l1pt; /* * allocate the l1 page */ while ((l1pt = vm_page_alloc(NULL, 0xdeadbeef, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) VM_WAIT; l1phys = VM_PAGE_TO_PHYS(l1pt); pmap->pm_l1 = (pd_entry_t *)PHYS_TO_DMAP(l1phys); if ((l1pt->flags & PG_ZERO) == 0) pagezero(pmap->pm_l1); bzero(&pmap->pm_stats, sizeof(pmap->pm_stats)); return (1); } /* * This routine is called if the desired page table page does not exist. * * If page table page allocation fails, this routine may sleep before * returning NULL. It sleeps only if a lock pointer was given. * * Note: If a page allocation fails at page table level two or three, * one or two pages may be held during the wait, only to be released * afterwards. This conservative approach is easily argued to avoid * race conditions. */ static vm_page_t _pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp) { vm_page_t m, /*pdppg, */pdpg; pt_entry_t entry; vm_paddr_t phys; int pn; PMAP_LOCK_ASSERT(pmap, MA_OWNED); /* * Allocate a page table page. */ if ((m = vm_page_alloc(NULL, ptepindex, VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) { if (lockp != NULL) { RELEASE_PV_LIST_LOCK(lockp); PMAP_UNLOCK(pmap); rw_runlock(&pvh_global_lock); VM_WAIT; rw_rlock(&pvh_global_lock); PMAP_LOCK(pmap); } /* * Indicate the need to retry. While waiting, the page table * page may have been allocated. */ return (NULL); } if ((m->flags & PG_ZERO) == 0) pmap_zero_page(m); /* * Map the pagetable page into the process address space, if * it isn't already there. */ if (ptepindex >= NUPDE) { pd_entry_t *l1; vm_pindex_t l1index; l1index = ptepindex - NUPDE; l1 = &pmap->pm_l1[l1index]; pn = (VM_PAGE_TO_PHYS(m) / PAGE_SIZE); entry = (PTE_VALID | (PTE_TYPE_PTR << PTE_TYPE_S)); entry |= (pn << PTE_PPN0_S); pmap_load_store(l1, entry); PTE_SYNC(l1); } else { vm_pindex_t l1index; pd_entry_t *l1, *l2; l1index = ptepindex >> (L1_SHIFT - L2_SHIFT); l1 = &pmap->pm_l1[l1index]; if (pmap_load(l1) == 0) { /* recurse for allocating page dir */ if (_pmap_alloc_l3(pmap, NUPDE + l1index, lockp) == NULL) { --m->wire_count; atomic_subtract_int(&vm_cnt.v_wire_count, 1); vm_page_free_zero(m); return (NULL); } } else { phys = PTE_TO_PHYS(pmap_load(l1)); pdpg = PHYS_TO_VM_PAGE(phys); pdpg->wire_count++; } phys = PTE_TO_PHYS(pmap_load(l1)); l2 = (pd_entry_t *)PHYS_TO_DMAP(phys); l2 = &l2[ptepindex & Ln_ADDR_MASK]; pn = (VM_PAGE_TO_PHYS(m) / PAGE_SIZE); entry = (PTE_VALID | (PTE_TYPE_PTR << PTE_TYPE_S)); entry |= (pn << PTE_PPN0_S); pmap_load_store(l2, entry); PTE_SYNC(l2); } pmap_resident_count_inc(pmap, 1); return (m); } static vm_page_t pmap_alloc_l3(pmap_t pmap, vm_offset_t va, struct rwlock **lockp) { vm_pindex_t ptepindex; pd_entry_t *l2; vm_paddr_t phys; vm_page_t m; /* * Calculate pagetable page index */ ptepindex = pmap_l2_pindex(va); retry: /* * Get the page directory entry */ l2 = pmap_l2(pmap, va); /* * If the page table page is mapped, we just increment the * hold count, and activate it. */ if (l2 != NULL && pmap_load(l2) != 0) { phys = PTE_TO_PHYS(pmap_load(l2)); m = PHYS_TO_VM_PAGE(phys); m->wire_count++; } else { /* * Here if the pte page isn't mapped, or if it has been * deallocated. */ m = _pmap_alloc_l3(pmap, ptepindex, lockp); if (m == NULL && lockp != NULL) goto retry; } return (m); } /*************************************************** * Pmap allocation/deallocation routines. ***************************************************/ /* * Release any resources held by the given physical map. * Called when a pmap initialized by pmap_pinit is being released. * Should only be called if the map contains no valid mappings. */ void pmap_release(pmap_t pmap) { vm_page_t m; KASSERT(pmap->pm_stats.resident_count == 0, ("pmap_release: pmap resident count %ld != 0", pmap->pm_stats.resident_count)); m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pmap->pm_l1)); m->wire_count--; atomic_subtract_int(&vm_cnt.v_wire_count, 1); vm_page_free_zero(m); } #if 0 static int kvm_size(SYSCTL_HANDLER_ARGS) { unsigned long ksize = VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS; return sysctl_handle_long(oidp, &ksize, 0, req); } SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD, 0, 0, kvm_size, "LU", "Size of KVM"); static int kvm_free(SYSCTL_HANDLER_ARGS) { unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end; return sysctl_handle_long(oidp, &kfree, 0, req); } SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, 0, 0, kvm_free, "LU", "Amount of KVM free"); #endif /* 0 */ /* * grow the number of kernel page table entries, if needed */ void pmap_growkernel(vm_offset_t addr) { vm_paddr_t paddr; vm_page_t nkpg; pd_entry_t *l1, *l2; pt_entry_t entry; int pn; mtx_assert(&kernel_map->system_mtx, MA_OWNED); addr = roundup2(addr, L2_SIZE); if (addr - 1 >= kernel_map->max_offset) addr = kernel_map->max_offset; while (kernel_vm_end < addr) { l1 = pmap_l1(kernel_pmap, kernel_vm_end); if (pmap_load(l1) == 0) { /* We need a new PDP entry */ nkpg = vm_page_alloc(NULL, kernel_vm_end >> L1_SHIFT, VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO); if (nkpg == NULL) panic("pmap_growkernel: no memory to grow kernel"); if ((nkpg->flags & PG_ZERO) == 0) pmap_zero_page(nkpg); paddr = VM_PAGE_TO_PHYS(nkpg); panic("%s: implement grow l1\n", __func__); #if 0 pmap_load_store(l1, paddr | L1_TABLE); #endif PTE_SYNC(l1); continue; /* try again */ } l2 = pmap_l1_to_l2(l1, kernel_vm_end); if ((pmap_load(l2) & PTE_REF) != 0) { kernel_vm_end = (kernel_vm_end + L2_SIZE) & ~L2_OFFSET; if (kernel_vm_end - 1 >= kernel_map->max_offset) { kernel_vm_end = kernel_map->max_offset; break; } continue; } nkpg = vm_page_alloc(NULL, kernel_vm_end >> L2_SHIFT, VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO); if (nkpg == NULL) panic("pmap_growkernel: no memory to grow kernel"); if ((nkpg->flags & PG_ZERO) == 0) pmap_zero_page(nkpg); paddr = VM_PAGE_TO_PHYS(nkpg); pn = (paddr / PAGE_SIZE); entry = (PTE_VALID | (PTE_TYPE_PTR << PTE_TYPE_S)); entry |= (pn << PTE_PPN0_S); pmap_load_store(l2, entry); PTE_SYNC(l2); pmap_invalidate_page(kernel_pmap, kernel_vm_end); kernel_vm_end = (kernel_vm_end + L2_SIZE) & ~L2_OFFSET; if (kernel_vm_end - 1 >= kernel_map->max_offset) { kernel_vm_end = kernel_map->max_offset; break; } } } /*************************************************** * page management routines. ***************************************************/ CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE); CTASSERT(_NPCM == 3); CTASSERT(_NPCPV == 168); static __inline struct pv_chunk * pv_to_chunk(pv_entry_t pv) { return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK)); } #define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap) #define PC_FREE0 0xfffffffffffffffful #define PC_FREE1 0xfffffffffffffffful #define PC_FREE2 0x000000fffffffffful static const uint64_t pc_freemask[_NPCM] = { PC_FREE0, PC_FREE1, PC_FREE2 }; #if 0 #ifdef PV_STATS static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail; SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0, "Current number of pv entry chunks"); SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0, "Current number of pv entry chunks allocated"); SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0, "Current number of pv entry chunks frees"); SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0, "Number of times tried to get a chunk page but failed."); static long pv_entry_frees, pv_entry_allocs, pv_entry_count; static int pv_entry_spare; SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0, "Current number of pv entry frees"); SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0, "Current number of pv entry allocs"); SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0, "Current number of pv entries"); SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0, "Current number of spare pv entries"); #endif #endif /* 0 */ /* * We are in a serious low memory condition. Resort to * drastic measures to free some pages so we can allocate * another pv entry chunk. * * Returns NULL if PV entries were reclaimed from the specified pmap. * * We do not, however, unmap 2mpages because subsequent accesses will * allocate per-page pv entries until repromotion occurs, thereby * exacerbating the shortage of free pv entries. */ static vm_page_t reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp) { panic("RISCVTODO: reclaim_pv_chunk"); } /* * free the pv_entry back to the free list */ static void free_pv_entry(pmap_t pmap, pv_entry_t pv) { struct pv_chunk *pc; int idx, field, bit; rw_assert(&pvh_global_lock, RA_LOCKED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); PV_STAT(atomic_add_long(&pv_entry_frees, 1)); PV_STAT(atomic_add_int(&pv_entry_spare, 1)); PV_STAT(atomic_subtract_long(&pv_entry_count, 1)); pc = pv_to_chunk(pv); idx = pv - &pc->pc_pventry[0]; field = idx / 64; bit = idx % 64; pc->pc_map[field] |= 1ul << bit; if (pc->pc_map[0] != PC_FREE0 || pc->pc_map[1] != PC_FREE1 || pc->pc_map[2] != PC_FREE2) { /* 98% of the time, pc is already at the head of the list. */ if (__predict_false(pc != TAILQ_FIRST(&pmap->pm_pvchunk))) { TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); } return; } TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); free_pv_chunk(pc); } static void free_pv_chunk(struct pv_chunk *pc) { vm_page_t m; mtx_lock(&pv_chunks_mutex); TAILQ_REMOVE(&pv_chunks, pc, pc_lru); mtx_unlock(&pv_chunks_mutex); PV_STAT(atomic_subtract_int(&pv_entry_spare, _NPCPV)); PV_STAT(atomic_subtract_int(&pc_chunk_count, 1)); PV_STAT(atomic_add_int(&pc_chunk_frees, 1)); /* entire chunk is free, return it */ m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc)); #if 0 /* TODO: For minidump */ dump_drop_page(m->phys_addr); #endif vm_page_unwire(m, PQ_INACTIVE); vm_page_free(m); } /* * Returns a new PV entry, allocating a new PV chunk from the system when * needed. If this PV chunk allocation fails and a PV list lock pointer was * given, a PV chunk is reclaimed from an arbitrary pmap. Otherwise, NULL is * returned. * * The given PV list lock may be released. */ static pv_entry_t get_pv_entry(pmap_t pmap, struct rwlock **lockp) { int bit, field; pv_entry_t pv; struct pv_chunk *pc; vm_page_t m; rw_assert(&pvh_global_lock, RA_LOCKED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); PV_STAT(atomic_add_long(&pv_entry_allocs, 1)); retry: pc = TAILQ_FIRST(&pmap->pm_pvchunk); if (pc != NULL) { for (field = 0; field < _NPCM; field++) { if (pc->pc_map[field]) { bit = ffsl(pc->pc_map[field]) - 1; break; } } if (field < _NPCM) { pv = &pc->pc_pventry[field * 64 + bit]; pc->pc_map[field] &= ~(1ul << bit); /* If this was the last item, move it to tail */ if (pc->pc_map[0] == 0 && pc->pc_map[1] == 0 && pc->pc_map[2] == 0) { TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list); } PV_STAT(atomic_add_long(&pv_entry_count, 1)); PV_STAT(atomic_subtract_int(&pv_entry_spare, 1)); return (pv); } } /* No free items, allocate another chunk */ m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED); if (m == NULL) { if (lockp == NULL) { PV_STAT(pc_chunk_tryfail++); return (NULL); } m = reclaim_pv_chunk(pmap, lockp); if (m == NULL) goto retry; } PV_STAT(atomic_add_int(&pc_chunk_count, 1)); PV_STAT(atomic_add_int(&pc_chunk_allocs, 1)); #if 0 /* TODO: This is for minidump */ dump_add_page(m->phys_addr); #endif pc = (void *)PHYS_TO_DMAP(m->phys_addr); pc->pc_pmap = pmap; pc->pc_map[0] = PC_FREE0 & ~1ul; /* preallocated bit 0 */ pc->pc_map[1] = PC_FREE1; pc->pc_map[2] = PC_FREE2; mtx_lock(&pv_chunks_mutex); TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru); mtx_unlock(&pv_chunks_mutex); pv = &pc->pc_pventry[0]; TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); PV_STAT(atomic_add_long(&pv_entry_count, 1)); PV_STAT(atomic_add_int(&pv_entry_spare, _NPCPV - 1)); return (pv); } /* * First find and then remove the pv entry for the specified pmap and virtual * address from the specified pv list. Returns the pv entry if found and NULL * otherwise. This operation can be performed on pv lists for either 4KB or * 2MB page mappings. */ static __inline pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va) { pv_entry_t pv; rw_assert(&pvh_global_lock, RA_LOCKED); TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { if (pmap == PV_PMAP(pv) && va == pv->pv_va) { TAILQ_REMOVE(&pvh->pv_list, pv, pv_next); pvh->pv_gen++; break; } } return (pv); } /* * First find and then destroy the pv entry for the specified pmap and virtual * address. This operation can be performed on pv lists for either 4KB or 2MB * page mappings. */ static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va) { pv_entry_t pv; pv = pmap_pvh_remove(pvh, pmap, va); KASSERT(pv != NULL, ("pmap_pvh_free: pv not found")); free_pv_entry(pmap, pv); } /* * Conditionally create the PV entry for a 4KB page mapping if the required * memory can be allocated without resorting to reclamation. */ static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m, struct rwlock **lockp) { pv_entry_t pv; rw_assert(&pvh_global_lock, RA_LOCKED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); /* Pass NULL instead of the lock pointer to disable reclamation. */ if ((pv = get_pv_entry(pmap, NULL)) != NULL) { pv->pv_va = va; CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m); TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); m->md.pv_gen++; return (TRUE); } else return (FALSE); } /* * pmap_remove_l3: do the things to unmap a page in a process */ static int pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t va, pd_entry_t l2e, struct spglist *free, struct rwlock **lockp) { pt_entry_t old_l3; vm_paddr_t phys; vm_page_t m; PMAP_LOCK_ASSERT(pmap, MA_OWNED); if (pmap_is_current(pmap) && pmap_l3_valid_cacheable(pmap_load(l3))) cpu_dcache_wb_range(va, L3_SIZE); old_l3 = pmap_load_clear(l3); PTE_SYNC(l3); pmap_invalidate_page(pmap, va); if (old_l3 & PTE_SW_WIRED) pmap->pm_stats.wired_count -= 1; pmap_resident_count_dec(pmap, 1); if (old_l3 & PTE_SW_MANAGED) { phys = PTE_TO_PHYS(old_l3); m = PHYS_TO_VM_PAGE(phys); if (pmap_page_dirty(old_l3)) vm_page_dirty(m); if (old_l3 & PTE_REF) vm_page_aflag_set(m, PGA_REFERENCED); CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m); pmap_pvh_free(&m->md, pmap, va); } return (pmap_unuse_l3(pmap, va, l2e, free)); } /* * Remove the given range of addresses from the specified map. * * It is assumed that the start and end are properly * rounded to the page size. */ void pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { struct rwlock *lock; vm_offset_t va, va_next; pd_entry_t *l1, *l2; pt_entry_t l3_pte, *l3; struct spglist free; int anyvalid; /* * Perform an unsynchronized read. This is, however, safe. */ if (pmap->pm_stats.resident_count == 0) return; anyvalid = 0; SLIST_INIT(&free); rw_rlock(&pvh_global_lock); PMAP_LOCK(pmap); lock = NULL; for (; sva < eva; sva = va_next) { if (pmap->pm_stats.resident_count == 0) break; l1 = pmap_l1(pmap, sva); if (pmap_load(l1) == 0) { va_next = (sva + L1_SIZE) & ~L1_OFFSET; if (va_next < sva) va_next = eva; continue; } /* * Calculate index for next page table. */ va_next = (sva + L2_SIZE) & ~L2_OFFSET; if (va_next < sva) va_next = eva; l2 = pmap_l1_to_l2(l1, sva); if (l2 == NULL) continue; l3_pte = pmap_load(l2); /* * Weed out invalid mappings. */ if (l3_pte == 0) continue; if ((pmap_load(l2) & PTE_TYPE_M) != (PTE_TYPE_PTR << PTE_TYPE_S)) continue; /* * Limit our scan to either the end of the va represented * by the current page table page, or to the end of the * range being removed. */ if (va_next > eva) va_next = eva; va = va_next; for (l3 = pmap_l2_to_l3(l2, sva); sva != va_next; l3++, sva += L3_SIZE) { if (l3 == NULL) panic("l3 == NULL"); if (pmap_load(l3) == 0) { if (va != va_next) { pmap_invalidate_range(pmap, va, sva); va = va_next; } continue; } if (va == va_next) va = sva; if (pmap_remove_l3(pmap, l3, sva, l3_pte, &free, &lock)) { sva += L3_SIZE; break; } } if (va != va_next) pmap_invalidate_range(pmap, va, sva); } if (lock != NULL) rw_wunlock(lock); if (anyvalid) pmap_invalidate_all(pmap); rw_runlock(&pvh_global_lock); PMAP_UNLOCK(pmap); pmap_free_zero_pages(&free); } /* * Routine: pmap_remove_all * Function: * Removes this physical page from * all physical maps in which it resides. * Reflects back modify bits to the pager. * * Notes: * Original versions of this routine were very * inefficient because they iteratively called * pmap_remove (slow...) */ void pmap_remove_all(vm_page_t m) { pv_entry_t pv; pmap_t pmap; pt_entry_t *l3, tl3; pd_entry_t *l2, tl2; struct spglist free; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_remove_all: page %p is not managed", m)); SLIST_INIT(&free); rw_wlock(&pvh_global_lock); while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pmap_resident_count_dec(pmap, 1); l2 = pmap_l2(pmap, pv->pv_va); KASSERT(l2 != NULL, ("pmap_remove_all: no l2 table found")); tl2 = pmap_load(l2); KASSERT((tl2 & PTE_TYPE_M) == (PTE_TYPE_PTR << PTE_TYPE_S), ("pmap_remove_all: found a table when expecting " "a block in %p's pv list", m)); l3 = pmap_l2_to_l3(l2, pv->pv_va); if (pmap_is_current(pmap) && pmap_l3_valid_cacheable(pmap_load(l3))) cpu_dcache_wb_range(pv->pv_va, L3_SIZE); tl3 = pmap_load_clear(l3); PTE_SYNC(l3); pmap_invalidate_page(pmap, pv->pv_va); if (tl3 & PTE_SW_WIRED) pmap->pm_stats.wired_count--; if ((tl3 & PTE_REF) != 0) vm_page_aflag_set(m, PGA_REFERENCED); /* * Update the vm_page_t clean and reference bits. */ if (pmap_page_dirty(tl3)) vm_page_dirty(m); pmap_unuse_l3(pmap, pv->pv_va, pmap_load(l2), &free); TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); m->md.pv_gen++; free_pv_entry(pmap, pv); PMAP_UNLOCK(pmap); } vm_page_aflag_clear(m, PGA_WRITEABLE); rw_wunlock(&pvh_global_lock); pmap_free_zero_pages(&free); } /* * Set the physical protection on the * specified range of this map as requested. */ void pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) { vm_offset_t va, va_next; pd_entry_t *l1, *l2; pt_entry_t *l3p, l3; pt_entry_t entry; if ((prot & VM_PROT_READ) == VM_PROT_NONE) { pmap_remove(pmap, sva, eva); return; } if ((prot & VM_PROT_WRITE) == VM_PROT_WRITE) return; PMAP_LOCK(pmap); for (; sva < eva; sva = va_next) { l1 = pmap_l1(pmap, sva); if (pmap_load(l1) == 0) { va_next = (sva + L1_SIZE) & ~L1_OFFSET; if (va_next < sva) va_next = eva; continue; } va_next = (sva + L2_SIZE) & ~L2_OFFSET; if (va_next < sva) va_next = eva; l2 = pmap_l1_to_l2(l1, sva); if (l2 == NULL) continue; if ((pmap_load(l2) & PTE_TYPE_M) != (PTE_TYPE_PTR << PTE_TYPE_S)) continue; if (va_next > eva) va_next = eva; va = va_next; for (l3p = pmap_l2_to_l3(l2, sva); sva != va_next; l3p++, sva += L3_SIZE) { l3 = pmap_load(l3p); if (pmap_l3_valid(l3)) { entry = pmap_load(l3p); entry &= ~(1 << PTE_TYPE_S); pmap_load_store(l3p, entry); PTE_SYNC(l3p); /* XXX: Use pmap_invalidate_range */ pmap_invalidate_page(pmap, va); } } } PMAP_UNLOCK(pmap); /* TODO: Only invalidate entries we are touching */ pmap_invalidate_all(pmap); } /* * Insert the given physical page (p) at * the specified virtual address (v) in the * target physical map with the protection requested. * * If specified, the page will be wired down, meaning * that the related pte can not be reclaimed. * * NB: This is the only routine which MAY NOT lazy-evaluate * or lose information. That is, this routine must actually * insert this page into the given map NOW. */ int pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, u_int flags, int8_t psind __unused) { struct rwlock *lock; pd_entry_t *l1, *l2; pt_entry_t new_l3, orig_l3; pt_entry_t *l3; pv_entry_t pv; vm_paddr_t opa, pa, l2_pa, l3_pa; vm_page_t mpte, om, l2_m, l3_m; boolean_t nosleep; pt_entry_t entry; int l2_pn; int l3_pn; int pn; va = trunc_page(va); if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m)) VM_OBJECT_ASSERT_LOCKED(m->object); pa = VM_PAGE_TO_PHYS(m); pn = (pa / PAGE_SIZE); new_l3 = PTE_VALID; if ((prot & VM_PROT_WRITE) == 0) { /* Read-only */ if ((va >> 63) == 0) /* USER */ new_l3 |= (PTE_TYPE_SURX << PTE_TYPE_S); else /* KERNEL */ new_l3 |= (PTE_TYPE_SRX << PTE_TYPE_S); } else { if ((va >> 63) == 0) /* USER */ new_l3 |= (PTE_TYPE_SURWX << PTE_TYPE_S); else /* KERNEL */ new_l3 |= (PTE_TYPE_SRWX << PTE_TYPE_S); } new_l3 |= (pn << PTE_PPN0_S); if ((flags & PMAP_ENTER_WIRED) != 0) new_l3 |= PTE_SW_WIRED; CTR2(KTR_PMAP, "pmap_enter: %.16lx -> %.16lx", va, pa); mpte = NULL; lock = NULL; rw_rlock(&pvh_global_lock); PMAP_LOCK(pmap); if (va < VM_MAXUSER_ADDRESS) { nosleep = (flags & PMAP_ENTER_NOSLEEP) != 0; mpte = pmap_alloc_l3(pmap, va, nosleep ? NULL : &lock); if (mpte == NULL && nosleep) { CTR0(KTR_PMAP, "pmap_enter: mpte == NULL"); if (lock != NULL) rw_wunlock(lock); rw_runlock(&pvh_global_lock); PMAP_UNLOCK(pmap); return (KERN_RESOURCE_SHORTAGE); } l3 = pmap_l3(pmap, va); } else { l3 = pmap_l3(pmap, va); /* TODO: This is not optimal, but should mostly work */ if (l3 == NULL) { l2 = pmap_l2(pmap, va); if (l2 == NULL) { l2_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO); if (l2_m == NULL) panic("pmap_enter: l2 pte_m == NULL"); if ((l2_m->flags & PG_ZERO) == 0) pmap_zero_page(l2_m); l2_pa = VM_PAGE_TO_PHYS(l2_m); l2_pn = (l2_pa / PAGE_SIZE); l1 = pmap_l1(pmap, va); entry = (PTE_VALID | (PTE_TYPE_PTR << PTE_TYPE_S)); entry |= (l2_pn << PTE_PPN0_S); pmap_load_store(l1, entry); PTE_SYNC(l1); l2 = pmap_l1_to_l2(l1, va); } KASSERT(l2 != NULL, ("No l2 table after allocating one")); l3_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO); if (l3_m == NULL) panic("pmap_enter: l3 pte_m == NULL"); if ((l3_m->flags & PG_ZERO) == 0) pmap_zero_page(l3_m); l3_pa = VM_PAGE_TO_PHYS(l3_m); l3_pn = (l3_pa / PAGE_SIZE); entry = (PTE_VALID | (PTE_TYPE_PTR << PTE_TYPE_S)); entry |= (l3_pn << PTE_PPN0_S); pmap_load_store(l2, entry); PTE_SYNC(l2); l3 = pmap_l2_to_l3(l2, va); } pmap_invalidate_page(pmap, va); } om = NULL; orig_l3 = pmap_load(l3); opa = PTE_TO_PHYS(orig_l3); /* * Is the specified virtual address already mapped? */ if (pmap_l3_valid(orig_l3)) { /* * Wiring change, just update stats. We don't worry about * wiring PT pages as they remain resident as long as there * are valid mappings in them. Hence, if a user page is wired, * the PT page will be also. */ if ((flags & PMAP_ENTER_WIRED) != 0 && (orig_l3 & PTE_SW_WIRED) == 0) pmap->pm_stats.wired_count++; else if ((flags & PMAP_ENTER_WIRED) == 0 && (orig_l3 & PTE_SW_WIRED) != 0) pmap->pm_stats.wired_count--; /* * Remove the extra PT page reference. */ if (mpte != NULL) { mpte->wire_count--; KASSERT(mpte->wire_count > 0, ("pmap_enter: missing reference to page table page," " va: 0x%lx", va)); } /* * Has the physical page changed? */ if (opa == pa) { /* * No, might be a protection or wiring change. */ if ((orig_l3 & PTE_SW_MANAGED) != 0) { new_l3 |= PTE_SW_MANAGED; if (pmap_is_write(new_l3)) vm_page_aflag_set(m, PGA_WRITEABLE); } goto validate; } /* Flush the cache, there might be uncommitted data in it */ if (pmap_is_current(pmap) && pmap_l3_valid_cacheable(orig_l3)) cpu_dcache_wb_range(va, L3_SIZE); } else { /* * Increment the counters. */ if ((new_l3 & PTE_SW_WIRED) != 0) pmap->pm_stats.wired_count++; pmap_resident_count_inc(pmap, 1); } /* * Enter on the PV list if part of our managed memory. */ if ((m->oflags & VPO_UNMANAGED) == 0) { new_l3 |= PTE_SW_MANAGED; pv = get_pv_entry(pmap, &lock); pv->pv_va = va; CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, pa); TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); m->md.pv_gen++; if (pmap_is_write(new_l3)) vm_page_aflag_set(m, PGA_WRITEABLE); } /* * Update the L3 entry. */ if (orig_l3 != 0) { validate: orig_l3 = pmap_load_store(l3, new_l3); PTE_SYNC(l3); opa = PTE_TO_PHYS(orig_l3); if (opa != pa) { if ((orig_l3 & PTE_SW_MANAGED) != 0) { om = PHYS_TO_VM_PAGE(opa); if (pmap_page_dirty(orig_l3)) vm_page_dirty(om); if ((orig_l3 & PTE_REF) != 0) vm_page_aflag_set(om, PGA_REFERENCED); CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, opa); pmap_pvh_free(&om->md, pmap, va); } } else if (pmap_page_dirty(orig_l3)) { if ((orig_l3 & PTE_SW_MANAGED) != 0) vm_page_dirty(m); } } else { pmap_load_store(l3, new_l3); PTE_SYNC(l3); } pmap_invalidate_page(pmap, va); if ((pmap != pmap_kernel()) && (pmap == &curproc->p_vmspace->vm_pmap)) cpu_icache_sync_range(va, PAGE_SIZE); if (lock != NULL) rw_wunlock(lock); rw_runlock(&pvh_global_lock); PMAP_UNLOCK(pmap); return (KERN_SUCCESS); } /* * Maps a sequence of resident pages belonging to the same object. * The sequence begins with the given page m_start. This page is * mapped at the given virtual address start. Each subsequent page is * mapped at a virtual address that is offset from start by the same * amount as the page is offset from m_start within the object. The * last page in the sequence is the page with the largest offset from * m_start that can be mapped at a virtual address less than the given * virtual address end. Not every virtual page between start and end * is mapped; only those for which a resident page exists with the * corresponding offset from m_start are mapped. */ void pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end, vm_page_t m_start, vm_prot_t prot) { struct rwlock *lock; vm_offset_t va; vm_page_t m, mpte; vm_pindex_t diff, psize; VM_OBJECT_ASSERT_LOCKED(m_start->object); psize = atop(end - start); mpte = NULL; m = m_start; lock = NULL; rw_rlock(&pvh_global_lock); PMAP_LOCK(pmap); while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { va = start + ptoa(diff); mpte = pmap_enter_quick_locked(pmap, va, m, prot, mpte, &lock); m = TAILQ_NEXT(m, listq); } if (lock != NULL) rw_wunlock(lock); rw_runlock(&pvh_global_lock); PMAP_UNLOCK(pmap); } /* * this code makes some *MAJOR* assumptions: * 1. Current pmap & pmap exists. * 2. Not wired. * 3. Read access. * 4. No page table pages. * but is *MUCH* faster than pmap_enter... */ void pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) { struct rwlock *lock; lock = NULL; rw_rlock(&pvh_global_lock); PMAP_LOCK(pmap); (void)pmap_enter_quick_locked(pmap, va, m, prot, NULL, &lock); if (lock != NULL) rw_wunlock(lock); rw_runlock(&pvh_global_lock); PMAP_UNLOCK(pmap); } static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp) { struct spglist free; vm_paddr_t phys; pd_entry_t *l2; pt_entry_t *l3; vm_paddr_t pa; pt_entry_t entry; int pn; KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva || (m->oflags & VPO_UNMANAGED) != 0, ("pmap_enter_quick_locked: managed mapping within the clean submap")); rw_assert(&pvh_global_lock, RA_LOCKED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); CTR2(KTR_PMAP, "pmap_enter_quick_locked: %p %lx", pmap, va); /* * In the case that a page table page is not * resident, we are creating it here. */ if (va < VM_MAXUSER_ADDRESS) { vm_pindex_t l2pindex; /* * Calculate pagetable page index */ l2pindex = pmap_l2_pindex(va); if (mpte && (mpte->pindex == l2pindex)) { mpte->wire_count++; } else { /* * Get the l2 entry */ l2 = pmap_l2(pmap, va); /* * If the page table page is mapped, we just increment * the hold count, and activate it. Otherwise, we * attempt to allocate a page table page. If this * attempt fails, we don't retry. Instead, we give up. */ if (l2 != NULL && pmap_load(l2) != 0) { phys = PTE_TO_PHYS(pmap_load(l2)); mpte = PHYS_TO_VM_PAGE(phys); mpte->wire_count++; } else { /* * Pass NULL instead of the PV list lock * pointer, because we don't intend to sleep. */ mpte = _pmap_alloc_l3(pmap, l2pindex, NULL); if (mpte == NULL) return (mpte); } } l3 = (pt_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mpte)); l3 = &l3[pmap_l3_index(va)]; } else { mpte = NULL; l3 = pmap_l3(kernel_pmap, va); } if (l3 == NULL) panic("pmap_enter_quick_locked: No l3"); if (pmap_load(l3) != 0) { if (mpte != NULL) { mpte->wire_count--; mpte = NULL; } return (mpte); } /* * Enter on the PV list if part of our managed memory. */ if ((m->oflags & VPO_UNMANAGED) == 0 && !pmap_try_insert_pv_entry(pmap, va, m, lockp)) { if (mpte != NULL) { SLIST_INIT(&free); if (pmap_unwire_l3(pmap, va, mpte, &free)) { pmap_invalidate_page(pmap, va); pmap_free_zero_pages(&free); } mpte = NULL; } return (mpte); } /* * Increment counters */ pmap_resident_count_inc(pmap, 1); pa = VM_PAGE_TO_PHYS(m); pn = (pa / PAGE_SIZE); /* RISCVTODO: check permissions */ entry = (PTE_VALID | (PTE_TYPE_SRWX << PTE_TYPE_S)); entry |= (pn << PTE_PPN0_S); /* * Now validate mapping with RO protection */ if ((m->oflags & VPO_UNMANAGED) == 0) entry |= PTE_SW_MANAGED; pmap_load_store(l3, entry); PTE_SYNC(l3); pmap_invalidate_page(pmap, va); return (mpte); } /* * This code maps large physical mmap regions into the * processor address space. Note that some shortcuts * are taken, but the code works. */ void pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object, vm_pindex_t pindex, vm_size_t size) { VM_OBJECT_ASSERT_WLOCKED(object); KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG, ("pmap_object_init_pt: non-device object")); } /* * Clear the wired attribute from the mappings for the specified range of * addresses in the given pmap. Every valid mapping within that range * must have the wired attribute set. In contrast, invalid mappings * cannot have the wired attribute set, so they are ignored. * * The wired attribute of the page table entry is not a hardware feature, * so there is no need to invalidate any TLB entries. */ void pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { vm_offset_t va_next; pd_entry_t *l1, *l2; pt_entry_t *l3; boolean_t pv_lists_locked; pv_lists_locked = FALSE; PMAP_LOCK(pmap); for (; sva < eva; sva = va_next) { l1 = pmap_l1(pmap, sva); if (pmap_load(l1) == 0) { va_next = (sva + L1_SIZE) & ~L1_OFFSET; if (va_next < sva) va_next = eva; continue; } va_next = (sva + L2_SIZE) & ~L2_OFFSET; if (va_next < sva) va_next = eva; l2 = pmap_l1_to_l2(l1, sva); if (pmap_load(l2) == 0) continue; if (va_next > eva) va_next = eva; for (l3 = pmap_l2_to_l3(l2, sva); sva != va_next; l3++, sva += L3_SIZE) { if (pmap_load(l3) == 0) continue; if ((pmap_load(l3) & PTE_SW_WIRED) == 0) panic("pmap_unwire: l3 %#jx is missing " "PTE_SW_WIRED", (uintmax_t)*l3); /* * PG_W must be cleared atomically. Although the pmap * lock synchronizes access to PG_W, another processor * could be setting PG_M and/or PG_A concurrently. */ atomic_clear_long(l3, PTE_SW_WIRED); pmap->pm_stats.wired_count--; } } if (pv_lists_locked) rw_runlock(&pvh_global_lock); PMAP_UNLOCK(pmap); } /* * Copy the range specified by src_addr/len * from the source map to the range dst_addr/len * in the destination map. * * This routine is only advisory and need not do anything. */ void pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, vm_offset_t src_addr) { } /* * pmap_zero_page zeros the specified hardware page by mapping * the page into KVM and using bzero to clear its contents. */ void pmap_zero_page(vm_page_t m) { vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); pagezero((void *)va); } /* * pmap_zero_page_area zeros the specified hardware page by mapping * the page into KVM and using bzero to clear its contents. * * off and size may not cover an area beyond a single hardware page. */ void pmap_zero_page_area(vm_page_t m, int off, int size) { vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); if (off == 0 && size == PAGE_SIZE) pagezero((void *)va); else bzero((char *)va + off, size); } /* * pmap_zero_page_idle zeros the specified hardware page by mapping * the page into KVM and using bzero to clear its contents. This * is intended to be called from the vm_pagezero process only and * outside of Giant. */ void pmap_zero_page_idle(vm_page_t m) { vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); pagezero((void *)va); } /* * pmap_copy_page copies the specified (machine independent) * page by mapping the page into virtual memory and using * bcopy to copy the page, one machine dependent page at a * time. */ void pmap_copy_page(vm_page_t msrc, vm_page_t mdst) { vm_offset_t src = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(msrc)); vm_offset_t dst = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mdst)); pagecopy((void *)src, (void *)dst); } int unmapped_buf_allowed = 1; void pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[], vm_offset_t b_offset, int xfersize) { void *a_cp, *b_cp; vm_page_t m_a, m_b; vm_paddr_t p_a, p_b; vm_offset_t a_pg_offset, b_pg_offset; int cnt; while (xfersize > 0) { a_pg_offset = a_offset & PAGE_MASK; m_a = ma[a_offset >> PAGE_SHIFT]; p_a = m_a->phys_addr; b_pg_offset = b_offset & PAGE_MASK; m_b = mb[b_offset >> PAGE_SHIFT]; p_b = m_b->phys_addr; cnt = min(xfersize, PAGE_SIZE - a_pg_offset); cnt = min(cnt, PAGE_SIZE - b_pg_offset); if (__predict_false(!PHYS_IN_DMAP(p_a))) { panic("!DMAP a %lx", p_a); } else { a_cp = (char *)PHYS_TO_DMAP(p_a) + a_pg_offset; } if (__predict_false(!PHYS_IN_DMAP(p_b))) { panic("!DMAP b %lx", p_b); } else { b_cp = (char *)PHYS_TO_DMAP(p_b) + b_pg_offset; } bcopy(a_cp, b_cp, cnt); a_offset += cnt; b_offset += cnt; xfersize -= cnt; } } vm_offset_t pmap_quick_enter_page(vm_page_t m) { return (PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m))); } void pmap_quick_remove_page(vm_offset_t addr) { } /* * Returns true if the pmap's pv is one of the first * 16 pvs linked to from this page. This count may * be changed upwards or downwards in the future; it * is only necessary that true be returned for a small * subset of pmaps for proper page aging. */ boolean_t pmap_page_exists_quick(pmap_t pmap, vm_page_t m) { struct rwlock *lock; pv_entry_t pv; int loops = 0; boolean_t rv; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_page_exists_quick: page %p is not managed", m)); rv = FALSE; rw_rlock(&pvh_global_lock); lock = VM_PAGE_TO_PV_LIST_LOCK(m); rw_rlock(lock); TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { if (PV_PMAP(pv) == pmap) { rv = TRUE; break; } loops++; if (loops >= 16) break; } rw_runlock(lock); rw_runlock(&pvh_global_lock); return (rv); } /* * pmap_page_wired_mappings: * * Return the number of managed mappings to the given physical page * that are wired. */ int pmap_page_wired_mappings(vm_page_t m) { struct rwlock *lock; pmap_t pmap; pt_entry_t *l3; pv_entry_t pv; int count, md_gen; if ((m->oflags & VPO_UNMANAGED) != 0) return (0); rw_rlock(&pvh_global_lock); lock = VM_PAGE_TO_PV_LIST_LOCK(m); rw_rlock(lock); restart: count = 0; TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { pmap = PV_PMAP(pv); if (!PMAP_TRYLOCK(pmap)) { md_gen = m->md.pv_gen; rw_runlock(lock); PMAP_LOCK(pmap); rw_rlock(lock); if (md_gen != m->md.pv_gen) { PMAP_UNLOCK(pmap); goto restart; } } l3 = pmap_l3(pmap, pv->pv_va); if (l3 != NULL && (pmap_load(l3) & PTE_SW_WIRED) != 0) count++; PMAP_UNLOCK(pmap); } rw_runlock(lock); rw_runlock(&pvh_global_lock); return (count); } /* * Destroy all managed, non-wired mappings in the given user-space * pmap. This pmap cannot be active on any processor besides the * caller. * * This function cannot be applied to the kernel pmap. Moreover, it * is not intended for general use. It is only to be used during * process termination. Consequently, it can be implemented in ways * that make it faster than pmap_remove(). First, it can more quickly * destroy mappings by iterating over the pmap's collection of PV * entries, rather than searching the page table. Second, it doesn't * have to test and clear the page table entries atomically, because * no processor is currently accessing the user address space. In * particular, a page table entry's dirty bit won't change state once * this function starts. */ void pmap_remove_pages(pmap_t pmap) { pd_entry_t ptepde, *l2; pt_entry_t *l3, tl3; struct spglist free; vm_page_t m; pv_entry_t pv; struct pv_chunk *pc, *npc; struct rwlock *lock; int64_t bit; uint64_t inuse, bitmask; int allfree, field, freed, idx; vm_paddr_t pa; lock = NULL; SLIST_INIT(&free); rw_rlock(&pvh_global_lock); PMAP_LOCK(pmap); TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) { allfree = 1; freed = 0; for (field = 0; field < _NPCM; field++) { inuse = ~pc->pc_map[field] & pc_freemask[field]; while (inuse != 0) { bit = ffsl(inuse) - 1; bitmask = 1UL << bit; idx = field * 64 + bit; pv = &pc->pc_pventry[idx]; inuse &= ~bitmask; l2 = pmap_l2(pmap, pv->pv_va); ptepde = pmap_load(l2); l3 = pmap_l2_to_l3(l2, pv->pv_va); tl3 = pmap_load(l3); /* * We cannot remove wired pages from a process' mapping at this time */ if (tl3 & PTE_SW_WIRED) { allfree = 0; continue; } pa = PTE_TO_PHYS(tl3); m = PHYS_TO_VM_PAGE(pa); KASSERT(m->phys_addr == pa, ("vm_page_t %p phys_addr mismatch %016jx %016jx", m, (uintmax_t)m->phys_addr, (uintmax_t)tl3)); KASSERT((m->flags & PG_FICTITIOUS) != 0 || m < &vm_page_array[vm_page_array_size], ("pmap_remove_pages: bad l3 %#jx", (uintmax_t)tl3)); if (pmap_is_current(pmap) && pmap_l3_valid_cacheable(pmap_load(l3))) cpu_dcache_wb_range(pv->pv_va, L3_SIZE); pmap_load_clear(l3); PTE_SYNC(l3); pmap_invalidate_page(pmap, pv->pv_va); /* * Update the vm_page_t clean/reference bits. */ if (pmap_page_dirty(tl3)) vm_page_dirty(m); CHANGE_PV_LIST_LOCK_TO_VM_PAGE(&lock, m); /* Mark free */ pc->pc_map[field] |= bitmask; pmap_resident_count_dec(pmap, 1); TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); m->md.pv_gen++; pmap_unuse_l3(pmap, pv->pv_va, ptepde, &free); freed++; } } PV_STAT(atomic_add_long(&pv_entry_frees, freed)); PV_STAT(atomic_add_int(&pv_entry_spare, freed)); PV_STAT(atomic_subtract_long(&pv_entry_count, freed)); if (allfree) { TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); free_pv_chunk(pc); } } pmap_invalidate_all(pmap); if (lock != NULL) rw_wunlock(lock); rw_runlock(&pvh_global_lock); PMAP_UNLOCK(pmap); pmap_free_zero_pages(&free); } /* * This is used to check if a page has been accessed or modified. As we * don't have a bit to see if it has been modified we have to assume it * has been if the page is read/write. */ static boolean_t pmap_page_test_mappings(vm_page_t m, boolean_t accessed, boolean_t modified) { struct rwlock *lock; pv_entry_t pv; pt_entry_t *l3, mask, value; pmap_t pmap; int md_gen; boolean_t rv; rv = FALSE; rw_rlock(&pvh_global_lock); lock = VM_PAGE_TO_PV_LIST_LOCK(m); rw_rlock(lock); restart: TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { pmap = PV_PMAP(pv); if (!PMAP_TRYLOCK(pmap)) { md_gen = m->md.pv_gen; rw_runlock(lock); PMAP_LOCK(pmap); rw_rlock(lock); if (md_gen != m->md.pv_gen) { PMAP_UNLOCK(pmap); goto restart; } } l3 = pmap_l3(pmap, pv->pv_va); mask = 0; value = 0; if (modified) { mask |= PTE_DIRTY; value |= PTE_DIRTY; } if (accessed) { mask |= PTE_REF; value |= PTE_REF; } #if 0 if (modified) { mask |= ATTR_AP_RW_BIT; value |= ATTR_AP(ATTR_AP_RW); } if (accessed) { mask |= ATTR_AF | ATTR_DESCR_MASK; value |= ATTR_AF | L3_PAGE; } #endif rv = (pmap_load(l3) & mask) == value; PMAP_UNLOCK(pmap); if (rv) goto out; } out: rw_runlock(lock); rw_runlock(&pvh_global_lock); return (rv); } /* * pmap_is_modified: * * Return whether or not the specified physical page was modified * in any physical maps. */ boolean_t pmap_is_modified(vm_page_t m) { KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_is_modified: page %p is not managed", m)); /* * If the page is not exclusive busied, then PGA_WRITEABLE cannot be * concurrently set while the object is locked. Thus, if PGA_WRITEABLE * is clear, no PTEs can have PG_M set. */ VM_OBJECT_ASSERT_WLOCKED(m->object); if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) return (FALSE); return (pmap_page_test_mappings(m, FALSE, TRUE)); } /* * pmap_is_prefaultable: * * Return whether or not the specified virtual address is eligible * for prefault. */ boolean_t pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr) { pt_entry_t *l3; boolean_t rv; rv = FALSE; PMAP_LOCK(pmap); l3 = pmap_l3(pmap, addr); if (l3 != NULL && pmap_load(l3) != 0) { rv = TRUE; } PMAP_UNLOCK(pmap); return (rv); } /* * pmap_is_referenced: * * Return whether or not the specified physical page was referenced * in any physical maps. */ boolean_t pmap_is_referenced(vm_page_t m) { KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_is_referenced: page %p is not managed", m)); return (pmap_page_test_mappings(m, TRUE, FALSE)); } /* * Clear the write and modified bits in each of the given page's mappings. */ void pmap_remove_write(vm_page_t m) { pmap_t pmap; struct rwlock *lock; pv_entry_t pv; pt_entry_t *l3, oldl3; pt_entry_t newl3; int md_gen; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_remove_write: page %p is not managed", m)); /* * If the page is not exclusive busied, then PGA_WRITEABLE cannot be * set by another thread while the object is locked. Thus, * if PGA_WRITEABLE is clear, no page table entries need updating. */ VM_OBJECT_ASSERT_WLOCKED(m->object); if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) return; rw_rlock(&pvh_global_lock); lock = VM_PAGE_TO_PV_LIST_LOCK(m); retry_pv_loop: rw_wlock(lock); TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { pmap = PV_PMAP(pv); if (!PMAP_TRYLOCK(pmap)) { md_gen = m->md.pv_gen; rw_wunlock(lock); PMAP_LOCK(pmap); rw_wlock(lock); if (md_gen != m->md.pv_gen) { PMAP_UNLOCK(pmap); rw_wunlock(lock); goto retry_pv_loop; } } l3 = pmap_l3(pmap, pv->pv_va); retry: oldl3 = pmap_load(l3); if (pmap_is_write(oldl3)) { newl3 = oldl3 & ~(1 << PTE_TYPE_S); if (!atomic_cmpset_long(l3, oldl3, newl3)) goto retry; /* TODO: use pmap_page_dirty(oldl3) ? */ if ((oldl3 & PTE_REF) != 0) vm_page_dirty(m); pmap_invalidate_page(pmap, pv->pv_va); } PMAP_UNLOCK(pmap); } rw_wunlock(lock); vm_page_aflag_clear(m, PGA_WRITEABLE); rw_runlock(&pvh_global_lock); } static __inline boolean_t safe_to_clear_referenced(pmap_t pmap, pt_entry_t pte) { return (FALSE); } #define PMAP_TS_REFERENCED_MAX 5 /* * pmap_ts_referenced: * * Return a count of reference bits for a page, clearing those bits. * It is not necessary for every reference bit to be cleared, but it * is necessary that 0 only be returned when there are truly no * reference bits set. * * XXX: The exact number of bits to check and clear is a matter that * should be tested and standardized at some point in the future for * optimal aging of shared pages. */ int pmap_ts_referenced(vm_page_t m) { pv_entry_t pv, pvf; pmap_t pmap; struct rwlock *lock; pd_entry_t *l2; pt_entry_t *l3; vm_paddr_t pa; int cleared, md_gen, not_cleared; struct spglist free; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_ts_referenced: page %p is not managed", m)); SLIST_INIT(&free); cleared = 0; pa = VM_PAGE_TO_PHYS(m); lock = PHYS_TO_PV_LIST_LOCK(pa); rw_rlock(&pvh_global_lock); rw_wlock(lock); retry: not_cleared = 0; if ((pvf = TAILQ_FIRST(&m->md.pv_list)) == NULL) goto out; pv = pvf; do { if (pvf == NULL) pvf = pv; pmap = PV_PMAP(pv); if (!PMAP_TRYLOCK(pmap)) { md_gen = m->md.pv_gen; rw_wunlock(lock); PMAP_LOCK(pmap); rw_wlock(lock); if (md_gen != m->md.pv_gen) { PMAP_UNLOCK(pmap); goto retry; } } l2 = pmap_l2(pmap, pv->pv_va); KASSERT((pmap_load(l2) & PTE_TYPE_M) == (PTE_TYPE_PTR << PTE_TYPE_S), ("pmap_ts_referenced: found an invalid l2 table")); l3 = pmap_l2_to_l3(l2, pv->pv_va); if ((pmap_load(l3) & PTE_REF) != 0) { if (safe_to_clear_referenced(pmap, pmap_load(l3))) { /* * TODO: We don't handle the access flag * at all. We need to be able to set it in * the exception handler. */ panic("RISCVTODO: safe_to_clear_referenced\n"); } else if ((pmap_load(l3) & PTE_SW_WIRED) == 0) { /* * Wired pages cannot be paged out so * doing accessed bit emulation for * them is wasted effort. We do the * hard work for unwired pages only. */ pmap_remove_l3(pmap, l3, pv->pv_va, pmap_load(l2), &free, &lock); pmap_invalidate_page(pmap, pv->pv_va); cleared++; if (pvf == pv) pvf = NULL; pv = NULL; KASSERT(lock == VM_PAGE_TO_PV_LIST_LOCK(m), ("inconsistent pv lock %p %p for page %p", lock, VM_PAGE_TO_PV_LIST_LOCK(m), m)); } else not_cleared++; } PMAP_UNLOCK(pmap); /* Rotate the PV list if it has more than one entry. */ if (pv != NULL && TAILQ_NEXT(pv, pv_next) != NULL) { TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); m->md.pv_gen++; } } while ((pv = TAILQ_FIRST(&m->md.pv_list)) != pvf && cleared + not_cleared < PMAP_TS_REFERENCED_MAX); out: rw_wunlock(lock); rw_runlock(&pvh_global_lock); pmap_free_zero_pages(&free); return (cleared + not_cleared); } /* * Apply the given advice to the specified range of addresses within the * given pmap. Depending on the advice, clear the referenced and/or * modified flags in each mapping and set the mapped page's dirty field. */ void pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice) { } /* * Clear the modify bits on the specified physical page. */ void pmap_clear_modify(vm_page_t m) { KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_clear_modify: page %p is not managed", m)); VM_OBJECT_ASSERT_WLOCKED(m->object); KASSERT(!vm_page_xbusied(m), ("pmap_clear_modify: page %p is exclusive busied", m)); /* * If the page is not PGA_WRITEABLE, then no PTEs can have PG_M set. * If the object containing the page is locked and the page is not * exclusive busied, then PGA_WRITEABLE cannot be concurrently set. */ if ((m->aflags & PGA_WRITEABLE) == 0) return; /* RISCVTODO: We lack support for tracking if a page is modified */ } void * pmap_mapbios(vm_paddr_t pa, vm_size_t size) { return ((void *)PHYS_TO_DMAP(pa)); } void pmap_unmapbios(vm_paddr_t pa, vm_size_t size) { } /* * Sets the memory attribute for the specified page. */ void pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma) { m->md.pv_memattr = ma; /* * RISCVTODO: Implement the below (from the amd64 pmap) * If "m" is a normal page, update its direct mapping. This update * can be relied upon to perform any cache operations that are * required for data coherence. */ if ((m->flags & PG_FICTITIOUS) == 0 && PHYS_IN_DMAP(VM_PAGE_TO_PHYS(m))) panic("RISCVTODO: pmap_page_set_memattr"); } /* * perform the pmap work for mincore */ int pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa) { panic("RISCVTODO: pmap_mincore"); } void pmap_activate(struct thread *td) { uint64_t entry; uint64_t pn; pmap_t pmap; critical_enter(); pmap = vmspace_pmap(td->td_proc->p_vmspace); td->td_pcb->pcb_l1addr = vtophys(pmap->pm_l1); pn = (td->td_pcb->pcb_l1addr / PAGE_SIZE); entry = (PTE_VALID | (PTE_TYPE_PTR << PTE_TYPE_S)); entry |= (pn << PTE_PPN0_S); - pmap_load_store(&pagetable_l0, entry); + pmap_load_store((uint64_t *)PCPU_GET(sptbr), entry); pmap_invalidate_all(pmap); critical_exit(); } void pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz) { panic("RISCVTODO: pmap_sync_icache"); } /* * Increase the starting virtual address of the given mapping if a * different alignment might result in more superpage mappings. */ void pmap_align_superpage(vm_object_t object, vm_ooffset_t offset, vm_offset_t *addr, vm_size_t size) { } /** * Get the kernel virtual address of a set of physical pages. If there are * physical addresses not covered by the DMAP perform a transient mapping * that will be removed when calling pmap_unmap_io_transient. * * \param page The pages the caller wishes to obtain the virtual * address on the kernel memory map. * \param vaddr On return contains the kernel virtual memory address * of the pages passed in the page parameter. * \param count Number of pages passed in. * \param can_fault TRUE if the thread using the mapped pages can take * page faults, FALSE otherwise. * * \returns TRUE if the caller must call pmap_unmap_io_transient when * finished or FALSE otherwise. * */ boolean_t pmap_map_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count, boolean_t can_fault) { vm_paddr_t paddr; boolean_t needs_mapping; int error, i; /* * Allocate any KVA space that we need, this is done in a separate * loop to prevent calling vmem_alloc while pinned. */ needs_mapping = FALSE; for (i = 0; i < count; i++) { paddr = VM_PAGE_TO_PHYS(page[i]); if (__predict_false(paddr >= DMAP_MAX_PHYSADDR)) { error = vmem_alloc(kernel_arena, PAGE_SIZE, M_BESTFIT | M_WAITOK, &vaddr[i]); KASSERT(error == 0, ("vmem_alloc failed: %d", error)); needs_mapping = TRUE; } else { vaddr[i] = PHYS_TO_DMAP(paddr); } } /* Exit early if everything is covered by the DMAP */ if (!needs_mapping) return (FALSE); if (!can_fault) sched_pin(); for (i = 0; i < count; i++) { paddr = VM_PAGE_TO_PHYS(page[i]); if (paddr >= DMAP_MAX_PHYSADDR) { panic( "pmap_map_io_transient: TODO: Map out of DMAP data"); } } return (needs_mapping); } void pmap_unmap_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count, boolean_t can_fault) { vm_paddr_t paddr; int i; if (!can_fault) sched_unpin(); for (i = 0; i < count; i++) { paddr = VM_PAGE_TO_PHYS(page[i]); if (paddr >= DMAP_MAX_PHYSADDR) { panic("RISCVTODO: pmap_unmap_io_transient: Unmap data"); } } } Index: head/sys/riscv/riscv/swtch.S =================================================================== --- head/sys/riscv/riscv/swtch.S (revision 295971) +++ head/sys/riscv/riscv/swtch.S (revision 295972) @@ -1,282 +1,286 @@ /*- - * Copyright (c) 2015 Ruslan Bukin + * Copyright (c) 2015-2016 Ruslan Bukin * All rights reserved. * * Portions of this software were developed by SRI International and the * University of Cambridge Computer Laboratory under DARPA/AFRL contract * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme. * * Portions of this software were developed by the University of Cambridge * Computer Laboratory as part of the CTSRD Project, with support from the * UK Higher Education Innovation Fund (HEIF). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include "assym.s" #include "opt_sched.h" #include #include #include #include __FBSDID("$FreeBSD$"); /* * void cpu_throw(struct thread *old, struct thread *new) */ ENTRY(cpu_throw) /* Store the new curthread */ sd a1, PC_CURTHREAD(gp) /* And the new pcb */ ld x13, TD_PCB(a1) sd x13, PC_CURPCB(gp) sfence.vm /* Switch to the new pmap */ - la t0, pagetable_l0 ld t1, PCB_L1ADDR(x13) /* Link to next level PN */ srli t1, t1, PAGE_SHIFT /* PN no */ li t2, (PTE_VALID | (PTE_TYPE_PTR << PTE_TYPE_S)) slli t3, t1, PTE_PPN0_S /* (t1 << PTE_PPN0_S) */ or t4, t2, t3 /* Store single level0 PTE entry to position */ + ld t0, PC_SPTBR(gp) sd t4, 0(t0) /* TODO: Invalidate the TLB */ sfence.vm /* Load registers */ ld ra, (PCB_RA)(x13) ld sp, (PCB_SP)(x13) /* s[0-11] */ ld s0, (PCB_S + 0 * 8)(x13) ld s1, (PCB_S + 1 * 8)(x13) ld s2, (PCB_S + 2 * 8)(x13) ld s3, (PCB_S + 3 * 8)(x13) ld s4, (PCB_S + 4 * 8)(x13) ld s5, (PCB_S + 5 * 8)(x13) ld s6, (PCB_S + 6 * 8)(x13) ld s7, (PCB_S + 7 * 8)(x13) ld s8, (PCB_S + 8 * 8)(x13) ld s9, (PCB_S + 9 * 8)(x13) ld s10, (PCB_S + 10 * 8)(x13) ld s11, (PCB_S + 11 * 8)(x13) ret .Lcpu_throw_panic_str: .asciz "cpu_throw: %p\0" END(cpu_throw) /* * void cpu_switch(struct thread *old, struct thread *new, struct mtx *mtx) * * a0 = old * a1 = new * a2 = mtx * x3 to x7, x16 and x17 are caller saved */ ENTRY(cpu_switch) /* Store the new curthread */ sd a1, PC_CURTHREAD(gp) /* And the new pcb */ ld x13, TD_PCB(a1) sd x13, PC_CURPCB(gp) /* Save the old context. */ ld x13, TD_PCB(a0) /* Store ra, sp and the callee-saved registers */ sd ra, (PCB_RA)(x13) sd sp, (PCB_SP)(x13) /* s[0-11] */ sd s0, (PCB_S + 0 * 8)(x13) sd s1, (PCB_S + 1 * 8)(x13) sd s2, (PCB_S + 2 * 8)(x13) sd s3, (PCB_S + 3 * 8)(x13) sd s4, (PCB_S + 4 * 8)(x13) sd s5, (PCB_S + 5 * 8)(x13) sd s6, (PCB_S + 6 * 8)(x13) sd s7, (PCB_S + 7 * 8)(x13) sd s8, (PCB_S + 8 * 8)(x13) sd s9, (PCB_S + 9 * 8)(x13) sd s10, (PCB_S + 10 * 8)(x13) sd s11, (PCB_S + 11 * 8)(x13) /* * Restore the saved context. */ ld x13, TD_PCB(a1) /* * TODO: We may need to flush the cache here if switching * to a user process. */ sfence.vm /* Switch to the new pmap */ - la t0, pagetable_l0 ld t1, PCB_L1ADDR(x13) /* Link to next level PN */ srli t1, t1, PAGE_SHIFT /* PN no */ li t2, (PTE_VALID | (PTE_TYPE_PTR << PTE_TYPE_S)) slli t3, t1, PTE_PPN0_S /* (t1 << PTE_PPN0_S) */ or t4, t2, t3 /* Store single level0 PTE entry to position */ + ld t0, PC_SPTBR(gp) sd t4, 0(t0) /* TODO: Invalidate the TLB */ sfence.vm /* Release the old thread */ sd a2, TD_LOCK(a0) #if defined(SCHED_ULE) && defined(SMP) - /* TODO */ + /* Spin if TD_LOCK points to a blocked_lock */ + la a2, _C_LABEL(blocked_lock) +1: + ld t0, TD_LOCK(a1) + beq t0, a2, 1b #endif /* Restore the registers */ ld ra, (PCB_RA)(x13) ld sp, (PCB_SP)(x13) /* s[0-11] */ ld s0, (PCB_S + 0 * 8)(x13) ld s1, (PCB_S + 1 * 8)(x13) ld s2, (PCB_S + 2 * 8)(x13) ld s3, (PCB_S + 3 * 8)(x13) ld s4, (PCB_S + 4 * 8)(x13) ld s5, (PCB_S + 5 * 8)(x13) ld s6, (PCB_S + 6 * 8)(x13) ld s7, (PCB_S + 7 * 8)(x13) ld s8, (PCB_S + 8 * 8)(x13) ld s9, (PCB_S + 9 * 8)(x13) ld s10, (PCB_S + 10 * 8)(x13) ld s11, (PCB_S + 11 * 8)(x13) ret .Lcpu_switch_panic_str: .asciz "cpu_switch: %p\0" END(cpu_switch) /* * fork_exit(void (*callout)(void *, struct trapframe *), void *arg, * struct trapframe *frame) */ ENTRY(fork_trampoline) mv a0, s0 mv a1, s1 mv a2, sp call _C_LABEL(fork_exit) /* Restore sstatus */ ld t0, (TF_SSTATUS)(sp) /* Ensure interrupts disabled */ li t1, ~SSTATUS_IE and t0, t0, t1 csrw sstatus, t0 /* Restore exception program counter */ ld t0, (TF_SEPC)(sp) csrw sepc, t0 /* Restore the registers */ ld t0, (TF_T + 0 * 8)(sp) ld t1, (TF_T + 1 * 8)(sp) ld t2, (TF_T + 2 * 8)(sp) ld t3, (TF_T + 3 * 8)(sp) ld t4, (TF_T + 4 * 8)(sp) ld t5, (TF_T + 5 * 8)(sp) ld t6, (TF_T + 6 * 8)(sp) ld s0, (TF_S + 0 * 8)(sp) ld s1, (TF_S + 1 * 8)(sp) ld s2, (TF_S + 2 * 8)(sp) ld s3, (TF_S + 3 * 8)(sp) ld s4, (TF_S + 4 * 8)(sp) ld s5, (TF_S + 5 * 8)(sp) ld s6, (TF_S + 6 * 8)(sp) ld s7, (TF_S + 7 * 8)(sp) ld s8, (TF_S + 8 * 8)(sp) ld s9, (TF_S + 9 * 8)(sp) ld s10, (TF_S + 10 * 8)(sp) ld s11, (TF_S + 11 * 8)(sp) ld a0, (TF_A + 0 * 8)(sp) ld a1, (TF_A + 1 * 8)(sp) ld a2, (TF_A + 2 * 8)(sp) ld a3, (TF_A + 3 * 8)(sp) ld a4, (TF_A + 4 * 8)(sp) ld a5, (TF_A + 5 * 8)(sp) ld a6, (TF_A + 6 * 8)(sp) ld a7, (TF_A + 7 * 8)(sp) /* Load user ra and sp */ ld tp, (TF_TP)(sp) ld ra, (TF_RA)(sp) /* * Store our pcpup on stack, we will load it back * on kernel mode trap. */ sd gp, (TF_SIZE)(sp) ld gp, (TF_GP)(sp) /* Save kernel stack so we can use it doing a user trap */ addi sp, sp, TF_SIZE csrw sscratch, sp /* Load user stack */ ld sp, (TF_SP - TF_SIZE)(sp) eret END(fork_trampoline) ENTRY(savectx) /* Store ra, sp and the callee-saved registers */ sd ra, (PCB_RA)(a0) sd sp, (PCB_SP)(a0) /* s[0-11] */ sd s0, (PCB_S + 0 * 8)(a0) sd s1, (PCB_S + 1 * 8)(a0) sd s2, (PCB_S + 2 * 8)(a0) sd s3, (PCB_S + 3 * 8)(a0) sd s4, (PCB_S + 4 * 8)(a0) sd s5, (PCB_S + 5 * 8)(a0) sd s6, (PCB_S + 6 * 8)(a0) sd s7, (PCB_S + 7 * 8)(a0) sd s8, (PCB_S + 8 * 8)(a0) sd s9, (PCB_S + 9 * 8)(a0) sd s10, (PCB_S + 10 * 8)(a0) sd s11, (PCB_S + 11 * 8)(a0) /* Store the VFP registers */ #ifdef VFP /* TODO */ #endif ret END(savectx)